Upload folder using huggingface_hub
#288
by
DavidNguyen
- opened
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-10000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-100000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-20000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-30000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-40000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-50000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-60000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-70000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-80000.pth.json +121 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-90000.pth.json +121 -0
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-10000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 3.026036095997644,
|
| 3 |
+
"val/accuracy": 0.42390659877232145,
|
| 4 |
+
"val/perplexity": 20.615353134651023,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.8078854009971854,
|
| 8 |
+
"lambada/accuracy/total": 0.1622670807453416,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7164208074534162,
|
| 10 |
+
"lambada/perplexity": 24.909094620287416,
|
| 11 |
+
"lambada/lm_loss": 3.564238264553546,
|
| 12 |
+
"lambada/lm_perplexity": 35.312544340058516,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.29308683975883154,
|
| 16 |
+
"mean_loss": 2.9169607484974147,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.869,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.95,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.698,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.778,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.571,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.885,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.326,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.429,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.838,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.966,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.854,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.523,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.863,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.858,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.748,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.435,
|
| 33 |
+
"blimp/accuracy/transitive": 0.829,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.385,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.734,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.709,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.813,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.892,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.182,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.188,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.653,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.677,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.88,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.872,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.518,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.926,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.476,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.656,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.769,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.195,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.918,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.51,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.906,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.409,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.759,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.862,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.837,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.872,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.754,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.973,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.744,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.776,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.971,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.513,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.906,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.813,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.737,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.714,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.795,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.974,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.861,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.598,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.828,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.972,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.909,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.728,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.065,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.529,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.966,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.44,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.981,
|
| 83 |
+
"blimp/accuracy/causative": 0.648,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7195970149253731,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7195970149253731,
|
| 86 |
+
"cbt/accuracy/NE": 0.6915064102564102,
|
| 87 |
+
"cbt/accuracy/V": 0.86,
|
| 88 |
+
"cbt/accuracy/CN": 0.7328,
|
| 89 |
+
"cbt/accuracy/P": 0.8376,
|
| 90 |
+
"cbt/accuracy/group_average": 0.7804766025641026,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.7805122048819528,
|
| 92 |
+
"hellaswag/accuracy/val": 0.26926906990639315,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.26926906990639315,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.26926906990639315,
|
| 95 |
+
"piqa/accuracy/val": 0.5495103373231773,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5495103373231773,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5495103373231773,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.31331923890063423,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.21030042918454936,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.2618098340425918,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.2793201133144476,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.2580622095101895,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.2580622095101895,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.2580622095101895,
|
| 105 |
+
"openbookqa/accuracy/test": 0.256,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.256,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.256,
|
| 108 |
+
"race/accuracy/test/high": 0.2578616352201258,
|
| 109 |
+
"race/accuracy/test/middle": 0.318941504178273,
|
| 110 |
+
"race/accuracy/group_average": 0.2884015696991994,
|
| 111 |
+
"race/accuracy/seq_average": 0.2756384272395622,
|
| 112 |
+
"siqa/accuracy/dev": 0.3705220061412487,
|
| 113 |
+
"siqa/accuracy/group_average": 0.3705220061412487,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.3705220061412487,
|
| 115 |
+
"winogrande/accuracy/dev": 0.510655090765588,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.510655090765588,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.510655090765588,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.23587223587223588,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.23587223587223588,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.23587223587223588
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-100000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.6119626968625993,
|
| 3 |
+
"val/accuracy": 0.479095943390377,
|
| 4 |
+
"val/perplexity": 13.625767876405735,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.546885993910132,
|
| 8 |
+
"lambada/accuracy/total": 0.25271739130434784,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7618400621118012,
|
| 10 |
+
"lambada/perplexity": 12.005568880054728,
|
| 11 |
+
"lambada/lm_loss": 3.1711634036775673,
|
| 12 |
+
"lambada/lm_perplexity": 23.835198189344226,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.36590666734736244,
|
| 16 |
+
"mean_loss": 2.5794243453863657,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.905,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.976,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.799,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.888,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.596,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.905,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.309,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.509,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.785,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.988,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.91,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.526,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.902,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.879,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.634,
|
| 33 |
+
"blimp/accuracy/transitive": 0.876,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.29,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.782,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.787,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.854,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.941,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.479,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.227,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.721,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.793,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.899,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.624,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.94,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.619,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.808,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.78,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.558,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.935,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.495,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.925,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.604,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.818,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.858,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.909,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.961,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.804,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.978,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.809,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.767,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.978,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.576,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.965,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.865,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.78,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.808,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.794,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.981,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.844,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.645,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.919,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.983,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.951,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.826,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.265,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.588,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.971,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.453,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974,
|
| 83 |
+
"blimp/accuracy/causative": 0.699,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7771343283582091,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.777134328358209,
|
| 86 |
+
"cbt/accuracy/NE": 0.7576121794871795,
|
| 87 |
+
"cbt/accuracy/V": 0.9008,
|
| 88 |
+
"cbt/accuracy/CN": 0.8176,
|
| 89 |
+
"cbt/accuracy/P": 0.8912,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8418030448717949,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8418367346938775,
|
| 92 |
+
"hellaswag/accuracy/val": 0.29426409081856203,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.29426409081856203,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.29426409081856203,
|
| 95 |
+
"piqa/accuracy/val": 0.5794341675734495,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5794341675734495,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5794341675734495,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.32684989429175476,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.21201716738197424,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.2694335308368645,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.28895184135977336,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.2607079013228459,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.2607079013228459,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.2607079013228459,
|
| 105 |
+
"openbookqa/accuracy/test": 0.274,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.274,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.274,
|
| 108 |
+
"race/accuracy/test/high": 0.25757575757575757,
|
| 109 |
+
"race/accuracy/test/middle": 0.3447075208913649,
|
| 110 |
+
"race/accuracy/group_average": 0.30114163923356124,
|
| 111 |
+
"race/accuracy/seq_average": 0.28293473854884477,
|
| 112 |
+
"siqa/accuracy/dev": 0.3561924257932446,
|
| 113 |
+
"siqa/accuracy/group_average": 0.3561924257932446,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.3561924257932446,
|
| 115 |
+
"winogrande/accuracy/dev": 0.510655090765588,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.510655090765588,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.510655090765588,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.24651924651924653,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.24651924651924653,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.24651924651924653
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-20000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.8806944347563244,
|
| 3 |
+
"val/accuracy": 0.44141012524801587,
|
| 4 |
+
"val/perplexity": 17.826648326435496,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.6214900994152757,
|
| 8 |
+
"lambada/accuracy/total": 0.1935170807453416,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7358307453416149,
|
| 10 |
+
"lambada/perplexity": 18.64794897523847,
|
| 11 |
+
"lambada/lm_loss": 3.406234809500662,
|
| 12 |
+
"lambada/lm_perplexity": 30.151504110290105,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.31746360299667875,
|
| 16 |
+
"mean_loss": 2.7510922670858,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.87,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.949,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.743,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.846,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.57,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.891,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.373,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.434,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.837,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.976,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.897,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.548,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.853,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.865,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.826,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.477,
|
| 33 |
+
"blimp/accuracy/transitive": 0.826,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.356,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.725,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.73,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.81,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.846,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.247,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.151,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.694,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.841,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.893,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.912,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.55,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.896,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.481,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.759,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.776,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.289,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.906,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.397,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.899,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.447,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.78,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.903,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.876,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.92,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.789,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.962,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.632,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.699,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.986,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.539,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.92,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.873,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.735,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.775,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.754,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.987,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.838,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.597,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.886,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.968,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.941,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.759,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.112,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.492,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.957,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.342,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.981,
|
| 83 |
+
"blimp/accuracy/causative": 0.665,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7321492537313434,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7321492537313433,
|
| 86 |
+
"cbt/accuracy/NE": 0.6991185897435898,
|
| 87 |
+
"cbt/accuracy/V": 0.8768,
|
| 88 |
+
"cbt/accuracy/CN": 0.7712,
|
| 89 |
+
"cbt/accuracy/P": 0.8552,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8005796474358974,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8006202480992397,
|
| 92 |
+
"hellaswag/accuracy/val": 0.27942640908185623,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.27942640908185623,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.27942640908185623,
|
| 95 |
+
"piqa/accuracy/val": 0.55930359085963,
|
| 96 |
+
"piqa/accuracy/group_average": 0.55930359085963,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.55930359085963,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.3150105708245243,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.2034334763948498,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.25922202360968705,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.2781869688385269,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.26363961387200574,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.26363961387200574,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.26363961387200574,
|
| 105 |
+
"openbookqa/accuracy/test": 0.276,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.276,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.276,
|
| 108 |
+
"race/accuracy/test/high": 0.25414522584333904,
|
| 109 |
+
"race/accuracy/test/middle": 0.32729805013927576,
|
| 110 |
+
"race/accuracy/group_average": 0.2907216379913074,
|
| 111 |
+
"race/accuracy/seq_average": 0.2754357519254155,
|
| 112 |
+
"siqa/accuracy/dev": 0.3623336745138178,
|
| 113 |
+
"siqa/accuracy/group_average": 0.3623336745138178,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.3623336745138178,
|
| 115 |
+
"winogrande/accuracy/dev": 0.5074980268350434,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.5074980268350434,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.5074980268350434,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.2285012285012285,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.2285012285012285,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.2285012285012285
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-30000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.8053150479755704,
|
| 3 |
+
"val/accuracy": 0.4509800502232143,
|
| 4 |
+
"val/perplexity": 16.532283548154435,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.7774893245341614,
|
| 8 |
+
"lambada/accuracy/total": 0.20031055900621117,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7358307453416149,
|
| 10 |
+
"lambada/perplexity": 17.452773071925026,
|
| 11 |
+
"lambada/lm_loss": 3.367414702727166,
|
| 12 |
+
"lambada/lm_perplexity": 29.003447514383748,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.32564530461471275,
|
| 16 |
+
"mean_loss": 2.791402186254866,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.872,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.96,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.771,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.828,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.604,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.883,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.361,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.486,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.847,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.97,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.881,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.562,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.89,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.884,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.849,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.526,
|
| 33 |
+
"blimp/accuracy/transitive": 0.841,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.395,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.782,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.799,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.811,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.863,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.263,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.215,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.751,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.833,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.899,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.889,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.646,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.935,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.558,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.641,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.799,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.332,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.898,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.316,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.91,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.599,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.773,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.912,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.889,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.908,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.81,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.98,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.712,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.781,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.984,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.458,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.94,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.845,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.781,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.766,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.814,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.963,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.865,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.601,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.91,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.972,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.937,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.815,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.246,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.541,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.944,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.329,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97,
|
| 83 |
+
"blimp/accuracy/causative": 0.682,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7504029850746267,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7504029850746269,
|
| 86 |
+
"cbt/accuracy/NE": 0.7347756410256411,
|
| 87 |
+
"cbt/accuracy/V": 0.892,
|
| 88 |
+
"cbt/accuracy/CN": 0.7872,
|
| 89 |
+
"cbt/accuracy/P": 0.8632,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8192939102564103,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.819327731092437,
|
| 92 |
+
"hellaswag/accuracy/val": 0.28141804421429994,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.28141804421429994,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.28141804421429994,
|
| 95 |
+
"piqa/accuracy/val": 0.5783460282916213,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5783460282916213,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5783460282916213,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.31839323467230446,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.21630901287553647,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.26735112377392045,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.2847025495750708,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.26349660350375403,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.26349660350375403,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.26349660350375403,
|
| 105 |
+
"openbookqa/accuracy/test": 0.268,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.268,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.268,
|
| 108 |
+
"race/accuracy/test/high": 0.2641509433962264,
|
| 109 |
+
"race/accuracy/test/middle": 0.3224233983286908,
|
| 110 |
+
"race/accuracy/group_average": 0.2932871708624586,
|
| 111 |
+
"race/accuracy/seq_average": 0.28111066072152413,
|
| 112 |
+
"siqa/accuracy/dev": 0.3654042988741044,
|
| 113 |
+
"siqa/accuracy/group_average": 0.3654042988741044,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.3654042988741044,
|
| 115 |
+
"winogrande/accuracy/dev": 0.500394632991318,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.500394632991318,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.500394632991318,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.2325962325962326,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.2325962325962326,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.2325962325962326
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-40000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.756097460549975,
|
| 3 |
+
"val/accuracy": 0.45806109716021826,
|
| 4 |
+
"val/perplexity": 15.738303595828631,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.634979248046875,
|
| 8 |
+
"lambada/accuracy/total": 0.21506211180124224,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7439829192546584,
|
| 10 |
+
"lambada/perplexity": 15.870491689993184,
|
| 11 |
+
"lambada/lm_loss": 3.2971033813003907,
|
| 12 |
+
"lambada/lm_perplexity": 27.03421757696205,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.33656160448073025,
|
| 16 |
+
"mean_loss": 2.6955383542984253,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.873,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.98,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.803,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.842,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.584,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.885,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.28,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.47,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.827,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.987,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.9,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.582,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.91,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.879,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.884,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.613,
|
| 33 |
+
"blimp/accuracy/transitive": 0.852,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.32,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.8,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.801,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.854,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.906,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.288,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.201,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.747,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.724,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.892,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.877,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.639,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.939,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.553,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.806,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.758,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.331,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.922,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.32,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.935,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.541,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.786,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.9,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.894,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.927,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.861,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.991,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.767,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.716,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.993,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.515,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.956,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.878,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.775,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.765,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.793,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.982,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.875,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.536,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.899,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.979,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.944,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.839,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.181,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.499,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.963,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.442,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974,
|
| 83 |
+
"blimp/accuracy/causative": 0.712,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7559253731343284,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7559253731343284,
|
| 86 |
+
"cbt/accuracy/NE": 0.7391826923076923,
|
| 87 |
+
"cbt/accuracy/V": 0.8924,
|
| 88 |
+
"cbt/accuracy/CN": 0.79,
|
| 89 |
+
"cbt/accuracy/P": 0.8656,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8217956730769231,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.821828731492597,
|
| 92 |
+
"hellaswag/accuracy/val": 0.2842063333997212,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.2842063333997212,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.2842063333997212,
|
| 95 |
+
"piqa/accuracy/val": 0.5663764961915125,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5663764961915125,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5663764961915125,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.320507399577167,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.21030042918454936,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.2654039143808582,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.2841359773371105,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.2598498391133357,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.2598498391133357,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.2598498391133357,
|
| 105 |
+
"openbookqa/accuracy/test": 0.272,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.272,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.272,
|
| 108 |
+
"race/accuracy/test/high": 0.2684391080617496,
|
| 109 |
+
"race/accuracy/test/middle": 0.33565459610027853,
|
| 110 |
+
"race/accuracy/group_average": 0.30204685208101406,
|
| 111 |
+
"race/accuracy/seq_average": 0.2880016214025132,
|
| 112 |
+
"siqa/accuracy/dev": 0.36284544524053225,
|
| 113 |
+
"siqa/accuracy/group_average": 0.36284544524053225,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.36284544524053225,
|
| 115 |
+
"winogrande/accuracy/dev": 0.5090765588003157,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.5090765588003157,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.5090765588003157,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.25307125307125306,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.25307125307125306,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.25307125307125306
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-50000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.7115478515625,
|
| 3 |
+
"val/accuracy": 0.4647148980034722,
|
| 4 |
+
"val/perplexity": 15.0525566156603,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.704208468798525,
|
| 8 |
+
"lambada/accuracy/total": 0.23718944099378883,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7591226708074534,
|
| 10 |
+
"lambada/perplexity": 13.7247949506867,
|
| 11 |
+
"lambada/lm_loss": 3.279500997334853,
|
| 12 |
+
"lambada/lm_perplexity": 26.562514626650426,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.3509521694986305,
|
| 16 |
+
"mean_loss": 2.7078781601805124,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.877,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.979,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.805,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.864,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.554,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.913,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.329,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.467,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.842,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.985,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.903,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.547,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.918,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.882,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.889,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.561,
|
| 33 |
+
"blimp/accuracy/transitive": 0.862,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.361,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.771,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.799,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.841,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.745,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.339,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.177,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.635,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.698,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.899,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.902,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.616,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.927,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.568,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.726,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.775,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.362,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.918,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.415,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.936,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.582,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.8,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.89,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.953,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.843,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.987,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.706,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.807,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.987,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.499,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.959,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.878,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.756,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.816,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.801,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.982,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.913,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.639,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.913,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.987,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.943,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.829,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.162,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.61,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.966,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.384,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.981,
|
| 83 |
+
"blimp/accuracy/causative": 0.711,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7578208955223882,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7578208955223881,
|
| 86 |
+
"cbt/accuracy/NE": 0.7423878205128205,
|
| 87 |
+
"cbt/accuracy/V": 0.898,
|
| 88 |
+
"cbt/accuracy/CN": 0.788,
|
| 89 |
+
"cbt/accuracy/P": 0.876,
|
| 90 |
+
"cbt/accuracy/group_average": 0.826096955128205,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8261304521808723,
|
| 92 |
+
"hellaswag/accuracy/val": 0.2861979685321649,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.2861979685321649,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.2861979685321649,
|
| 95 |
+
"piqa/accuracy/val": 0.5680087051142546,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5680087051142546,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5680087051142546,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.32473572938689216,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.2094420600858369,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.26708889473636455,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.28668555240793203,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.2604933857704684,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.2604933857704684,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.2604933857704684,
|
| 105 |
+
"openbookqa/accuracy/test": 0.284,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.284,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.284,
|
| 108 |
+
"race/accuracy/test/high": 0.25900514579759865,
|
| 109 |
+
"race/accuracy/test/middle": 0.33913649025069637,
|
| 110 |
+
"race/accuracy/group_average": 0.2990708180241475,
|
| 111 |
+
"race/accuracy/seq_average": 0.2823267126064045,
|
| 112 |
+
"siqa/accuracy/dev": 0.36591606960081885,
|
| 113 |
+
"siqa/accuracy/group_average": 0.36591606960081885,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.36591606960081885,
|
| 115 |
+
"winogrande/accuracy/dev": 0.5122336227308603,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.5122336227308603,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.5122336227308603,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.24488124488124488,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.24488124488124488,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.24488124488124488
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-60000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.6786939832899304,
|
| 3 |
+
"val/accuracy": 0.4689989846850198,
|
| 4 |
+
"val/perplexity": 14.566057353653534,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.5141347565265915,
|
| 8 |
+
"lambada/accuracy/total": 0.23388975155279504,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7542701863354038,
|
| 10 |
+
"lambada/perplexity": 13.526006134511944,
|
| 11 |
+
"lambada/lm_loss": 3.219389834308319,
|
| 12 |
+
"lambada/lm_perplexity": 25.012853539140185,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.35144436811890745,
|
| 16 |
+
"mean_loss": 2.5964143699082607,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.896,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.983,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.814,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.884,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.549,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.896,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.303,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.478,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.795,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.988,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.899,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.527,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.939,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.9,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.866,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.666,
|
| 33 |
+
"blimp/accuracy/transitive": 0.856,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.337,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.791,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.771,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.87,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.98,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.38,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 0.999,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.201,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.746,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.734,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.877,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.92,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.633,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.94,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.643,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.696,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.769,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.465,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.923,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.377,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.924,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.615,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.809,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.86,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.886,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.96,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.819,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.994,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.811,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.804,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.983,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.605,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.957,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.871,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.771,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.76,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.786,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.992,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.885,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.602,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.906,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.985,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.945,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.787,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.252,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.565,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.966,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.447,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.978,
|
| 83 |
+
"blimp/accuracy/causative": 0.725,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7692686567164178,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7692686567164179,
|
| 86 |
+
"cbt/accuracy/NE": 0.7347756410256411,
|
| 87 |
+
"cbt/accuracy/V": 0.8944,
|
| 88 |
+
"cbt/accuracy/CN": 0.8092,
|
| 89 |
+
"cbt/accuracy/P": 0.8812,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8298939102564102,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8299319727891157,
|
| 92 |
+
"hellaswag/accuracy/val": 0.2901812387970524,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.2901812387970524,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.2901812387970524,
|
| 95 |
+
"piqa/accuracy/val": 0.5767138193688792,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5767138193688792,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5767138193688792,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.3192389006342495,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.2,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.25961945031712474,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.2798866855524079,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.261136932427601,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.261136932427601,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.261136932427601,
|
| 105 |
+
"openbookqa/accuracy/test": 0.27,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.27,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.27,
|
| 108 |
+
"race/accuracy/test/high": 0.26043453401943967,
|
| 109 |
+
"race/accuracy/test/middle": 0.33774373259052926,
|
| 110 |
+
"race/accuracy/group_average": 0.29908913330498443,
|
| 111 |
+
"race/accuracy/seq_average": 0.28293473854884477,
|
| 112 |
+
"siqa/accuracy/dev": 0.3556806550665302,
|
| 113 |
+
"siqa/accuracy/group_average": 0.3556806550665302,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.3556806550665302,
|
| 115 |
+
"winogrande/accuracy/dev": 0.5059194948697711,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.5059194948697711,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.5059194948697711,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.24733824733824733,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.24733824733824733,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.24733824733824733
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-70000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.65325685531374,
|
| 3 |
+
"val/accuracy": 0.47258068266369047,
|
| 4 |
+
"val/perplexity": 14.20021144960697,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.5280458438470497,
|
| 8 |
+
"lambada/accuracy/total": 0.24592391304347827,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7552406832298136,
|
| 10 |
+
"lambada/perplexity": 12.934206584598552,
|
| 11 |
+
"lambada/lm_loss": 3.2085472225481486,
|
| 12 |
+
"lambada/lm_perplexity": 24.743113863087796,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.35925229785358437,
|
| 16 |
+
"mean_loss": 2.590651349580395,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.881,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.98,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.81,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.863,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.553,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.909,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.323,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.473,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.76,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.985,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.904,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.522,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.919,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.902,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.861,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.664,
|
| 33 |
+
"blimp/accuracy/transitive": 0.862,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.3,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.76,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.76,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.859,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.958,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.367,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 0.999,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.229,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.693,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.62,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.888,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.891,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.612,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.95,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.632,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.785,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.761,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.465,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.92,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.434,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.921,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.596,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.827,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.896,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.905,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.954,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.833,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.992,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.749,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.799,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.986,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.569,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.846,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.75,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.809,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.779,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.988,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.577,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.914,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.988,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.941,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.81,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.226,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.553,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.972,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.445,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972,
|
| 83 |
+
"blimp/accuracy/causative": 0.697,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7642537313432837,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7642537313432836,
|
| 86 |
+
"cbt/accuracy/NE": 0.7371794871794872,
|
| 87 |
+
"cbt/accuracy/V": 0.8976,
|
| 88 |
+
"cbt/accuracy/CN": 0.8128,
|
| 89 |
+
"cbt/accuracy/P": 0.886,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8333948717948718,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8334333733493398,
|
| 92 |
+
"hellaswag/accuracy/val": 0.28719378609838675,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.28719378609838675,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.28719378609838675,
|
| 95 |
+
"piqa/accuracy/val": 0.5772578890097932,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5772578890097932,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5772578890097932,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.33403805496828753,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.20429184549356222,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.2691649502309249,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.29121813031161475,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.2609939220593493,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.2609939220593493,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.2609939220593493,
|
| 105 |
+
"openbookqa/accuracy/test": 0.268,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.268,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.268,
|
| 108 |
+
"race/accuracy/test/high": 0.2612921669525443,
|
| 109 |
+
"race/accuracy/test/middle": 0.34192200557103064,
|
| 110 |
+
"race/accuracy/group_average": 0.30160708626178745,
|
| 111 |
+
"race/accuracy/seq_average": 0.2847588163761654,
|
| 112 |
+
"siqa/accuracy/dev": 0.3592630501535312,
|
| 113 |
+
"siqa/accuracy/group_average": 0.3592630501535312,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.3592630501535312,
|
| 115 |
+
"winogrande/accuracy/dev": 0.5035516969218626,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.5035516969218626,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.5035516969218626,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.257985257985258,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.257985257985258,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.257985257985258
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-80000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.630677238343254,
|
| 3 |
+
"val/accuracy": 0.4760829380580357,
|
| 4 |
+
"val/perplexity": 13.883168933406056,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.499687431761937,
|
| 8 |
+
"lambada/accuracy/total": 0.24941770186335405,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7608695652173914,
|
| 10 |
+
"lambada/perplexity": 12.631674255586239,
|
| 11 |
+
"lambada/lm_loss": 3.1846876623812,
|
| 12 |
+
"lambada/lm_perplexity": 24.15974123122619,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.36275031996069484,
|
| 16 |
+
"mean_loss": 2.5651823350525955,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.898,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.98,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.818,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.871,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.603,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.911,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.27,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.498,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.791,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.989,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.9,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.538,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.935,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.896,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.889,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.658,
|
| 33 |
+
"blimp/accuracy/transitive": 0.882,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.299,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.788,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.792,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.855,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.934,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.355,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 0.999,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.233,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.712,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.757,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.901,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.896,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.644,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.94,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.643,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.866,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.766,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.464,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.912,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.395,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.927,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.663,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.802,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.859,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.9,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.954,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.832,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.987,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.772,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.772,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.973,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.56,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.968,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.852,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.776,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.785,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.787,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.991,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.872,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.641,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.927,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.986,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.951,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.831,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.285,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.621,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.962,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.413,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966,
|
| 83 |
+
"blimp/accuracy/causative": 0.705,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7731044776119402,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7731044776119403,
|
| 86 |
+
"cbt/accuracy/NE": 0.7524038461538461,
|
| 87 |
+
"cbt/accuracy/V": 0.8992,
|
| 88 |
+
"cbt/accuracy/CN": 0.8164,
|
| 89 |
+
"cbt/accuracy/P": 0.8848,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8382009615384616,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8382352941176471,
|
| 92 |
+
"hellaswag/accuracy/val": 0.29267078271260705,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.29267078271260705,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.29267078271260705,
|
| 95 |
+
"piqa/accuracy/val": 0.5701849836779108,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5701849836779108,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5701849836779108,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.33192389006342493,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.20772532188841203,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.2698246059759185,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.29093484419263455,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.2615659635323561,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.2615659635323561,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.2615659635323561,
|
| 105 |
+
"openbookqa/accuracy/test": 0.294,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.294,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.294,
|
| 108 |
+
"race/accuracy/test/high": 0.2670097198399085,
|
| 109 |
+
"race/accuracy/test/middle": 0.346100278551532,
|
| 110 |
+
"race/accuracy/group_average": 0.30655499919572027,
|
| 111 |
+
"race/accuracy/seq_average": 0.29002837454398056,
|
| 112 |
+
"siqa/accuracy/dev": 0.3587512794268168,
|
| 113 |
+
"siqa/accuracy/group_average": 0.3587512794268168,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.3587512794268168,
|
| 115 |
+
"winogrande/accuracy/dev": 0.5090765588003157,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.5090765588003157,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.5090765588003157,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.2497952497952498,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.2497952497952498,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.2497952497952498
|
| 121 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/export/result-model-90000.pth.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.6191844637431796,
|
| 3 |
+
"val/accuracy": 0.4788682725694444,
|
| 4 |
+
"val/perplexity": 13.724526171049808,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.505079944681677,
|
| 8 |
+
"lambada/accuracy/total": 0.24631211180124224,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7569875776397516,
|
| 10 |
+
"lambada/perplexity": 12.18903683885384,
|
| 11 |
+
"lambada/lm_loss": 3.1642302502556197,
|
| 12 |
+
"lambada/lm_perplexity": 23.67051664433095,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.36259019218534333,
|
| 16 |
+
"mean_loss": 2.5621322042124284,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.89,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.976,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.797,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.886,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.589,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.914,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.322,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.5,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.783,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.986,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.904,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.535,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.928,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.895,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.896,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.679,
|
| 33 |
+
"blimp/accuracy/transitive": 0.879,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.292,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.775,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.767,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.846,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.922,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.402,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.236,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.713,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.635,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.885,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.913,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.625,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.958,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.657,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.848,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.788,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.504,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.923,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.466,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.928,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.614,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.815,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.85,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.923,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.957,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.833,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.989,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.821,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.767,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.976,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.55,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.962,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.888,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.774,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.806,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.793,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.991,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.871,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.66,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.927,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.992,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.945,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.83,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.273,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.626,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.969,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.47,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97,
|
| 83 |
+
"blimp/accuracy/causative": 0.71,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7760298507462686,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7760298507462686,
|
| 86 |
+
"cbt/accuracy/NE": 0.7528044871794872,
|
| 87 |
+
"cbt/accuracy/V": 0.9004,
|
| 88 |
+
"cbt/accuracy/CN": 0.8188,
|
| 89 |
+
"cbt/accuracy/P": 0.8924,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8411011217948717,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8411364545818327,
|
| 92 |
+
"hellaswag/accuracy/val": 0.29336785500896234,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.29336785500896234,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.29336785500896234,
|
| 95 |
+
"piqa/accuracy/val": 0.5805223068552775,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5805223068552775,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5805223068552775,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.3281183932346723,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.20772532188841203,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.2679218575615422,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.288385269121813,
|
| 102 |
+
"mmlu/accuracy/MMLU": 0.25927779764032893,
|
| 103 |
+
"mmlu/accuracy/group_average": 0.25927779764032893,
|
| 104 |
+
"mmlu/accuracy/seq_average": 0.25927779764032893,
|
| 105 |
+
"openbookqa/accuracy/test": 0.28,
|
| 106 |
+
"openbookqa/accuracy/group_average": 0.28,
|
| 107 |
+
"openbookqa/accuracy/seq_average": 0.28,
|
| 108 |
+
"race/accuracy/test/high": 0.2641509433962264,
|
| 109 |
+
"race/accuracy/test/middle": 0.3447075208913649,
|
| 110 |
+
"race/accuracy/group_average": 0.3044292321437957,
|
| 111 |
+
"race/accuracy/seq_average": 0.2875962707742197,
|
| 112 |
+
"siqa/accuracy/dev": 0.3592630501535312,
|
| 113 |
+
"siqa/accuracy/group_average": 0.3592630501535312,
|
| 114 |
+
"siqa/accuracy/seq_average": 0.3592630501535312,
|
| 115 |
+
"winogrande/accuracy/dev": 0.5074980268350434,
|
| 116 |
+
"winogrande/accuracy/group_average": 0.5074980268350434,
|
| 117 |
+
"winogrande/accuracy/seq_average": 0.5074980268350434,
|
| 118 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.2538902538902539,
|
| 119 |
+
"commonsenseqa/accuracy/group_average": 0.2538902538902539,
|
| 120 |
+
"commonsenseqa/accuracy/seq_average": 0.2538902538902539
|
| 121 |
+
}
|