955f52b95d392e1c8958188a562a2e3c9c1f364cfc31d8a6c7900f9ef58f0e02
Browse files- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-10000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-100000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-20000.pth.json +112 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-30000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-40000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-50000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-60000.pth.json +112 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-70000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-80000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-90000.pth.json +1 -0
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-10000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 3.0193529885912698, "val/accuracy": 0.42433093843005953, "val/perplexity": 20.47803787283959, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.7976781240901594, "lambada/accuracy/total": 0.1626552795031056, "lambada/accuracy/openai_last_token": 0.7119565217391305, "lambada/perplexity": 25.716909525850493, "lambada/lm_loss": 3.569134057728088, "lambada/lm_perplexity": 35.4858511445869, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.29349310896658254, "mean_loss": 2.9085155563407143, "blimp/accuracy/passive_2": 0.87, "blimp/accuracy/determiner_noun_agreement_2": 0.965, "blimp/accuracy/ellipsis_n_bar_1": 0.709, "blimp/accuracy/tough_vs_raising_2": 0.786, "blimp/accuracy/tough_vs_raising_1": 0.632, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.884, "blimp/accuracy/principle_A_reconstruction": 0.549, "blimp/accuracy/wh_vs_that_with_gap": 0.452, "blimp/accuracy/principle_A_domain_2": 0.827, "blimp/accuracy/determiner_noun_agreement_1": 0.983, "blimp/accuracy/ellipsis_n_bar_2": 0.892, "blimp/accuracy/principle_A_domain_3": 0.565, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.875, "blimp/accuracy/animate_subject_trans": 0.884, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.752, "blimp/accuracy/distractor_agreement_relative_clause": 0.462, "blimp/accuracy/transitive": 0.793, "blimp/accuracy/sentential_subject_island": 0.304, "blimp/accuracy/adjunct_island": 0.82, "blimp/accuracy/intransitive": 0.7, "blimp/accuracy/existential_there_subject_raising": 0.82, "blimp/accuracy/irregular_past_participle_adjectives": 0.906, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.286, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.202, "blimp/accuracy/only_npi_scope": 0.554, "blimp/accuracy/superlative_quantifiers_2": 0.617, "blimp/accuracy/passive_1": 0.877, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.874, "blimp/accuracy/inchoative": 0.526, "blimp/accuracy/anaphor_gender_agreement": 0.928, "blimp/accuracy/principle_A_c_command": 0.439, "blimp/accuracy/only_npi_licensor_present": 0.276, "blimp/accuracy/expletive_it_object_raising": 0.775, "blimp/accuracy/left_branch_island_simple_question": 0.319, "blimp/accuracy/wh_questions_subject_gap": 0.908, "blimp/accuracy/existential_there_quantifiers_2": 0.407, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.884, "blimp/accuracy/sentential_negation_npi_scope": 0.419, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.805, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.882, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.801, "blimp/accuracy/principle_A_case_2": 0.934, "blimp/accuracy/distractor_agreement_relational_noun": 0.734, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.729, "blimp/accuracy/wh_island": 0.862, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.638, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.943, "blimp/accuracy/irregular_past_participle_verbs": 0.758, "blimp/accuracy/drop_argument": 0.71, "blimp/accuracy/wh_questions_object_gap": 0.78, "blimp/accuracy/animate_subject_passive": 0.736, "blimp/accuracy/existential_there_quantifiers_1": 0.96, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.849, "blimp/accuracy/npi_present_2": 0.535, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.847, "blimp/accuracy/anaphor_number_agreement": 0.979, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.926, "blimp/accuracy/existential_there_object_raising": 0.786, "blimp/accuracy/matrix_question_npi_licensor_present": 0.061, "blimp/accuracy/npi_present_1": 0.503, "blimp/accuracy/wh_vs_that_no_gap": 0.962, "blimp/accuracy/left_branch_island_echo_question": 0.509, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.962, "blimp/accuracy/causative": 0.608, "blimp/accuracy/group_average": 0.7239552238805971, "blimp/accuracy/seq_average": 0.723955223880597, "cbt/accuracy/NE": 0.6854967948717948, "cbt/accuracy/V": 0.862, "cbt/accuracy/CN": 0.7308, "cbt/accuracy/P": 0.8364, "cbt/accuracy/group_average": 0.7786741987179486, "cbt/accuracy/seq_average": 0.7787114845938375, "hellaswag/accuracy/val": 0.2722565226050587, "hellaswag/accuracy/group_average": 0.2722565226050587, "hellaswag/accuracy/seq_average": 0.2722565226050587, "piqa/accuracy/val": 0.5467899891186072, "piqa/accuracy/group_average": 0.5467899891186072, "piqa/accuracy/seq_average": 0.5467899891186072, "ai2arc/accuracy/ARC-Easy": 0.3150105708245243, "ai2arc/accuracy/ARC-Challenge": 0.2034334763948498, "ai2arc/accuracy/group_average": 0.25922202360968705, "ai2arc/accuracy/seq_average": 0.2781869688385269, "race/accuracy/test/high": 0.2535734705546026, "race/accuracy/test/middle": 0.32729805013927576, "race/accuracy/group_average": 0.2904357603469392, "race/accuracy/seq_average": 0.27503040129712203, "siqa/accuracy/dev": 0.35670419651995905, "siqa/accuracy/group_average": 0.35670419651995905, "siqa/accuracy/seq_average": 0.35670419651995905, "commonsenseqa/accuracy/dev_rand_split": 0.23587223587223588, "commonsenseqa/accuracy/group_average": 0.23587223587223588, "commonsenseqa/accuracy/seq_average": 0.23587223587223588}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-100000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.609346662248884, "val/accuracy": 0.4792683919270833, "val/perplexity": 13.590168980277008, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.648307468580163, "lambada/accuracy/total": 0.2511645962732919, "lambada/accuracy/openai_last_token": 0.7624223602484472, "lambada/perplexity": 12.533536360437177, "lambada/lm_loss": 3.1833163269582125, "lambada/lm_perplexity": 24.126632828816643, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.36521649410018764, "mean_loss": 2.628827065414524, "blimp/accuracy/passive_2": 0.911, "blimp/accuracy/determiner_noun_agreement_2": 0.99, "blimp/accuracy/ellipsis_n_bar_1": 0.815, "blimp/accuracy/tough_vs_raising_2": 0.867, "blimp/accuracy/tough_vs_raising_1": 0.607, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.916, "blimp/accuracy/principle_A_reconstruction": 0.247, "blimp/accuracy/wh_vs_that_with_gap": 0.535, "blimp/accuracy/principle_A_domain_2": 0.832, "blimp/accuracy/determiner_noun_agreement_1": 0.994, "blimp/accuracy/ellipsis_n_bar_2": 0.907, "blimp/accuracy/principle_A_domain_3": 0.577, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.917, "blimp/accuracy/animate_subject_trans": 0.892, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.88, "blimp/accuracy/distractor_agreement_relative_clause": 0.681, "blimp/accuracy/transitive": 0.874, "blimp/accuracy/sentential_subject_island": 0.286, "blimp/accuracy/adjunct_island": 0.839, "blimp/accuracy/intransitive": 0.809, "blimp/accuracy/existential_there_subject_raising": 0.897, "blimp/accuracy/irregular_past_participle_adjectives": 0.911, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.567, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.298, "blimp/accuracy/only_npi_scope": 0.57, "blimp/accuracy/superlative_quantifiers_2": 0.834, "blimp/accuracy/passive_1": 0.9, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.908, "blimp/accuracy/inchoative": 0.633, "blimp/accuracy/anaphor_gender_agreement": 0.958, "blimp/accuracy/principle_A_c_command": 0.561, "blimp/accuracy/only_npi_licensor_present": 0.37, "blimp/accuracy/expletive_it_object_raising": 0.768, "blimp/accuracy/left_branch_island_simple_question": 0.597, "blimp/accuracy/wh_questions_subject_gap": 0.92, "blimp/accuracy/existential_there_quantifiers_2": 0.591, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.943, "blimp/accuracy/sentential_negation_npi_scope": 0.713, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.8, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.837, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.894, "blimp/accuracy/principle_A_case_2": 0.939, "blimp/accuracy/distractor_agreement_relational_noun": 0.785, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989, "blimp/accuracy/superlative_quantifiers_1": 0.503, "blimp/accuracy/wh_island": 0.752, "blimp/accuracy/principle_A_domain_1": 0.98, "blimp/accuracy/complex_NP_island": 0.605, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.979, "blimp/accuracy/irregular_past_participle_verbs": 0.878, "blimp/accuracy/drop_argument": 0.747, "blimp/accuracy/wh_questions_object_gap": 0.774, "blimp/accuracy/animate_subject_passive": 0.792, "blimp/accuracy/existential_there_quantifiers_1": 0.984, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.867, "blimp/accuracy/npi_present_2": 0.591, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.946, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.945, "blimp/accuracy/existential_there_object_raising": 0.861, "blimp/accuracy/matrix_question_npi_licensor_present": 0.165, "blimp/accuracy/npi_present_1": 0.554, "blimp/accuracy/wh_vs_that_no_gap": 0.957, "blimp/accuracy/left_branch_island_echo_question": 0.522, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.967, "blimp/accuracy/causative": 0.708, "blimp/accuracy/group_average": 0.7704925373134327, "blimp/accuracy/seq_average": 0.7704925373134328, "cbt/accuracy/NE": 0.7632211538461539, "cbt/accuracy/V": 0.9064, "cbt/accuracy/CN": 0.8232, "cbt/accuracy/P": 0.8828, "cbt/accuracy/group_average": 0.8439052884615384, "cbt/accuracy/seq_average": 0.843937575030012, "hellaswag/accuracy/val": 0.29117705636327423, "hellaswag/accuracy/group_average": 0.29117705636327423, "hellaswag/accuracy/seq_average": 0.29117705636327423, "piqa/accuracy/val": 0.5810663764961915, "piqa/accuracy/group_average": 0.5810663764961915, "piqa/accuracy/seq_average": 0.5810663764961915, "ai2arc/accuracy/ARC-Easy": 0.33276955602537, "ai2arc/accuracy/ARC-Challenge": 0.20600858369098712, "ai2arc/accuracy/group_average": 0.26938906985817856, "ai2arc/accuracy/seq_average": 0.29093484419263455, "race/accuracy/test/high": 0.2687249857061178, "race/accuracy/test/middle": 0.3384401114206128, "race/accuracy/group_average": 0.3035825485633653, "race/accuracy/seq_average": 0.28901499797324687, "siqa/accuracy/dev": 0.36489252814739, "siqa/accuracy/group_average": 0.36489252814739, "siqa/accuracy/seq_average": 0.36489252814739, "commonsenseqa/accuracy/dev_rand_split": 0.25143325143325146, "commonsenseqa/accuracy/group_average": 0.25143325143325146, "commonsenseqa/accuracy/seq_average": 0.25143325143325146}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-20000.pth.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.8779788547092013,
|
| 3 |
+
"val/accuracy": 0.44091796875,
|
| 4 |
+
"val/perplexity": 17.778304306669796,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.6696091172117624,
|
| 8 |
+
"lambada/accuracy/total": 0.1906055900621118,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7303959627329193,
|
| 10 |
+
"lambada/perplexity": 22.08173634475889,
|
| 11 |
+
"lambada/lm_loss": 3.4162537295294926,
|
| 12 |
+
"lambada/lm_perplexity": 30.455107970507406,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.3157617794060559,
|
| 16 |
+
"mean_loss": 2.7737939859604817,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.87,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.96,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.778,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.831,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.614,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.882,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.678,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.467,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.806,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.979,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.89,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.567,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.877,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.875,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.811,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.552,
|
| 33 |
+
"blimp/accuracy/transitive": 0.837,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.284,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.805,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.721,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.846,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.935,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.408,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.17,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.593,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.648,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.874,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.866,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.538,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.912,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.459,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.249,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.772,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.468,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.892,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.49,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.894,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.475,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.79,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.889,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.84,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.898,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.74,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.987,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.475,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.896,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.986,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.567,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.941,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.836,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.733,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.815,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.777,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.982,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.857,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.583,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.897,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.973,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.923,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.784,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.077,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.551,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.982,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.433,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.979,
|
| 83 |
+
"blimp/accuracy/causative": 0.672,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7378507462686564,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7378507462686568,
|
| 86 |
+
"cbt/accuracy/NE": 0.6999198717948718,
|
| 87 |
+
"cbt/accuracy/V": 0.8776,
|
| 88 |
+
"cbt/accuracy/CN": 0.76,
|
| 89 |
+
"cbt/accuracy/P": 0.8632,
|
| 90 |
+
"cbt/accuracy/group_average": 0.800179967948718,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.800220088035214,
|
| 92 |
+
"hellaswag/accuracy/val": 0.28201553475403307,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.28201553475403307,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.28201553475403307,
|
| 95 |
+
"piqa/accuracy/val": 0.5527747551686616,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5527747551686616,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5527747551686616,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.30613107822410146,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.20772532188841203,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.2569282000562567,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.27365439093484417,
|
| 102 |
+
"race/accuracy/test/high": 0.252715837621498,
|
| 103 |
+
"race/accuracy/test/middle": 0.3328690807799443,
|
| 104 |
+
"race/accuracy/group_average": 0.29279245920072117,
|
| 105 |
+
"race/accuracy/seq_average": 0.2760437778678557,
|
| 106 |
+
"siqa/accuracy/dev": 0.3633572159672467,
|
| 107 |
+
"siqa/accuracy/group_average": 0.3633572159672467,
|
| 108 |
+
"siqa/accuracy/seq_average": 0.3633572159672467,
|
| 109 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.24406224406224405,
|
| 110 |
+
"commonsenseqa/accuracy/group_average": 0.24406224406224405,
|
| 111 |
+
"commonsenseqa/accuracy/seq_average": 0.24406224406224405
|
| 112 |
+
}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-30000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.8066788930741566, "val/accuracy": 0.4506312779017857, "val/perplexity": 16.554846404663273, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.621950706339771, "lambada/accuracy/total": 0.21195652173913043, "lambada/accuracy/openai_last_token": 0.7371894409937888, "lambada/perplexity": 16.762477855495113, "lambada/lm_loss": 3.3654079853835968, "lambada/lm_perplexity": 28.945304151392328, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3312938998204581, "mean_loss": 2.7143147997069637, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.786, "blimp/accuracy/tough_vs_raising_2": 0.87, "blimp/accuracy/tough_vs_raising_1": 0.646, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.88, "blimp/accuracy/principle_A_reconstruction": 0.482, "blimp/accuracy/wh_vs_that_with_gap": 0.554, "blimp/accuracy/principle_A_domain_2": 0.772, "blimp/accuracy/determiner_noun_agreement_1": 0.984, "blimp/accuracy/ellipsis_n_bar_2": 0.87, "blimp/accuracy/principle_A_domain_3": 0.546, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.907, "blimp/accuracy/animate_subject_trans": 0.867, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.812, "blimp/accuracy/distractor_agreement_relative_clause": 0.633, "blimp/accuracy/transitive": 0.825, "blimp/accuracy/sentential_subject_island": 0.281, "blimp/accuracy/adjunct_island": 0.852, "blimp/accuracy/intransitive": 0.752, "blimp/accuracy/existential_there_subject_raising": 0.867, "blimp/accuracy/irregular_past_participle_adjectives": 0.871, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.527, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.277, "blimp/accuracy/only_npi_scope": 0.706, "blimp/accuracy/superlative_quantifiers_2": 0.719, "blimp/accuracy/passive_1": 0.873, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.873, "blimp/accuracy/inchoative": 0.597, "blimp/accuracy/anaphor_gender_agreement": 0.952, "blimp/accuracy/principle_A_c_command": 0.529, "blimp/accuracy/only_npi_licensor_present": 0.509, "blimp/accuracy/expletive_it_object_raising": 0.799, "blimp/accuracy/left_branch_island_simple_question": 0.537, "blimp/accuracy/wh_questions_subject_gap": 0.884, "blimp/accuracy/existential_there_quantifiers_2": 0.477, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.911, "blimp/accuracy/sentential_negation_npi_scope": 0.638, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.792, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.85, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.847, "blimp/accuracy/principle_A_case_2": 0.925, "blimp/accuracy/distractor_agreement_relational_noun": 0.775, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.992, "blimp/accuracy/superlative_quantifiers_1": 0.674, "blimp/accuracy/wh_island": 0.754, "blimp/accuracy/principle_A_domain_1": 0.988, "blimp/accuracy/complex_NP_island": 0.577, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.964, "blimp/accuracy/irregular_past_participle_verbs": 0.883, "blimp/accuracy/drop_argument": 0.75, "blimp/accuracy/wh_questions_object_gap": 0.757, "blimp/accuracy/animate_subject_passive": 0.818, "blimp/accuracy/existential_there_quantifiers_1": 0.985, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.853, "blimp/accuracy/npi_present_2": 0.555, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.889, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.934, "blimp/accuracy/existential_there_object_raising": 0.784, "blimp/accuracy/matrix_question_npi_licensor_present": 0.102, "blimp/accuracy/npi_present_1": 0.525, "blimp/accuracy/wh_vs_that_no_gap": 0.961, "blimp/accuracy/left_branch_island_echo_question": 0.422, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.955, "blimp/accuracy/causative": 0.66, "blimp/accuracy/group_average": 0.7566567164179103, "blimp/accuracy/seq_average": 0.7566567164179104, "cbt/accuracy/NE": 0.7375801282051282, "cbt/accuracy/V": 0.8892, "cbt/accuracy/CN": 0.7824, "cbt/accuracy/P": 0.8632, "cbt/accuracy/group_average": 0.8180950320512821, "cbt/accuracy/seq_average": 0.8181272509003601, "hellaswag/accuracy/val": 0.2804222266480781, "hellaswag/accuracy/group_average": 0.2804222266480781, "hellaswag/accuracy/seq_average": 0.2804222266480781, "piqa/accuracy/val": 0.5690968443960827, "piqa/accuracy/group_average": 0.5690968443960827, "piqa/accuracy/seq_average": 0.5690968443960827, "ai2arc/accuracy/ARC-Easy": 0.31881606765327697, "ai2arc/accuracy/ARC-Challenge": 0.1982832618025751, "ai2arc/accuracy/group_average": 0.25854966472792607, "ai2arc/accuracy/seq_average": 0.2790368271954674, "race/accuracy/test/high": 0.26329331046312177, "race/accuracy/test/middle": 0.3328690807799443, "race/accuracy/group_average": 0.298081195621533, "race/accuracy/seq_average": 0.28354276449128496, "siqa/accuracy/dev": 0.3623336745138178, "siqa/accuracy/group_average": 0.3623336745138178, "siqa/accuracy/seq_average": 0.3623336745138178, "commonsenseqa/accuracy/dev_rand_split": 0.2375102375102375, "commonsenseqa/accuracy/group_average": 0.2375102375102375, "commonsenseqa/accuracy/seq_average": 0.2375102375102375}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-40000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.754640367296007, "val/accuracy": 0.45817638578869047, "val/perplexity": 15.71538811887805, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.529570395902077, "lambada/accuracy/total": 0.21195652173913043, "lambada/accuracy/openai_last_token": 0.7402950310559007, "lambada/perplexity": 16.018655883617395, "lambada/lm_loss": 3.3054434892472218, "lambada/lm_perplexity": 27.260628704052912, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3350664537639104, "mean_loss": 2.642105381599042, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.818, "blimp/accuracy/tough_vs_raising_2": 0.875, "blimp/accuracy/tough_vs_raising_1": 0.587, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.906, "blimp/accuracy/principle_A_reconstruction": 0.325, "blimp/accuracy/wh_vs_that_with_gap": 0.505, "blimp/accuracy/principle_A_domain_2": 0.797, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.568, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.924, "blimp/accuracy/animate_subject_trans": 0.889, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.856, "blimp/accuracy/distractor_agreement_relative_clause": 0.694, "blimp/accuracy/transitive": 0.845, "blimp/accuracy/sentential_subject_island": 0.248, "blimp/accuracy/adjunct_island": 0.829, "blimp/accuracy/intransitive": 0.766, "blimp/accuracy/existential_there_subject_raising": 0.856, "blimp/accuracy/irregular_past_participle_adjectives": 0.859, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.52, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.269, "blimp/accuracy/only_npi_scope": 0.552, "blimp/accuracy/superlative_quantifiers_2": 0.614, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.888, "blimp/accuracy/inchoative": 0.589, "blimp/accuracy/anaphor_gender_agreement": 0.963, "blimp/accuracy/principle_A_c_command": 0.527, "blimp/accuracy/only_npi_licensor_present": 0.474, "blimp/accuracy/expletive_it_object_raising": 0.809, "blimp/accuracy/left_branch_island_simple_question": 0.54, "blimp/accuracy/wh_questions_subject_gap": 0.926, "blimp/accuracy/existential_there_quantifiers_2": 0.426, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.944, "blimp/accuracy/sentential_negation_npi_scope": 0.647, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.802, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.857, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.876, "blimp/accuracy/principle_A_case_2": 0.943, "blimp/accuracy/distractor_agreement_relational_noun": 0.812, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.488, "blimp/accuracy/wh_island": 0.744, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.568, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.974, "blimp/accuracy/irregular_past_participle_verbs": 0.867, "blimp/accuracy/drop_argument": 0.748, "blimp/accuracy/wh_questions_object_gap": 0.766, "blimp/accuracy/animate_subject_passive": 0.782, "blimp/accuracy/existential_there_quantifiers_1": 0.987, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.856, "blimp/accuracy/npi_present_2": 0.502, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.92, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.952, "blimp/accuracy/existential_there_object_raising": 0.821, "blimp/accuracy/matrix_question_npi_licensor_present": 0.093, "blimp/accuracy/npi_present_1": 0.47, "blimp/accuracy/wh_vs_that_no_gap": 0.97, "blimp/accuracy/left_branch_island_echo_question": 0.474, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97, "blimp/accuracy/causative": 0.682, "blimp/accuracy/group_average": 0.7519253731343282, "blimp/accuracy/seq_average": 0.7519253731343284, "cbt/accuracy/NE": 0.7271634615384616, "cbt/accuracy/V": 0.8932, "cbt/accuracy/CN": 0.7948, "cbt/accuracy/P": 0.868, "cbt/accuracy/group_average": 0.8207908653846153, "cbt/accuracy/seq_average": 0.820828331332533, "hellaswag/accuracy/val": 0.27723561043616807, "hellaswag/accuracy/group_average": 0.27723561043616807, "hellaswag/accuracy/seq_average": 0.27723561043616807, "piqa/accuracy/val": 0.5680087051142546, "piqa/accuracy/group_average": 0.5680087051142546, "piqa/accuracy/seq_average": 0.5680087051142546, "ai2arc/accuracy/ARC-Easy": 0.31839323467230446, "ai2arc/accuracy/ARC-Challenge": 0.20686695278969958, "ai2arc/accuracy/group_average": 0.262630093731002, "ai2arc/accuracy/seq_average": 0.28158640226628895, "race/accuracy/test/high": 0.2652944539736993, "race/accuracy/test/middle": 0.32172701949860727, "race/accuracy/group_average": 0.29351073673615324, "race/accuracy/seq_average": 0.2817186866639643, "siqa/accuracy/dev": 0.35209825997952915, "siqa/accuracy/group_average": 0.35209825997952915, "siqa/accuracy/seq_average": 0.35209825997952915, "commonsenseqa/accuracy/dev_rand_split": 0.25143325143325146, "commonsenseqa/accuracy/group_average": 0.25143325143325146, "commonsenseqa/accuracy/seq_average": 0.25143325143325146}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-50000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.711526295495412, "val/accuracy": 0.4646044534350198, "val/perplexity": 15.05223214523721, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.675644015673525, "lambada/accuracy/total": 0.23699534161490685, "lambada/accuracy/openai_last_token": 0.751358695652174, "lambada/perplexity": 14.025293823640384, "lambada/lm_loss": 3.283100098057292, "lambada/lm_perplexity": 26.65828803792627, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3507998975249633, "mean_loss": 2.6935851555844685, "blimp/accuracy/passive_2": 0.901, "blimp/accuracy/determiner_noun_agreement_2": 0.993, "blimp/accuracy/ellipsis_n_bar_1": 0.809, "blimp/accuracy/tough_vs_raising_2": 0.872, "blimp/accuracy/tough_vs_raising_1": 0.57, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.908, "blimp/accuracy/principle_A_reconstruction": 0.372, "blimp/accuracy/wh_vs_that_with_gap": 0.506, "blimp/accuracy/principle_A_domain_2": 0.824, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.894, "blimp/accuracy/principle_A_domain_3": 0.554, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.91, "blimp/accuracy/animate_subject_trans": 0.899, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.859, "blimp/accuracy/distractor_agreement_relative_clause": 0.686, "blimp/accuracy/transitive": 0.855, "blimp/accuracy/sentential_subject_island": 0.266, "blimp/accuracy/adjunct_island": 0.826, "blimp/accuracy/intransitive": 0.764, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.854, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.556, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.264, "blimp/accuracy/only_npi_scope": 0.59, "blimp/accuracy/superlative_quantifiers_2": 0.612, "blimp/accuracy/passive_1": 0.886, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.889, "blimp/accuracy/inchoative": 0.584, "blimp/accuracy/anaphor_gender_agreement": 0.968, "blimp/accuracy/principle_A_c_command": 0.574, "blimp/accuracy/only_npi_licensor_present": 0.352, "blimp/accuracy/expletive_it_object_raising": 0.815, "blimp/accuracy/left_branch_island_simple_question": 0.575, "blimp/accuracy/wh_questions_subject_gap": 0.936, "blimp/accuracy/existential_there_quantifiers_2": 0.561, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.927, "blimp/accuracy/sentential_negation_npi_scope": 0.641, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.824, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.874, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.873, "blimp/accuracy/principle_A_case_2": 0.925, "blimp/accuracy/distractor_agreement_relational_noun": 0.798, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.985, "blimp/accuracy/superlative_quantifiers_1": 0.583, "blimp/accuracy/wh_island": 0.799, "blimp/accuracy/principle_A_domain_1": 0.988, "blimp/accuracy/complex_NP_island": 0.609, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.965, "blimp/accuracy/irregular_past_participle_verbs": 0.841, "blimp/accuracy/drop_argument": 0.728, "blimp/accuracy/wh_questions_object_gap": 0.823, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.988, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.883, "blimp/accuracy/npi_present_2": 0.522, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.919, "blimp/accuracy/anaphor_number_agreement": 0.985, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.94, "blimp/accuracy/existential_there_object_raising": 0.838, "blimp/accuracy/matrix_question_npi_licensor_present": 0.151, "blimp/accuracy/npi_present_1": 0.452, "blimp/accuracy/wh_vs_that_no_gap": 0.97, "blimp/accuracy/left_branch_island_echo_question": 0.472, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.971, "blimp/accuracy/causative": 0.706, "blimp/accuracy/group_average": 0.7601641791044776, "blimp/accuracy/seq_average": 0.7601641791044776, "cbt/accuracy/NE": 0.7347756410256411, "cbt/accuracy/V": 0.898, "cbt/accuracy/CN": 0.7916, "cbt/accuracy/P": 0.8748, "cbt/accuracy/group_average": 0.8247939102564102, "cbt/accuracy/seq_average": 0.8248299319727891, "hellaswag/accuracy/val": 0.2877912766381199, "hellaswag/accuracy/group_average": 0.2877912766381199, "hellaswag/accuracy/seq_average": 0.2877912766381199, "piqa/accuracy/val": 0.5772578890097932, "piqa/accuracy/group_average": 0.5772578890097932, "piqa/accuracy/seq_average": 0.5772578890097932, "ai2arc/accuracy/ARC-Easy": 0.3217758985200846, "ai2arc/accuracy/ARC-Challenge": 0.21545064377682405, "ai2arc/accuracy/group_average": 0.26861327114845435, "ai2arc/accuracy/seq_average": 0.28668555240793203, "race/accuracy/test/high": 0.2610062893081761, "race/accuracy/test/middle": 0.3328690807799443, "race/accuracy/group_average": 0.2969376850440602, "race/accuracy/seq_average": 0.2819213619781111, "siqa/accuracy/dev": 0.35516888433981575, "siqa/accuracy/group_average": 0.35516888433981575, "siqa/accuracy/seq_average": 0.35516888433981575, "commonsenseqa/accuracy/dev_rand_split": 0.24242424242424243, "commonsenseqa/accuracy/group_average": 0.24242424242424243, "commonsenseqa/accuracy/seq_average": 0.24242424242424243}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-60000.pth.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.6765243288070435,
|
| 3 |
+
"val/accuracy": 0.4695347377232143,
|
| 4 |
+
"val/perplexity": 14.534488301370109,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"lambada/loss": 2.63063949679736,
|
| 8 |
+
"lambada/accuracy/total": 0.21777950310559005,
|
| 9 |
+
"lambada/accuracy/openai_last_token": 0.7476708074534162,
|
| 10 |
+
"lambada/perplexity": 14.60334217178118,
|
| 11 |
+
"lambada/lm_loss": 3.237729481302067,
|
| 12 |
+
"lambada/lm_perplexity": 25.475812721337494,
|
| 13 |
+
"lambada/time_since_best_loss": 0,
|
| 14 |
+
"lambada/time_since_best_accuracy": 0,
|
| 15 |
+
"mean_accuracy": 0.34365712041440216,
|
| 16 |
+
"mean_loss": 2.653581912802202,
|
| 17 |
+
"blimp/accuracy/passive_2": 0.9,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.988,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.814,
|
| 20 |
+
"blimp/accuracy/tough_vs_raising_2": 0.877,
|
| 21 |
+
"blimp/accuracy/tough_vs_raising_1": 0.597,
|
| 22 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.905,
|
| 23 |
+
"blimp/accuracy/principle_A_reconstruction": 0.336,
|
| 24 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.52,
|
| 25 |
+
"blimp/accuracy/principle_A_domain_2": 0.799,
|
| 26 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.989,
|
| 27 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.889,
|
| 28 |
+
"blimp/accuracy/principle_A_domain_3": 0.564,
|
| 29 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.915,
|
| 30 |
+
"blimp/accuracy/animate_subject_trans": 0.896,
|
| 31 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.868,
|
| 32 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.707,
|
| 33 |
+
"blimp/accuracy/transitive": 0.854,
|
| 34 |
+
"blimp/accuracy/sentential_subject_island": 0.297,
|
| 35 |
+
"blimp/accuracy/adjunct_island": 0.838,
|
| 36 |
+
"blimp/accuracy/intransitive": 0.77,
|
| 37 |
+
"blimp/accuracy/existential_there_subject_raising": 0.873,
|
| 38 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.872,
|
| 39 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.445,
|
| 40 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 41 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.305,
|
| 42 |
+
"blimp/accuracy/only_npi_scope": 0.62,
|
| 43 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.814,
|
| 44 |
+
"blimp/accuracy/passive_1": 0.872,
|
| 45 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.907,
|
| 46 |
+
"blimp/accuracy/inchoative": 0.62,
|
| 47 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.97,
|
| 48 |
+
"blimp/accuracy/principle_A_c_command": 0.559,
|
| 49 |
+
"blimp/accuracy/only_npi_licensor_present": 0.257,
|
| 50 |
+
"blimp/accuracy/expletive_it_object_raising": 0.783,
|
| 51 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.49,
|
| 52 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.935,
|
| 53 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.369,
|
| 54 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.935,
|
| 55 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.686,
|
| 56 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.806,
|
| 57 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.866,
|
| 58 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.877,
|
| 59 |
+
"blimp/accuracy/principle_A_case_2": 0.934,
|
| 60 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.773,
|
| 61 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.996,
|
| 62 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.592,
|
| 63 |
+
"blimp/accuracy/wh_island": 0.84,
|
| 64 |
+
"blimp/accuracy/principle_A_domain_1": 0.988,
|
| 65 |
+
"blimp/accuracy/complex_NP_island": 0.612,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.971,
|
| 67 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.886,
|
| 68 |
+
"blimp/accuracy/drop_argument": 0.76,
|
| 69 |
+
"blimp/accuracy/wh_questions_object_gap": 0.821,
|
| 70 |
+
"blimp/accuracy/animate_subject_passive": 0.787,
|
| 71 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.98,
|
| 72 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.892,
|
| 73 |
+
"blimp/accuracy/npi_present_2": 0.567,
|
| 74 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.913,
|
| 75 |
+
"blimp/accuracy/anaphor_number_agreement": 0.985,
|
| 76 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.945,
|
| 77 |
+
"blimp/accuracy/existential_there_object_raising": 0.798,
|
| 78 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.178,
|
| 79 |
+
"blimp/accuracy/npi_present_1": 0.53,
|
| 80 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.97,
|
| 81 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.434,
|
| 82 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.968,
|
| 83 |
+
"blimp/accuracy/causative": 0.724,
|
| 84 |
+
"blimp/accuracy/group_average": 0.7616119402985074,
|
| 85 |
+
"blimp/accuracy/seq_average": 0.7616119402985074,
|
| 86 |
+
"cbt/accuracy/NE": 0.7447916666666666,
|
| 87 |
+
"cbt/accuracy/V": 0.9008,
|
| 88 |
+
"cbt/accuracy/CN": 0.812,
|
| 89 |
+
"cbt/accuracy/P": 0.8788,
|
| 90 |
+
"cbt/accuracy/group_average": 0.8340979166666667,
|
| 91 |
+
"cbt/accuracy/seq_average": 0.8341336534613846,
|
| 92 |
+
"hellaswag/accuracy/val": 0.2863971320454093,
|
| 93 |
+
"hellaswag/accuracy/group_average": 0.2863971320454093,
|
| 94 |
+
"hellaswag/accuracy/seq_average": 0.2863971320454093,
|
| 95 |
+
"piqa/accuracy/val": 0.5685527747551686,
|
| 96 |
+
"piqa/accuracy/group_average": 0.5685527747551686,
|
| 97 |
+
"piqa/accuracy/seq_average": 0.5685527747551686,
|
| 98 |
+
"ai2arc/accuracy/ARC-Easy": 0.333615221987315,
|
| 99 |
+
"ai2arc/accuracy/ARC-Challenge": 0.20686695278969958,
|
| 100 |
+
"ai2arc/accuracy/group_average": 0.2702410873885073,
|
| 101 |
+
"ai2arc/accuracy/seq_average": 0.29178470254957506,
|
| 102 |
+
"race/accuracy/test/high": 0.2644368210405946,
|
| 103 |
+
"race/accuracy/test/middle": 0.3447075208913649,
|
| 104 |
+
"race/accuracy/group_average": 0.30457217096597977,
|
| 105 |
+
"race/accuracy/seq_average": 0.2877989460883664,
|
| 106 |
+
"siqa/accuracy/dev": 0.3546571136131013,
|
| 107 |
+
"siqa/accuracy/group_average": 0.3546571136131013,
|
| 108 |
+
"siqa/accuracy/seq_average": 0.3546571136131013,
|
| 109 |
+
"commonsenseqa/accuracy/dev_rand_split": 0.2457002457002457,
|
| 110 |
+
"commonsenseqa/accuracy/group_average": 0.2457002457002457,
|
| 111 |
+
"commonsenseqa/accuracy/seq_average": 0.2457002457002457
|
| 112 |
+
}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-70000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.650483873155382, "val/accuracy": 0.47308737134176587, "val/perplexity": 14.160889061949224, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6131271457079777, "lambada/accuracy/total": 0.23796583850931677, "lambada/accuracy/openai_last_token": 0.74902950310559, "lambada/perplexity": 13.266013478759401, "lambada/lm_loss": 3.225909439481051, "lambada/lm_perplexity": 25.176460214410444, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.35552660492554133, "mean_loss": 2.63180550943168, "blimp/accuracy/passive_2": 0.917, "blimp/accuracy/determiner_noun_agreement_2": 0.989, "blimp/accuracy/ellipsis_n_bar_1": 0.818, "blimp/accuracy/tough_vs_raising_2": 0.853, "blimp/accuracy/tough_vs_raising_1": 0.607, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.82, "blimp/accuracy/principle_A_reconstruction": 0.268, "blimp/accuracy/wh_vs_that_with_gap": 0.508, "blimp/accuracy/principle_A_domain_2": 0.807, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.894, "blimp/accuracy/principle_A_domain_3": 0.571, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.918, "blimp/accuracy/animate_subject_trans": 0.9, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.888, "blimp/accuracy/distractor_agreement_relative_clause": 0.698, "blimp/accuracy/transitive": 0.86, "blimp/accuracy/sentential_subject_island": 0.312, "blimp/accuracy/adjunct_island": 0.827, "blimp/accuracy/intransitive": 0.78, "blimp/accuracy/existential_there_subject_raising": 0.881, "blimp/accuracy/irregular_past_participle_adjectives": 0.935, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.457, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.29, "blimp/accuracy/only_npi_scope": 0.611, "blimp/accuracy/superlative_quantifiers_2": 0.718, "blimp/accuracy/passive_1": 0.902, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.896, "blimp/accuracy/inchoative": 0.603, "blimp/accuracy/anaphor_gender_agreement": 0.976, "blimp/accuracy/principle_A_c_command": 0.549, "blimp/accuracy/only_npi_licensor_present": 0.368, "blimp/accuracy/expletive_it_object_raising": 0.773, "blimp/accuracy/left_branch_island_simple_question": 0.489, "blimp/accuracy/wh_questions_subject_gap": 0.921, "blimp/accuracy/existential_there_quantifiers_2": 0.544, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.936, "blimp/accuracy/sentential_negation_npi_scope": 0.726, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.803, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.851, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.896, "blimp/accuracy/principle_A_case_2": 0.938, "blimp/accuracy/distractor_agreement_relational_noun": 0.792, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.997, "blimp/accuracy/superlative_quantifiers_1": 0.608, "blimp/accuracy/wh_island": 0.753, "blimp/accuracy/principle_A_domain_1": 0.985, "blimp/accuracy/complex_NP_island": 0.578, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.979, "blimp/accuracy/irregular_past_participle_verbs": 0.892, "blimp/accuracy/drop_argument": 0.762, "blimp/accuracy/wh_questions_object_gap": 0.8, "blimp/accuracy/animate_subject_passive": 0.784, "blimp/accuracy/existential_there_quantifiers_1": 0.993, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.864, "blimp/accuracy/npi_present_2": 0.544, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.926, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.95, "blimp/accuracy/existential_there_object_raising": 0.821, "blimp/accuracy/matrix_question_npi_licensor_present": 0.183, "blimp/accuracy/npi_present_1": 0.511, "blimp/accuracy/wh_vs_that_no_gap": 0.961, "blimp/accuracy/left_branch_island_echo_question": 0.487, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.967, "blimp/accuracy/causative": 0.727, "blimp/accuracy/group_average": 0.7633432835820895, "blimp/accuracy/seq_average": 0.7633432835820896, "cbt/accuracy/NE": 0.7504006410256411, "cbt/accuracy/V": 0.9032, "cbt/accuracy/CN": 0.8168, "cbt/accuracy/P": 0.878, "cbt/accuracy/group_average": 0.8371001602564103, "cbt/accuracy/seq_average": 0.8371348539415766, "hellaswag/accuracy/val": 0.28719378609838675, "hellaswag/accuracy/group_average": 0.28719378609838675, "hellaswag/accuracy/seq_average": 0.28719378609838675, "piqa/accuracy/val": 0.5810663764961915, "piqa/accuracy/group_average": 0.5810663764961915, "piqa/accuracy/seq_average": 0.5810663764961915, "ai2arc/accuracy/ARC-Easy": 0.3302325581395349, "ai2arc/accuracy/ARC-Challenge": 0.2111587982832618, "ai2arc/accuracy/group_average": 0.27069567821139834, "ai2arc/accuracy/seq_average": 0.29093484419263455, "race/accuracy/test/high": 0.259576901086335, "race/accuracy/test/middle": 0.3321727019498607, "race/accuracy/group_average": 0.29587480151809786, "race/accuracy/seq_average": 0.28070531009323063, "siqa/accuracy/dev": 0.35670419651995905, "siqa/accuracy/group_average": 0.35670419651995905, "siqa/accuracy/seq_average": 0.35670419651995905, "commonsenseqa/accuracy/dev_rand_split": 0.25634725634725636, "commonsenseqa/accuracy/group_average": 0.25634725634725636, "commonsenseqa/accuracy/seq_average": 0.25634725634725636}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-80000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.6302894713386658, "val/accuracy": 0.4754396468874008, "val/perplexity": 13.877786542198857, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.568061591675563, "lambada/accuracy/total": 0.2453416149068323, "lambada/accuracy/openai_last_token": 0.7560170807453416, "lambada/perplexity": 12.916188960854681, "lambada/lm_loss": 3.1977878966291757, "lambada/lm_perplexity": 24.47832168388822, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.36039063089711654, "mean_loss": 2.5991755315071146, "blimp/accuracy/passive_2": 0.914, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.817, "blimp/accuracy/tough_vs_raising_2": 0.868, "blimp/accuracy/tough_vs_raising_1": 0.637, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.92, "blimp/accuracy/principle_A_reconstruction": 0.289, "blimp/accuracy/wh_vs_that_with_gap": 0.481, "blimp/accuracy/principle_A_domain_2": 0.823, "blimp/accuracy/determiner_noun_agreement_1": 0.995, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.605, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.918, "blimp/accuracy/animate_subject_trans": 0.906, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.888, "blimp/accuracy/distractor_agreement_relative_clause": 0.716, "blimp/accuracy/transitive": 0.866, "blimp/accuracy/sentential_subject_island": 0.31, "blimp/accuracy/adjunct_island": 0.82, "blimp/accuracy/intransitive": 0.785, "blimp/accuracy/existential_there_subject_raising": 0.896, "blimp/accuracy/irregular_past_participle_adjectives": 0.901, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.509, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.271, "blimp/accuracy/only_npi_scope": 0.604, "blimp/accuracy/superlative_quantifiers_2": 0.786, "blimp/accuracy/passive_1": 0.91, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.906, "blimp/accuracy/inchoative": 0.625, "blimp/accuracy/anaphor_gender_agreement": 0.967, "blimp/accuracy/principle_A_c_command": 0.578, "blimp/accuracy/only_npi_licensor_present": 0.325, "blimp/accuracy/expletive_it_object_raising": 0.773, "blimp/accuracy/left_branch_island_simple_question": 0.52, "blimp/accuracy/wh_questions_subject_gap": 0.932, "blimp/accuracy/existential_there_quantifiers_2": 0.443, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939, "blimp/accuracy/sentential_negation_npi_scope": 0.72, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.819, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.868, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.89, "blimp/accuracy/principle_A_case_2": 0.938, "blimp/accuracy/distractor_agreement_relational_noun": 0.836, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991, "blimp/accuracy/superlative_quantifiers_1": 0.611, "blimp/accuracy/wh_island": 0.771, "blimp/accuracy/principle_A_domain_1": 0.973, "blimp/accuracy/complex_NP_island": 0.598, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.98, "blimp/accuracy/irregular_past_participle_verbs": 0.849, "blimp/accuracy/drop_argument": 0.763, "blimp/accuracy/wh_questions_object_gap": 0.796, "blimp/accuracy/animate_subject_passive": 0.803, "blimp/accuracy/existential_there_quantifiers_1": 0.988, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.866, "blimp/accuracy/npi_present_2": 0.596, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.945, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.958, "blimp/accuracy/existential_there_object_raising": 0.837, "blimp/accuracy/matrix_question_npi_licensor_present": 0.2, "blimp/accuracy/npi_present_1": 0.54, "blimp/accuracy/wh_vs_that_no_gap": 0.967, "blimp/accuracy/left_branch_island_echo_question": 0.487, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.965, "blimp/accuracy/causative": 0.731, "blimp/accuracy/group_average": 0.7701343283582087, "blimp/accuracy/seq_average": 0.7701343283582089, "cbt/accuracy/NE": 0.7568108974358975, "cbt/accuracy/V": 0.9104, "cbt/accuracy/CN": 0.8212, "cbt/accuracy/P": 0.8848, "cbt/accuracy/group_average": 0.8433027243589744, "cbt/accuracy/seq_average": 0.8433373349339736, "hellaswag/accuracy/val": 0.2905795658235411, "hellaswag/accuracy/group_average": 0.2905795658235411, "hellaswag/accuracy/seq_average": 0.2905795658235411, "piqa/accuracy/val": 0.5865070729053319, "piqa/accuracy/group_average": 0.5865070729053319, "piqa/accuracy/seq_average": 0.5865070729053319, "ai2arc/accuracy/ARC-Easy": 0.3391120507399577, "ai2arc/accuracy/ARC-Challenge": 0.21373390557939914, "ai2arc/accuracy/group_average": 0.2764229781596784, "ai2arc/accuracy/seq_average": 0.29773371104815866, "race/accuracy/test/high": 0.26300743281875355, "race/accuracy/test/middle": 0.3426183844011142, "race/accuracy/group_average": 0.3028129086099339, "race/accuracy/seq_average": 0.28617754357519254, "siqa/accuracy/dev": 0.3587512794268168, "siqa/accuracy/group_average": 0.3587512794268168, "siqa/accuracy/seq_average": 0.3587512794268168, "commonsenseqa/accuracy/dev_rand_split": 0.25225225225225223, "commonsenseqa/accuracy/group_average": 0.25225225225225223, "commonsenseqa/accuracy/seq_average": 0.25225225225225223}
|
Pretrain_language_model/save/slimpajama_moe_no_attmoe_154M_standard_lb_33_experts/export/result-model-90000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.617249019562252, "val/accuracy": 0.4780321878100198, "val/perplexity": 13.697988805814578, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5965621663916925, "lambada/accuracy/total": 0.24786490683229814, "lambada/accuracy/openai_last_token": 0.7567934782608695, "lambada/perplexity": 12.60696785379666, "lambada/lm_loss": 3.174572361253122, "lambada/lm_perplexity": 23.916590020605387, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.362948547321159, "mean_loss": 2.606905592976972, "blimp/accuracy/passive_2": 0.909, "blimp/accuracy/determiner_noun_agreement_2": 0.994, "blimp/accuracy/ellipsis_n_bar_1": 0.802, "blimp/accuracy/tough_vs_raising_2": 0.869, "blimp/accuracy/tough_vs_raising_1": 0.613, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.917, "blimp/accuracy/principle_A_reconstruction": 0.264, "blimp/accuracy/wh_vs_that_with_gap": 0.492, "blimp/accuracy/principle_A_domain_2": 0.806, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.912, "blimp/accuracy/principle_A_domain_3": 0.584, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.925, "blimp/accuracy/animate_subject_trans": 0.898, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.884, "blimp/accuracy/distractor_agreement_relative_clause": 0.694, "blimp/accuracy/transitive": 0.87, "blimp/accuracy/sentential_subject_island": 0.303, "blimp/accuracy/adjunct_island": 0.827, "blimp/accuracy/intransitive": 0.789, "blimp/accuracy/existential_there_subject_raising": 0.907, "blimp/accuracy/irregular_past_participle_adjectives": 0.896, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.519, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.278, "blimp/accuracy/only_npi_scope": 0.598, "blimp/accuracy/superlative_quantifiers_2": 0.706, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.907, "blimp/accuracy/inchoative": 0.644, "blimp/accuracy/anaphor_gender_agreement": 0.974, "blimp/accuracy/principle_A_c_command": 0.583, "blimp/accuracy/only_npi_licensor_present": 0.437, "blimp/accuracy/expletive_it_object_raising": 0.797, "blimp/accuracy/left_branch_island_simple_question": 0.553, "blimp/accuracy/wh_questions_subject_gap": 0.939, "blimp/accuracy/existential_there_quantifiers_2": 0.467, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.7, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.803, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.878, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.895, "blimp/accuracy/principle_A_case_2": 0.926, "blimp/accuracy/distractor_agreement_relational_noun": 0.839, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.693, "blimp/accuracy/wh_island": 0.778, "blimp/accuracy/principle_A_domain_1": 0.978, "blimp/accuracy/complex_NP_island": 0.588, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.977, "blimp/accuracy/irregular_past_participle_verbs": 0.886, "blimp/accuracy/drop_argument": 0.762, "blimp/accuracy/wh_questions_object_gap": 0.796, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.994, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.869, "blimp/accuracy/npi_present_2": 0.597, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.941, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.954, "blimp/accuracy/existential_there_object_raising": 0.856, "blimp/accuracy/matrix_question_npi_licensor_present": 0.203, "blimp/accuracy/npi_present_1": 0.563, "blimp/accuracy/wh_vs_that_no_gap": 0.966, "blimp/accuracy/left_branch_island_echo_question": 0.467, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.971, "blimp/accuracy/causative": 0.724, "blimp/accuracy/group_average": 0.7726567164179103, "blimp/accuracy/seq_average": 0.7726567164179104, "cbt/accuracy/NE": 0.7548076923076923, "cbt/accuracy/V": 0.91, "cbt/accuracy/CN": 0.82, "cbt/accuracy/P": 0.8868, "cbt/accuracy/group_average": 0.842901923076923, "cbt/accuracy/seq_average": 0.842937174869948, "hellaswag/accuracy/val": 0.29067914758016333, "hellaswag/accuracy/group_average": 0.29067914758016333, "hellaswag/accuracy/seq_average": 0.29067914758016333, "piqa/accuracy/val": 0.5745375408052231, "piqa/accuracy/group_average": 0.5745375408052231, "piqa/accuracy/seq_average": 0.5745375408052231, "ai2arc/accuracy/ARC-Easy": 0.33488372093023255, "ai2arc/accuracy/ARC-Challenge": 0.21802575107296138, "ai2arc/accuracy/group_average": 0.276454736001597, "ai2arc/accuracy/seq_average": 0.2963172804532578, "race/accuracy/test/high": 0.2655803316180675, "race/accuracy/test/middle": 0.33913649025069637, "race/accuracy/group_average": 0.3023584109343819, "race/accuracy/seq_average": 0.2869882448317795, "siqa/accuracy/dev": 0.3607983623336745, "siqa/accuracy/group_average": 0.3607983623336745, "siqa/accuracy/seq_average": 0.3607983623336745, "commonsenseqa/accuracy/dev_rand_split": 0.25061425061425063, "commonsenseqa/accuracy/group_average": 0.25061425061425063, "commonsenseqa/accuracy/seq_average": 0.25061425061425063}
|