Upload folder using huggingface_hub
#261
by
DavidNguyen
- opened
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-100000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-120000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-140000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-160000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-180000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-20000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-200000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-220000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-240000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-260000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-280000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-300000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-320000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-340000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-360000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-380000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-40000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-400000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-60000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-80000.pth.json +1 -0
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-100000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.4443790496341764, "val/accuracy": 0.4993257068452381, "val/perplexity": 11.523391922358208, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.501361159804445, "lambada/accuracy/total": 0.3159937888198758, "lambada/accuracy/openai_last_token": 0.7845496894409938, "lambada/perplexity": 8.75451786621247, "lambada/lm_loss": 3.027924854849423, "lambada/lm_perplexity": 20.65432736023701, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.40765974783255693, "mean_loss": 2.472870104719311, "blimp/accuracy/passive_2": 0.885, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.849, "blimp/accuracy/tough_vs_raising_2": 0.881, "blimp/accuracy/tough_vs_raising_1": 0.65, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.919, "blimp/accuracy/principle_A_reconstruction": 0.415, "blimp/accuracy/wh_vs_that_with_gap": 0.532, "blimp/accuracy/principle_A_domain_2": 0.884, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.91, "blimp/accuracy/principle_A_domain_3": 0.681, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.948, "blimp/accuracy/animate_subject_trans": 0.914, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.917, "blimp/accuracy/distractor_agreement_relative_clause": 0.643, "blimp/accuracy/transitive": 0.895, "blimp/accuracy/sentential_subject_island": 0.252, "blimp/accuracy/adjunct_island": 0.86, "blimp/accuracy/intransitive": 0.798, "blimp/accuracy/existential_there_subject_raising": 0.863, "blimp/accuracy/irregular_past_participle_adjectives": 0.886, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.558, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.336, "blimp/accuracy/only_npi_scope": 0.649, "blimp/accuracy/superlative_quantifiers_2": 0.845, "blimp/accuracy/passive_1": 0.898, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.898, "blimp/accuracy/inchoative": 0.653, "blimp/accuracy/anaphor_gender_agreement": 0.975, "blimp/accuracy/principle_A_c_command": 0.72, "blimp/accuracy/only_npi_licensor_present": 0.539, "blimp/accuracy/expletive_it_object_raising": 0.805, "blimp/accuracy/left_branch_island_simple_question": 0.632, "blimp/accuracy/wh_questions_subject_gap": 0.923, "blimp/accuracy/existential_there_quantifiers_2": 0.504, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.951, "blimp/accuracy/sentential_negation_npi_scope": 0.754, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.82, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.903, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.92, "blimp/accuracy/principle_A_case_2": 0.937, "blimp/accuracy/distractor_agreement_relational_noun": 0.887, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.984, "blimp/accuracy/superlative_quantifiers_1": 0.625, "blimp/accuracy/wh_island": 0.731, "blimp/accuracy/principle_A_domain_1": 0.97, "blimp/accuracy/complex_NP_island": 0.569, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.972, "blimp/accuracy/irregular_past_participle_verbs": 0.918, "blimp/accuracy/drop_argument": 0.753, "blimp/accuracy/wh_questions_object_gap": 0.848, "blimp/accuracy/animate_subject_passive": 0.805, "blimp/accuracy/existential_there_quantifiers_1": 0.97, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.904, "blimp/accuracy/npi_present_2": 0.665, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.959, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957, "blimp/accuracy/existential_there_object_raising": 0.855, "blimp/accuracy/matrix_question_npi_licensor_present": 0.361, "blimp/accuracy/npi_present_1": 0.645, "blimp/accuracy/wh_vs_that_no_gap": 0.973, "blimp/accuracy/left_branch_island_echo_question": 0.534, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.964, "blimp/accuracy/causative": 0.752, "blimp/accuracy/group_average": 0.7965223880597015, "blimp/accuracy/seq_average": 0.7965223880597015, "cbt/accuracy/NE": 0.7760416666666666, "cbt/accuracy/V": 0.9288, "cbt/accuracy/CN": 0.8656, "cbt/accuracy/P": 0.9072, "cbt/accuracy/group_average": 0.8694104166666666, "cbt/accuracy/seq_average": 0.8694477791116446, "hellaswag/accuracy/val": 0.3253335988846843, "hellaswag/accuracy/group_average": 0.3253335988846843, "hellaswag/accuracy/seq_average": 0.3253335988846843, "piqa/accuracy/val": 0.6196953210010882, "piqa/accuracy/group_average": 0.6196953210010882, "piqa/accuracy/seq_average": 0.6196953210010882, "ai2arc/accuracy/ARC-Easy": 0.3572938689217759, "ai2arc/accuracy/ARC-Challenge": 0.22489270386266094, "ai2arc/accuracy/group_average": 0.29109328639221843, "ai2arc/accuracy/seq_average": 0.31359773371104815, "mmlu/accuracy/MMLU": 0.25956381837683234, "mmlu/accuracy/group_average": 0.25956381837683234, "mmlu/accuracy/seq_average": 0.25956381837683234, "openbookqa/accuracy/test": 0.286, "openbookqa/accuracy/group_average": 0.286, "openbookqa/accuracy/seq_average": 0.286, "race/accuracy/test/high": 0.27930245854774155, "race/accuracy/test/middle": 0.3544568245125348, "race/accuracy/group_average": 0.31687964153013815, "race/accuracy/seq_average": 0.30117551682205107, "siqa/accuracy/dev": 0.36284544524053225, "siqa/accuracy/group_average": 0.36284544524053225, "siqa/accuracy/seq_average": 0.36284544524053225, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.2628992628992629, "commonsenseqa/accuracy/group_average": 0.2628992628992629, "commonsenseqa/accuracy/seq_average": 0.2628992628992629}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-120000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.4189845493861606, "val/accuracy": 0.5035739474826388, "val/perplexity": 11.23444549620276, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.487743496154406, "lambada/accuracy/total": 0.3361801242236025, "lambada/accuracy/openai_last_token": 0.7884316770186336, "lambada/perplexity": 7.7680072478895825, "lambada/lm_loss": 3.002447026063198, "lambada/lm_perplexity": 20.13474694007344, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.41987703585312064, "mean_loss": 2.453364022770283, "blimp/accuracy/passive_2": 0.917, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.836, "blimp/accuracy/tough_vs_raising_2": 0.906, "blimp/accuracy/tough_vs_raising_1": 0.611, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.918, "blimp/accuracy/principle_A_reconstruction": 0.406, "blimp/accuracy/wh_vs_that_with_gap": 0.507, "blimp/accuracy/principle_A_domain_2": 0.867, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.921, "blimp/accuracy/principle_A_domain_3": 0.634, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.948, "blimp/accuracy/animate_subject_trans": 0.899, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.906, "blimp/accuracy/distractor_agreement_relative_clause": 0.662, "blimp/accuracy/transitive": 0.89, "blimp/accuracy/sentential_subject_island": 0.3, "blimp/accuracy/adjunct_island": 0.884, "blimp/accuracy/intransitive": 0.778, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.928, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.557, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.289, "blimp/accuracy/only_npi_scope": 0.594, "blimp/accuracy/superlative_quantifiers_2": 0.718, "blimp/accuracy/passive_1": 0.885, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.932, "blimp/accuracy/inchoative": 0.636, "blimp/accuracy/anaphor_gender_agreement": 0.969, "blimp/accuracy/principle_A_c_command": 0.726, "blimp/accuracy/only_npi_licensor_present": 0.66, "blimp/accuracy/expletive_it_object_raising": 0.809, "blimp/accuracy/left_branch_island_simple_question": 0.64, "blimp/accuracy/wh_questions_subject_gap": 0.939, "blimp/accuracy/existential_there_quantifiers_2": 0.538, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.953, "blimp/accuracy/sentential_negation_npi_scope": 0.776, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.812, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.94, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.921, "blimp/accuracy/principle_A_case_2": 0.942, "blimp/accuracy/distractor_agreement_relational_noun": 0.901, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.978, "blimp/accuracy/superlative_quantifiers_1": 0.78, "blimp/accuracy/wh_island": 0.749, "blimp/accuracy/principle_A_domain_1": 0.984, "blimp/accuracy/complex_NP_island": 0.59, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.978, "blimp/accuracy/irregular_past_participle_verbs": 0.896, "blimp/accuracy/drop_argument": 0.722, "blimp/accuracy/wh_questions_object_gap": 0.847, "blimp/accuracy/animate_subject_passive": 0.825, "blimp/accuracy/existential_there_quantifiers_1": 0.978, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/npi_present_2": 0.671, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.955, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.968, "blimp/accuracy/existential_there_object_raising": 0.877, "blimp/accuracy/matrix_question_npi_licensor_present": 0.342, "blimp/accuracy/npi_present_1": 0.605, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.505, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.978, "blimp/accuracy/causative": 0.74, "blimp/accuracy/group_average": 0.7993731343283588, "blimp/accuracy/seq_average": 0.7993731343283582, "cbt/accuracy/NE": 0.7880608974358975, "cbt/accuracy/V": 0.9328, "cbt/accuracy/CN": 0.8676, "cbt/accuracy/P": 0.9108, "cbt/accuracy/group_average": 0.8748152243589744, "cbt/accuracy/seq_average": 0.8748499399759904, "hellaswag/accuracy/val": 0.3314080860386377, "hellaswag/accuracy/group_average": 0.3314080860386377, "hellaswag/accuracy/seq_average": 0.3314080860386377, "piqa/accuracy/val": 0.6224156692056583, "piqa/accuracy/group_average": 0.6224156692056583, "piqa/accuracy/seq_average": 0.6224156692056583, "ai2arc/accuracy/ARC-Easy": 0.3585623678646934, "ai2arc/accuracy/ARC-Challenge": 0.23004291845493563, "ai2arc/accuracy/group_average": 0.2943026431598145, "ai2arc/accuracy/seq_average": 0.3161473087818697, "mmlu/accuracy/MMLU": 0.2612799427958527, "mmlu/accuracy/group_average": 0.2612799427958527, "mmlu/accuracy/seq_average": 0.2612799427958527, "openbookqa/accuracy/test": 0.294, "openbookqa/accuracy/group_average": 0.294, "openbookqa/accuracy/seq_average": 0.294, "race/accuracy/test/high": 0.28130360205831906, "race/accuracy/test/middle": 0.3565459610027855, "race/accuracy/group_average": 0.31892478153055226, "race/accuracy/seq_average": 0.30320226996351846, "siqa/accuracy/dev": 0.35977482088024565, "siqa/accuracy/group_average": 0.35977482088024565, "siqa/accuracy/seq_average": 0.35977482088024565, "winogrande/accuracy/dev": 0.5122336227308603, "winogrande/accuracy/group_average": 0.5122336227308603, "winogrande/accuracy/seq_average": 0.5122336227308603, "commonsenseqa/accuracy/dev_rand_split": 0.266994266994267, "commonsenseqa/accuracy/group_average": 0.266994266994267, "commonsenseqa/accuracy/seq_average": 0.266994266994267}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-140000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3981781005859375, "val/accuracy": 0.5059291294642857, "val/perplexity": 11.003111545658282, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.473374645162073, "lambada/accuracy/total": 0.3179347826086957, "lambada/accuracy/openai_last_token": 0.7872670807453416, "lambada/perplexity": 8.228297446824557, "lambada/lm_loss": 2.998701187814685, "lambada/lm_perplexity": 20.059466517028714, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4119319560364907, "mean_loss": 2.435776372874005, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.87, "blimp/accuracy/tough_vs_raising_2": 0.894, "blimp/accuracy/tough_vs_raising_1": 0.619, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.908, "blimp/accuracy/principle_A_reconstruction": 0.457, "blimp/accuracy/wh_vs_that_with_gap": 0.544, "blimp/accuracy/principle_A_domain_2": 0.87, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.91, "blimp/accuracy/principle_A_domain_3": 0.672, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.941, "blimp/accuracy/animate_subject_trans": 0.917, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.881, "blimp/accuracy/distractor_agreement_relative_clause": 0.648, "blimp/accuracy/transitive": 0.897, "blimp/accuracy/sentential_subject_island": 0.252, "blimp/accuracy/adjunct_island": 0.899, "blimp/accuracy/intransitive": 0.767, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.886, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.681, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.337, "blimp/accuracy/only_npi_scope": 0.684, "blimp/accuracy/superlative_quantifiers_2": 0.772, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.927, "blimp/accuracy/inchoative": 0.655, "blimp/accuracy/anaphor_gender_agreement": 0.976, "blimp/accuracy/principle_A_c_command": 0.686, "blimp/accuracy/only_npi_licensor_present": 0.678, "blimp/accuracy/expletive_it_object_raising": 0.811, "blimp/accuracy/left_branch_island_simple_question": 0.749, "blimp/accuracy/wh_questions_subject_gap": 0.946, "blimp/accuracy/existential_there_quantifiers_2": 0.444, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.95, "blimp/accuracy/sentential_negation_npi_scope": 0.698, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.811, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.937, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.909, "blimp/accuracy/principle_A_case_2": 0.939, "blimp/accuracy/distractor_agreement_relational_noun": 0.93, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.983, "blimp/accuracy/superlative_quantifiers_1": 0.778, "blimp/accuracy/wh_island": 0.709, "blimp/accuracy/principle_A_domain_1": 0.985, "blimp/accuracy/complex_NP_island": 0.604, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.971, "blimp/accuracy/irregular_past_participle_verbs": 0.919, "blimp/accuracy/drop_argument": 0.752, "blimp/accuracy/wh_questions_object_gap": 0.893, "blimp/accuracy/animate_subject_passive": 0.8, "blimp/accuracy/existential_there_quantifiers_1": 0.974, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.905, "blimp/accuracy/npi_present_2": 0.586, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.958, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.963, "blimp/accuracy/existential_there_object_raising": 0.848, "blimp/accuracy/matrix_question_npi_licensor_present": 0.389, "blimp/accuracy/npi_present_1": 0.483, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.563, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.971, "blimp/accuracy/causative": 0.742, "blimp/accuracy/group_average": 0.8022985074626863, "blimp/accuracy/seq_average": 0.8022985074626866, "cbt/accuracy/NE": 0.8036858974358975, "cbt/accuracy/V": 0.9368, "cbt/accuracy/CN": 0.8712, "cbt/accuracy/P": 0.9152, "cbt/accuracy/group_average": 0.8817214743589744, "cbt/accuracy/seq_average": 0.8817527010804321, "hellaswag/accuracy/val": 0.33499302927703645, "hellaswag/accuracy/group_average": 0.33499302927703645, "hellaswag/accuracy/seq_average": 0.33499302927703645, "piqa/accuracy/val": 0.6207834602829162, "piqa/accuracy/group_average": 0.6207834602829162, "piqa/accuracy/seq_average": 0.6207834602829162, "ai2arc/accuracy/ARC-Easy": 0.36363636363636365, "ai2arc/accuracy/ARC-Challenge": 0.22746781115879827, "ai2arc/accuracy/group_average": 0.295552087397581, "ai2arc/accuracy/seq_average": 0.31869688385269124, "mmlu/accuracy/MMLU": 0.26385412942438324, "mmlu/accuracy/group_average": 0.26385412942438324, "mmlu/accuracy/seq_average": 0.26385412942438324, "openbookqa/accuracy/test": 0.306, "openbookqa/accuracy/group_average": 0.306, "openbookqa/accuracy/seq_average": 0.306, "race/accuracy/test/high": 0.28444825614636937, "race/accuracy/test/middle": 0.3628133704735376, "race/accuracy/group_average": 0.32363081330995347, "race/accuracy/seq_average": 0.30725577624645317, "siqa/accuracy/dev": 0.3587512794268168, "siqa/accuracy/group_average": 0.3587512794268168, "siqa/accuracy/seq_average": 0.3587512794268168, "winogrande/accuracy/dev": 0.5122336227308603, "winogrande/accuracy/group_average": 0.5122336227308603, "winogrande/accuracy/seq_average": 0.5122336227308603, "commonsenseqa/accuracy/dev_rand_split": 0.2719082719082719, "commonsenseqa/accuracy/group_average": 0.2719082719082719, "commonsenseqa/accuracy/seq_average": 0.2719082719082719}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-160000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.381313263423859, "val/accuracy": 0.5083734421502977, "val/perplexity": 10.819101869137201, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5544761515552215, "lambada/accuracy/total": 0.3189052795031056, "lambada/accuracy/openai_last_token": 0.7886257763975155, "lambada/perplexity": 7.849256071290196, "lambada/lm_loss": 2.9702501555745995, "lambada/lm_perplexity": 19.496796218309402, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4136393608267016, "mean_loss": 2.46789470748954, "blimp/accuracy/passive_2": 0.913, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.87, "blimp/accuracy/tough_vs_raising_2": 0.885, "blimp/accuracy/tough_vs_raising_1": 0.606, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.932, "blimp/accuracy/principle_A_reconstruction": 0.428, "blimp/accuracy/wh_vs_that_with_gap": 0.496, "blimp/accuracy/principle_A_domain_2": 0.879, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.918, "blimp/accuracy/principle_A_domain_3": 0.64, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.929, "blimp/accuracy/animate_subject_trans": 0.901, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.904, "blimp/accuracy/distractor_agreement_relative_clause": 0.678, "blimp/accuracy/transitive": 0.892, "blimp/accuracy/sentential_subject_island": 0.268, "blimp/accuracy/adjunct_island": 0.915, "blimp/accuracy/intransitive": 0.779, "blimp/accuracy/existential_there_subject_raising": 0.853, "blimp/accuracy/irregular_past_participle_adjectives": 0.94, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.682, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.319, "blimp/accuracy/only_npi_scope": 0.668, "blimp/accuracy/superlative_quantifiers_2": 0.834, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.93, "blimp/accuracy/inchoative": 0.648, "blimp/accuracy/anaphor_gender_agreement": 0.976, "blimp/accuracy/principle_A_c_command": 0.728, "blimp/accuracy/only_npi_licensor_present": 0.611, "blimp/accuracy/expletive_it_object_raising": 0.799, "blimp/accuracy/left_branch_island_simple_question": 0.778, "blimp/accuracy/wh_questions_subject_gap": 0.93, "blimp/accuracy/existential_there_quantifiers_2": 0.469, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.724, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.84, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.935, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/principle_A_case_2": 0.96, "blimp/accuracy/distractor_agreement_relational_noun": 0.929, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.804, "blimp/accuracy/wh_island": 0.787, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.628, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.976, "blimp/accuracy/irregular_past_participle_verbs": 0.921, "blimp/accuracy/drop_argument": 0.718, "blimp/accuracy/wh_questions_object_gap": 0.856, "blimp/accuracy/animate_subject_passive": 0.799, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.917, "blimp/accuracy/npi_present_2": 0.589, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.961, "blimp/accuracy/anaphor_number_agreement": 0.994, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.97, "blimp/accuracy/existential_there_object_raising": 0.864, "blimp/accuracy/matrix_question_npi_licensor_present": 0.349, "blimp/accuracy/npi_present_1": 0.506, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.434, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.964, "blimp/accuracy/causative": 0.783, "blimp/accuracy/group_average": 0.8039253731343283, "blimp/accuracy/seq_average": 0.8039253731343283, "cbt/accuracy/NE": 0.8028846153846154, "cbt/accuracy/V": 0.9296, "cbt/accuracy/CN": 0.878, "cbt/accuracy/P": 0.9148, "cbt/accuracy/group_average": 0.8813211538461538, "cbt/accuracy/seq_average": 0.8813525410164066, "hellaswag/accuracy/val": 0.33648675562636926, "hellaswag/accuracy/group_average": 0.33648675562636926, "hellaswag/accuracy/seq_average": 0.33648675562636926, "piqa/accuracy/val": 0.6175190424374319, "piqa/accuracy/group_average": 0.6175190424374319, "piqa/accuracy/seq_average": 0.6175190424374319, "ai2arc/accuracy/ARC-Easy": 0.37420718816067655, "ai2arc/accuracy/ARC-Challenge": 0.22918454935622318, "ai2arc/accuracy/group_average": 0.3016958687584499, "ai2arc/accuracy/seq_average": 0.3263456090651558, "mmlu/accuracy/MMLU": 0.26514122273864854, "mmlu/accuracy/group_average": 0.26514122273864854, "mmlu/accuracy/seq_average": 0.26514122273864854, "openbookqa/accuracy/test": 0.282, "openbookqa/accuracy/group_average": 0.282, "openbookqa/accuracy/seq_average": 0.282, "race/accuracy/test/high": 0.2858776443682104, "race/accuracy/test/middle": 0.362116991643454, "race/accuracy/group_average": 0.3239973180058322, "race/accuracy/seq_average": 0.3080664775030401, "siqa/accuracy/dev": 0.3679631525076766, "siqa/accuracy/group_average": 0.3679631525076766, "siqa/accuracy/seq_average": 0.3679631525076766, "winogrande/accuracy/dev": 0.5209155485398579, "winogrande/accuracy/group_average": 0.5209155485398579, "winogrande/accuracy/seq_average": 0.5209155485398579, "commonsenseqa/accuracy/dev_rand_split": 0.266994266994267, "commonsenseqa/accuracy/group_average": 0.266994266994267, "commonsenseqa/accuracy/seq_average": 0.266994266994267}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-180000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3600594656808034, "val/accuracy": 0.5120316762772817, "val/perplexity": 10.591581269298313, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.579767452263684, "lambada/accuracy/total": 0.35267857142857145, "lambada/accuracy/openai_last_token": 0.7954192546583851, "lambada/perplexity": 7.385408425966351, "lambada/lm_loss": 2.9573429639423128, "lambada/lm_perplexity": 19.246764408717876, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4323551238529266, "mean_loss": 2.469913458972244, "blimp/accuracy/passive_2": 0.92, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.843, "blimp/accuracy/tough_vs_raising_2": 0.87, "blimp/accuracy/tough_vs_raising_1": 0.631, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.932, "blimp/accuracy/principle_A_reconstruction": 0.366, "blimp/accuracy/wh_vs_that_with_gap": 0.509, "blimp/accuracy/principle_A_domain_2": 0.891, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.889, "blimp/accuracy/principle_A_domain_3": 0.682, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.94, "blimp/accuracy/animate_subject_trans": 0.909, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.904, "blimp/accuracy/distractor_agreement_relative_clause": 0.68, "blimp/accuracy/transitive": 0.887, "blimp/accuracy/sentential_subject_island": 0.302, "blimp/accuracy/adjunct_island": 0.883, "blimp/accuracy/intransitive": 0.777, "blimp/accuracy/existential_there_subject_raising": 0.869, "blimp/accuracy/irregular_past_participle_adjectives": 0.944, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.679, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.321, "blimp/accuracy/only_npi_scope": 0.663, "blimp/accuracy/superlative_quantifiers_2": 0.79, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.929, "blimp/accuracy/inchoative": 0.641, "blimp/accuracy/anaphor_gender_agreement": 0.977, "blimp/accuracy/principle_A_c_command": 0.745, "blimp/accuracy/only_npi_licensor_present": 0.578, "blimp/accuracy/expletive_it_object_raising": 0.811, "blimp/accuracy/left_branch_island_simple_question": 0.732, "blimp/accuracy/wh_questions_subject_gap": 0.947, "blimp/accuracy/existential_there_quantifiers_2": 0.553, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.941, "blimp/accuracy/sentential_negation_npi_scope": 0.691, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.782, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.92, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.91, "blimp/accuracy/principle_A_case_2": 0.961, "blimp/accuracy/distractor_agreement_relational_noun": 0.895, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.986, "blimp/accuracy/superlative_quantifiers_1": 0.743, "blimp/accuracy/wh_island": 0.725, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.566, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.981, "blimp/accuracy/irregular_past_participle_verbs": 0.906, "blimp/accuracy/drop_argument": 0.726, "blimp/accuracy/wh_questions_object_gap": 0.859, "blimp/accuracy/animate_subject_passive": 0.789, "blimp/accuracy/existential_there_quantifiers_1": 0.984, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.919, "blimp/accuracy/npi_present_2": 0.538, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.93, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.956, "blimp/accuracy/existential_there_object_raising": 0.873, "blimp/accuracy/matrix_question_npi_licensor_present": 0.376, "blimp/accuracy/npi_present_1": 0.528, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.489, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966, "blimp/accuracy/causative": 0.749, "blimp/accuracy/group_average": 0.7987164179104476, "blimp/accuracy/seq_average": 0.7987164179104478, "cbt/accuracy/NE": 0.8141025641025641, "cbt/accuracy/V": 0.93, "cbt/accuracy/CN": 0.8748, "cbt/accuracy/P": 0.92, "cbt/accuracy/group_average": 0.884725641025641, "cbt/accuracy/seq_average": 0.8847539015606243, "hellaswag/accuracy/val": 0.3461461860187214, "hellaswag/accuracy/group_average": 0.3461461860187214, "hellaswag/accuracy/seq_average": 0.3461461860187214, "piqa/accuracy/val": 0.6294885745375408, "piqa/accuracy/group_average": 0.6294885745375408, "piqa/accuracy/seq_average": 0.6294885745375408, "ai2arc/accuracy/ARC-Easy": 0.3682875264270613, "ai2arc/accuracy/ARC-Challenge": 0.2369098712446352, "ai2arc/accuracy/group_average": 0.3025986988358482, "ai2arc/accuracy/seq_average": 0.32492917847025493, "mmlu/accuracy/MMLU": 0.26092241687522344, "mmlu/accuracy/group_average": 0.26092241687522344, "mmlu/accuracy/seq_average": 0.26092241687522344, "openbookqa/accuracy/test": 0.29, "openbookqa/accuracy/group_average": 0.29, "openbookqa/accuracy/seq_average": 0.29, "race/accuracy/test/high": 0.28959405374499714, "race/accuracy/test/middle": 0.3516713091922006, "race/accuracy/group_average": 0.32063268146859886, "race/accuracy/seq_average": 0.30766112687474667, "siqa/accuracy/dev": 0.3694984646878199, "siqa/accuracy/group_average": 0.3694984646878199, "siqa/accuracy/seq_average": 0.3694984646878199, "winogrande/accuracy/dev": 0.5019731649565904, "winogrande/accuracy/group_average": 0.5019731649565904, "winogrande/accuracy/seq_average": 0.5019731649565904, "commonsenseqa/accuracy/dev_rand_split": 0.266994266994267, "commonsenseqa/accuracy/group_average": 0.266994266994267, "commonsenseqa/accuracy/seq_average": 0.266994266994267}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-20000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.721920679485987, "val/accuracy": 0.46118939112103174, "val/perplexity": 15.20950679659343, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.737494616775039, "lambada/accuracy/total": 0.21836180124223603, "lambada/accuracy/openai_last_token": 0.7441770186335404, "lambada/perplexity": 14.687933517614002, "lambada/lm_loss": 3.2756014503533692, "lambada/lm_perplexity": 26.45913455165093, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3397755961816339, "mean_loss": 2.729707648130513, "blimp/accuracy/passive_2": 0.88, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.756, "blimp/accuracy/tough_vs_raising_2": 0.895, "blimp/accuracy/tough_vs_raising_1": 0.571, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.879, "blimp/accuracy/principle_A_reconstruction": 0.315, "blimp/accuracy/wh_vs_that_with_gap": 0.526, "blimp/accuracy/principle_A_domain_2": 0.838, "blimp/accuracy/determiner_noun_agreement_1": 0.98, "blimp/accuracy/ellipsis_n_bar_2": 0.906, "blimp/accuracy/principle_A_domain_3": 0.612, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.919, "blimp/accuracy/animate_subject_trans": 0.903, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.88, "blimp/accuracy/distractor_agreement_relative_clause": 0.499, "blimp/accuracy/transitive": 0.864, "blimp/accuracy/sentential_subject_island": 0.279, "blimp/accuracy/adjunct_island": 0.788, "blimp/accuracy/intransitive": 0.789, "blimp/accuracy/existential_there_subject_raising": 0.872, "blimp/accuracy/irregular_past_participle_adjectives": 0.965, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.199, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.244, "blimp/accuracy/only_npi_scope": 0.683, "blimp/accuracy/superlative_quantifiers_2": 0.66, "blimp/accuracy/passive_1": 0.889, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.892, "blimp/accuracy/inchoative": 0.605, "blimp/accuracy/anaphor_gender_agreement": 0.961, "blimp/accuracy/principle_A_c_command": 0.586, "blimp/accuracy/only_npi_licensor_present": 0.596, "blimp/accuracy/expletive_it_object_raising": 0.784, "blimp/accuracy/left_branch_island_simple_question": 0.27, "blimp/accuracy/wh_questions_subject_gap": 0.927, "blimp/accuracy/existential_there_quantifiers_2": 0.23, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.655, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.816, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.906, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.843, "blimp/accuracy/principle_A_case_2": 0.932, "blimp/accuracy/distractor_agreement_relational_noun": 0.844, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.997, "blimp/accuracy/superlative_quantifiers_1": 0.563, "blimp/accuracy/wh_island": 0.729, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.475, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.976, "blimp/accuracy/irregular_past_participle_verbs": 0.861, "blimp/accuracy/drop_argument": 0.777, "blimp/accuracy/wh_questions_object_gap": 0.802, "blimp/accuracy/animate_subject_passive": 0.804, "blimp/accuracy/existential_there_quantifiers_1": 0.969, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.863, "blimp/accuracy/npi_present_2": 0.63, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.925, "blimp/accuracy/anaphor_number_agreement": 0.984, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.951, "blimp/accuracy/existential_there_object_raising": 0.818, "blimp/accuracy/matrix_question_npi_licensor_present": 0.2, "blimp/accuracy/npi_present_1": 0.527, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.355, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.985, "blimp/accuracy/causative": 0.69, "blimp/accuracy/group_average": 0.7493582089552238, "blimp/accuracy/seq_average": 0.7493582089552239, "cbt/accuracy/NE": 0.7427884615384616, "cbt/accuracy/V": 0.9068, "cbt/accuracy/CN": 0.8032, "cbt/accuracy/P": 0.8776, "cbt/accuracy/group_average": 0.8325971153846154, "cbt/accuracy/seq_average": 0.8326330532212886, "hellaswag/accuracy/val": 0.2891854212308305, "hellaswag/accuracy/group_average": 0.2891854212308305, "hellaswag/accuracy/seq_average": 0.2891854212308305, "piqa/accuracy/val": 0.5750816104461371, "piqa/accuracy/group_average": 0.5750816104461371, "piqa/accuracy/seq_average": 0.5750816104461371, "ai2arc/accuracy/ARC-Easy": 0.32515856236786467, "ai2arc/accuracy/ARC-Challenge": 0.2094420600858369, "ai2arc/accuracy/group_average": 0.2673003112268508, "ai2arc/accuracy/seq_average": 0.2869688385269122, "mmlu/accuracy/MMLU": 0.2609939220593493, "mmlu/accuracy/group_average": 0.2609939220593493, "mmlu/accuracy/seq_average": 0.2609939220593493, "openbookqa/accuracy/test": 0.248, "openbookqa/accuracy/group_average": 0.248, "openbookqa/accuracy/seq_average": 0.248, "race/accuracy/test/high": 0.2672955974842767, "race/accuracy/test/middle": 0.3363509749303621, "race/accuracy/group_average": 0.3018232862073194, "race/accuracy/seq_average": 0.287393595460073, "siqa/accuracy/dev": 0.3623336745138178, "siqa/accuracy/group_average": 0.3623336745138178, "siqa/accuracy/seq_average": 0.3623336745138178, "winogrande/accuracy/dev": 0.505130228887135, "winogrande/accuracy/group_average": 0.505130228887135, "winogrande/accuracy/seq_average": 0.505130228887135, "commonsenseqa/accuracy/dev_rand_split": 0.2538902538902539, "commonsenseqa/accuracy/group_average": 0.2538902538902539, "commonsenseqa/accuracy/seq_average": 0.2538902538902539}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-200000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.3456648569258434, "val/accuracy": 0.5138578869047619, "val/perplexity": 10.440211667626619, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4847658524602094, "lambada/accuracy/total": 0.33773291925465837, "lambada/accuracy/openai_last_token": 0.7927018633540373, "lambada/perplexity": 7.200324526814876, "lambada/lm_loss": 2.952690654838108, "lambada/lm_perplexity": 19.157430477092852, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4257954030797101, "mean_loss": 2.4152153546930264, "blimp/accuracy/passive_2": 0.919, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.819, "blimp/accuracy/tough_vs_raising_2": 0.88, "blimp/accuracy/tough_vs_raising_1": 0.61, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.933, "blimp/accuracy/principle_A_reconstruction": 0.535, "blimp/accuracy/wh_vs_that_with_gap": 0.48, "blimp/accuracy/principle_A_domain_2": 0.866, "blimp/accuracy/determiner_noun_agreement_1": 0.996, "blimp/accuracy/ellipsis_n_bar_2": 0.896, "blimp/accuracy/principle_A_domain_3": 0.65, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.932, "blimp/accuracy/animate_subject_trans": 0.896, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.921, "blimp/accuracy/distractor_agreement_relative_clause": 0.728, "blimp/accuracy/transitive": 0.876, "blimp/accuracy/sentential_subject_island": 0.332, "blimp/accuracy/adjunct_island": 0.861, "blimp/accuracy/intransitive": 0.753, "blimp/accuracy/existential_there_subject_raising": 0.877, "blimp/accuracy/irregular_past_participle_adjectives": 0.965, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.674, "blimp/accuracy/principle_A_case_1": 0.999, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.328, "blimp/accuracy/only_npi_scope": 0.611, "blimp/accuracy/superlative_quantifiers_2": 0.823, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/inchoative": 0.629, "blimp/accuracy/anaphor_gender_agreement": 0.977, "blimp/accuracy/principle_A_c_command": 0.757, "blimp/accuracy/only_npi_licensor_present": 0.828, "blimp/accuracy/expletive_it_object_raising": 0.819, "blimp/accuracy/left_branch_island_simple_question": 0.763, "blimp/accuracy/wh_questions_subject_gap": 0.94, "blimp/accuracy/existential_there_quantifiers_2": 0.416, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.947, "blimp/accuracy/sentential_negation_npi_scope": 0.683, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.84, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.926, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.922, "blimp/accuracy/principle_A_case_2": 0.936, "blimp/accuracy/distractor_agreement_relational_noun": 0.908, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.976, "blimp/accuracy/superlative_quantifiers_1": 0.772, "blimp/accuracy/wh_island": 0.737, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.58, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975, "blimp/accuracy/irregular_past_participle_verbs": 0.918, "blimp/accuracy/drop_argument": 0.714, "blimp/accuracy/wh_questions_object_gap": 0.867, "blimp/accuracy/animate_subject_passive": 0.782, "blimp/accuracy/existential_there_quantifiers_1": 0.973, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.893, "blimp/accuracy/npi_present_2": 0.614, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.965, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.974, "blimp/accuracy/existential_there_object_raising": 0.864, "blimp/accuracy/matrix_question_npi_licensor_present": 0.337, "blimp/accuracy/npi_present_1": 0.505, "blimp/accuracy/wh_vs_that_no_gap": 0.979, "blimp/accuracy/left_branch_island_echo_question": 0.555, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.953, "blimp/accuracy/causative": 0.781, "blimp/accuracy/group_average": 0.8049999999999999, "blimp/accuracy/seq_average": 0.805, "cbt/accuracy/NE": 0.811698717948718, "cbt/accuracy/V": 0.9352, "cbt/accuracy/CN": 0.8808, "cbt/accuracy/P": 0.9212, "cbt/accuracy/group_average": 0.8872246794871794, "cbt/accuracy/seq_average": 0.8872549019607843, "hellaswag/accuracy/val": 0.3483369846644095, "hellaswag/accuracy/group_average": 0.3483369846644095, "hellaswag/accuracy/seq_average": 0.3483369846644095, "piqa/accuracy/val": 0.6284004352557128, "piqa/accuracy/group_average": 0.6284004352557128, "piqa/accuracy/seq_average": 0.6284004352557128, "ai2arc/accuracy/ARC-Easy": 0.3704016913319239, "ai2arc/accuracy/ARC-Challenge": 0.2446351931330472, "ai2arc/accuracy/group_average": 0.3075184422324856, "ai2arc/accuracy/seq_average": 0.32889518413597735, "mmlu/accuracy/MMLU": 0.26106542724347515, "mmlu/accuracy/group_average": 0.26106542724347515, "mmlu/accuracy/seq_average": 0.26106542724347515, "openbookqa/accuracy/test": 0.294, "openbookqa/accuracy/group_average": 0.294, "openbookqa/accuracy/seq_average": 0.294, "race/accuracy/test/high": 0.28244711263579186, "race/accuracy/test/middle": 0.35863509749303624, "race/accuracy/group_average": 0.320541105064414, "race/accuracy/seq_average": 0.3046209971625456, "siqa/accuracy/dev": 0.372057318321392, "siqa/accuracy/group_average": 0.372057318321392, "siqa/accuracy/seq_average": 0.372057318321392, "winogrande/accuracy/dev": 0.5130228887134964, "winogrande/accuracy/group_average": 0.5130228887134964, "winogrande/accuracy/seq_average": 0.5130228887134964, "commonsenseqa/accuracy/dev_rand_split": 0.27436527436527436, "commonsenseqa/accuracy/group_average": 0.27436527436527436, "commonsenseqa/accuracy/seq_average": 0.27436527436527436}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-220000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.332155257936508, "val/accuracy": 0.5160493396577381, "val/perplexity": 10.300117036484377, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4428151764484665, "lambada/accuracy/total": 0.3447204968944099, "lambada/accuracy/openai_last_token": 0.7989130434782609, "lambada/perplexity": 7.493040654335682, "lambada/lm_loss": 2.937803946670913, "lambada/lm_perplexity": 18.874351683873083, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.43038491827607406, "mean_loss": 2.387485217192487, "blimp/accuracy/passive_2": 0.922, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.882, "blimp/accuracy/tough_vs_raising_2": 0.861, "blimp/accuracy/tough_vs_raising_1": 0.646, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.926, "blimp/accuracy/principle_A_reconstruction": 0.447, "blimp/accuracy/wh_vs_that_with_gap": 0.508, "blimp/accuracy/principle_A_domain_2": 0.877, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.896, "blimp/accuracy/principle_A_domain_3": 0.67, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.924, "blimp/accuracy/animate_subject_trans": 0.911, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.909, "blimp/accuracy/distractor_agreement_relative_clause": 0.702, "blimp/accuracy/transitive": 0.874, "blimp/accuracy/sentential_subject_island": 0.288, "blimp/accuracy/adjunct_island": 0.873, "blimp/accuracy/intransitive": 0.803, "blimp/accuracy/existential_there_subject_raising": 0.876, "blimp/accuracy/irregular_past_participle_adjectives": 0.973, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.658, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.328, "blimp/accuracy/only_npi_scope": 0.612, "blimp/accuracy/superlative_quantifiers_2": 0.829, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.919, "blimp/accuracy/inchoative": 0.655, "blimp/accuracy/anaphor_gender_agreement": 0.981, "blimp/accuracy/principle_A_c_command": 0.722, "blimp/accuracy/only_npi_licensor_present": 0.656, "blimp/accuracy/expletive_it_object_raising": 0.784, "blimp/accuracy/left_branch_island_simple_question": 0.677, "blimp/accuracy/wh_questions_subject_gap": 0.932, "blimp/accuracy/existential_there_quantifiers_2": 0.474, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.952, "blimp/accuracy/sentential_negation_npi_scope": 0.688, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.806, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.919, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.906, "blimp/accuracy/principle_A_case_2": 0.949, "blimp/accuracy/distractor_agreement_relational_noun": 0.905, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.979, "blimp/accuracy/superlative_quantifiers_1": 0.799, "blimp/accuracy/wh_island": 0.753, "blimp/accuracy/principle_A_domain_1": 0.988, "blimp/accuracy/complex_NP_island": 0.618, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.973, "blimp/accuracy/irregular_past_participle_verbs": 0.898, "blimp/accuracy/drop_argument": 0.727, "blimp/accuracy/wh_questions_object_gap": 0.851, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.891, "blimp/accuracy/npi_present_2": 0.592, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.93, "blimp/accuracy/anaphor_number_agreement": 0.995, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.968, "blimp/accuracy/existential_there_object_raising": 0.848, "blimp/accuracy/matrix_question_npi_licensor_present": 0.375, "blimp/accuracy/npi_present_1": 0.565, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.492, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.958, "blimp/accuracy/causative": 0.772, "blimp/accuracy/group_average": 0.8016119402985071, "blimp/accuracy/seq_average": 0.8016119402985075, "cbt/accuracy/NE": 0.8120993589743589, "cbt/accuracy/V": 0.938, "cbt/accuracy/CN": 0.8784, "cbt/accuracy/P": 0.9208, "cbt/accuracy/group_average": 0.8873248397435897, "cbt/accuracy/seq_average": 0.8873549419767908, "hellaswag/accuracy/val": 0.3509261103365863, "hellaswag/accuracy/group_average": 0.3509261103365863, "hellaswag/accuracy/seq_average": 0.3509261103365863, "piqa/accuracy/val": 0.6322089227421109, "piqa/accuracy/group_average": 0.6322089227421109, "piqa/accuracy/seq_average": 0.6322089227421109, "ai2arc/accuracy/ARC-Easy": 0.3788583509513742, "ai2arc/accuracy/ARC-Challenge": 0.23605150214592274, "ai2arc/accuracy/group_average": 0.3074549265486485, "ai2arc/accuracy/seq_average": 0.33172804532577904, "mmlu/accuracy/MMLU": 0.2646406864497676, "mmlu/accuracy/group_average": 0.2646406864497676, "mmlu/accuracy/seq_average": 0.2646406864497676, "openbookqa/accuracy/test": 0.278, "openbookqa/accuracy/group_average": 0.278, "openbookqa/accuracy/seq_average": 0.278, "race/accuracy/test/high": 0.2935963407661521, "race/accuracy/test/middle": 0.36559888579387184, "race/accuracy/group_average": 0.32959761328001197, "race/accuracy/seq_average": 0.3145520875557357, "siqa/accuracy/dev": 0.3725690890481064, "siqa/accuracy/group_average": 0.3725690890481064, "siqa/accuracy/seq_average": 0.3725690890481064, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.26863226863226863, "commonsenseqa/accuracy/group_average": 0.26863226863226863, "commonsenseqa/accuracy/seq_average": 0.26863226863226863}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-240000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.317928738064236, "val/accuracy": 0.5186738513764881, "val/perplexity": 10.154619631612592, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.237897979546778, "lambada/accuracy/total": 0.35190217391304346, "lambada/accuracy/openai_last_token": 0.7975543478260869, "lambada/perplexity": 6.92858826441491, "lambada/lm_loss": 2.916927354519416, "lambada/lm_perplexity": 18.484404092593664, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4352880126447658, "mean_loss": 2.277913358805507, "blimp/accuracy/passive_2": 0.911, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.86, "blimp/accuracy/tough_vs_raising_2": 0.896, "blimp/accuracy/tough_vs_raising_1": 0.632, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.924, "blimp/accuracy/principle_A_reconstruction": 0.474, "blimp/accuracy/wh_vs_that_with_gap": 0.503, "blimp/accuracy/principle_A_domain_2": 0.887, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.884, "blimp/accuracy/principle_A_domain_3": 0.664, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.943, "blimp/accuracy/animate_subject_trans": 0.907, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.903, "blimp/accuracy/distractor_agreement_relative_clause": 0.716, "blimp/accuracy/transitive": 0.877, "blimp/accuracy/sentential_subject_island": 0.247, "blimp/accuracy/adjunct_island": 0.901, "blimp/accuracy/intransitive": 0.783, "blimp/accuracy/existential_there_subject_raising": 0.886, "blimp/accuracy/irregular_past_participle_adjectives": 0.928, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.659, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.362, "blimp/accuracy/only_npi_scope": 0.674, "blimp/accuracy/superlative_quantifiers_2": 0.772, "blimp/accuracy/passive_1": 0.886, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.925, "blimp/accuracy/inchoative": 0.639, "blimp/accuracy/anaphor_gender_agreement": 0.976, "blimp/accuracy/principle_A_c_command": 0.748, "blimp/accuracy/only_npi_licensor_present": 0.708, "blimp/accuracy/expletive_it_object_raising": 0.801, "blimp/accuracy/left_branch_island_simple_question": 0.731, "blimp/accuracy/wh_questions_subject_gap": 0.938, "blimp/accuracy/existential_there_quantifiers_2": 0.465, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.947, "blimp/accuracy/sentential_negation_npi_scope": 0.729, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.839, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.927, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.924, "blimp/accuracy/principle_A_case_2": 0.957, "blimp/accuracy/distractor_agreement_relational_noun": 0.909, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.985, "blimp/accuracy/superlative_quantifiers_1": 0.866, "blimp/accuracy/wh_island": 0.782, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.64, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.97, "blimp/accuracy/irregular_past_participle_verbs": 0.93, "blimp/accuracy/drop_argument": 0.723, "blimp/accuracy/wh_questions_object_gap": 0.854, "blimp/accuracy/animate_subject_passive": 0.786, "blimp/accuracy/existential_there_quantifiers_1": 0.98, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.916, "blimp/accuracy/npi_present_2": 0.57, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.951, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.959, "blimp/accuracy/existential_there_object_raising": 0.896, "blimp/accuracy/matrix_question_npi_licensor_present": 0.34, "blimp/accuracy/npi_present_1": 0.519, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.542, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.952, "blimp/accuracy/causative": 0.743, "blimp/accuracy/group_average": 0.8070895522388059, "blimp/accuracy/seq_average": 0.807089552238806, "cbt/accuracy/NE": 0.8181089743589743, "cbt/accuracy/V": 0.938, "cbt/accuracy/CN": 0.8868, "cbt/accuracy/P": 0.9216, "cbt/accuracy/group_average": 0.8911272435897435, "cbt/accuracy/seq_average": 0.891156462585034, "hellaswag/accuracy/val": 0.3547102170882294, "hellaswag/accuracy/group_average": 0.3547102170882294, "hellaswag/accuracy/seq_average": 0.3547102170882294, "piqa/accuracy/val": 0.6278563656147987, "piqa/accuracy/group_average": 0.6278563656147987, "piqa/accuracy/seq_average": 0.6278563656147987, "ai2arc/accuracy/ARC-Easy": 0.3780126849894292, "ai2arc/accuracy/ARC-Challenge": 0.23433476394849787, "ai2arc/accuracy/group_average": 0.3061737244689635, "ai2arc/accuracy/seq_average": 0.3305949008498584, "mmlu/accuracy/MMLU": 0.26313907758312477, "mmlu/accuracy/group_average": 0.26313907758312477, "mmlu/accuracy/seq_average": 0.26313907758312477, "openbookqa/accuracy/test": 0.284, "openbookqa/accuracy/group_average": 0.284, "openbookqa/accuracy/seq_average": 0.284, "race/accuracy/test/high": 0.2918810748999428, "race/accuracy/test/middle": 0.3683844011142061, "race/accuracy/group_average": 0.33013273800707443, "race/accuracy/seq_average": 0.3141467369274422, "siqa/accuracy/dev": 0.36745138178096215, "siqa/accuracy/group_average": 0.36745138178096215, "siqa/accuracy/seq_average": 0.36745138178096215, "winogrande/accuracy/dev": 0.5130228887134964, "winogrande/accuracy/group_average": 0.5130228887134964, "winogrande/accuracy/seq_average": 0.5130228887134964, "commonsenseqa/accuracy/dev_rand_split": 0.2678132678132678, "commonsenseqa/accuracy/group_average": 0.2678132678132678, "commonsenseqa/accuracy/seq_average": 0.2678132678132678}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-260000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.306055220346602, "val/accuracy": 0.5207199823288691, "val/perplexity": 10.034761552149444, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.36907636749078, "lambada/accuracy/total": 0.3563664596273292, "lambada/accuracy/openai_last_token": 0.7944487577639752, "lambada/perplexity": 7.16053332113276, "lambada/lm_loss": 2.892238398954036, "lambada/lm_perplexity": 18.03363091770243, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.43854322097809917, "mean_loss": 2.3375657939186913, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.988, "blimp/accuracy/ellipsis_n_bar_1": 0.82, "blimp/accuracy/tough_vs_raising_2": 0.885, "blimp/accuracy/tough_vs_raising_1": 0.658, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.94, "blimp/accuracy/principle_A_reconstruction": 0.424, "blimp/accuracy/wh_vs_that_with_gap": 0.505, "blimp/accuracy/principle_A_domain_2": 0.884, "blimp/accuracy/determiner_noun_agreement_1": 0.997, "blimp/accuracy/ellipsis_n_bar_2": 0.888, "blimp/accuracy/principle_A_domain_3": 0.67, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.945, "blimp/accuracy/animate_subject_trans": 0.907, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.921, "blimp/accuracy/distractor_agreement_relative_clause": 0.731, "blimp/accuracy/transitive": 0.896, "blimp/accuracy/sentential_subject_island": 0.29, "blimp/accuracy/adjunct_island": 0.881, "blimp/accuracy/intransitive": 0.795, "blimp/accuracy/existential_there_subject_raising": 0.876, "blimp/accuracy/irregular_past_participle_adjectives": 0.95, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.628, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.361, "blimp/accuracy/only_npi_scope": 0.71, "blimp/accuracy/superlative_quantifiers_2": 0.765, "blimp/accuracy/passive_1": 0.901, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.922, "blimp/accuracy/inchoative": 0.662, "blimp/accuracy/anaphor_gender_agreement": 0.981, "blimp/accuracy/principle_A_c_command": 0.772, "blimp/accuracy/only_npi_licensor_present": 0.686, "blimp/accuracy/expletive_it_object_raising": 0.79, "blimp/accuracy/left_branch_island_simple_question": 0.692, "blimp/accuracy/wh_questions_subject_gap": 0.93, "blimp/accuracy/existential_there_quantifiers_2": 0.449, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.941, "blimp/accuracy/sentential_negation_npi_scope": 0.76, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.825, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.924, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/principle_A_case_2": 0.939, "blimp/accuracy/distractor_agreement_relational_noun": 0.916, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.984, "blimp/accuracy/superlative_quantifiers_1": 0.787, "blimp/accuracy/wh_island": 0.784, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.612, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.977, "blimp/accuracy/irregular_past_participle_verbs": 0.877, "blimp/accuracy/drop_argument": 0.734, "blimp/accuracy/wh_questions_object_gap": 0.852, "blimp/accuracy/animate_subject_passive": 0.805, "blimp/accuracy/existential_there_quantifiers_1": 0.972, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.928, "blimp/accuracy/npi_present_2": 0.637, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.957, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.967, "blimp/accuracy/existential_there_object_raising": 0.85, "blimp/accuracy/matrix_question_npi_licensor_present": 0.401, "blimp/accuracy/npi_present_1": 0.604, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.471, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966, "blimp/accuracy/causative": 0.767, "blimp/accuracy/group_average": 0.8076268656716419, "blimp/accuracy/seq_average": 0.8076268656716418, "cbt/accuracy/NE": 0.8205128205128205, "cbt/accuracy/V": 0.9392, "cbt/accuracy/CN": 0.8872, "cbt/accuracy/P": 0.9236, "cbt/accuracy/group_average": 0.8926282051282052, "cbt/accuracy/seq_average": 0.89265706282513, "hellaswag/accuracy/val": 0.3552081258713404, "hellaswag/accuracy/group_average": 0.3552081258713404, "hellaswag/accuracy/seq_average": 0.3552081258713404, "piqa/accuracy/val": 0.6316648531011969, "piqa/accuracy/group_average": 0.6316648531011969, "piqa/accuracy/seq_average": 0.6316648531011969, "ai2arc/accuracy/ARC-Easy": 0.37420718816067655, "ai2arc/accuracy/ARC-Challenge": 0.22489270386266094, "ai2arc/accuracy/group_average": 0.29954994601166873, "ai2arc/accuracy/seq_average": 0.32492917847025493, "mmlu/accuracy/MMLU": 0.26299606721487306, "mmlu/accuracy/group_average": 0.26299606721487306, "mmlu/accuracy/seq_average": 0.26299606721487306, "openbookqa/accuracy/test": 0.28, "openbookqa/accuracy/group_average": 0.28, "openbookqa/accuracy/seq_average": 0.28, "race/accuracy/test/high": 0.2890222984562607, "race/accuracy/test/middle": 0.35863509749303624, "race/accuracy/group_average": 0.32382869797464847, "race/accuracy/seq_average": 0.30928252938792056, "siqa/accuracy/dev": 0.3735926305015353, "siqa/accuracy/group_average": 0.3735926305015353, "siqa/accuracy/seq_average": 0.3735926305015353, "winogrande/accuracy/dev": 0.5130228887134964, "winogrande/accuracy/group_average": 0.5130228887134964, "winogrande/accuracy/seq_average": 0.5130228887134964, "commonsenseqa/accuracy/dev_rand_split": 0.2710892710892711, "commonsenseqa/accuracy/group_average": 0.2710892710892711, "commonsenseqa/accuracy/seq_average": 0.2710892710892711}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-280000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.2941398015097967, "val/accuracy": 0.5222313290550595, "val/perplexity": 9.915902698109626, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4012716542119565, "lambada/accuracy/total": 0.3676242236024845, "lambada/accuracy/openai_last_token": 0.8037655279503105, "lambada/perplexity": 6.710949504629041, "lambada/lm_loss": 2.883886085868798, "lambada/lm_perplexity": 17.883635661868738, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.444927776328772, "mean_loss": 2.347705727860877, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.986, "blimp/accuracy/ellipsis_n_bar_1": 0.858, "blimp/accuracy/tough_vs_raising_2": 0.895, "blimp/accuracy/tough_vs_raising_1": 0.611, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.915, "blimp/accuracy/principle_A_reconstruction": 0.417, "blimp/accuracy/wh_vs_that_with_gap": 0.506, "blimp/accuracy/principle_A_domain_2": 0.889, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.907, "blimp/accuracy/principle_A_domain_3": 0.644, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.944, "blimp/accuracy/animate_subject_trans": 0.901, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.916, "blimp/accuracy/distractor_agreement_relative_clause": 0.73, "blimp/accuracy/transitive": 0.887, "blimp/accuracy/sentential_subject_island": 0.279, "blimp/accuracy/adjunct_island": 0.888, "blimp/accuracy/intransitive": 0.783, "blimp/accuracy/existential_there_subject_raising": 0.879, "blimp/accuracy/irregular_past_participle_adjectives": 0.939, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.702, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.343, "blimp/accuracy/only_npi_scope": 0.672, "blimp/accuracy/superlative_quantifiers_2": 0.803, "blimp/accuracy/passive_1": 0.887, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.922, "blimp/accuracy/inchoative": 0.65, "blimp/accuracy/anaphor_gender_agreement": 0.983, "blimp/accuracy/principle_A_c_command": 0.767, "blimp/accuracy/only_npi_licensor_present": 0.732, "blimp/accuracy/expletive_it_object_raising": 0.779, "blimp/accuracy/left_branch_island_simple_question": 0.786, "blimp/accuracy/wh_questions_subject_gap": 0.935, "blimp/accuracy/existential_there_quantifiers_2": 0.459, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.946, "blimp/accuracy/sentential_negation_npi_scope": 0.737, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.841, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.922, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.901, "blimp/accuracy/principle_A_case_2": 0.942, "blimp/accuracy/distractor_agreement_relational_noun": 0.903, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.979, "blimp/accuracy/superlative_quantifiers_1": 0.88, "blimp/accuracy/wh_island": 0.751, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.605, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.971, "blimp/accuracy/irregular_past_participle_verbs": 0.891, "blimp/accuracy/drop_argument": 0.719, "blimp/accuracy/wh_questions_object_gap": 0.848, "blimp/accuracy/animate_subject_passive": 0.794, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.9, "blimp/accuracy/npi_present_2": 0.592, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.947, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.962, "blimp/accuracy/existential_there_object_raising": 0.837, "blimp/accuracy/matrix_question_npi_licensor_present": 0.398, "blimp/accuracy/npi_present_1": 0.553, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.53, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.961, "blimp/accuracy/causative": 0.771, "blimp/accuracy/group_average": 0.8076119402985075, "blimp/accuracy/seq_average": 0.8076119402985075, "cbt/accuracy/NE": 0.8261217948717948, "cbt/accuracy/V": 0.9396, "cbt/accuracy/CN": 0.8916, "cbt/accuracy/P": 0.9312, "cbt/accuracy/group_average": 0.8971304487179487, "cbt/accuracy/seq_average": 0.8971588635454182, "hellaswag/accuracy/val": 0.3588926508663613, "hellaswag/accuracy/group_average": 0.3588926508663613, "hellaswag/accuracy/seq_average": 0.3588926508663613, "piqa/accuracy/val": 0.6360174102285092, "piqa/accuracy/group_average": 0.6360174102285092, "piqa/accuracy/seq_average": 0.6360174102285092, "ai2arc/accuracy/ARC-Easy": 0.3767441860465116, "ai2arc/accuracy/ARC-Challenge": 0.2351931330472103, "ai2arc/accuracy/group_average": 0.3059686595468609, "ai2arc/accuracy/seq_average": 0.330028328611898, "mmlu/accuracy/MMLU": 0.26077940650697173, "mmlu/accuracy/group_average": 0.26077940650697173, "mmlu/accuracy/seq_average": 0.26077940650697173, "openbookqa/accuracy/test": 0.282, "openbookqa/accuracy/group_average": 0.282, "openbookqa/accuracy/seq_average": 0.282, "race/accuracy/test/high": 0.2950257289879931, "race/accuracy/test/middle": 0.3635097493036212, "race/accuracy/group_average": 0.32926773914580715, "race/accuracy/seq_average": 0.31495743818402916, "siqa/accuracy/dev": 0.37461617195496416, "siqa/accuracy/group_average": 0.37461617195496416, "siqa/accuracy/seq_average": 0.37461617195496416, "winogrande/accuracy/dev": 0.5043409629044988, "winogrande/accuracy/group_average": 0.5043409629044988, "winogrande/accuracy/seq_average": 0.5043409629044988, "commonsenseqa/accuracy/dev_rand_split": 0.276003276003276, "commonsenseqa/accuracy/group_average": 0.276003276003276, "commonsenseqa/accuracy/seq_average": 0.276003276003276}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-300000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.2844076005239335, "val/accuracy": 0.5235634455605159, "val/perplexity": 9.81986721640497, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.288421062208851, "lambada/accuracy/total": 0.3610248447204969, "lambada/accuracy/openai_last_token": 0.8024068322981367, "lambada/perplexity": 6.70941070652531, "lambada/lm_loss": 2.8837493687966593, "lambada/lm_perplexity": 17.881190830690745, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4422941451405064, "mean_loss": 2.2864143313663923, "blimp/accuracy/passive_2": 0.911, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.854, "blimp/accuracy/tough_vs_raising_2": 0.875, "blimp/accuracy/tough_vs_raising_1": 0.621, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.919, "blimp/accuracy/principle_A_reconstruction": 0.422, "blimp/accuracy/wh_vs_that_with_gap": 0.505, "blimp/accuracy/principle_A_domain_2": 0.872, "blimp/accuracy/determiner_noun_agreement_1": 0.994, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.657, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.938, "blimp/accuracy/animate_subject_trans": 0.906, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.906, "blimp/accuracy/distractor_agreement_relative_clause": 0.698, "blimp/accuracy/transitive": 0.886, "blimp/accuracy/sentential_subject_island": 0.281, "blimp/accuracy/adjunct_island": 0.872, "blimp/accuracy/intransitive": 0.773, "blimp/accuracy/existential_there_subject_raising": 0.877, "blimp/accuracy/irregular_past_participle_adjectives": 0.985, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.71, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.353, "blimp/accuracy/only_npi_scope": 0.673, "blimp/accuracy/superlative_quantifiers_2": 0.831, "blimp/accuracy/passive_1": 0.879, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.925, "blimp/accuracy/inchoative": 0.656, "blimp/accuracy/anaphor_gender_agreement": 0.974, "blimp/accuracy/principle_A_c_command": 0.763, "blimp/accuracy/only_npi_licensor_present": 0.651, "blimp/accuracy/expletive_it_object_raising": 0.805, "blimp/accuracy/left_branch_island_simple_question": 0.781, "blimp/accuracy/wh_questions_subject_gap": 0.945, "blimp/accuracy/existential_there_quantifiers_2": 0.385, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.932, "blimp/accuracy/sentential_negation_npi_scope": 0.722, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.811, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.928, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.925, "blimp/accuracy/principle_A_case_2": 0.96, "blimp/accuracy/distractor_agreement_relational_noun": 0.903, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.977, "blimp/accuracy/superlative_quantifiers_1": 0.862, "blimp/accuracy/wh_island": 0.704, "blimp/accuracy/principle_A_domain_1": 0.99, "blimp/accuracy/complex_NP_island": 0.586, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.976, "blimp/accuracy/irregular_past_participle_verbs": 0.914, "blimp/accuracy/drop_argument": 0.719, "blimp/accuracy/wh_questions_object_gap": 0.849, "blimp/accuracy/animate_subject_passive": 0.794, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.922, "blimp/accuracy/npi_present_2": 0.587, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.951, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.969, "blimp/accuracy/existential_there_object_raising": 0.866, "blimp/accuracy/matrix_question_npi_licensor_present": 0.356, "blimp/accuracy/npi_present_1": 0.544, "blimp/accuracy/wh_vs_that_no_gap": 0.976, "blimp/accuracy/left_branch_island_echo_question": 0.514, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.739, "blimp/accuracy/group_average": 0.8039701492537312, "blimp/accuracy/seq_average": 0.8039701492537313, "cbt/accuracy/NE": 0.8297275641025641, "cbt/accuracy/V": 0.9428, "cbt/accuracy/CN": 0.8864, "cbt/accuracy/P": 0.9236, "cbt/accuracy/group_average": 0.895631891025641, "cbt/accuracy/seq_average": 0.8956582633053222, "hellaswag/accuracy/val": 0.3623780123481378, "hellaswag/accuracy/group_average": 0.3623780123481378, "hellaswag/accuracy/seq_average": 0.3623780123481378, "piqa/accuracy/val": 0.6289445048966268, "piqa/accuracy/group_average": 0.6289445048966268, "piqa/accuracy/seq_average": 0.6289445048966268, "ai2arc/accuracy/ARC-Easy": 0.38308668076109936, "ai2arc/accuracy/ARC-Challenge": 0.23862660944206007, "ai2arc/accuracy/group_average": 0.31085664510157973, "ai2arc/accuracy/seq_average": 0.33541076487252125, "mmlu/accuracy/MMLU": 0.2661422953164104, "mmlu/accuracy/group_average": 0.2661422953164104, "mmlu/accuracy/seq_average": 0.2661422953164104, "openbookqa/accuracy/test": 0.286, "openbookqa/accuracy/group_average": 0.286, "openbookqa/accuracy/seq_average": 0.286, "race/accuracy/test/high": 0.2950257289879931, "race/accuracy/test/middle": 0.3607242339832869, "race/accuracy/group_average": 0.32787498148564, "race/accuracy/seq_average": 0.3141467369274422, "siqa/accuracy/dev": 0.37154554759467756, "siqa/accuracy/group_average": 0.37154554759467756, "siqa/accuracy/seq_average": 0.37154554759467756, "winogrande/accuracy/dev": 0.5122336227308603, "winogrande/accuracy/group_average": 0.5122336227308603, "winogrande/accuracy/seq_average": 0.5122336227308603, "commonsenseqa/accuracy/dev_rand_split": 0.2809172809172809, "commonsenseqa/accuracy/group_average": 0.2809172809172809, "commonsenseqa/accuracy/seq_average": 0.2809172809172809}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-320000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.2764844137524802, "val/accuracy": 0.5249352833581349, "val/perplexity": 9.742369992288124, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.2689643054275037, "lambada/accuracy/total": 0.3695652173913043, "lambada/accuracy/openai_last_token": 0.8022127329192547, "lambada/perplexity": 6.62073928563423, "lambada/lm_loss": 2.8810179083649894, "lambada/lm_perplexity": 17.83241570944786, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4472502503747196, "mean_loss": 2.2727243595899917, "blimp/accuracy/passive_2": 0.921, "blimp/accuracy/determiner_noun_agreement_2": 0.99, "blimp/accuracy/ellipsis_n_bar_1": 0.843, "blimp/accuracy/tough_vs_raising_2": 0.889, "blimp/accuracy/tough_vs_raising_1": 0.636, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.922, "blimp/accuracy/principle_A_reconstruction": 0.441, "blimp/accuracy/wh_vs_that_with_gap": 0.527, "blimp/accuracy/principle_A_domain_2": 0.888, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.9, "blimp/accuracy/principle_A_domain_3": 0.691, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.933, "blimp/accuracy/animate_subject_trans": 0.908, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.907, "blimp/accuracy/distractor_agreement_relative_clause": 0.726, "blimp/accuracy/transitive": 0.889, "blimp/accuracy/sentential_subject_island": 0.306, "blimp/accuracy/adjunct_island": 0.886, "blimp/accuracy/intransitive": 0.779, "blimp/accuracy/existential_there_subject_raising": 0.877, "blimp/accuracy/irregular_past_participle_adjectives": 0.9, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.712, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.363, "blimp/accuracy/only_npi_scope": 0.641, "blimp/accuracy/superlative_quantifiers_2": 0.807, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.929, "blimp/accuracy/inchoative": 0.653, "blimp/accuracy/anaphor_gender_agreement": 0.981, "blimp/accuracy/principle_A_c_command": 0.767, "blimp/accuracy/only_npi_licensor_present": 0.651, "blimp/accuracy/expletive_it_object_raising": 0.789, "blimp/accuracy/left_branch_island_simple_question": 0.802, "blimp/accuracy/wh_questions_subject_gap": 0.945, "blimp/accuracy/existential_there_quantifiers_2": 0.448, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.951, "blimp/accuracy/sentential_negation_npi_scope": 0.732, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.819, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.916, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.923, "blimp/accuracy/principle_A_case_2": 0.952, "blimp/accuracy/distractor_agreement_relational_noun": 0.906, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.982, "blimp/accuracy/superlative_quantifiers_1": 0.823, "blimp/accuracy/wh_island": 0.728, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.573, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.972, "blimp/accuracy/irregular_past_participle_verbs": 0.912, "blimp/accuracy/drop_argument": 0.725, "blimp/accuracy/wh_questions_object_gap": 0.869, "blimp/accuracy/animate_subject_passive": 0.787, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.921, "blimp/accuracy/npi_present_2": 0.615, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.958, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.969, "blimp/accuracy/existential_there_object_raising": 0.841, "blimp/accuracy/matrix_question_npi_licensor_present": 0.354, "blimp/accuracy/npi_present_1": 0.583, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.54, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.953, "blimp/accuracy/causative": 0.767, "blimp/accuracy/group_average": 0.8079552238805969, "blimp/accuracy/seq_average": 0.807955223880597, "cbt/accuracy/NE": 0.828926282051282, "cbt/accuracy/V": 0.9396, "cbt/accuracy/CN": 0.8912, "cbt/accuracy/P": 0.9292, "cbt/accuracy/group_average": 0.8972315705128204, "cbt/accuracy/seq_average": 0.8972589035614246, "hellaswag/accuracy/val": 0.36436964748058154, "hellaswag/accuracy/group_average": 0.36436964748058154, "hellaswag/accuracy/seq_average": 0.36436964748058154, "piqa/accuracy/val": 0.6430903155603918, "piqa/accuracy/group_average": 0.6430903155603918, "piqa/accuracy/seq_average": 0.6430903155603918, "ai2arc/accuracy/ARC-Easy": 0.38816067653276953, "ai2arc/accuracy/ARC-Challenge": 0.23948497854077253, "ai2arc/accuracy/group_average": 0.31382282753677104, "ai2arc/accuracy/seq_average": 0.33909348441926346, "mmlu/accuracy/MMLU": 0.2682874508401859, "mmlu/accuracy/group_average": 0.2682874508401859, "mmlu/accuracy/seq_average": 0.2682874508401859, "openbookqa/accuracy/test": 0.29, "openbookqa/accuracy/group_average": 0.29, "openbookqa/accuracy/seq_average": 0.29, "race/accuracy/test/high": 0.2918810748999428, "race/accuracy/test/middle": 0.36420612813370473, "race/accuracy/group_average": 0.32804360151682377, "race/accuracy/seq_average": 0.31293068504256183, "siqa/accuracy/dev": 0.37563971340839303, "siqa/accuracy/group_average": 0.37563971340839303, "siqa/accuracy/seq_average": 0.37563971340839303, "winogrande/accuracy/dev": 0.5122336227308603, "winogrande/accuracy/group_average": 0.5122336227308603, "winogrande/accuracy/seq_average": 0.5122336227308603, "commonsenseqa/accuracy/dev_rand_split": 0.2702702702702703, "commonsenseqa/accuracy/group_average": 0.2702702702702703, "commonsenseqa/accuracy/seq_average": 0.2702702702702703}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-340000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.2682679191468256, "val/accuracy": 0.5261976453993056, "val/perplexity": 9.662649820478116, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.390303522903727, "lambada/accuracy/total": 0.3934394409937888, "lambada/accuracy/openai_last_token": 0.8105590062111802, "lambada/perplexity": 6.499801322866643, "lambada/lm_loss": 2.872149894293084, "lambada/lm_perplexity": 17.674976713387498, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4598185431965472, "mean_loss": 2.3292857210252764, "blimp/accuracy/passive_2": 0.919, "blimp/accuracy/determiner_noun_agreement_2": 0.987, "blimp/accuracy/ellipsis_n_bar_1": 0.839, "blimp/accuracy/tough_vs_raising_2": 0.888, "blimp/accuracy/tough_vs_raising_1": 0.626, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.912, "blimp/accuracy/principle_A_reconstruction": 0.439, "blimp/accuracy/wh_vs_that_with_gap": 0.5, "blimp/accuracy/principle_A_domain_2": 0.883, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.905, "blimp/accuracy/principle_A_domain_3": 0.665, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.939, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.919, "blimp/accuracy/distractor_agreement_relative_clause": 0.735, "blimp/accuracy/transitive": 0.884, "blimp/accuracy/sentential_subject_island": 0.294, "blimp/accuracy/adjunct_island": 0.884, "blimp/accuracy/intransitive": 0.772, "blimp/accuracy/existential_there_subject_raising": 0.871, "blimp/accuracy/irregular_past_participle_adjectives": 0.95, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.756, "blimp/accuracy/principle_A_case_1": 0.999, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.364, "blimp/accuracy/only_npi_scope": 0.632, "blimp/accuracy/superlative_quantifiers_2": 0.854, "blimp/accuracy/passive_1": 0.89, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.932, "blimp/accuracy/inchoative": 0.643, "blimp/accuracy/anaphor_gender_agreement": 0.979, "blimp/accuracy/principle_A_c_command": 0.762, "blimp/accuracy/only_npi_licensor_present": 0.698, "blimp/accuracy/expletive_it_object_raising": 0.788, "blimp/accuracy/left_branch_island_simple_question": 0.831, "blimp/accuracy/wh_questions_subject_gap": 0.943, "blimp/accuracy/existential_there_quantifiers_2": 0.483, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.714, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.807, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.915, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.918, "blimp/accuracy/principle_A_case_2": 0.942, "blimp/accuracy/distractor_agreement_relational_noun": 0.91, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.984, "blimp/accuracy/superlative_quantifiers_1": 0.867, "blimp/accuracy/wh_island": 0.75, "blimp/accuracy/principle_A_domain_1": 0.989, "blimp/accuracy/complex_NP_island": 0.596, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.981, "blimp/accuracy/irregular_past_participle_verbs": 0.909, "blimp/accuracy/drop_argument": 0.709, "blimp/accuracy/wh_questions_object_gap": 0.87, "blimp/accuracy/animate_subject_passive": 0.798, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.913, "blimp/accuracy/npi_present_2": 0.596, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.958, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.966, "blimp/accuracy/existential_there_object_raising": 0.839, "blimp/accuracy/matrix_question_npi_licensor_present": 0.393, "blimp/accuracy/npi_present_1": 0.575, "blimp/accuracy/wh_vs_that_no_gap": 0.979, "blimp/accuracy/left_branch_island_echo_question": 0.549, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.955, "blimp/accuracy/causative": 0.774, "blimp/accuracy/group_average": 0.8108208955223879, "blimp/accuracy/seq_average": 0.810820895522388, "cbt/accuracy/NE": 0.8329326923076923, "cbt/accuracy/V": 0.9408, "cbt/accuracy/CN": 0.898, "cbt/accuracy/P": 0.9272, "cbt/accuracy/group_average": 0.8997331730769231, "cbt/accuracy/seq_average": 0.8997599039615847, "hellaswag/accuracy/val": 0.3664608643696475, "hellaswag/accuracy/group_average": 0.3664608643696475, "hellaswag/accuracy/seq_average": 0.3664608643696475, "piqa/accuracy/val": 0.6420021762785637, "piqa/accuracy/group_average": 0.6420021762785637, "piqa/accuracy/seq_average": 0.6420021762785637, "ai2arc/accuracy/ARC-Easy": 0.3970401691331924, "ai2arc/accuracy/ARC-Challenge": 0.24034334763948498, "ai2arc/accuracy/group_average": 0.3186917583863387, "ai2arc/accuracy/seq_average": 0.3453257790368272, "mmlu/accuracy/MMLU": 0.2652127279227744, "mmlu/accuracy/group_average": 0.2652127279227744, "mmlu/accuracy/seq_average": 0.2652127279227744, "openbookqa/accuracy/test": 0.28, "openbookqa/accuracy/group_average": 0.28, "openbookqa/accuracy/seq_average": 0.28, "race/accuracy/test/high": 0.2935963407661521, "race/accuracy/test/middle": 0.36768802228412256, "race/accuracy/group_average": 0.33064218152513736, "race/accuracy/seq_average": 0.3151601134981759, "siqa/accuracy/dev": 0.3766632548618219, "siqa/accuracy/group_average": 0.3766632548618219, "siqa/accuracy/seq_average": 0.3766632548618219, "winogrande/accuracy/dev": 0.5114443567482242, "winogrande/accuracy/group_average": 0.5114443567482242, "winogrande/accuracy/seq_average": 0.5114443567482242, "commonsenseqa/accuracy/dev_rand_split": 0.27436527436527436, "commonsenseqa/accuracy/group_average": 0.27436527436527436, "commonsenseqa/accuracy/seq_average": 0.27436527436527436}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-360000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.262420169890873, "val/accuracy": 0.5272575257316469, "val/perplexity": 9.60630995842363, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3124018130095108, "lambada/accuracy/total": 0.3771350931677019, "lambada/accuracy/openai_last_token": 0.8039596273291926, "lambada/perplexity": 6.609073925566097, "lambada/lm_loss": 2.8582293724843533, "lambada/lm_perplexity": 17.430636432379686, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.45219630944967437, "mean_loss": 2.2874109914501917, "blimp/accuracy/passive_2": 0.919, "blimp/accuracy/determiner_noun_agreement_2": 0.986, "blimp/accuracy/ellipsis_n_bar_1": 0.83, "blimp/accuracy/tough_vs_raising_2": 0.892, "blimp/accuracy/tough_vs_raising_1": 0.619, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.92, "blimp/accuracy/principle_A_reconstruction": 0.446, "blimp/accuracy/wh_vs_that_with_gap": 0.523, "blimp/accuracy/principle_A_domain_2": 0.885, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.896, "blimp/accuracy/principle_A_domain_3": 0.657, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.931, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.907, "blimp/accuracy/distractor_agreement_relative_clause": 0.7, "blimp/accuracy/transitive": 0.884, "blimp/accuracy/sentential_subject_island": 0.294, "blimp/accuracy/adjunct_island": 0.895, "blimp/accuracy/intransitive": 0.764, "blimp/accuracy/existential_there_subject_raising": 0.883, "blimp/accuracy/irregular_past_participle_adjectives": 0.939, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.723, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.383, "blimp/accuracy/only_npi_scope": 0.645, "blimp/accuracy/superlative_quantifiers_2": 0.851, "blimp/accuracy/passive_1": 0.883, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.92, "blimp/accuracy/inchoative": 0.644, "blimp/accuracy/anaphor_gender_agreement": 0.982, "blimp/accuracy/principle_A_c_command": 0.786, "blimp/accuracy/only_npi_licensor_present": 0.695, "blimp/accuracy/expletive_it_object_raising": 0.771, "blimp/accuracy/left_branch_island_simple_question": 0.81, "blimp/accuracy/wh_questions_subject_gap": 0.935, "blimp/accuracy/existential_there_quantifiers_2": 0.448, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.746, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.817, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.913, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.921, "blimp/accuracy/principle_A_case_2": 0.943, "blimp/accuracy/distractor_agreement_relational_noun": 0.891, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.98, "blimp/accuracy/superlative_quantifiers_1": 0.897, "blimp/accuracy/wh_island": 0.755, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.6, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.974, "blimp/accuracy/irregular_past_participle_verbs": 0.897, "blimp/accuracy/drop_argument": 0.713, "blimp/accuracy/wh_questions_object_gap": 0.876, "blimp/accuracy/animate_subject_passive": 0.79, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.922, "blimp/accuracy/npi_present_2": 0.574, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.954, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.965, "blimp/accuracy/existential_there_object_raising": 0.83, "blimp/accuracy/matrix_question_npi_licensor_present": 0.348, "blimp/accuracy/npi_present_1": 0.578, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.507, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.958, "blimp/accuracy/causative": 0.773, "blimp/accuracy/group_average": 0.8080149253731339, "blimp/accuracy/seq_average": 0.8080149253731344, "cbt/accuracy/NE": 0.8253205128205128, "cbt/accuracy/V": 0.9376, "cbt/accuracy/CN": 0.8964, "cbt/accuracy/P": 0.9276, "cbt/accuracy/group_average": 0.8967301282051282, "cbt/accuracy/seq_average": 0.8967587034813925, "hellaswag/accuracy/val": 0.36755626369249156, "hellaswag/accuracy/group_average": 0.36755626369249156, "hellaswag/accuracy/seq_average": 0.36755626369249156, "piqa/accuracy/val": 0.6409140369967355, "piqa/accuracy/group_average": 0.6409140369967355, "piqa/accuracy/seq_average": 0.6409140369967355, "ai2arc/accuracy/ARC-Easy": 0.3885835095137421, "ai2arc/accuracy/ARC-Challenge": 0.24549356223175967, "ai2arc/accuracy/group_average": 0.3170385358727509, "ai2arc/accuracy/seq_average": 0.3413597733711048, "mmlu/accuracy/MMLU": 0.26721487307829817, "mmlu/accuracy/group_average": 0.26721487307829817, "mmlu/accuracy/seq_average": 0.26721487307829817, "openbookqa/accuracy/test": 0.288, "openbookqa/accuracy/group_average": 0.288, "openbookqa/accuracy/seq_average": 0.288, "race/accuracy/test/high": 0.29302458547741567, "race/accuracy/test/middle": 0.36768802228412256, "race/accuracy/group_average": 0.3303563038807691, "race/accuracy/seq_average": 0.31475476286988247, "siqa/accuracy/dev": 0.372057318321392, "siqa/accuracy/group_average": 0.372057318321392, "siqa/accuracy/seq_average": 0.372057318321392, "winogrande/accuracy/dev": 0.5090765588003157, "winogrande/accuracy/group_average": 0.5090765588003157, "winogrande/accuracy/seq_average": 0.5090765588003157, "commonsenseqa/accuracy/dev_rand_split": 0.2702702702702703, "commonsenseqa/accuracy/group_average": 0.2702702702702703, "commonsenseqa/accuracy/seq_average": 0.2702702702702703}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-380000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.257756551106771, "val/accuracy": 0.5282428075396826, "val/perplexity": 9.5616140941006, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.307918761827931, "lambada/accuracy/total": 0.38179347826086957, "lambada/accuracy/openai_last_token": 0.8043478260869565, "lambada/perplexity": 6.541918454600722, "lambada/lm_loss": 2.8577094809360877, "lambada/lm_perplexity": 17.42157674704858, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4550181429002761, "mean_loss": 2.282837656467351, "blimp/accuracy/passive_2": 0.912, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.848, "blimp/accuracy/tough_vs_raising_2": 0.892, "blimp/accuracy/tough_vs_raising_1": 0.62, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.936, "blimp/accuracy/principle_A_reconstruction": 0.424, "blimp/accuracy/wh_vs_that_with_gap": 0.501, "blimp/accuracy/principle_A_domain_2": 0.884, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.905, "blimp/accuracy/principle_A_domain_3": 0.685, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.943, "blimp/accuracy/animate_subject_trans": 0.904, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.909, "blimp/accuracy/distractor_agreement_relative_clause": 0.731, "blimp/accuracy/transitive": 0.893, "blimp/accuracy/sentential_subject_island": 0.299, "blimp/accuracy/adjunct_island": 0.905, "blimp/accuracy/intransitive": 0.783, "blimp/accuracy/existential_there_subject_raising": 0.882, "blimp/accuracy/irregular_past_participle_adjectives": 0.911, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.723, "blimp/accuracy/principle_A_case_1": 0.999, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.355, "blimp/accuracy/only_npi_scope": 0.633, "blimp/accuracy/superlative_quantifiers_2": 0.815, "blimp/accuracy/passive_1": 0.889, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.918, "blimp/accuracy/inchoative": 0.658, "blimp/accuracy/anaphor_gender_agreement": 0.981, "blimp/accuracy/principle_A_c_command": 0.783, "blimp/accuracy/only_npi_licensor_present": 0.683, "blimp/accuracy/expletive_it_object_raising": 0.789, "blimp/accuracy/left_branch_island_simple_question": 0.817, "blimp/accuracy/wh_questions_subject_gap": 0.946, "blimp/accuracy/existential_there_quantifiers_2": 0.453, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939, "blimp/accuracy/sentential_negation_npi_scope": 0.728, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.82, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.923, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.919, "blimp/accuracy/principle_A_case_2": 0.953, "blimp/accuracy/distractor_agreement_relational_noun": 0.908, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.98, "blimp/accuracy/superlative_quantifiers_1": 0.886, "blimp/accuracy/wh_island": 0.748, "blimp/accuracy/principle_A_domain_1": 0.99, "blimp/accuracy/complex_NP_island": 0.619, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975, "blimp/accuracy/irregular_past_participle_verbs": 0.915, "blimp/accuracy/drop_argument": 0.723, "blimp/accuracy/wh_questions_object_gap": 0.87, "blimp/accuracy/animate_subject_passive": 0.797, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.923, "blimp/accuracy/npi_present_2": 0.584, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.958, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.967, "blimp/accuracy/existential_there_object_raising": 0.859, "blimp/accuracy/matrix_question_npi_licensor_present": 0.386, "blimp/accuracy/npi_present_1": 0.577, "blimp/accuracy/wh_vs_that_no_gap": 0.982, "blimp/accuracy/left_branch_island_echo_question": 0.521, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.775, "blimp/accuracy/group_average": 0.8109850746268659, "blimp/accuracy/seq_average": 0.8109850746268656, "cbt/accuracy/NE": 0.8269230769230769, "cbt/accuracy/V": 0.9384, "cbt/accuracy/CN": 0.8964, "cbt/accuracy/P": 0.9316, "cbt/accuracy/group_average": 0.8983307692307692, "cbt/accuracy/seq_average": 0.898359343737495, "hellaswag/accuracy/val": 0.3684524995020912, "hellaswag/accuracy/group_average": 0.3684524995020912, "hellaswag/accuracy/seq_average": 0.3684524995020912, "piqa/accuracy/val": 0.6452665941240479, "piqa/accuracy/group_average": 0.6452665941240479, "piqa/accuracy/seq_average": 0.6452665941240479, "ai2arc/accuracy/ARC-Easy": 0.39112050739957716, "ai2arc/accuracy/ARC-Challenge": 0.24549356223175967, "ai2arc/accuracy/group_average": 0.3183070348156684, "ai2arc/accuracy/seq_average": 0.3430594900849858, "mmlu/accuracy/MMLU": 0.26657132642116554, "mmlu/accuracy/group_average": 0.26657132642116554, "mmlu/accuracy/seq_average": 0.26657132642116554, "openbookqa/accuracy/test": 0.29, "openbookqa/accuracy/group_average": 0.29, "openbookqa/accuracy/seq_average": 0.29, "race/accuracy/test/high": 0.2947398513436249, "race/accuracy/test/middle": 0.3697771587743733, "race/accuracy/group_average": 0.3322585050589991, "race/accuracy/seq_average": 0.3165788406972031, "siqa/accuracy/dev": 0.3741044012282497, "siqa/accuracy/group_average": 0.3741044012282497, "siqa/accuracy/seq_average": 0.3741044012282497, "winogrande/accuracy/dev": 0.5074980268350434, "winogrande/accuracy/group_average": 0.5074980268350434, "winogrande/accuracy/seq_average": 0.5074980268350434, "commonsenseqa/accuracy/dev_rand_split": 0.2719082719082719, "commonsenseqa/accuracy/group_average": 0.2719082719082719, "commonsenseqa/accuracy/seq_average": 0.2719082719082719}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-40000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.5851018996465776, "val/accuracy": 0.4797615172371032, "val/perplexity": 13.264640683301039, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6775148758977094, "lambada/accuracy/total": 0.2843555900621118, "lambada/accuracy/openai_last_token": 0.7628105590062112, "lambada/perplexity": 10.212207588430068, "lambada/lm_loss": 3.1587688549080903, "lambada/lm_perplexity": 23.541594961439255, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3820585536496075, "mean_loss": 2.6313083877721435, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.823, "blimp/accuracy/tough_vs_raising_2": 0.849, "blimp/accuracy/tough_vs_raising_1": 0.607, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.928, "blimp/accuracy/principle_A_reconstruction": 0.234, "blimp/accuracy/wh_vs_that_with_gap": 0.489, "blimp/accuracy/principle_A_domain_2": 0.863, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.884, "blimp/accuracy/principle_A_domain_3": 0.649, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.933, "blimp/accuracy/animate_subject_trans": 0.914, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.884, "blimp/accuracy/distractor_agreement_relative_clause": 0.579, "blimp/accuracy/transitive": 0.86, "blimp/accuracy/sentential_subject_island": 0.31, "blimp/accuracy/adjunct_island": 0.837, "blimp/accuracy/intransitive": 0.762, "blimp/accuracy/existential_there_subject_raising": 0.859, "blimp/accuracy/irregular_past_participle_adjectives": 0.987, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.397, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.287, "blimp/accuracy/only_npi_scope": 0.598, "blimp/accuracy/superlative_quantifiers_2": 0.643, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.905, "blimp/accuracy/inchoative": 0.623, "blimp/accuracy/anaphor_gender_agreement": 0.945, "blimp/accuracy/principle_A_c_command": 0.661, "blimp/accuracy/only_npi_licensor_present": 0.682, "blimp/accuracy/expletive_it_object_raising": 0.802, "blimp/accuracy/left_branch_island_simple_question": 0.411, "blimp/accuracy/wh_questions_subject_gap": 0.955, "blimp/accuracy/existential_there_quantifiers_2": 0.426, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.935, "blimp/accuracy/sentential_negation_npi_scope": 0.628, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.793, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.906, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.875, "blimp/accuracy/principle_A_case_2": 0.939, "blimp/accuracy/distractor_agreement_relational_noun": 0.91, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.997, "blimp/accuracy/superlative_quantifiers_1": 0.708, "blimp/accuracy/wh_island": 0.715, "blimp/accuracy/principle_A_domain_1": 0.953, "blimp/accuracy/complex_NP_island": 0.584, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.98, "blimp/accuracy/irregular_past_participle_verbs": 0.884, "blimp/accuracy/drop_argument": 0.747, "blimp/accuracy/wh_questions_object_gap": 0.862, "blimp/accuracy/animate_subject_passive": 0.816, "blimp/accuracy/existential_there_quantifiers_1": 0.969, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.906, "blimp/accuracy/npi_present_2": 0.585, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.915, "blimp/accuracy/anaphor_number_agreement": 0.981, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.958, "blimp/accuracy/existential_there_object_raising": 0.885, "blimp/accuracy/matrix_question_npi_licensor_present": 0.365, "blimp/accuracy/npi_present_1": 0.488, "blimp/accuracy/wh_vs_that_no_gap": 0.981, "blimp/accuracy/left_branch_island_echo_question": 0.415, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.971, "blimp/accuracy/causative": 0.7, "blimp/accuracy/group_average": 0.771686567164179, "blimp/accuracy/seq_average": 0.771686567164179, "cbt/accuracy/NE": 0.7636217948717948, "cbt/accuracy/V": 0.9164, "cbt/accuracy/CN": 0.8372, "cbt/accuracy/P": 0.8928, "cbt/accuracy/group_average": 0.8525054487179486, "cbt/accuracy/seq_average": 0.8525410164065627, "hellaswag/accuracy/val": 0.29924317864967137, "hellaswag/accuracy/group_average": 0.29924317864967137, "hellaswag/accuracy/seq_average": 0.29924317864967137, "piqa/accuracy/val": 0.6001088139281828, "piqa/accuracy/group_average": 0.6001088139281828, "piqa/accuracy/seq_average": 0.6001088139281828, "ai2arc/accuracy/ARC-Easy": 0.3391120507399577, "ai2arc/accuracy/ARC-Challenge": 0.20858369098712445, "ai2arc/accuracy/group_average": 0.27384787086354107, "ai2arc/accuracy/seq_average": 0.29603399433427763, "mmlu/accuracy/MMLU": 0.25963532356095814, "mmlu/accuracy/group_average": 0.25963532356095814, "mmlu/accuracy/seq_average": 0.25963532356095814, "openbookqa/accuracy/test": 0.296, "openbookqa/accuracy/group_average": 0.296, "openbookqa/accuracy/seq_average": 0.296, "race/accuracy/test/high": 0.269010863350486, "race/accuracy/test/middle": 0.34401114206128136, "race/accuracy/group_average": 0.3065110027058837, "race/accuracy/seq_average": 0.2908390758005675, "siqa/accuracy/dev": 0.36438075742067555, "siqa/accuracy/group_average": 0.36438075742067555, "siqa/accuracy/seq_average": 0.36438075742067555, "winogrande/accuracy/dev": 0.510655090765588, "winogrande/accuracy/group_average": 0.510655090765588, "winogrande/accuracy/seq_average": 0.510655090765588, "commonsenseqa/accuracy/dev_rand_split": 0.24897624897624898, "commonsenseqa/accuracy/group_average": 0.24897624897624898, "commonsenseqa/accuracy/seq_average": 0.24897624897624898}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-400000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.253688267299107, "val/accuracy": 0.5288347516741071, "val/perplexity": 9.522793753929827, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3169684558181287, "lambada/accuracy/total": 0.37111801242236025, "lambada/accuracy/openai_last_token": 0.8055124223602484, "lambada/perplexity": 6.596435433605531, "lambada/lm_loss": 2.8533444872114537, "lambada/lm_perplexity": 17.34569740078808, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.44997638204823365, "mean_loss": 2.285328361558618, "blimp/accuracy/passive_2": 0.917, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.839, "blimp/accuracy/tough_vs_raising_2": 0.887, "blimp/accuracy/tough_vs_raising_1": 0.639, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.929, "blimp/accuracy/principle_A_reconstruction": 0.416, "blimp/accuracy/wh_vs_that_with_gap": 0.518, "blimp/accuracy/principle_A_domain_2": 0.875, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.908, "blimp/accuracy/principle_A_domain_3": 0.686, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.939, "blimp/accuracy/animate_subject_trans": 0.903, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.913, "blimp/accuracy/distractor_agreement_relative_clause": 0.708, "blimp/accuracy/transitive": 0.896, "blimp/accuracy/sentential_subject_island": 0.31, "blimp/accuracy/adjunct_island": 0.898, "blimp/accuracy/intransitive": 0.776, "blimp/accuracy/existential_there_subject_raising": 0.882, "blimp/accuracy/irregular_past_participle_adjectives": 0.932, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.701, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.377, "blimp/accuracy/only_npi_scope": 0.658, "blimp/accuracy/superlative_quantifiers_2": 0.826, "blimp/accuracy/passive_1": 0.882, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.924, "blimp/accuracy/inchoative": 0.658, "blimp/accuracy/anaphor_gender_agreement": 0.983, "blimp/accuracy/principle_A_c_command": 0.769, "blimp/accuracy/only_npi_licensor_present": 0.587, "blimp/accuracy/expletive_it_object_raising": 0.798, "blimp/accuracy/left_branch_island_simple_question": 0.793, "blimp/accuracy/wh_questions_subject_gap": 0.948, "blimp/accuracy/existential_there_quantifiers_2": 0.444, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.94, "blimp/accuracy/sentential_negation_npi_scope": 0.743, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.813, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.928, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.91, "blimp/accuracy/principle_A_case_2": 0.955, "blimp/accuracy/distractor_agreement_relational_noun": 0.898, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.986, "blimp/accuracy/superlative_quantifiers_1": 0.859, "blimp/accuracy/wh_island": 0.776, "blimp/accuracy/principle_A_domain_1": 0.99, "blimp/accuracy/complex_NP_island": 0.602, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.979, "blimp/accuracy/irregular_past_participle_verbs": 0.908, "blimp/accuracy/drop_argument": 0.732, "blimp/accuracy/wh_questions_object_gap": 0.881, "blimp/accuracy/animate_subject_passive": 0.801, "blimp/accuracy/existential_there_quantifiers_1": 0.979, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.916, "blimp/accuracy/npi_present_2": 0.587, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.963, "blimp/accuracy/anaphor_number_agreement": 0.989, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.969, "blimp/accuracy/existential_there_object_raising": 0.858, "blimp/accuracy/matrix_question_npi_licensor_present": 0.356, "blimp/accuracy/npi_present_1": 0.592, "blimp/accuracy/wh_vs_that_no_gap": 0.981, "blimp/accuracy/left_branch_island_echo_question": 0.532, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.962, "blimp/accuracy/causative": 0.773, "blimp/accuracy/group_average": 0.8097611940298507, "blimp/accuracy/seq_average": 0.8097611940298507, "cbt/accuracy/NE": 0.8317307692307693, "cbt/accuracy/V": 0.942, "cbt/accuracy/CN": 0.8924, "cbt/accuracy/P": 0.9308, "cbt/accuracy/group_average": 0.8992326923076923, "cbt/accuracy/seq_average": 0.8992597038815526, "hellaswag/accuracy/val": 0.3714399522007568, "hellaswag/accuracy/group_average": 0.3714399522007568, "hellaswag/accuracy/seq_average": 0.3714399522007568, "piqa/accuracy/val": 0.6436343852013058, "piqa/accuracy/group_average": 0.6436343852013058, "piqa/accuracy/seq_average": 0.6436343852013058, "ai2arc/accuracy/ARC-Easy": 0.38054968287526425, "ai2arc/accuracy/ARC-Challenge": 0.24206008583690988, "ai2arc/accuracy/group_average": 0.31130488435608705, "ai2arc/accuracy/seq_average": 0.3348441926345609, "mmlu/accuracy/MMLU": 0.2666428316052914, "mmlu/accuracy/group_average": 0.2666428316052914, "mmlu/accuracy/seq_average": 0.2666428316052914, "openbookqa/accuracy/test": 0.294, "openbookqa/accuracy/group_average": 0.294, "openbookqa/accuracy/seq_average": 0.294, "race/accuracy/test/high": 0.2915951972555746, "race/accuracy/test/middle": 0.37186629526462395, "race/accuracy/group_average": 0.33173074626009924, "race/accuracy/seq_average": 0.31495743818402916, "siqa/accuracy/dev": 0.3694984646878199, "siqa/accuracy/group_average": 0.3694984646878199, "siqa/accuracy/seq_average": 0.3694984646878199, "winogrande/accuracy/dev": 0.5122336227308603, "winogrande/accuracy/group_average": 0.5122336227308603, "winogrande/accuracy/seq_average": 0.5122336227308603, "commonsenseqa/accuracy/dev_rand_split": 0.27354627354627353, "commonsenseqa/accuracy/group_average": 0.27354627354627353, "commonsenseqa/accuracy/seq_average": 0.27354627354627353}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-60000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.5200694250682045, "val/accuracy": 0.4883054703000992, "val/perplexity": 12.429459549701216, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.684123542738257, "lambada/accuracy/total": 0.2713509316770186, "lambada/accuracy/openai_last_token": 0.7705745341614907, "lambada/perplexity": 9.981822411391969, "lambada/lm_loss": 3.0818021195860563, "lambada/lm_perplexity": 21.7976489922247, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3798282009885589, "mean_loss": 2.602096483903231, "blimp/accuracy/passive_2": 0.914, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.85, "blimp/accuracy/tough_vs_raising_2": 0.857, "blimp/accuracy/tough_vs_raising_1": 0.625, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.89, "blimp/accuracy/principle_A_reconstruction": 0.363, "blimp/accuracy/wh_vs_that_with_gap": 0.553, "blimp/accuracy/principle_A_domain_2": 0.854, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.911, "blimp/accuracy/principle_A_domain_3": 0.611, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.957, "blimp/accuracy/animate_subject_trans": 0.907, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.885, "blimp/accuracy/distractor_agreement_relative_clause": 0.634, "blimp/accuracy/transitive": 0.883, "blimp/accuracy/sentential_subject_island": 0.379, "blimp/accuracy/adjunct_island": 0.892, "blimp/accuracy/intransitive": 0.758, "blimp/accuracy/existential_there_subject_raising": 0.863, "blimp/accuracy/irregular_past_participle_adjectives": 0.933, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.58, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.321, "blimp/accuracy/only_npi_scope": 0.52, "blimp/accuracy/superlative_quantifiers_2": 0.545, "blimp/accuracy/passive_1": 0.902, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.937, "blimp/accuracy/inchoative": 0.605, "blimp/accuracy/anaphor_gender_agreement": 0.971, "blimp/accuracy/principle_A_c_command": 0.659, "blimp/accuracy/only_npi_licensor_present": 0.739, "blimp/accuracy/expletive_it_object_raising": 0.797, "blimp/accuracy/left_branch_island_simple_question": 0.647, "blimp/accuracy/wh_questions_subject_gap": 0.935, "blimp/accuracy/existential_there_quantifiers_2": 0.512, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.958, "blimp/accuracy/sentential_negation_npi_scope": 0.701, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.83, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.861, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.905, "blimp/accuracy/principle_A_case_2": 0.926, "blimp/accuracy/distractor_agreement_relational_noun": 0.917, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.537, "blimp/accuracy/wh_island": 0.832, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.543, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.982, "blimp/accuracy/irregular_past_participle_verbs": 0.895, "blimp/accuracy/drop_argument": 0.733, "blimp/accuracy/wh_questions_object_gap": 0.846, "blimp/accuracy/animate_subject_passive": 0.821, "blimp/accuracy/existential_there_quantifiers_1": 0.976, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.914, "blimp/accuracy/npi_present_2": 0.626, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.902, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.974, "blimp/accuracy/existential_there_object_raising": 0.835, "blimp/accuracy/matrix_question_npi_licensor_present": 0.303, "blimp/accuracy/npi_present_1": 0.53, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.537, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.971, "blimp/accuracy/causative": 0.729, "blimp/accuracy/group_average": 0.7866417910447758, "blimp/accuracy/seq_average": 0.7866417910447762, "cbt/accuracy/NE": 0.7796474358974359, "cbt/accuracy/V": 0.924, "cbt/accuracy/CN": 0.8524, "cbt/accuracy/P": 0.9064, "cbt/accuracy/group_average": 0.865611858974359, "cbt/accuracy/seq_average": 0.8656462585034014, "hellaswag/accuracy/val": 0.3143796056562438, "hellaswag/accuracy/group_average": 0.3143796056562438, "hellaswag/accuracy/seq_average": 0.3143796056562438, "piqa/accuracy/val": 0.6006528835690969, "piqa/accuracy/group_average": 0.6006528835690969, "piqa/accuracy/seq_average": 0.6006528835690969, "ai2arc/accuracy/ARC-Easy": 0.35517970401691334, "ai2arc/accuracy/ARC-Challenge": 0.22832618025751072, "ai2arc/accuracy/group_average": 0.29175294213721203, "ai2arc/accuracy/seq_average": 0.313314447592068, "mmlu/accuracy/MMLU": 0.2619234894529853, "mmlu/accuracy/group_average": 0.2619234894529853, "mmlu/accuracy/seq_average": 0.2619234894529853, "openbookqa/accuracy/test": 0.282, "openbookqa/accuracy/group_average": 0.282, "openbookqa/accuracy/seq_average": 0.282, "race/accuracy/test/high": 0.274442538593482, "race/accuracy/test/middle": 0.35376044568245124, "race/accuracy/group_average": 0.3141014921379666, "race/accuracy/seq_average": 0.2975273611674098, "siqa/accuracy/dev": 0.37768679631525076, "siqa/accuracy/group_average": 0.37768679631525076, "siqa/accuracy/seq_average": 0.37768679631525076, "winogrande/accuracy/dev": 0.510655090765588, "winogrande/accuracy/group_average": 0.510655090765588, "winogrande/accuracy/seq_average": 0.510655090765588, "commonsenseqa/accuracy/dev_rand_split": 0.26044226044226043, "commonsenseqa/accuracy/group_average": 0.26044226044226043, "commonsenseqa/accuracy/seq_average": 0.26044226044226043}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_660M_standardlb_deepseek_shared_only/export/result-model-80000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.4762151808965776, "val/accuracy": 0.49526541573660715, "val/perplexity": 11.896154312804708, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.49625259304639, "lambada/accuracy/total": 0.2783385093167702, "lambada/accuracy/openai_last_token": 0.7736801242236024, "lambada/perplexity": 9.963250173774625, "lambada/lm_loss": 3.0457064553506137, "lambada/lm_perplexity": 21.02487909590856, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.38680196252668864, "mean_loss": 2.4862338869714837, "blimp/accuracy/passive_2": 0.919, "blimp/accuracy/determiner_noun_agreement_2": 0.989, "blimp/accuracy/ellipsis_n_bar_1": 0.822, "blimp/accuracy/tough_vs_raising_2": 0.846, "blimp/accuracy/tough_vs_raising_1": 0.682, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.928, "blimp/accuracy/principle_A_reconstruction": 0.44, "blimp/accuracy/wh_vs_that_with_gap": 0.522, "blimp/accuracy/principle_A_domain_2": 0.874, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.866, "blimp/accuracy/principle_A_domain_3": 0.66, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.951, "blimp/accuracy/animate_subject_trans": 0.912, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.905, "blimp/accuracy/distractor_agreement_relative_clause": 0.652, "blimp/accuracy/transitive": 0.877, "blimp/accuracy/sentential_subject_island": 0.318, "blimp/accuracy/adjunct_island": 0.853, "blimp/accuracy/intransitive": 0.799, "blimp/accuracy/existential_there_subject_raising": 0.861, "blimp/accuracy/irregular_past_participle_adjectives": 0.937, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.38, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.297, "blimp/accuracy/only_npi_scope": 0.63, "blimp/accuracy/superlative_quantifiers_2": 0.722, "blimp/accuracy/passive_1": 0.899, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.915, "blimp/accuracy/inchoative": 0.643, "blimp/accuracy/anaphor_gender_agreement": 0.971, "blimp/accuracy/principle_A_c_command": 0.716, "blimp/accuracy/only_npi_licensor_present": 0.733, "blimp/accuracy/expletive_it_object_raising": 0.801, "blimp/accuracy/left_branch_island_simple_question": 0.413, "blimp/accuracy/wh_questions_subject_gap": 0.913, "blimp/accuracy/existential_there_quantifiers_2": 0.491, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.96, "blimp/accuracy/sentential_negation_npi_scope": 0.732, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.832, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.89, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.925, "blimp/accuracy/principle_A_case_2": 0.927, "blimp/accuracy/distractor_agreement_relational_noun": 0.934, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.983, "blimp/accuracy/superlative_quantifiers_1": 0.623, "blimp/accuracy/wh_island": 0.764, "blimp/accuracy/principle_A_domain_1": 0.979, "blimp/accuracy/complex_NP_island": 0.524, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.988, "blimp/accuracy/irregular_past_participle_verbs": 0.863, "blimp/accuracy/drop_argument": 0.776, "blimp/accuracy/wh_questions_object_gap": 0.83, "blimp/accuracy/animate_subject_passive": 0.801, "blimp/accuracy/existential_there_quantifiers_1": 0.973, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.926, "blimp/accuracy/npi_present_2": 0.606, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.94, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.963, "blimp/accuracy/existential_there_object_raising": 0.868, "blimp/accuracy/matrix_question_npi_licensor_present": 0.284, "blimp/accuracy/npi_present_1": 0.578, "blimp/accuracy/wh_vs_that_no_gap": 0.968, "blimp/accuracy/left_branch_island_echo_question": 0.417, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.964, "blimp/accuracy/causative": 0.719, "blimp/accuracy/group_average": 0.7858955223880598, "blimp/accuracy/seq_average": 0.7858955223880597, "cbt/accuracy/NE": 0.7908653846153846, "cbt/accuracy/V": 0.9232, "cbt/accuracy/CN": 0.8616, "cbt/accuracy/P": 0.9048, "cbt/accuracy/group_average": 0.8701163461538461, "cbt/accuracy/seq_average": 0.8701480592236894, "hellaswag/accuracy/val": 0.3205536745668194, "hellaswag/accuracy/group_average": 0.3205536745668194, "hellaswag/accuracy/seq_average": 0.3205536745668194, "piqa/accuracy/val": 0.6017410228509249, "piqa/accuracy/group_average": 0.6017410228509249, "piqa/accuracy/seq_average": 0.6017410228509249, "ai2arc/accuracy/ARC-Easy": 0.3488372093023256, "ai2arc/accuracy/ARC-Challenge": 0.22746781115879827, "ai2arc/accuracy/group_average": 0.28815251023056193, "ai2arc/accuracy/seq_average": 0.3087818696883853, "mmlu/accuracy/MMLU": 0.26149445834823026, "mmlu/accuracy/group_average": 0.26149445834823026, "mmlu/accuracy/seq_average": 0.26149445834823026, "openbookqa/accuracy/test": 0.296, "openbookqa/accuracy/group_average": 0.296, "openbookqa/accuracy/seq_average": 0.296, "race/accuracy/test/high": 0.27815894797026874, "race/accuracy/test/middle": 0.35376044568245124, "race/accuracy/group_average": 0.31595969682636, "race/accuracy/seq_average": 0.3001621402513174, "siqa/accuracy/dev": 0.3587512794268168, "siqa/accuracy/group_average": 0.3587512794268168, "siqa/accuracy/seq_average": 0.3587512794268168, "winogrande/accuracy/dev": 0.5130228887134964, "winogrande/accuracy/group_average": 0.5130228887134964, "winogrande/accuracy/seq_average": 0.5130228887134964, "commonsenseqa/accuracy/dev_rand_split": 0.25307125307125306, "commonsenseqa/accuracy/group_average": 0.25307125307125306, "commonsenseqa/accuracy/seq_average": 0.25307125307125306}
|