Upload folder using huggingface_hub
#5321
by
DavidNguyen
- opened
- Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-160000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-180000.pth.json +1 -0
- Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-280000.pth.json +1 -0
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-160000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.4003583846553678, "val/accuracy": 0.5051988170992944, "val/perplexity": 11.027127625900379, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.3034290763902368, "lambada/accuracy/total": 0.3297748447204969, "lambada/accuracy/openai_last_token": 0.7915372670807453, "lambada/perplexity": 7.824656120039497, "lambada/lm_loss": 2.9716990422471468, "lambada/lm_perplexity": 19.525065340939612, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.41748683090989563, "mean_loss": 2.3518937305228023, "blimp/accuracy/passive_2": 0.905, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.874, "blimp/accuracy/tough_vs_raising_2": 0.878, "blimp/accuracy/tough_vs_raising_1": 0.577, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/principle_A_reconstruction": 0.301, "blimp/accuracy/wh_vs_that_with_gap": 0.455, "blimp/accuracy/principle_A_domain_2": 0.883, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.907, "blimp/accuracy/principle_A_domain_3": 0.604, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.93, "blimp/accuracy/animate_subject_trans": 0.898, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.908, "blimp/accuracy/distractor_agreement_relative_clause": 0.672, "blimp/accuracy/transitive": 0.903, "blimp/accuracy/sentential_subject_island": 0.286, "blimp/accuracy/adjunct_island": 0.889, "blimp/accuracy/intransitive": 0.758, "blimp/accuracy/existential_there_subject_raising": 0.896, "blimp/accuracy/irregular_past_participle_adjectives": 0.975, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.681, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.317, "blimp/accuracy/only_npi_scope": 0.711, "blimp/accuracy/superlative_quantifiers_2": 0.852, "blimp/accuracy/passive_1": 0.902, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.934, "blimp/accuracy/inchoative": 0.623, "blimp/accuracy/anaphor_gender_agreement": 0.966, "blimp/accuracy/principle_A_c_command": 0.64, "blimp/accuracy/only_npi_licensor_present": 0.446, "blimp/accuracy/expletive_it_object_raising": 0.759, "blimp/accuracy/left_branch_island_simple_question": 0.785, "blimp/accuracy/wh_questions_subject_gap": 0.927, "blimp/accuracy/existential_there_quantifiers_2": 0.492, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.672, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.838, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.907, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.904, "blimp/accuracy/principle_A_case_2": 0.943, "blimp/accuracy/distractor_agreement_relational_noun": 0.85, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.977, "blimp/accuracy/superlative_quantifiers_1": 0.685, "blimp/accuracy/wh_island": 0.807, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.637, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.974, "blimp/accuracy/irregular_past_participle_verbs": 0.925, "blimp/accuracy/drop_argument": 0.724, "blimp/accuracy/wh_questions_object_gap": 0.84, "blimp/accuracy/animate_subject_passive": 0.788, "blimp/accuracy/existential_there_quantifiers_1": 0.971, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/npi_present_2": 0.566, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.976, "blimp/accuracy/anaphor_number_agreement": 0.994, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.961, "blimp/accuracy/existential_there_object_raising": 0.887, "blimp/accuracy/matrix_question_npi_licensor_present": 0.303, "blimp/accuracy/npi_present_1": 0.551, "blimp/accuracy/wh_vs_that_no_gap": 0.977, "blimp/accuracy/left_branch_island_echo_question": 0.452, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.951, "blimp/accuracy/causative": 0.759, "blimp/accuracy/group_average": 0.7921791044776119, "blimp/accuracy/seq_average": 0.792179104477612, "cbt/accuracy/NE": 0.8100961538461539, "cbt/accuracy/V": 0.9284, "cbt/accuracy/CN": 0.8804, "cbt/accuracy/P": 0.91, "cbt/accuracy/group_average": 0.8822240384615385, "cbt/accuracy/seq_average": 0.8822529011604642, "hellaswag/accuracy/val": 0.3392750448117905, "hellaswag/accuracy/group_average": 0.3392750448117905, "hellaswag/accuracy/seq_average": 0.3392750448117905, "piqa/accuracy/val": 0.6235038084874864, "piqa/accuracy/group_average": 0.6235038084874864, "piqa/accuracy/seq_average": 0.6235038084874864, "ai2arc/accuracy/ARC-Easy": 0.3704016913319239, "ai2arc/accuracy/ARC-Challenge": 0.23605150214592274, "ai2arc/accuracy/group_average": 0.3032265967389233, "ai2arc/accuracy/seq_average": 0.32606232294617565, "mmlu/accuracy/MMLU": 0.2632105827672506, "mmlu/accuracy/group_average": 0.2632105827672506, "mmlu/accuracy/seq_average": 0.2632105827672506, "openbookqa/accuracy/test": 0.28, "openbookqa/accuracy/group_average": 0.28, "openbookqa/accuracy/seq_average": 0.28, "race/accuracy/test/high": 0.2830188679245283, "race/accuracy/test/middle": 0.3628133704735376, "race/accuracy/group_average": 0.32291611919903296, "race/accuracy/seq_average": 0.3062423996757195, "siqa/accuracy/dev": 0.3638689866939611, "siqa/accuracy/group_average": 0.3638689866939611, "siqa/accuracy/seq_average": 0.3638689866939611, "winogrande/accuracy/dev": 0.5011838989739542, "winogrande/accuracy/group_average": 0.5011838989739542, "winogrande/accuracy/seq_average": 0.5011838989739542, "commonsenseqa/accuracy/dev_rand_split": 0.2620802620802621, "commonsenseqa/accuracy/group_average": 0.2620802620802621, "commonsenseqa/accuracy/seq_average": 0.2620802620802621}
|
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-180000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.364891173347594, "val/accuracy": 0.5107199048239087, "val/perplexity": 10.64288052542509, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5708572672020575, "lambada/accuracy/total": 0.3421972049689441, "lambada/accuracy/openai_last_token": 0.797360248447205, "lambada/perplexity": 7.406868178335139, "lambada/lm_loss": 2.9628792196336193, "lambada/lm_perplexity": 19.353614920677348, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4264585548964264, "mean_loss": 2.467874220274826, "blimp/accuracy/passive_2": 0.915, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.861, "blimp/accuracy/tough_vs_raising_2": 0.874, "blimp/accuracy/tough_vs_raising_1": 0.648, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.907, "blimp/accuracy/principle_A_reconstruction": 0.28, "blimp/accuracy/wh_vs_that_with_gap": 0.421, "blimp/accuracy/principle_A_domain_2": 0.888, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.897, "blimp/accuracy/principle_A_domain_3": 0.6, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.922, "blimp/accuracy/animate_subject_trans": 0.907, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.905, "blimp/accuracy/distractor_agreement_relative_clause": 0.701, "blimp/accuracy/transitive": 0.896, "blimp/accuracy/sentential_subject_island": 0.354, "blimp/accuracy/adjunct_island": 0.855, "blimp/accuracy/intransitive": 0.756, "blimp/accuracy/existential_there_subject_raising": 0.889, "blimp/accuracy/irregular_past_participle_adjectives": 0.915, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.696, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.306, "blimp/accuracy/only_npi_scope": 0.736, "blimp/accuracy/superlative_quantifiers_2": 0.832, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.938, "blimp/accuracy/inchoative": 0.609, "blimp/accuracy/anaphor_gender_agreement": 0.972, "blimp/accuracy/principle_A_c_command": 0.685, "blimp/accuracy/only_npi_licensor_present": 0.535, "blimp/accuracy/expletive_it_object_raising": 0.768, "blimp/accuracy/left_branch_island_simple_question": 0.796, "blimp/accuracy/wh_questions_subject_gap": 0.942, "blimp/accuracy/existential_there_quantifiers_2": 0.56, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.941, "blimp/accuracy/sentential_negation_npi_scope": 0.662, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.811, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.933, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.891, "blimp/accuracy/principle_A_case_2": 0.937, "blimp/accuracy/distractor_agreement_relational_noun": 0.821, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.97, "blimp/accuracy/superlative_quantifiers_1": 0.688, "blimp/accuracy/wh_island": 0.741, "blimp/accuracy/principle_A_domain_1": 0.995, "blimp/accuracy/complex_NP_island": 0.594, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.97, "blimp/accuracy/irregular_past_participle_verbs": 0.909, "blimp/accuracy/drop_argument": 0.734, "blimp/accuracy/wh_questions_object_gap": 0.841, "blimp/accuracy/animate_subject_passive": 0.787, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.892, "blimp/accuracy/npi_present_2": 0.542, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.941, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957, "blimp/accuracy/existential_there_object_raising": 0.883, "blimp/accuracy/matrix_question_npi_licensor_present": 0.349, "blimp/accuracy/npi_present_1": 0.526, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.437, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.739, "blimp/accuracy/group_average": 0.7913432835820896, "blimp/accuracy/seq_average": 0.7913432835820896, "cbt/accuracy/NE": 0.8052884615384616, "cbt/accuracy/V": 0.9328, "cbt/accuracy/CN": 0.88, "cbt/accuracy/P": 0.9216, "cbt/accuracy/group_average": 0.8849221153846154, "cbt/accuracy/seq_average": 0.884953981592637, "hellaswag/accuracy/val": 0.34624576777534355, "hellaswag/accuracy/group_average": 0.34624576777534355, "hellaswag/accuracy/seq_average": 0.34624576777534355, "piqa/accuracy/val": 0.6376496191512514, "piqa/accuracy/group_average": 0.6376496191512514, "piqa/accuracy/seq_average": 0.6376496191512514, "ai2arc/accuracy/ARC-Easy": 0.3708245243128964, "ai2arc/accuracy/ARC-Challenge": 0.22832618025751072, "ai2arc/accuracy/group_average": 0.2995753522852036, "ai2arc/accuracy/seq_average": 0.32379603399433426, "mmlu/accuracy/MMLU": 0.2606363961387201, "mmlu/accuracy/group_average": 0.2606363961387201, "mmlu/accuracy/seq_average": 0.2606363961387201, "openbookqa/accuracy/test": 0.272, "openbookqa/accuracy/group_average": 0.272, "openbookqa/accuracy/seq_average": 0.272, "race/accuracy/test/high": 0.2850200114351058, "race/accuracy/test/middle": 0.3628133704735376, "race/accuracy/group_average": 0.32391669095432174, "race/accuracy/seq_average": 0.30766112687474667, "siqa/accuracy/dev": 0.36284544524053225, "siqa/accuracy/group_average": 0.36284544524053225, "siqa/accuracy/seq_average": 0.36284544524053225, "winogrande/accuracy/dev": 0.5011838989739542, "winogrande/accuracy/group_average": 0.5011838989739542, "winogrande/accuracy/seq_average": 0.5011838989739542, "commonsenseqa/accuracy/dev_rand_split": 0.26453726453726456, "commonsenseqa/accuracy/group_average": 0.26453726453726456, "commonsenseqa/accuracy/seq_average": 0.26453726453726456}
|
Pretrain_language_model/save/slimpajama_competesmoe_no_attmoe_660M_standardlb/export/result-model-280000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.311976771200857, "val/accuracy": 0.5189691358996976, "val/perplexity": 10.094359184046882, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.193172478527756, "lambada/accuracy/total": 0.37577639751552794, "lambada/accuracy/openai_last_token": 0.8016304347826086, "lambada/perplexity": 6.779006294918139, "lambada/lm_loss": 2.8871369140285337, "lambada/lm_perplexity": 17.941866886848043, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4473727667076128, "mean_loss": 2.2525746248643066, "blimp/accuracy/passive_2": 0.912, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.84, "blimp/accuracy/tough_vs_raising_2": 0.873, "blimp/accuracy/tough_vs_raising_1": 0.603, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.855, "blimp/accuracy/principle_A_reconstruction": 0.421, "blimp/accuracy/wh_vs_that_with_gap": 0.429, "blimp/accuracy/principle_A_domain_2": 0.888, "blimp/accuracy/determiner_noun_agreement_1": 0.989, "blimp/accuracy/ellipsis_n_bar_2": 0.918, "blimp/accuracy/principle_A_domain_3": 0.634, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.92, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.914, "blimp/accuracy/distractor_agreement_relative_clause": 0.726, "blimp/accuracy/transitive": 0.901, "blimp/accuracy/sentential_subject_island": 0.328, "blimp/accuracy/adjunct_island": 0.868, "blimp/accuracy/intransitive": 0.758, "blimp/accuracy/existential_there_subject_raising": 0.909, "blimp/accuracy/irregular_past_participle_adjectives": 0.769, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.725, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.291, "blimp/accuracy/only_npi_scope": 0.679, "blimp/accuracy/superlative_quantifiers_2": 0.782, "blimp/accuracy/passive_1": 0.901, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/inchoative": 0.609, "blimp/accuracy/anaphor_gender_agreement": 0.977, "blimp/accuracy/principle_A_c_command": 0.698, "blimp/accuracy/only_npi_licensor_present": 0.856, "blimp/accuracy/expletive_it_object_raising": 0.778, "blimp/accuracy/left_branch_island_simple_question": 0.799, "blimp/accuracy/wh_questions_subject_gap": 0.941, "blimp/accuracy/existential_there_quantifiers_2": 0.55, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.949, "blimp/accuracy/sentential_negation_npi_scope": 0.677, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.839, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.914, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.874, "blimp/accuracy/principle_A_case_2": 0.932, "blimp/accuracy/distractor_agreement_relational_noun": 0.866, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.956, "blimp/accuracy/superlative_quantifiers_1": 0.835, "blimp/accuracy/wh_island": 0.768, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.622, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.982, "blimp/accuracy/irregular_past_participle_verbs": 0.927, "blimp/accuracy/drop_argument": 0.727, "blimp/accuracy/wh_questions_object_gap": 0.848, "blimp/accuracy/animate_subject_passive": 0.802, "blimp/accuracy/existential_there_quantifiers_1": 0.98, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.864, "blimp/accuracy/npi_present_2": 0.576, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.938, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.962, "blimp/accuracy/existential_there_object_raising": 0.86, "blimp/accuracy/matrix_question_npi_licensor_present": 0.433, "blimp/accuracy/npi_present_1": 0.615, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.482, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.755, "blimp/accuracy/group_average": 0.8024029850746269, "blimp/accuracy/seq_average": 0.8024029850746268, "cbt/accuracy/NE": 0.8261217948717948, "cbt/accuracy/V": 0.9348, "cbt/accuracy/CN": 0.8856, "cbt/accuracy/P": 0.9168, "cbt/accuracy/group_average": 0.8908304487179487, "cbt/accuracy/seq_average": 0.8908563425370148, "hellaswag/accuracy/val": 0.3626767576180044, "hellaswag/accuracy/group_average": 0.3626767576180044, "hellaswag/accuracy/seq_average": 0.3626767576180044, "piqa/accuracy/val": 0.6322089227421109, "piqa/accuracy/group_average": 0.6322089227421109, "piqa/accuracy/seq_average": 0.6322089227421109, "ai2arc/accuracy/ARC-Easy": 0.38054968287526425, "ai2arc/accuracy/ARC-Challenge": 0.2257510729613734, "ai2arc/accuracy/group_average": 0.30315037791831884, "ai2arc/accuracy/seq_average": 0.32946175637393765, "mmlu/accuracy/MMLU": 0.25927779764032893, "mmlu/accuracy/group_average": 0.25927779764032893, "mmlu/accuracy/seq_average": 0.25927779764032893, "openbookqa/accuracy/test": 0.276, "openbookqa/accuracy/group_average": 0.276, "openbookqa/accuracy/seq_average": 0.276, "race/accuracy/test/high": 0.28987993138936535, "race/accuracy/test/middle": 0.3767409470752089, "race/accuracy/group_average": 0.3333104392322871, "race/accuracy/seq_average": 0.3151601134981759, "siqa/accuracy/dev": 0.37461617195496416, "siqa/accuracy/group_average": 0.37461617195496416, "siqa/accuracy/seq_average": 0.37461617195496416, "winogrande/accuracy/dev": 0.5059194948697711, "winogrande/accuracy/group_average": 0.5059194948697711, "winogrande/accuracy/seq_average": 0.5059194948697711, "commonsenseqa/accuracy/dev_rand_split": 0.2702702702702703, "commonsenseqa/accuracy/group_average": 0.2702702702702703, "commonsenseqa/accuracy/seq_average": 0.2702702702702703}
|