Upload folder using huggingface_hub

#2153
Files changed (20) hide show
  1. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-100000.pth.json +1 -0
  2. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-120000.pth.json +1 -0
  3. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-140000.pth.json +1 -0
  4. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-160000.pth.json +1 -0
  5. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-180000.pth.json +1 -0
  6. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-20000.pth.json +1 -0
  7. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-200000.pth.json +1 -0
  8. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-220000.pth.json +1 -0
  9. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-240000.pth.json +1 -0
  10. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-260000.pth.json +1 -0
  11. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-280000.pth.json +1 -0
  12. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-300000.pth.json +1 -0
  13. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-320000.pth.json +1 -0
  14. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-340000.pth.json +1 -0
  15. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-360000.pth.json +1 -0
  16. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-380000.pth.json +1 -0
  17. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-40000.pth.json +1 -0
  18. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-400000.pth.json +1 -0
  19. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-60000.pth.json +1 -0
  20. Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-80000.pth.json +1 -0
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-100000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6107284303695435, "val/accuracy": 0.47708856491815477, "val/perplexity": 13.608960422246208, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5802127056240294, "lambada/accuracy/total": 0.2686335403726708, "lambada/accuracy/openai_last_token": 0.7672748447204969, "lambada/perplexity": 11.166401902251305, "lambada/lm_loss": 3.200858855599533, "lambada/lm_perplexity": 24.553609148631917, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3728610526454128, "mean_loss": 2.595470567996786, "blimp/accuracy/passive_2": 0.884, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.803, "blimp/accuracy/tough_vs_raising_2": 0.852, "blimp/accuracy/tough_vs_raising_1": 0.603, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.927, "blimp/accuracy/principle_A_reconstruction": 0.337, "blimp/accuracy/wh_vs_that_with_gap": 0.521, "blimp/accuracy/principle_A_domain_2": 0.847, "blimp/accuracy/determiner_noun_agreement_1": 0.986, "blimp/accuracy/ellipsis_n_bar_2": 0.899, "blimp/accuracy/principle_A_domain_3": 0.592, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.903, "blimp/accuracy/animate_subject_trans": 0.892, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.868, "blimp/accuracy/distractor_agreement_relative_clause": 0.603, "blimp/accuracy/transitive": 0.859, "blimp/accuracy/sentential_subject_island": 0.323, "blimp/accuracy/adjunct_island": 0.864, "blimp/accuracy/intransitive": 0.816, "blimp/accuracy/existential_there_subject_raising": 0.873, "blimp/accuracy/irregular_past_participle_adjectives": 0.954, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.372, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.303, "blimp/accuracy/only_npi_scope": 0.582, "blimp/accuracy/superlative_quantifiers_2": 0.694, "blimp/accuracy/passive_1": 0.885, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.906, "blimp/accuracy/inchoative": 0.644, "blimp/accuracy/anaphor_gender_agreement": 0.96, "blimp/accuracy/principle_A_c_command": 0.724, "blimp/accuracy/only_npi_licensor_present": 0.511, "blimp/accuracy/expletive_it_object_raising": 0.771, "blimp/accuracy/left_branch_island_simple_question": 0.428, "blimp/accuracy/wh_questions_subject_gap": 0.942, "blimp/accuracy/existential_there_quantifiers_2": 0.494, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.925, "blimp/accuracy/sentential_negation_npi_scope": 0.656, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.829, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.881, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.911, "blimp/accuracy/principle_A_case_2": 0.962, "blimp/accuracy/distractor_agreement_relational_noun": 0.768, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.514, "blimp/accuracy/wh_island": 0.769, "blimp/accuracy/principle_A_domain_1": 0.986, "blimp/accuracy/complex_NP_island": 0.559, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.943, "blimp/accuracy/irregular_past_participle_verbs": 0.9, "blimp/accuracy/drop_argument": 0.785, "blimp/accuracy/wh_questions_object_gap": 0.808, "blimp/accuracy/animate_subject_passive": 0.787, "blimp/accuracy/existential_there_quantifiers_1": 0.951, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.881, "blimp/accuracy/npi_present_2": 0.642, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.926, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.953, "blimp/accuracy/existential_there_object_raising": 0.845, "blimp/accuracy/matrix_question_npi_licensor_present": 0.169, "blimp/accuracy/npi_present_1": 0.628, "blimp/accuracy/wh_vs_that_no_gap": 0.974, "blimp/accuracy/left_branch_island_echo_question": 0.456, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.968, "blimp/accuracy/causative": 0.711, "blimp/accuracy/group_average": 0.7680746268656716, "blimp/accuracy/seq_average": 0.7680746268656716, "cbt/accuracy/NE": 0.7800480769230769, "cbt/accuracy/V": 0.9108, "cbt/accuracy/CN": 0.8324, "cbt/accuracy/P": 0.8844, "cbt/accuracy/group_average": 0.8519120192307692, "cbt/accuracy/seq_average": 0.8519407763105242, "hellaswag/accuracy/val": 0.2997410874327823, "hellaswag/accuracy/group_average": 0.2997410874327823, "hellaswag/accuracy/seq_average": 0.2997410874327823, "piqa/accuracy/val": 0.5875952121871599, "piqa/accuracy/group_average": 0.5875952121871599, "piqa/accuracy/seq_average": 0.5875952121871599, "ai2arc/accuracy/ARC-Easy": 0.34418604651162793, "ai2arc/accuracy/ARC-Challenge": 0.21630901287553647, "ai2arc/accuracy/group_average": 0.2802475296935822, "ai2arc/accuracy/seq_average": 0.30198300283286117, "race/accuracy/test/high": 0.27329902801600914, "race/accuracy/test/middle": 0.35097493036211697, "race/accuracy/group_average": 0.312136979189063, "race/accuracy/seq_average": 0.2959059586542359, "siqa/accuracy/dev": 0.3587512794268168, "siqa/accuracy/group_average": 0.3587512794268168, "siqa/accuracy/seq_average": 0.3587512794268168, "commonsenseqa/accuracy/dev_rand_split": 0.26863226863226863, "commonsenseqa/accuracy/group_average": 0.26863226863226863, "commonsenseqa/accuracy/seq_average": 0.26863226863226863}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-120000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.590767270042783, "val/accuracy": 0.48016454303075395, "val/perplexity": 13.340003062268122, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6195223790518245, "lambada/accuracy/total": 0.2983307453416149, "lambada/accuracy/openai_last_token": 0.7678571428571429, "lambada/perplexity": 9.904861636395912, "lambada/lm_loss": 3.1739308305902614, "lambada/lm_perplexity": 23.901251715278747, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3892476441861844, "mean_loss": 2.6051448245473035, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.793, "blimp/accuracy/tough_vs_raising_2": 0.866, "blimp/accuracy/tough_vs_raising_1": 0.588, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.912, "blimp/accuracy/principle_A_reconstruction": 0.355, "blimp/accuracy/wh_vs_that_with_gap": 0.496, "blimp/accuracy/principle_A_domain_2": 0.833, "blimp/accuracy/determiner_noun_agreement_1": 0.984, "blimp/accuracy/ellipsis_n_bar_2": 0.913, "blimp/accuracy/principle_A_domain_3": 0.576, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.899, "blimp/accuracy/animate_subject_trans": 0.887, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.844, "blimp/accuracy/distractor_agreement_relative_clause": 0.617, "blimp/accuracy/transitive": 0.863, "blimp/accuracy/sentential_subject_island": 0.379, "blimp/accuracy/adjunct_island": 0.826, "blimp/accuracy/intransitive": 0.783, "blimp/accuracy/existential_there_subject_raising": 0.875, "blimp/accuracy/irregular_past_participle_adjectives": 0.95, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.375, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.276, "blimp/accuracy/only_npi_scope": 0.556, "blimp/accuracy/superlative_quantifiers_2": 0.714, "blimp/accuracy/passive_1": 0.918, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.932, "blimp/accuracy/inchoative": 0.626, "blimp/accuracy/anaphor_gender_agreement": 0.941, "blimp/accuracy/principle_A_c_command": 0.745, "blimp/accuracy/only_npi_licensor_present": 0.625, "blimp/accuracy/expletive_it_object_raising": 0.788, "blimp/accuracy/left_branch_island_simple_question": 0.45, "blimp/accuracy/wh_questions_subject_gap": 0.952, "blimp/accuracy/existential_there_quantifiers_2": 0.326, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.921, "blimp/accuracy/sentential_negation_npi_scope": 0.69, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.804, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.906, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.917, "blimp/accuracy/principle_A_case_2": 0.956, "blimp/accuracy/distractor_agreement_relational_noun": 0.816, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.625, "blimp/accuracy/wh_island": 0.78, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.519, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.952, "blimp/accuracy/irregular_past_participle_verbs": 0.87, "blimp/accuracy/drop_argument": 0.757, "blimp/accuracy/wh_questions_object_gap": 0.815, "blimp/accuracy/animate_subject_passive": 0.804, "blimp/accuracy/existential_there_quantifiers_1": 0.964, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.914, "blimp/accuracy/npi_present_2": 0.601, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.892, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.943, "blimp/accuracy/existential_there_object_raising": 0.885, "blimp/accuracy/matrix_question_npi_licensor_present": 0.205, "blimp/accuracy/npi_present_1": 0.555, "blimp/accuracy/wh_vs_that_no_gap": 0.986, "blimp/accuracy/left_branch_island_echo_question": 0.425, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.986, "blimp/accuracy/causative": 0.716, "blimp/accuracy/group_average": 0.7689850746268654, "blimp/accuracy/seq_average": 0.7689850746268657, "cbt/accuracy/NE": 0.7856570512820513, "cbt/accuracy/V": 0.91, "cbt/accuracy/CN": 0.838, "cbt/accuracy/P": 0.8904, "cbt/accuracy/group_average": 0.8560142628205129, "cbt/accuracy/seq_average": 0.8560424169667867, "hellaswag/accuracy/val": 0.3007369049990042, "hellaswag/accuracy/group_average": 0.3007369049990042, "hellaswag/accuracy/seq_average": 0.3007369049990042, "piqa/accuracy/val": 0.588683351468988, "piqa/accuracy/group_average": 0.588683351468988, "piqa/accuracy/seq_average": 0.588683351468988, "ai2arc/accuracy/ARC-Easy": 0.35306553911205074, "ai2arc/accuracy/ARC-Challenge": 0.2240343347639485, "ai2arc/accuracy/group_average": 0.28854993693799963, "ai2arc/accuracy/seq_average": 0.3104815864022663, "race/accuracy/test/high": 0.27615780445969124, "race/accuracy/test/middle": 0.3488857938718663, "race/accuracy/group_average": 0.31252179916577877, "race/accuracy/seq_average": 0.29732468585326305, "siqa/accuracy/dev": 0.36438075742067555, "siqa/accuracy/group_average": 0.36438075742067555, "siqa/accuracy/seq_average": 0.36438075742067555, "commonsenseqa/accuracy/dev_rand_split": 0.2678132678132678, "commonsenseqa/accuracy/group_average": 0.2678132678132678, "commonsenseqa/accuracy/seq_average": 0.2678132678132678}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-140000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.5733531164744545, "val/accuracy": 0.4821322002108135, "val/perplexity": 13.109709206366974, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6722620614567156, "lambada/accuracy/total": 0.25873447204968947, "lambada/accuracy/openai_last_token": 0.7633928571428571, "lambada/perplexity": 11.31685462744714, "lambada/lm_loss": 3.1650793168546483, "lambada/lm_perplexity": 23.690623024015196, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3704333361302515, "mean_loss": 2.622807588965585, "blimp/accuracy/passive_2": 0.909, "blimp/accuracy/determiner_noun_agreement_2": 0.978, "blimp/accuracy/ellipsis_n_bar_1": 0.772, "blimp/accuracy/tough_vs_raising_2": 0.904, "blimp/accuracy/tough_vs_raising_1": 0.531, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.9, "blimp/accuracy/principle_A_reconstruction": 0.413, "blimp/accuracy/wh_vs_that_with_gap": 0.555, "blimp/accuracy/principle_A_domain_2": 0.831, "blimp/accuracy/determiner_noun_agreement_1": 0.986, "blimp/accuracy/ellipsis_n_bar_2": 0.892, "blimp/accuracy/principle_A_domain_3": 0.585, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.908, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.841, "blimp/accuracy/distractor_agreement_relative_clause": 0.618, "blimp/accuracy/transitive": 0.879, "blimp/accuracy/sentential_subject_island": 0.313, "blimp/accuracy/adjunct_island": 0.879, "blimp/accuracy/intransitive": 0.776, "blimp/accuracy/existential_there_subject_raising": 0.863, "blimp/accuracy/irregular_past_participle_adjectives": 0.796, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.349, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.314, "blimp/accuracy/only_npi_scope": 0.686, "blimp/accuracy/superlative_quantifiers_2": 0.636, "blimp/accuracy/passive_1": 0.891, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.921, "blimp/accuracy/inchoative": 0.615, "blimp/accuracy/anaphor_gender_agreement": 0.965, "blimp/accuracy/principle_A_c_command": 0.67, "blimp/accuracy/only_npi_licensor_present": 0.679, "blimp/accuracy/expletive_it_object_raising": 0.764, "blimp/accuracy/left_branch_island_simple_question": 0.453, "blimp/accuracy/wh_questions_subject_gap": 0.949, "blimp/accuracy/existential_there_quantifiers_2": 0.347, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.641, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.831, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.907, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.907, "blimp/accuracy/principle_A_case_2": 0.957, "blimp/accuracy/distractor_agreement_relational_noun": 0.838, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989, "blimp/accuracy/superlative_quantifiers_1": 0.678, "blimp/accuracy/wh_island": 0.707, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.573, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.961, "blimp/accuracy/irregular_past_participle_verbs": 0.91, "blimp/accuracy/drop_argument": 0.795, "blimp/accuracy/wh_questions_object_gap": 0.803, "blimp/accuracy/animate_subject_passive": 0.804, "blimp/accuracy/existential_there_quantifiers_1": 0.953, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.9, "blimp/accuracy/npi_present_2": 0.529, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.928, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.949, "blimp/accuracy/existential_there_object_raising": 0.834, "blimp/accuracy/matrix_question_npi_licensor_present": 0.23, "blimp/accuracy/npi_present_1": 0.535, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.505, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.971, "blimp/accuracy/causative": 0.718, "blimp/accuracy/group_average": 0.7689999999999996, "blimp/accuracy/seq_average": 0.769, "cbt/accuracy/NE": 0.7872596153846154, "cbt/accuracy/V": 0.9172, "cbt/accuracy/CN": 0.8352, "cbt/accuracy/P": 0.8924, "cbt/accuracy/group_average": 0.8580149038461539, "cbt/accuracy/seq_average": 0.8580432172869148, "hellaswag/accuracy/val": 0.304919338777136, "hellaswag/accuracy/group_average": 0.304919338777136, "hellaswag/accuracy/seq_average": 0.304919338777136, "piqa/accuracy/val": 0.5908596300326442, "piqa/accuracy/group_average": 0.5908596300326442, "piqa/accuracy/seq_average": 0.5908596300326442, "ai2arc/accuracy/ARC-Easy": 0.35052854122621563, "ai2arc/accuracy/ARC-Challenge": 0.2206008583690987, "ai2arc/accuracy/group_average": 0.2855646997976572, "ai2arc/accuracy/seq_average": 0.3076487252124646, "race/accuracy/test/high": 0.27615780445969124, "race/accuracy/test/middle": 0.346100278551532, "race/accuracy/group_average": 0.3111290415056116, "race/accuracy/seq_average": 0.2965139845966761, "siqa/accuracy/dev": 0.36284544524053225, "siqa/accuracy/group_average": 0.36284544524053225, "siqa/accuracy/seq_average": 0.36284544524053225, "commonsenseqa/accuracy/dev_rand_split": 0.2620802620802621, "commonsenseqa/accuracy/group_average": 0.2620802620802621, "commonsenseqa/accuracy/seq_average": 0.2620802620802621}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-160000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.556674533420139, "val/accuracy": 0.4846937391493056, "val/perplexity": 12.892871134998618, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6150216997039983, "lambada/accuracy/total": 0.27096273291925466, "lambada/accuracy/openai_last_token": 0.7699922360248447, "lambada/perplexity": 10.033828407483169, "lambada/lm_loss": 3.148103875640894, "lambada/lm_perplexity": 23.291858423875222, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3778282360342801, "mean_loss": 2.585848116562069, "blimp/accuracy/passive_2": 0.908, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.81, "blimp/accuracy/tough_vs_raising_2": 0.875, "blimp/accuracy/tough_vs_raising_1": 0.553, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.911, "blimp/accuracy/principle_A_reconstruction": 0.41, "blimp/accuracy/wh_vs_that_with_gap": 0.488, "blimp/accuracy/principle_A_domain_2": 0.859, "blimp/accuracy/determiner_noun_agreement_1": 0.99, "blimp/accuracy/ellipsis_n_bar_2": 0.886, "blimp/accuracy/principle_A_domain_3": 0.55, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.91, "blimp/accuracy/animate_subject_trans": 0.895, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.839, "blimp/accuracy/distractor_agreement_relative_clause": 0.602, "blimp/accuracy/transitive": 0.876, "blimp/accuracy/sentential_subject_island": 0.35, "blimp/accuracy/adjunct_island": 0.837, "blimp/accuracy/intransitive": 0.767, "blimp/accuracy/existential_there_subject_raising": 0.866, "blimp/accuracy/irregular_past_participle_adjectives": 0.896, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.364, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.276, "blimp/accuracy/only_npi_scope": 0.601, "blimp/accuracy/superlative_quantifiers_2": 0.73, "blimp/accuracy/passive_1": 0.904, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.924, "blimp/accuracy/inchoative": 0.61, "blimp/accuracy/anaphor_gender_agreement": 0.959, "blimp/accuracy/principle_A_c_command": 0.659, "blimp/accuracy/only_npi_licensor_present": 0.554, "blimp/accuracy/expletive_it_object_raising": 0.726, "blimp/accuracy/left_branch_island_simple_question": 0.456, "blimp/accuracy/wh_questions_subject_gap": 0.932, "blimp/accuracy/existential_there_quantifiers_2": 0.401, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.926, "blimp/accuracy/sentential_negation_npi_scope": 0.661, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.836, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.88, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.865, "blimp/accuracy/principle_A_case_2": 0.951, "blimp/accuracy/distractor_agreement_relational_noun": 0.844, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989, "blimp/accuracy/superlative_quantifiers_1": 0.572, "blimp/accuracy/wh_island": 0.776, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.538, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.968, "blimp/accuracy/irregular_past_participle_verbs": 0.859, "blimp/accuracy/drop_argument": 0.754, "blimp/accuracy/wh_questions_object_gap": 0.795, "blimp/accuracy/animate_subject_passive": 0.795, "blimp/accuracy/existential_there_quantifiers_1": 0.957, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.527, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.914, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.962, "blimp/accuracy/existential_there_object_raising": 0.863, "blimp/accuracy/matrix_question_npi_licensor_present": 0.197, "blimp/accuracy/npi_present_1": 0.524, "blimp/accuracy/wh_vs_that_no_gap": 0.983, "blimp/accuracy/left_branch_island_echo_question": 0.399, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.971, "blimp/accuracy/causative": 0.717, "blimp/accuracy/group_average": 0.7618358208955223, "blimp/accuracy/seq_average": 0.7618358208955224, "cbt/accuracy/NE": 0.7872596153846154, "cbt/accuracy/V": 0.9168, "cbt/accuracy/CN": 0.8392, "cbt/accuracy/P": 0.896, "cbt/accuracy/group_average": 0.8598149038461538, "cbt/accuracy/seq_average": 0.85984393757503, "hellaswag/accuracy/val": 0.3058155745867357, "hellaswag/accuracy/group_average": 0.3058155745867357, "hellaswag/accuracy/seq_average": 0.3058155745867357, "piqa/accuracy/val": 0.5941240478781284, "piqa/accuracy/group_average": 0.5941240478781284, "piqa/accuracy/seq_average": 0.5941240478781284, "ai2arc/accuracy/ARC-Easy": 0.35095137420718814, "ai2arc/accuracy/ARC-Challenge": 0.2094420600858369, "ai2arc/accuracy/group_average": 0.2801967171465125, "ai2arc/accuracy/seq_average": 0.30424929178470256, "race/accuracy/test/high": 0.2775871926815323, "race/accuracy/test/middle": 0.3579387186629526, "race/accuracy/group_average": 0.31776295567224244, "race/accuracy/seq_average": 0.3009728415079043, "siqa/accuracy/dev": 0.3664278403275333, "siqa/accuracy/group_average": 0.3664278403275333, "siqa/accuracy/seq_average": 0.3664278403275333, "commonsenseqa/accuracy/dev_rand_split": 0.26453726453726456, "commonsenseqa/accuracy/group_average": 0.26453726453726456, "commonsenseqa/accuracy/seq_average": 0.26453726453726456}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-180000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.5377245706225198, "val/accuracy": 0.48673890128968256, "val/perplexity": 12.650852075125359, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.7023861334190604, "lambada/accuracy/total": 0.28940217391304346, "lambada/accuracy/openai_last_token": 0.7721273291925466, "lambada/perplexity": 9.898926941547161, "lambada/lm_loss": 3.1401402371438807, "lambada/lm_perplexity": 23.107107106219267, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.388070537601363, "mean_loss": 2.62005535202079, "blimp/accuracy/passive_2": 0.914, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.804, "blimp/accuracy/tough_vs_raising_2": 0.879, "blimp/accuracy/tough_vs_raising_1": 0.586, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.889, "blimp/accuracy/principle_A_reconstruction": 0.435, "blimp/accuracy/wh_vs_that_with_gap": 0.553, "blimp/accuracy/principle_A_domain_2": 0.876, "blimp/accuracy/determiner_noun_agreement_1": 0.986, "blimp/accuracy/ellipsis_n_bar_2": 0.884, "blimp/accuracy/principle_A_domain_3": 0.576, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.918, "blimp/accuracy/animate_subject_trans": 0.894, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.869, "blimp/accuracy/distractor_agreement_relative_clause": 0.663, "blimp/accuracy/transitive": 0.876, "blimp/accuracy/sentential_subject_island": 0.336, "blimp/accuracy/adjunct_island": 0.826, "blimp/accuracy/intransitive": 0.763, "blimp/accuracy/existential_there_subject_raising": 0.887, "blimp/accuracy/irregular_past_participle_adjectives": 0.871, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.441, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.341, "blimp/accuracy/only_npi_scope": 0.554, "blimp/accuracy/superlative_quantifiers_2": 0.687, "blimp/accuracy/passive_1": 0.902, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.909, "blimp/accuracy/inchoative": 0.589, "blimp/accuracy/anaphor_gender_agreement": 0.968, "blimp/accuracy/principle_A_c_command": 0.698, "blimp/accuracy/only_npi_licensor_present": 0.718, "blimp/accuracy/expletive_it_object_raising": 0.776, "blimp/accuracy/left_branch_island_simple_question": 0.502, "blimp/accuracy/wh_questions_subject_gap": 0.948, "blimp/accuracy/existential_there_quantifiers_2": 0.234, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.923, "blimp/accuracy/sentential_negation_npi_scope": 0.625, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.825, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.871, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.882, "blimp/accuracy/principle_A_case_2": 0.954, "blimp/accuracy/distractor_agreement_relational_noun": 0.829, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991, "blimp/accuracy/superlative_quantifiers_1": 0.556, "blimp/accuracy/wh_island": 0.701, "blimp/accuracy/principle_A_domain_1": 0.99, "blimp/accuracy/complex_NP_island": 0.552, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.96, "blimp/accuracy/irregular_past_participle_verbs": 0.87, "blimp/accuracy/drop_argument": 0.764, "blimp/accuracy/wh_questions_object_gap": 0.805, "blimp/accuracy/animate_subject_passive": 0.783, "blimp/accuracy/existential_there_quantifiers_1": 0.95, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.876, "blimp/accuracy/npi_present_2": 0.502, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.906, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.948, "blimp/accuracy/existential_there_object_raising": 0.888, "blimp/accuracy/matrix_question_npi_licensor_present": 0.198, "blimp/accuracy/npi_present_1": 0.51, "blimp/accuracy/wh_vs_that_no_gap": 0.984, "blimp/accuracy/left_branch_island_echo_question": 0.393, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974, "blimp/accuracy/causative": 0.729, "blimp/accuracy/group_average": 0.7651044776119402, "blimp/accuracy/seq_average": 0.7651044776119403, "cbt/accuracy/NE": 0.7876602564102564, "cbt/accuracy/V": 0.922, "cbt/accuracy/CN": 0.8468, "cbt/accuracy/P": 0.8936, "cbt/accuracy/group_average": 0.8625150641025641, "cbt/accuracy/seq_average": 0.8625450180072028, "hellaswag/accuracy/val": 0.30850428201553476, "hellaswag/accuracy/group_average": 0.30850428201553476, "hellaswag/accuracy/seq_average": 0.30850428201553476, "piqa/accuracy/val": 0.5892274211099021, "piqa/accuracy/group_average": 0.5892274211099021, "piqa/accuracy/seq_average": 0.5892274211099021, "ai2arc/accuracy/ARC-Easy": 0.34587737843551797, "ai2arc/accuracy/ARC-Challenge": 0.21030042918454936, "ai2arc/accuracy/group_average": 0.27808890381003365, "ai2arc/accuracy/seq_average": 0.30113314447592066, "race/accuracy/test/high": 0.275871926815323, "race/accuracy/test/middle": 0.36142061281337046, "race/accuracy/group_average": 0.31864626981434674, "race/accuracy/seq_average": 0.3007701661937576, "siqa/accuracy/dev": 0.36745138178096215, "siqa/accuracy/group_average": 0.36745138178096215, "siqa/accuracy/seq_average": 0.36745138178096215, "commonsenseqa/accuracy/dev_rand_split": 0.26617526617526616, "commonsenseqa/accuracy/group_average": 0.26617526617526616, "commonsenseqa/accuracy/seq_average": 0.26617526617526616}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-20000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.8620091998387895, "val/accuracy": 0.44157676091269843, "val/perplexity": 17.496645902381935, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.652888896302407, "lambada/accuracy/total": 0.18885869565217392, "lambada/accuracy/openai_last_token": 0.7303959627329193, "lambada/perplexity": 17.69543910903868, "lambada/lm_loss": 3.4205406662573883, "lambada/lm_perplexity": 30.585947341091266, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.31521772828243616, "mean_loss": 2.7574490480705984, "blimp/accuracy/passive_2": 0.881, "blimp/accuracy/determiner_noun_agreement_2": 0.973, "blimp/accuracy/ellipsis_n_bar_1": 0.747, "blimp/accuracy/tough_vs_raising_2": 0.895, "blimp/accuracy/tough_vs_raising_1": 0.506, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.858, "blimp/accuracy/principle_A_reconstruction": 0.424, "blimp/accuracy/wh_vs_that_with_gap": 0.541, "blimp/accuracy/principle_A_domain_2": 0.848, "blimp/accuracy/determiner_noun_agreement_1": 0.981, "blimp/accuracy/ellipsis_n_bar_2": 0.909, "blimp/accuracy/principle_A_domain_3": 0.564, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.898, "blimp/accuracy/animate_subject_trans": 0.884, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.839, "blimp/accuracy/distractor_agreement_relative_clause": 0.506, "blimp/accuracy/transitive": 0.833, "blimp/accuracy/sentential_subject_island": 0.372, "blimp/accuracy/adjunct_island": 0.821, "blimp/accuracy/intransitive": 0.765, "blimp/accuracy/existential_there_subject_raising": 0.84, "blimp/accuracy/irregular_past_participle_adjectives": 0.958, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.24, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.228, "blimp/accuracy/only_npi_scope": 0.664, "blimp/accuracy/superlative_quantifiers_2": 0.683, "blimp/accuracy/passive_1": 0.861, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.889, "blimp/accuracy/inchoative": 0.594, "blimp/accuracy/anaphor_gender_agreement": 0.963, "blimp/accuracy/principle_A_c_command": 0.548, "blimp/accuracy/only_npi_licensor_present": 0.924, "blimp/accuracy/expletive_it_object_raising": 0.754, "blimp/accuracy/left_branch_island_simple_question": 0.219, "blimp/accuracy/wh_questions_subject_gap": 0.918, "blimp/accuracy/existential_there_quantifiers_2": 0.197, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.922, "blimp/accuracy/sentential_negation_npi_scope": 0.501, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.739, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.907, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.845, "blimp/accuracy/principle_A_case_2": 0.953, "blimp/accuracy/distractor_agreement_relational_noun": 0.754, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.996, "blimp/accuracy/superlative_quantifiers_1": 0.651, "blimp/accuracy/wh_island": 0.63, "blimp/accuracy/principle_A_domain_1": 0.985, "blimp/accuracy/complex_NP_island": 0.544, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.951, "blimp/accuracy/irregular_past_participle_verbs": 0.832, "blimp/accuracy/drop_argument": 0.787, "blimp/accuracy/wh_questions_object_gap": 0.77, "blimp/accuracy/animate_subject_passive": 0.775, "blimp/accuracy/existential_there_quantifiers_1": 0.95, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.847, "blimp/accuracy/npi_present_2": 0.64, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.877, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.817, "blimp/accuracy/matrix_question_npi_licensor_present": 0.088, "blimp/accuracy/npi_present_1": 0.633, "blimp/accuracy/wh_vs_that_no_gap": 0.961, "blimp/accuracy/left_branch_island_echo_question": 0.427, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.969, "blimp/accuracy/causative": 0.686, "blimp/accuracy/group_average": 0.7448208955223884, "blimp/accuracy/seq_average": 0.7448208955223881, "cbt/accuracy/NE": 0.7207532051282052, "cbt/accuracy/V": 0.8836, "cbt/accuracy/CN": 0.7796, "cbt/accuracy/P": 0.86, "cbt/accuracy/group_average": 0.8109883012820512, "cbt/accuracy/seq_average": 0.8110244097639056, "hellaswag/accuracy/val": 0.2795259908384784, "hellaswag/accuracy/group_average": 0.2795259908384784, "hellaswag/accuracy/seq_average": 0.2795259908384784, "piqa/accuracy/val": 0.5571273122959739, "piqa/accuracy/group_average": 0.5571273122959739, "piqa/accuracy/seq_average": 0.5571273122959739, "ai2arc/accuracy/ARC-Easy": 0.3128964059196617, "ai2arc/accuracy/ARC-Challenge": 0.2, "ai2arc/accuracy/group_average": 0.25644820295983084, "ai2arc/accuracy/seq_average": 0.2756373937677054, "race/accuracy/test/high": 0.26300743281875355, "race/accuracy/test/middle": 0.34192200557103064, "race/accuracy/group_average": 0.3024647191948921, "race/accuracy/seq_average": 0.2859748682610458, "siqa/accuracy/dev": 0.3541453428863869, "siqa/accuracy/group_average": 0.3541453428863869, "siqa/accuracy/seq_average": 0.3541453428863869, "commonsenseqa/accuracy/dev_rand_split": 0.23914823914823916, "commonsenseqa/accuracy/group_average": 0.23914823914823916, "commonsenseqa/accuracy/seq_average": 0.23914823914823916}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-200000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.5251367962549605, "val/accuracy": 0.4890039837549603, "val/perplexity": 12.492604088177709, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.4911764393682065, "lambada/accuracy/total": 0.2919254658385093, "lambada/accuracy/openai_last_token": 0.7711568322981367, "lambada/perplexity": 9.573692734788992, "lambada/lm_loss": 3.1340749230508127, "lambada/lm_perplexity": 22.967379418353747, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3904647247967348, "mean_loss": 2.5081566178115837, "blimp/accuracy/passive_2": 0.927, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.789, "blimp/accuracy/tough_vs_raising_2": 0.841, "blimp/accuracy/tough_vs_raising_1": 0.595, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.908, "blimp/accuracy/principle_A_reconstruction": 0.377, "blimp/accuracy/wh_vs_that_with_gap": 0.549, "blimp/accuracy/principle_A_domain_2": 0.83, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.897, "blimp/accuracy/principle_A_domain_3": 0.61, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.908, "blimp/accuracy/animate_subject_trans": 0.893, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.864, "blimp/accuracy/distractor_agreement_relative_clause": 0.66, "blimp/accuracy/transitive": 0.871, "blimp/accuracy/sentential_subject_island": 0.347, "blimp/accuracy/adjunct_island": 0.839, "blimp/accuracy/intransitive": 0.774, "blimp/accuracy/existential_there_subject_raising": 0.882, "blimp/accuracy/irregular_past_participle_adjectives": 0.917, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.436, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.31, "blimp/accuracy/only_npi_scope": 0.55, "blimp/accuracy/superlative_quantifiers_2": 0.675, "blimp/accuracy/passive_1": 0.906, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.91, "blimp/accuracy/inchoative": 0.598, "blimp/accuracy/anaphor_gender_agreement": 0.971, "blimp/accuracy/principle_A_c_command": 0.688, "blimp/accuracy/only_npi_licensor_present": 0.979, "blimp/accuracy/expletive_it_object_raising": 0.758, "blimp/accuracy/left_branch_island_simple_question": 0.514, "blimp/accuracy/wh_questions_subject_gap": 0.95, "blimp/accuracy/existential_there_quantifiers_2": 0.312, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.934, "blimp/accuracy/sentential_negation_npi_scope": 0.628, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.859, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.903, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.905, "blimp/accuracy/principle_A_case_2": 0.947, "blimp/accuracy/distractor_agreement_relational_noun": 0.841, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.511, "blimp/accuracy/wh_island": 0.749, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.56, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.969, "blimp/accuracy/irregular_past_participle_verbs": 0.878, "blimp/accuracy/drop_argument": 0.741, "blimp/accuracy/wh_questions_object_gap": 0.843, "blimp/accuracy/animate_subject_passive": 0.788, "blimp/accuracy/existential_there_quantifiers_1": 0.967, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.866, "blimp/accuracy/npi_present_2": 0.535, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.918, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.961, "blimp/accuracy/existential_there_object_raising": 0.872, "blimp/accuracy/matrix_question_npi_licensor_present": 0.235, "blimp/accuracy/npi_present_1": 0.537, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.411, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.977, "blimp/accuracy/causative": 0.764, "blimp/accuracy/group_average": 0.7740746268656715, "blimp/accuracy/seq_average": 0.7740746268656716, "cbt/accuracy/NE": 0.7884615384615384, "cbt/accuracy/V": 0.9212, "cbt/accuracy/CN": 0.84, "cbt/accuracy/P": 0.8944, "cbt/accuracy/group_average": 0.8610153846153846, "cbt/accuracy/seq_average": 0.8610444177671068, "hellaswag/accuracy/val": 0.3110934076877116, "hellaswag/accuracy/group_average": 0.3110934076877116, "hellaswag/accuracy/seq_average": 0.3110934076877116, "piqa/accuracy/val": 0.6022850924918389, "piqa/accuracy/group_average": 0.6022850924918389, "piqa/accuracy/seq_average": 0.6022850924918389, "ai2arc/accuracy/ARC-Easy": 0.346723044397463, "ai2arc/accuracy/ARC-Challenge": 0.2206008583690987, "ai2arc/accuracy/group_average": 0.2836619513832809, "ai2arc/accuracy/seq_average": 0.30509915014164307, "race/accuracy/test/high": 0.27530017152658665, "race/accuracy/test/middle": 0.35376044568245124, "race/accuracy/group_average": 0.3145303086045189, "race/accuracy/seq_average": 0.29813538710985005, "siqa/accuracy/dev": 0.3638689866939611, "siqa/accuracy/group_average": 0.3638689866939611, "siqa/accuracy/seq_average": 0.3638689866939611, "commonsenseqa/accuracy/dev_rand_split": 0.28173628173628174, "commonsenseqa/accuracy/group_average": 0.28173628173628174, "commonsenseqa/accuracy/seq_average": 0.28173628173628174}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-220000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.5123237730964783, "val/accuracy": 0.49121287512400796, "val/perplexity": 12.333557174515738, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6664218310243593, "lambada/accuracy/total": 0.2798913043478261, "lambada/accuracy/openai_last_token": 0.7699922360248447, "lambada/perplexity": 9.541353074406723, "lambada/lm_loss": 3.1203510957270937, "lambada/lm_perplexity": 22.65433208625496, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.38555208973591704, "mean_loss": 2.589372802060419, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.976, "blimp/accuracy/ellipsis_n_bar_1": 0.806, "blimp/accuracy/tough_vs_raising_2": 0.857, "blimp/accuracy/tough_vs_raising_1": 0.622, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.904, "blimp/accuracy/principle_A_reconstruction": 0.344, "blimp/accuracy/wh_vs_that_with_gap": 0.601, "blimp/accuracy/principle_A_domain_2": 0.859, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.572, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.907, "blimp/accuracy/animate_subject_trans": 0.899, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.86, "blimp/accuracy/distractor_agreement_relative_clause": 0.624, "blimp/accuracy/transitive": 0.862, "blimp/accuracy/sentential_subject_island": 0.458, "blimp/accuracy/adjunct_island": 0.85, "blimp/accuracy/intransitive": 0.811, "blimp/accuracy/existential_there_subject_raising": 0.856, "blimp/accuracy/irregular_past_participle_adjectives": 0.955, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.29, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.369, "blimp/accuracy/only_npi_scope": 0.62, "blimp/accuracy/superlative_quantifiers_2": 0.681, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.928, "blimp/accuracy/inchoative": 0.646, "blimp/accuracy/anaphor_gender_agreement": 0.969, "blimp/accuracy/principle_A_c_command": 0.641, "blimp/accuracy/only_npi_licensor_present": 0.851, "blimp/accuracy/expletive_it_object_raising": 0.797, "blimp/accuracy/left_branch_island_simple_question": 0.344, "blimp/accuracy/wh_questions_subject_gap": 0.936, "blimp/accuracy/existential_there_quantifiers_2": 0.295, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.931, "blimp/accuracy/sentential_negation_npi_scope": 0.654, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.823, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.854, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.899, "blimp/accuracy/principle_A_case_2": 0.964, "blimp/accuracy/distractor_agreement_relational_noun": 0.861, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.634, "blimp/accuracy/wh_island": 0.704, "blimp/accuracy/principle_A_domain_1": 0.986, "blimp/accuracy/complex_NP_island": 0.539, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.966, "blimp/accuracy/irregular_past_participle_verbs": 0.882, "blimp/accuracy/drop_argument": 0.795, "blimp/accuracy/wh_questions_object_gap": 0.791, "blimp/accuracy/animate_subject_passive": 0.786, "blimp/accuracy/existential_there_quantifiers_1": 0.955, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.883, "blimp/accuracy/npi_present_2": 0.548, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.919, "blimp/accuracy/anaphor_number_agreement": 0.99, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.954, "blimp/accuracy/existential_there_object_raising": 0.881, "blimp/accuracy/matrix_question_npi_licensor_present": 0.196, "blimp/accuracy/npi_present_1": 0.567, "blimp/accuracy/wh_vs_that_no_gap": 0.961, "blimp/accuracy/left_branch_island_echo_question": 0.399, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.959, "blimp/accuracy/causative": 0.722, "blimp/accuracy/group_average": 0.7712089552238807, "blimp/accuracy/seq_average": 0.7712089552238806, "cbt/accuracy/NE": 0.7956730769230769, "cbt/accuracy/V": 0.9216, "cbt/accuracy/CN": 0.8512, "cbt/accuracy/P": 0.896, "cbt/accuracy/group_average": 0.8661182692307692, "cbt/accuracy/seq_average": 0.8661464585834334, "hellaswag/accuracy/val": 0.31009759012148974, "hellaswag/accuracy/group_average": 0.31009759012148974, "hellaswag/accuracy/seq_average": 0.31009759012148974, "piqa/accuracy/val": 0.6055495103373232, "piqa/accuracy/group_average": 0.6055495103373232, "piqa/accuracy/seq_average": 0.6055495103373232, "ai2arc/accuracy/ARC-Easy": 0.34460887949260044, "ai2arc/accuracy/ARC-Challenge": 0.2, "ai2arc/accuracy/group_average": 0.27230443974630025, "ai2arc/accuracy/seq_average": 0.29688385269121814, "race/accuracy/test/high": 0.2770154373927959, "race/accuracy/test/middle": 0.3593314763231198, "race/accuracy/group_average": 0.31817345685795784, "race/accuracy/seq_average": 0.3009728415079043, "siqa/accuracy/dev": 0.3618219037871034, "siqa/accuracy/group_average": 0.3618219037871034, "siqa/accuracy/seq_average": 0.3618219037871034, "commonsenseqa/accuracy/dev_rand_split": 0.2858312858312858, "commonsenseqa/accuracy/group_average": 0.2858312858312858, "commonsenseqa/accuracy/seq_average": 0.2858312858312858}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-240000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.501651340060764, "val/accuracy": 0.4924161396329365, "val/perplexity": 12.202628020535252, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5906900915299884, "lambada/accuracy/total": 0.2981366459627329, "lambada/accuracy/openai_last_token": 0.781444099378882, "lambada/perplexity": 8.975166147789626, "lambada/lm_loss": 3.1024050156831766, "lambada/lm_perplexity": 22.2514019514941, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3952763927978347, "mean_loss": 2.546170715795376, "blimp/accuracy/passive_2": 0.906, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.794, "blimp/accuracy/tough_vs_raising_2": 0.894, "blimp/accuracy/tough_vs_raising_1": 0.581, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.902, "blimp/accuracy/principle_A_reconstruction": 0.32, "blimp/accuracy/wh_vs_that_with_gap": 0.546, "blimp/accuracy/principle_A_domain_2": 0.889, "blimp/accuracy/determiner_noun_agreement_1": 0.985, "blimp/accuracy/ellipsis_n_bar_2": 0.892, "blimp/accuracy/principle_A_domain_3": 0.595, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.913, "blimp/accuracy/animate_subject_trans": 0.884, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.848, "blimp/accuracy/distractor_agreement_relative_clause": 0.631, "blimp/accuracy/transitive": 0.879, "blimp/accuracy/sentential_subject_island": 0.318, "blimp/accuracy/adjunct_island": 0.855, "blimp/accuracy/intransitive": 0.777, "blimp/accuracy/existential_there_subject_raising": 0.864, "blimp/accuracy/irregular_past_participle_adjectives": 0.865, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.386, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.316, "blimp/accuracy/only_npi_scope": 0.505, "blimp/accuracy/superlative_quantifiers_2": 0.775, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/inchoative": 0.602, "blimp/accuracy/anaphor_gender_agreement": 0.965, "blimp/accuracy/principle_A_c_command": 0.715, "blimp/accuracy/only_npi_licensor_present": 0.662, "blimp/accuracy/expletive_it_object_raising": 0.772, "blimp/accuracy/left_branch_island_simple_question": 0.435, "blimp/accuracy/wh_questions_subject_gap": 0.94, "blimp/accuracy/existential_there_quantifiers_2": 0.401, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.941, "blimp/accuracy/sentential_negation_npi_scope": 0.661, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.802, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.883, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.913, "blimp/accuracy/principle_A_case_2": 0.975, "blimp/accuracy/distractor_agreement_relational_noun": 0.823, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.708, "blimp/accuracy/wh_island": 0.717, "blimp/accuracy/principle_A_domain_1": 0.99, "blimp/accuracy/complex_NP_island": 0.512, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.957, "blimp/accuracy/irregular_past_participle_verbs": 0.892, "blimp/accuracy/drop_argument": 0.758, "blimp/accuracy/wh_questions_object_gap": 0.822, "blimp/accuracy/animate_subject_passive": 0.802, "blimp/accuracy/existential_there_quantifiers_1": 0.964, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.902, "blimp/accuracy/npi_present_2": 0.554, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.928, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.947, "blimp/accuracy/existential_there_object_raising": 0.881, "blimp/accuracy/matrix_question_npi_licensor_present": 0.227, "blimp/accuracy/npi_present_1": 0.522, "blimp/accuracy/wh_vs_that_no_gap": 0.984, "blimp/accuracy/left_branch_island_echo_question": 0.421, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972, "blimp/accuracy/causative": 0.737, "blimp/accuracy/group_average": 0.7696865671641792, "blimp/accuracy/seq_average": 0.7696865671641792, "cbt/accuracy/NE": 0.7932692307692307, "cbt/accuracy/V": 0.9292, "cbt/accuracy/CN": 0.8428, "cbt/accuracy/P": 0.8908, "cbt/accuracy/group_average": 0.8640173076923077, "cbt/accuracy/seq_average": 0.864045618247299, "hellaswag/accuracy/val": 0.3114917347142004, "hellaswag/accuracy/group_average": 0.3114917347142004, "hellaswag/accuracy/seq_average": 0.3114917347142004, "piqa/accuracy/val": 0.6001088139281828, "piqa/accuracy/group_average": 0.6001088139281828, "piqa/accuracy/seq_average": 0.6001088139281828, "ai2arc/accuracy/ARC-Easy": 0.35433403805496827, "ai2arc/accuracy/ARC-Challenge": 0.2128755364806867, "ai2arc/accuracy/group_average": 0.28360478726782745, "ai2arc/accuracy/seq_average": 0.3076487252124646, "race/accuracy/test/high": 0.2830188679245283, "race/accuracy/test/middle": 0.36908077994428967, "race/accuracy/group_average": 0.32604982393440896, "race/accuracy/seq_average": 0.3080664775030401, "siqa/accuracy/dev": 0.3654042988741044, "siqa/accuracy/group_average": 0.3654042988741044, "siqa/accuracy/seq_average": 0.3654042988741044, "commonsenseqa/accuracy/dev_rand_split": 0.2751842751842752, "commonsenseqa/accuracy/group_average": 0.2751842751842752, "commonsenseqa/accuracy/seq_average": 0.2751842751842752}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-260000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.489743187313988, "val/accuracy": 0.49438670324900796, "val/perplexity": 12.058179029430649, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5379543896787657, "lambada/accuracy/total": 0.30124223602484473, "lambada/accuracy/openai_last_token": 0.7779503105590062, "lambada/perplexity": 9.090749567287679, "lambada/lm_loss": 3.069082979405485, "lambada/lm_perplexity": 21.5221573614177, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.39781446963692635, "mean_loss": 2.513848788496377, "blimp/accuracy/passive_2": 0.905, "blimp/accuracy/determiner_noun_agreement_2": 0.977, "blimp/accuracy/ellipsis_n_bar_1": 0.795, "blimp/accuracy/tough_vs_raising_2": 0.871, "blimp/accuracy/tough_vs_raising_1": 0.603, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.924, "blimp/accuracy/principle_A_reconstruction": 0.338, "blimp/accuracy/wh_vs_that_with_gap": 0.528, "blimp/accuracy/principle_A_domain_2": 0.864, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.894, "blimp/accuracy/principle_A_domain_3": 0.607, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.904, "blimp/accuracy/animate_subject_trans": 0.892, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.844, "blimp/accuracy/distractor_agreement_relative_clause": 0.632, "blimp/accuracy/transitive": 0.881, "blimp/accuracy/sentential_subject_island": 0.4, "blimp/accuracy/adjunct_island": 0.843, "blimp/accuracy/intransitive": 0.801, "blimp/accuracy/existential_there_subject_raising": 0.881, "blimp/accuracy/irregular_past_participle_adjectives": 0.824, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.444, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.298, "blimp/accuracy/only_npi_scope": 0.611, "blimp/accuracy/superlative_quantifiers_2": 0.746, "blimp/accuracy/passive_1": 0.901, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.913, "blimp/accuracy/inchoative": 0.635, "blimp/accuracy/anaphor_gender_agreement": 0.965, "blimp/accuracy/principle_A_c_command": 0.673, "blimp/accuracy/only_npi_licensor_present": 0.659, "blimp/accuracy/expletive_it_object_raising": 0.779, "blimp/accuracy/left_branch_island_simple_question": 0.497, "blimp/accuracy/wh_questions_subject_gap": 0.936, "blimp/accuracy/existential_there_quantifiers_2": 0.308, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.671, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.83, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.861, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.948, "blimp/accuracy/distractor_agreement_relational_noun": 0.853, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.734, "blimp/accuracy/wh_island": 0.749, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.539, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.965, "blimp/accuracy/irregular_past_participle_verbs": 0.862, "blimp/accuracy/drop_argument": 0.775, "blimp/accuracy/wh_questions_object_gap": 0.791, "blimp/accuracy/animate_subject_passive": 0.82, "blimp/accuracy/existential_there_quantifiers_1": 0.965, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.874, "blimp/accuracy/npi_present_2": 0.533, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.912, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.96, "blimp/accuracy/existential_there_object_raising": 0.861, "blimp/accuracy/matrix_question_npi_licensor_present": 0.276, "blimp/accuracy/npi_present_1": 0.553, "blimp/accuracy/wh_vs_that_no_gap": 0.969, "blimp/accuracy/left_branch_island_echo_question": 0.427, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976, "blimp/accuracy/causative": 0.744, "blimp/accuracy/group_average": 0.7736417910447763, "blimp/accuracy/seq_average": 0.7736417910447762, "cbt/accuracy/NE": 0.8024839743589743, "cbt/accuracy/V": 0.9228, "cbt/accuracy/CN": 0.8496, "cbt/accuracy/P": 0.9012, "cbt/accuracy/group_average": 0.8690209935897435, "cbt/accuracy/seq_average": 0.8690476190476191, "hellaswag/accuracy/val": 0.31208922525393346, "hellaswag/accuracy/group_average": 0.31208922525393346, "hellaswag/accuracy/seq_average": 0.31208922525393346, "piqa/accuracy/val": 0.6039173014145811, "piqa/accuracy/group_average": 0.6039173014145811, "piqa/accuracy/seq_average": 0.6039173014145811, "ai2arc/accuracy/ARC-Easy": 0.3382663847780127, "ai2arc/accuracy/ARC-Challenge": 0.2128755364806867, "ai2arc/accuracy/group_average": 0.2755709606293497, "ai2arc/accuracy/seq_average": 0.29688385269121814, "race/accuracy/test/high": 0.27844482561463696, "race/accuracy/test/middle": 0.366991643454039, "race/accuracy/group_average": 0.322718234534338, "race/accuracy/seq_average": 0.30421564653425215, "siqa/accuracy/dev": 0.35977482088024565, "siqa/accuracy/group_average": 0.35977482088024565, "siqa/accuracy/seq_average": 0.35977482088024565, "commonsenseqa/accuracy/dev_rand_split": 0.2710892710892711, "commonsenseqa/accuracy/group_average": 0.2710892710892711, "commonsenseqa/accuracy/seq_average": 0.2710892710892711}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-280000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.4793873136005704, "val/accuracy": 0.4960239955357143, "val/perplexity": 11.933950408374157, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.542667175672069, "lambada/accuracy/total": 0.3059006211180124, "lambada/accuracy/openai_last_token": 0.7779503105590062, "lambada/perplexity": 8.59194671028772, "lambada/lm_loss": 3.071023209425411, "lambada/lm_perplexity": 21.56395583343649, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.40096230832686336, "mean_loss": 2.5110272446363195, "blimp/accuracy/passive_2": 0.906, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.801, "blimp/accuracy/tough_vs_raising_2": 0.882, "blimp/accuracy/tough_vs_raising_1": 0.589, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.918, "blimp/accuracy/principle_A_reconstruction": 0.381, "blimp/accuracy/wh_vs_that_with_gap": 0.53, "blimp/accuracy/principle_A_domain_2": 0.866, "blimp/accuracy/determiner_noun_agreement_1": 0.989, "blimp/accuracy/ellipsis_n_bar_2": 0.907, "blimp/accuracy/principle_A_domain_3": 0.594, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.899, "blimp/accuracy/animate_subject_trans": 0.894, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.87, "blimp/accuracy/distractor_agreement_relative_clause": 0.616, "blimp/accuracy/transitive": 0.889, "blimp/accuracy/sentential_subject_island": 0.367, "blimp/accuracy/adjunct_island": 0.84, "blimp/accuracy/intransitive": 0.803, "blimp/accuracy/existential_there_subject_raising": 0.864, "blimp/accuracy/irregular_past_participle_adjectives": 0.712, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.403, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.297, "blimp/accuracy/only_npi_scope": 0.56, "blimp/accuracy/superlative_quantifiers_2": 0.795, "blimp/accuracy/passive_1": 0.892, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.898, "blimp/accuracy/inchoative": 0.638, "blimp/accuracy/anaphor_gender_agreement": 0.969, "blimp/accuracy/principle_A_c_command": 0.696, "blimp/accuracy/only_npi_licensor_present": 0.771, "blimp/accuracy/expletive_it_object_raising": 0.767, "blimp/accuracy/left_branch_island_simple_question": 0.452, "blimp/accuracy/wh_questions_subject_gap": 0.927, "blimp/accuracy/existential_there_quantifiers_2": 0.369, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.935, "blimp/accuracy/sentential_negation_npi_scope": 0.623, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.851, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.898, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.898, "blimp/accuracy/principle_A_case_2": 0.946, "blimp/accuracy/distractor_agreement_relational_noun": 0.823, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.614, "blimp/accuracy/wh_island": 0.794, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.614, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.962, "blimp/accuracy/irregular_past_participle_verbs": 0.872, "blimp/accuracy/drop_argument": 0.755, "blimp/accuracy/wh_questions_object_gap": 0.798, "blimp/accuracy/animate_subject_passive": 0.779, "blimp/accuracy/existential_there_quantifiers_1": 0.964, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.887, "blimp/accuracy/npi_present_2": 0.521, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.909, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.953, "blimp/accuracy/existential_there_object_raising": 0.874, "blimp/accuracy/matrix_question_npi_licensor_present": 0.243, "blimp/accuracy/npi_present_1": 0.54, "blimp/accuracy/wh_vs_that_no_gap": 0.982, "blimp/accuracy/left_branch_island_echo_question": 0.394, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.976, "blimp/accuracy/causative": 0.735, "blimp/accuracy/group_average": 0.7708358208955225, "blimp/accuracy/seq_average": 0.7708358208955224, "cbt/accuracy/NE": 0.8076923076923077, "cbt/accuracy/V": 0.9288, "cbt/accuracy/CN": 0.8532, "cbt/accuracy/P": 0.9028, "cbt/accuracy/group_average": 0.8731230769230769, "cbt/accuracy/seq_average": 0.8731492597038816, "hellaswag/accuracy/val": 0.3158733320055766, "hellaswag/accuracy/group_average": 0.3158733320055766, "hellaswag/accuracy/seq_average": 0.3158733320055766, "piqa/accuracy/val": 0.6017410228509249, "piqa/accuracy/group_average": 0.6017410228509249, "piqa/accuracy/seq_average": 0.6017410228509249, "ai2arc/accuracy/ARC-Easy": 0.35517970401691334, "ai2arc/accuracy/ARC-Challenge": 0.20858369098712445, "ai2arc/accuracy/group_average": 0.2818816975020189, "ai2arc/accuracy/seq_average": 0.3067988668555241, "race/accuracy/test/high": 0.27844482561463696, "race/accuracy/test/middle": 0.35236768802228413, "race/accuracy/group_average": 0.3154062568184606, "race/accuracy/seq_average": 0.29995946493717063, "siqa/accuracy/dev": 0.36438075742067555, "siqa/accuracy/group_average": 0.36438075742067555, "siqa/accuracy/seq_average": 0.36438075742067555, "commonsenseqa/accuracy/dev_rand_split": 0.266994266994267, "commonsenseqa/accuracy/group_average": 0.266994266994267, "commonsenseqa/accuracy/seq_average": 0.266994266994267}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-300000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.4707777235243054, "val/accuracy": 0.49688623821924605, "val/perplexity": 11.83164502303124, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.8143202503275426, "lambada/accuracy/total": 0.28843167701863354, "lambada/accuracy/openai_last_token": 0.7773680124223602, "lambada/perplexity": 9.222715330381197, "lambada/lm_loss": 3.070398030016993, "lambada/lm_perplexity": 21.550478705535774, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.39265895761893976, "mean_loss": 2.6425489869259238, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.809, "blimp/accuracy/tough_vs_raising_2": 0.872, "blimp/accuracy/tough_vs_raising_1": 0.56, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.929, "blimp/accuracy/principle_A_reconstruction": 0.374, "blimp/accuracy/wh_vs_that_with_gap": 0.559, "blimp/accuracy/principle_A_domain_2": 0.872, "blimp/accuracy/determiner_noun_agreement_1": 0.989, "blimp/accuracy/ellipsis_n_bar_2": 0.876, "blimp/accuracy/principle_A_domain_3": 0.591, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.909, "blimp/accuracy/animate_subject_trans": 0.888, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.873, "blimp/accuracy/distractor_agreement_relative_clause": 0.619, "blimp/accuracy/transitive": 0.872, "blimp/accuracy/sentential_subject_island": 0.379, "blimp/accuracy/adjunct_island": 0.809, "blimp/accuracy/intransitive": 0.797, "blimp/accuracy/existential_there_subject_raising": 0.874, "blimp/accuracy/irregular_past_participle_adjectives": 0.866, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.453, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.325, "blimp/accuracy/only_npi_scope": 0.634, "blimp/accuracy/superlative_quantifiers_2": 0.785, "blimp/accuracy/passive_1": 0.884, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.916, "blimp/accuracy/inchoative": 0.623, "blimp/accuracy/anaphor_gender_agreement": 0.963, "blimp/accuracy/principle_A_c_command": 0.705, "blimp/accuracy/only_npi_licensor_present": 0.754, "blimp/accuracy/expletive_it_object_raising": 0.781, "blimp/accuracy/left_branch_island_simple_question": 0.521, "blimp/accuracy/wh_questions_subject_gap": 0.932, "blimp/accuracy/existential_there_quantifiers_2": 0.299, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.946, "blimp/accuracy/sentential_negation_npi_scope": 0.64, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.834, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.875, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.909, "blimp/accuracy/principle_A_case_2": 0.965, "blimp/accuracy/distractor_agreement_relational_noun": 0.809, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.99, "blimp/accuracy/superlative_quantifiers_1": 0.632, "blimp/accuracy/wh_island": 0.774, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.533, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.955, "blimp/accuracy/irregular_past_participle_verbs": 0.879, "blimp/accuracy/drop_argument": 0.759, "blimp/accuracy/wh_questions_object_gap": 0.816, "blimp/accuracy/animate_subject_passive": 0.788, "blimp/accuracy/existential_there_quantifiers_1": 0.961, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.521, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.909, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.966, "blimp/accuracy/existential_there_object_raising": 0.895, "blimp/accuracy/matrix_question_npi_licensor_present": 0.226, "blimp/accuracy/npi_present_1": 0.488, "blimp/accuracy/wh_vs_that_no_gap": 0.976, "blimp/accuracy/left_branch_island_echo_question": 0.42, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.977, "blimp/accuracy/causative": 0.711, "blimp/accuracy/group_average": 0.7731194029850745, "blimp/accuracy/seq_average": 0.7731194029850746, "cbt/accuracy/NE": 0.7980769230769231, "cbt/accuracy/V": 0.9272, "cbt/accuracy/CN": 0.8528, "cbt/accuracy/P": 0.9, "cbt/accuracy/group_average": 0.8695192307692308, "cbt/accuracy/seq_average": 0.8695478191276511, "hellaswag/accuracy/val": 0.31627165903206533, "hellaswag/accuracy/group_average": 0.31627165903206533, "hellaswag/accuracy/seq_average": 0.31627165903206533, "piqa/accuracy/val": 0.6033732317736671, "piqa/accuracy/group_average": 0.6033732317736671, "piqa/accuracy/seq_average": 0.6033732317736671, "ai2arc/accuracy/ARC-Easy": 0.35264270613107823, "ai2arc/accuracy/ARC-Challenge": 0.2128755364806867, "ai2arc/accuracy/group_average": 0.28275912130588243, "ai2arc/accuracy/seq_average": 0.3065155807365439, "race/accuracy/test/high": 0.2804459691252144, "race/accuracy/test/middle": 0.3467966573816156, "race/accuracy/group_average": 0.313621313253415, "race/accuracy/seq_average": 0.29975678962302393, "siqa/accuracy/dev": 0.3725690890481064, "siqa/accuracy/group_average": 0.3725690890481064, "siqa/accuracy/seq_average": 0.3725690890481064, "commonsenseqa/accuracy/dev_rand_split": 0.2841932841932842, "commonsenseqa/accuracy/group_average": 0.2841932841932842, "commonsenseqa/accuracy/seq_average": 0.2841932841932842}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-320000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.4627273801773315, "val/accuracy": 0.4990021236359127, "val/perplexity": 11.736778584280033, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.7494929532827057, "lambada/accuracy/total": 0.3018245341614907, "lambada/accuracy/openai_last_token": 0.78027950310559, "lambada/perplexity": 8.941543204663434, "lambada/lm_loss": 3.0676869985731137, "lambada/lm_perplexity": 21.49213380330144, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4004133288987017, "mean_loss": 2.6061101667300184, "blimp/accuracy/passive_2": 0.912, "blimp/accuracy/determiner_noun_agreement_2": 0.984, "blimp/accuracy/ellipsis_n_bar_1": 0.793, "blimp/accuracy/tough_vs_raising_2": 0.874, "blimp/accuracy/tough_vs_raising_1": 0.569, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.927, "blimp/accuracy/principle_A_reconstruction": 0.374, "blimp/accuracy/wh_vs_that_with_gap": 0.566, "blimp/accuracy/principle_A_domain_2": 0.871, "blimp/accuracy/determiner_noun_agreement_1": 0.993, "blimp/accuracy/ellipsis_n_bar_2": 0.888, "blimp/accuracy/principle_A_domain_3": 0.624, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.905, "blimp/accuracy/animate_subject_trans": 0.903, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.88, "blimp/accuracy/distractor_agreement_relative_clause": 0.636, "blimp/accuracy/transitive": 0.882, "blimp/accuracy/sentential_subject_island": 0.407, "blimp/accuracy/adjunct_island": 0.826, "blimp/accuracy/intransitive": 0.768, "blimp/accuracy/existential_there_subject_raising": 0.865, "blimp/accuracy/irregular_past_participle_adjectives": 0.797, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.472, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.342, "blimp/accuracy/only_npi_scope": 0.616, "blimp/accuracy/superlative_quantifiers_2": 0.744, "blimp/accuracy/passive_1": 0.892, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.928, "blimp/accuracy/inchoative": 0.62, "blimp/accuracy/anaphor_gender_agreement": 0.963, "blimp/accuracy/principle_A_c_command": 0.711, "blimp/accuracy/only_npi_licensor_present": 0.723, "blimp/accuracy/expletive_it_object_raising": 0.777, "blimp/accuracy/left_branch_island_simple_question": 0.544, "blimp/accuracy/wh_questions_subject_gap": 0.937, "blimp/accuracy/existential_there_quantifiers_2": 0.37, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.938, "blimp/accuracy/sentential_negation_npi_scope": 0.635, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.854, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.888, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.914, "blimp/accuracy/principle_A_case_2": 0.965, "blimp/accuracy/distractor_agreement_relational_noun": 0.837, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.618, "blimp/accuracy/wh_island": 0.795, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.542, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.964, "blimp/accuracy/irregular_past_participle_verbs": 0.91, "blimp/accuracy/drop_argument": 0.749, "blimp/accuracy/wh_questions_object_gap": 0.808, "blimp/accuracy/animate_subject_passive": 0.798, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.893, "blimp/accuracy/npi_present_2": 0.54, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.918, "blimp/accuracy/anaphor_number_agreement": 0.993, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.912, "blimp/accuracy/matrix_question_npi_licensor_present": 0.244, "blimp/accuracy/npi_present_1": 0.518, "blimp/accuracy/wh_vs_that_no_gap": 0.976, "blimp/accuracy/left_branch_island_echo_question": 0.407, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974, "blimp/accuracy/causative": 0.738, "blimp/accuracy/group_average": 0.7779850746268657, "blimp/accuracy/seq_average": 0.7779850746268657, "cbt/accuracy/NE": 0.7984775641025641, "cbt/accuracy/V": 0.9288, "cbt/accuracy/CN": 0.8568, "cbt/accuracy/P": 0.9048, "cbt/accuracy/group_average": 0.872219391025641, "cbt/accuracy/seq_average": 0.8722488995598239, "hellaswag/accuracy/val": 0.3169687313284206, "hellaswag/accuracy/group_average": 0.3169687313284206, "hellaswag/accuracy/seq_average": 0.3169687313284206, "piqa/accuracy/val": 0.6022850924918389, "piqa/accuracy/group_average": 0.6022850924918389, "piqa/accuracy/seq_average": 0.6022850924918389, "ai2arc/accuracy/ARC-Easy": 0.3572938689217759, "ai2arc/accuracy/ARC-Challenge": 0.21630901287553647, "ai2arc/accuracy/group_average": 0.28680144089865617, "ai2arc/accuracy/seq_average": 0.31076487252124646, "race/accuracy/test/high": 0.27958833619210977, "race/accuracy/test/middle": 0.35724233983286907, "race/accuracy/group_average": 0.31841533801248945, "race/accuracy/seq_average": 0.30218889339278476, "siqa/accuracy/dev": 0.36745138178096215, "siqa/accuracy/group_average": 0.36745138178096215, "siqa/accuracy/seq_average": 0.36745138178096215, "commonsenseqa/accuracy/dev_rand_split": 0.27354627354627353, "commonsenseqa/accuracy/group_average": 0.27354627354627353, "commonsenseqa/accuracy/seq_average": 0.27354627354627353}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-340000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.4548998635912698, "val/accuracy": 0.49950106181795634, "val/perplexity": 11.645267375238564, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.7167445591517856, "lambada/accuracy/total": 0.32298136645962733, "lambada/accuracy/openai_last_token": 0.780667701863354, "lambada/perplexity": 8.336560427450598, "lambada/lm_loss": 3.0582854736277483, "lambada/lm_perplexity": 21.291021832160126, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.41124121413879183, "mean_loss": 2.5858222113715277, "blimp/accuracy/passive_2": 0.914, "blimp/accuracy/determiner_noun_agreement_2": 0.988, "blimp/accuracy/ellipsis_n_bar_1": 0.814, "blimp/accuracy/tough_vs_raising_2": 0.869, "blimp/accuracy/tough_vs_raising_1": 0.576, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.924, "blimp/accuracy/principle_A_reconstruction": 0.346, "blimp/accuracy/wh_vs_that_with_gap": 0.567, "blimp/accuracy/principle_A_domain_2": 0.855, "blimp/accuracy/determiner_noun_agreement_1": 0.994, "blimp/accuracy/ellipsis_n_bar_2": 0.892, "blimp/accuracy/principle_A_domain_3": 0.604, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.918, "blimp/accuracy/animate_subject_trans": 0.904, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.872, "blimp/accuracy/distractor_agreement_relative_clause": 0.646, "blimp/accuracy/transitive": 0.88, "blimp/accuracy/sentential_subject_island": 0.379, "blimp/accuracy/adjunct_island": 0.839, "blimp/accuracy/intransitive": 0.765, "blimp/accuracy/existential_there_subject_raising": 0.878, "blimp/accuracy/irregular_past_participle_adjectives": 0.835, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.457, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.322, "blimp/accuracy/only_npi_scope": 0.563, "blimp/accuracy/superlative_quantifiers_2": 0.766, "blimp/accuracy/passive_1": 0.888, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.932, "blimp/accuracy/inchoative": 0.608, "blimp/accuracy/anaphor_gender_agreement": 0.972, "blimp/accuracy/principle_A_c_command": 0.68, "blimp/accuracy/only_npi_licensor_present": 0.566, "blimp/accuracy/expletive_it_object_raising": 0.771, "blimp/accuracy/left_branch_island_simple_question": 0.551, "blimp/accuracy/wh_questions_subject_gap": 0.933, "blimp/accuracy/existential_there_quantifiers_2": 0.413, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939, "blimp/accuracy/sentential_negation_npi_scope": 0.665, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.846, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.882, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.909, "blimp/accuracy/principle_A_case_2": 0.954, "blimp/accuracy/distractor_agreement_relational_noun": 0.847, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.606, "blimp/accuracy/wh_island": 0.785, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.526, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.956, "blimp/accuracy/irregular_past_participle_verbs": 0.878, "blimp/accuracy/drop_argument": 0.744, "blimp/accuracy/wh_questions_object_gap": 0.832, "blimp/accuracy/animate_subject_passive": 0.803, "blimp/accuracy/existential_there_quantifiers_1": 0.965, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.892, "blimp/accuracy/npi_present_2": 0.517, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.922, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.961, "blimp/accuracy/existential_there_object_raising": 0.892, "blimp/accuracy/matrix_question_npi_licensor_present": 0.258, "blimp/accuracy/npi_present_1": 0.489, "blimp/accuracy/wh_vs_that_no_gap": 0.975, "blimp/accuracy/left_branch_island_echo_question": 0.442, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.98, "blimp/accuracy/causative": 0.764, "blimp/accuracy/group_average": 0.7744328358208956, "blimp/accuracy/seq_average": 0.7744328358208955, "cbt/accuracy/NE": 0.8120993589743589, "cbt/accuracy/V": 0.9308, "cbt/accuracy/CN": 0.8592, "cbt/accuracy/P": 0.902, "cbt/accuracy/group_average": 0.8760248397435897, "cbt/accuracy/seq_average": 0.8760504201680672, "hellaswag/accuracy/val": 0.3169687313284206, "hellaswag/accuracy/group_average": 0.3169687313284206, "hellaswag/accuracy/seq_average": 0.3169687313284206, "piqa/accuracy/val": 0.6082698585418934, "piqa/accuracy/group_average": 0.6082698585418934, "piqa/accuracy/seq_average": 0.6082698585418934, "ai2arc/accuracy/ARC-Easy": 0.35348837209302325, "ai2arc/accuracy/ARC-Challenge": 0.21030042918454936, "ai2arc/accuracy/group_average": 0.2818944006387863, "ai2arc/accuracy/seq_average": 0.30623229461756374, "race/accuracy/test/high": 0.2770154373927959, "race/accuracy/test/middle": 0.3530640668523677, "race/accuracy/group_average": 0.3150397521225818, "race/accuracy/seq_average": 0.2991487636805837, "siqa/accuracy/dev": 0.3694984646878199, "siqa/accuracy/group_average": 0.3694984646878199, "siqa/accuracy/seq_average": 0.3694984646878199, "commonsenseqa/accuracy/dev_rand_split": 0.276003276003276, "commonsenseqa/accuracy/group_average": 0.276003276003276, "commonsenseqa/accuracy/seq_average": 0.276003276003276}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-360000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.4505857437375993, "val/accuracy": 0.4998963371155754, "val/perplexity": 11.595136509087622, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.645380434782609, "lambada/accuracy/total": 0.31308229813664595, "lambada/accuracy/openai_last_token": 0.7829968944099379, "lambada/perplexity": 8.50942535518671, "lambada/lm_loss": 3.047256439321716, "lambada/lm_perplexity": 21.057492590168895, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.4064893176261107, "mean_loss": 2.547983089260104, "blimp/accuracy/passive_2": 0.902, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.818, "blimp/accuracy/tough_vs_raising_2": 0.868, "blimp/accuracy/tough_vs_raising_1": 0.578, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/principle_A_reconstruction": 0.373, "blimp/accuracy/wh_vs_that_with_gap": 0.582, "blimp/accuracy/principle_A_domain_2": 0.869, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.875, "blimp/accuracy/principle_A_domain_3": 0.627, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.906, "blimp/accuracy/animate_subject_trans": 0.887, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.864, "blimp/accuracy/distractor_agreement_relative_clause": 0.639, "blimp/accuracy/transitive": 0.872, "blimp/accuracy/sentential_subject_island": 0.371, "blimp/accuracy/adjunct_island": 0.832, "blimp/accuracy/intransitive": 0.775, "blimp/accuracy/existential_there_subject_raising": 0.882, "blimp/accuracy/irregular_past_participle_adjectives": 0.784, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.448, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.388, "blimp/accuracy/only_npi_scope": 0.521, "blimp/accuracy/superlative_quantifiers_2": 0.722, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.91, "blimp/accuracy/inchoative": 0.627, "blimp/accuracy/anaphor_gender_agreement": 0.963, "blimp/accuracy/principle_A_c_command": 0.712, "blimp/accuracy/only_npi_licensor_present": 0.704, "blimp/accuracy/expletive_it_object_raising": 0.762, "blimp/accuracy/left_branch_island_simple_question": 0.526, "blimp/accuracy/wh_questions_subject_gap": 0.938, "blimp/accuracy/existential_there_quantifiers_2": 0.355, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.937, "blimp/accuracy/sentential_negation_npi_scope": 0.628, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.842, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.877, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.913, "blimp/accuracy/principle_A_case_2": 0.958, "blimp/accuracy/distractor_agreement_relational_noun": 0.851, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.598, "blimp/accuracy/wh_island": 0.759, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.536, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.961, "blimp/accuracy/irregular_past_participle_verbs": 0.877, "blimp/accuracy/drop_argument": 0.74, "blimp/accuracy/wh_questions_object_gap": 0.82, "blimp/accuracy/animate_subject_passive": 0.804, "blimp/accuracy/existential_there_quantifiers_1": 0.969, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.879, "blimp/accuracy/npi_present_2": 0.51, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.914, "blimp/accuracy/anaphor_number_agreement": 0.991, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.959, "blimp/accuracy/existential_there_object_raising": 0.873, "blimp/accuracy/matrix_question_npi_licensor_present": 0.229, "blimp/accuracy/npi_present_1": 0.505, "blimp/accuracy/wh_vs_that_no_gap": 0.974, "blimp/accuracy/left_branch_island_echo_question": 0.439, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.969, "blimp/accuracy/causative": 0.748, "blimp/accuracy/group_average": 0.7715522388059702, "blimp/accuracy/seq_average": 0.7715522388059701, "cbt/accuracy/NE": 0.8072916666666666, "cbt/accuracy/V": 0.9284, "cbt/accuracy/CN": 0.8588, "cbt/accuracy/P": 0.9012, "cbt/accuracy/group_average": 0.8739229166666667, "cbt/accuracy/seq_average": 0.8739495798319328, "hellaswag/accuracy/val": 0.31955785700059747, "hellaswag/accuracy/group_average": 0.31955785700059747, "hellaswag/accuracy/seq_average": 0.31955785700059747, "piqa/accuracy/val": 0.6158868335146899, "piqa/accuracy/group_average": 0.6158868335146899, "piqa/accuracy/seq_average": 0.6158868335146899, "ai2arc/accuracy/ARC-Easy": 0.35264270613107823, "ai2arc/accuracy/ARC-Challenge": 0.21201716738197424, "ai2arc/accuracy/group_average": 0.28232993675652623, "ai2arc/accuracy/seq_average": 0.30623229461756374, "race/accuracy/test/high": 0.2775871926815323, "race/accuracy/test/middle": 0.3593314763231198, "race/accuracy/group_average": 0.31845933450232605, "race/accuracy/seq_average": 0.3013781921361978, "siqa/accuracy/dev": 0.37154554759467756, "siqa/accuracy/group_average": 0.37154554759467756, "siqa/accuracy/seq_average": 0.37154554759467756, "commonsenseqa/accuracy/dev_rand_split": 0.28255528255528256, "commonsenseqa/accuracy/group_average": 0.28255528255528256, "commonsenseqa/accuracy/seq_average": 0.28255528255528256}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-380000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.4472467331659224, "val/accuracy": 0.5006336030505952, "val/perplexity": 11.55648479086226, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.631435986631405, "lambada/accuracy/total": 0.32142857142857145, "lambada/accuracy/openai_last_token": 0.7847437888198758, "lambada/perplexity": 8.514410882652985, "lambada/lm_loss": 3.04249569943616, "lambada/lm_perplexity": 20.957481597359035, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.41103108723958337, "mean_loss": 2.539341359898664, "blimp/accuracy/passive_2": 0.906, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.806, "blimp/accuracy/tough_vs_raising_2": 0.871, "blimp/accuracy/tough_vs_raising_1": 0.589, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.916, "blimp/accuracy/principle_A_reconstruction": 0.346, "blimp/accuracy/wh_vs_that_with_gap": 0.531, "blimp/accuracy/principle_A_domain_2": 0.847, "blimp/accuracy/determiner_noun_agreement_1": 0.991, "blimp/accuracy/ellipsis_n_bar_2": 0.873, "blimp/accuracy/principle_A_domain_3": 0.61, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.905, "blimp/accuracy/animate_subject_trans": 0.9, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.859, "blimp/accuracy/distractor_agreement_relative_clause": 0.665, "blimp/accuracy/transitive": 0.878, "blimp/accuracy/sentential_subject_island": 0.343, "blimp/accuracy/adjunct_island": 0.828, "blimp/accuracy/intransitive": 0.781, "blimp/accuracy/existential_there_subject_raising": 0.867, "blimp/accuracy/irregular_past_participle_adjectives": 0.884, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.445, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.312, "blimp/accuracy/only_npi_scope": 0.606, "blimp/accuracy/superlative_quantifiers_2": 0.704, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.923, "blimp/accuracy/inchoative": 0.63, "blimp/accuracy/anaphor_gender_agreement": 0.974, "blimp/accuracy/principle_A_c_command": 0.689, "blimp/accuracy/only_npi_licensor_present": 0.724, "blimp/accuracy/expletive_it_object_raising": 0.776, "blimp/accuracy/left_branch_island_simple_question": 0.537, "blimp/accuracy/wh_questions_subject_gap": 0.943, "blimp/accuracy/existential_there_quantifiers_2": 0.34, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.933, "blimp/accuracy/sentential_negation_npi_scope": 0.637, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.843, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.908, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.912, "blimp/accuracy/principle_A_case_2": 0.958, "blimp/accuracy/distractor_agreement_relational_noun": 0.833, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989, "blimp/accuracy/superlative_quantifiers_1": 0.584, "blimp/accuracy/wh_island": 0.793, "blimp/accuracy/principle_A_domain_1": 0.994, "blimp/accuracy/complex_NP_island": 0.535, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.966, "blimp/accuracy/irregular_past_participle_verbs": 0.907, "blimp/accuracy/drop_argument": 0.751, "blimp/accuracy/wh_questions_object_gap": 0.848, "blimp/accuracy/animate_subject_passive": 0.799, "blimp/accuracy/existential_there_quantifiers_1": 0.971, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.881, "blimp/accuracy/npi_present_2": 0.523, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.918, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.957, "blimp/accuracy/existential_there_object_raising": 0.886, "blimp/accuracy/matrix_question_npi_licensor_present": 0.235, "blimp/accuracy/npi_present_1": 0.496, "blimp/accuracy/wh_vs_that_no_gap": 0.98, "blimp/accuracy/left_branch_island_echo_question": 0.403, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.977, "blimp/accuracy/causative": 0.753, "blimp/accuracy/group_average": 0.7736567164179103, "blimp/accuracy/seq_average": 0.7736567164179105, "cbt/accuracy/NE": 0.8004807692307693, "cbt/accuracy/V": 0.93, "cbt/accuracy/CN": 0.8584, "cbt/accuracy/P": 0.9092, "cbt/accuracy/group_average": 0.8745201923076924, "cbt/accuracy/seq_average": 0.8745498199279712, "hellaswag/accuracy/val": 0.31955785700059747, "hellaswag/accuracy/group_average": 0.31955785700059747, "hellaswag/accuracy/seq_average": 0.31955785700059747, "piqa/accuracy/val": 0.6142546245919478, "piqa/accuracy/group_average": 0.6142546245919478, "piqa/accuracy/seq_average": 0.6142546245919478, "ai2arc/accuracy/ARC-Easy": 0.3594080338266385, "ai2arc/accuracy/ARC-Challenge": 0.21373390557939914, "ai2arc/accuracy/group_average": 0.2865709697030188, "ai2arc/accuracy/seq_average": 0.3113314447592068, "race/accuracy/test/high": 0.27958833619210977, "race/accuracy/test/middle": 0.3593314763231198, "race/accuracy/group_average": 0.3194599062576148, "race/accuracy/seq_average": 0.30279691933522496, "siqa/accuracy/dev": 0.37001023541453426, "siqa/accuracy/group_average": 0.37001023541453426, "siqa/accuracy/seq_average": 0.37001023541453426, "commonsenseqa/accuracy/dev_rand_split": 0.2751842751842752, "commonsenseqa/accuracy/group_average": 0.2751842751842752, "commonsenseqa/accuracy/seq_average": 0.2751842751842752}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-40000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.7425648522755455, "val/accuracy": 0.4591306656125992, "val/perplexity": 15.526757909196727, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5875630822981366, "lambada/accuracy/total": 0.24650621118012422, "lambada/accuracy/openai_last_token": 0.7470885093167702, "lambada/perplexity": 13.189781738149332, "lambada/lm_loss": 3.302956650735548, "lambada/lm_perplexity": 27.192920147756457, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.35281843839636173, "mean_loss": 2.665063967286841, "blimp/accuracy/passive_2": 0.898, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.828, "blimp/accuracy/tough_vs_raising_2": 0.803, "blimp/accuracy/tough_vs_raising_1": 0.571, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.873, "blimp/accuracy/principle_A_reconstruction": 0.391, "blimp/accuracy/wh_vs_that_with_gap": 0.514, "blimp/accuracy/principle_A_domain_2": 0.865, "blimp/accuracy/determiner_noun_agreement_1": 0.989, "blimp/accuracy/ellipsis_n_bar_2": 0.886, "blimp/accuracy/principle_A_domain_3": 0.582, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.91, "blimp/accuracy/animate_subject_trans": 0.878, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.823, "blimp/accuracy/distractor_agreement_relative_clause": 0.609, "blimp/accuracy/transitive": 0.852, "blimp/accuracy/sentential_subject_island": 0.353, "blimp/accuracy/adjunct_island": 0.79, "blimp/accuracy/intransitive": 0.755, "blimp/accuracy/existential_there_subject_raising": 0.833, "blimp/accuracy/irregular_past_participle_adjectives": 0.99, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.317, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.259, "blimp/accuracy/only_npi_scope": 0.642, "blimp/accuracy/superlative_quantifiers_2": 0.516, "blimp/accuracy/passive_1": 0.874, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.894, "blimp/accuracy/inchoative": 0.553, "blimp/accuracy/anaphor_gender_agreement": 0.939, "blimp/accuracy/principle_A_c_command": 0.646, "blimp/accuracy/only_npi_licensor_present": 0.699, "blimp/accuracy/expletive_it_object_raising": 0.743, "blimp/accuracy/left_branch_island_simple_question": 0.343, "blimp/accuracy/wh_questions_subject_gap": 0.899, "blimp/accuracy/existential_there_quantifiers_2": 0.315, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.935, "blimp/accuracy/sentential_negation_npi_scope": 0.576, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.784, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.906, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.853, "blimp/accuracy/principle_A_case_2": 0.951, "blimp/accuracy/distractor_agreement_relational_noun": 0.813, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991, "blimp/accuracy/superlative_quantifiers_1": 0.553, "blimp/accuracy/wh_island": 0.652, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.537, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.948, "blimp/accuracy/irregular_past_participle_verbs": 0.891, "blimp/accuracy/drop_argument": 0.796, "blimp/accuracy/wh_questions_object_gap": 0.738, "blimp/accuracy/animate_subject_passive": 0.783, "blimp/accuracy/existential_there_quantifiers_1": 0.958, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.895, "blimp/accuracy/npi_present_2": 0.503, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.902, "blimp/accuracy/anaphor_number_agreement": 0.975, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.962, "blimp/accuracy/existential_there_object_raising": 0.786, "blimp/accuracy/matrix_question_npi_licensor_present": 0.22, "blimp/accuracy/npi_present_1": 0.497, "blimp/accuracy/wh_vs_that_no_gap": 0.96, "blimp/accuracy/left_branch_island_echo_question": 0.385, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.96, "blimp/accuracy/causative": 0.666, "blimp/accuracy/group_average": 0.7460298507462687, "blimp/accuracy/seq_average": 0.7460298507462687, "cbt/accuracy/NE": 0.7435897435897436, "cbt/accuracy/V": 0.8988, "cbt/accuracy/CN": 0.812, "cbt/accuracy/P": 0.8704, "cbt/accuracy/group_average": 0.8311974358974359, "cbt/accuracy/seq_average": 0.8312324929971989, "hellaswag/accuracy/val": 0.2849034056960765, "hellaswag/accuracy/group_average": 0.2849034056960765, "hellaswag/accuracy/seq_average": 0.2849034056960765, "piqa/accuracy/val": 0.5674646354733406, "piqa/accuracy/group_average": 0.5674646354733406, "piqa/accuracy/seq_average": 0.5674646354733406, "ai2arc/accuracy/ARC-Easy": 0.32515856236786467, "ai2arc/accuracy/ARC-Challenge": 0.20085836909871244, "ai2arc/accuracy/group_average": 0.26300846573328857, "ai2arc/accuracy/seq_average": 0.2841359773371105, "race/accuracy/test/high": 0.2672955974842767, "race/accuracy/test/middle": 0.33913649025069637, "race/accuracy/group_average": 0.3032160438674866, "race/accuracy/seq_average": 0.2882042967166599, "siqa/accuracy/dev": 0.3490276356192426, "siqa/accuracy/group_average": 0.3490276356192426, "siqa/accuracy/seq_average": 0.3490276356192426, "commonsenseqa/accuracy/dev_rand_split": 0.25307125307125306, "commonsenseqa/accuracy/group_average": 0.25307125307125306, "commonsenseqa/accuracy/seq_average": 0.25307125307125306}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-400000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.4431656125992065, "val/accuracy": 0.5016450427827381, "val/perplexity": 11.509417492091456, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6884505941260675, "lambada/accuracy/total": 0.30997670807453415, "lambada/accuracy/openai_last_token": 0.7837732919254659, "lambada/perplexity": 8.504349073894907, "lambada/lm_loss": 3.0386220490589047, "lambada/lm_perplexity": 20.876456673292964, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.40581087542863614, "mean_loss": 2.565808103362637, "blimp/accuracy/passive_2": 0.906, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.801, "blimp/accuracy/tough_vs_raising_2": 0.868, "blimp/accuracy/tough_vs_raising_1": 0.606, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.924, "blimp/accuracy/principle_A_reconstruction": 0.384, "blimp/accuracy/wh_vs_that_with_gap": 0.552, "blimp/accuracy/principle_A_domain_2": 0.855, "blimp/accuracy/determiner_noun_agreement_1": 0.992, "blimp/accuracy/ellipsis_n_bar_2": 0.895, "blimp/accuracy/principle_A_domain_3": 0.617, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.892, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.891, "blimp/accuracy/distractor_agreement_relative_clause": 0.647, "blimp/accuracy/transitive": 0.891, "blimp/accuracy/sentential_subject_island": 0.333, "blimp/accuracy/adjunct_island": 0.824, "blimp/accuracy/intransitive": 0.795, "blimp/accuracy/existential_there_subject_raising": 0.899, "blimp/accuracy/irregular_past_participle_adjectives": 0.813, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.445, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.33, "blimp/accuracy/only_npi_scope": 0.603, "blimp/accuracy/superlative_quantifiers_2": 0.741, "blimp/accuracy/passive_1": 0.894, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.925, "blimp/accuracy/inchoative": 0.642, "blimp/accuracy/anaphor_gender_agreement": 0.974, "blimp/accuracy/principle_A_c_command": 0.67, "blimp/accuracy/only_npi_licensor_present": 0.627, "blimp/accuracy/expletive_it_object_raising": 0.782, "blimp/accuracy/left_branch_island_simple_question": 0.531, "blimp/accuracy/wh_questions_subject_gap": 0.926, "blimp/accuracy/existential_there_quantifiers_2": 0.332, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939, "blimp/accuracy/sentential_negation_npi_scope": 0.678, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.842, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.896, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.905, "blimp/accuracy/principle_A_case_2": 0.967, "blimp/accuracy/distractor_agreement_relational_noun": 0.847, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991, "blimp/accuracy/superlative_quantifiers_1": 0.556, "blimp/accuracy/wh_island": 0.805, "blimp/accuracy/principle_A_domain_1": 0.991, "blimp/accuracy/complex_NP_island": 0.534, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.965, "blimp/accuracy/irregular_past_participle_verbs": 0.892, "blimp/accuracy/drop_argument": 0.759, "blimp/accuracy/wh_questions_object_gap": 0.817, "blimp/accuracy/animate_subject_passive": 0.801, "blimp/accuracy/existential_there_quantifiers_1": 0.978, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.888, "blimp/accuracy/npi_present_2": 0.54, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.922, "blimp/accuracy/anaphor_number_agreement": 0.994, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.965, "blimp/accuracy/existential_there_object_raising": 0.861, "blimp/accuracy/matrix_question_npi_licensor_present": 0.222, "blimp/accuracy/npi_present_1": 0.527, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.405, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972, "blimp/accuracy/causative": 0.749, "blimp/accuracy/group_average": 0.7743283582089554, "blimp/accuracy/seq_average": 0.7743283582089552, "cbt/accuracy/NE": 0.8008814102564102, "cbt/accuracy/V": 0.9316, "cbt/accuracy/CN": 0.8576, "cbt/accuracy/P": 0.9076, "cbt/accuracy/group_average": 0.8744203525641026, "cbt/accuracy/seq_average": 0.8744497799119648, "hellaswag/accuracy/val": 0.3189603664608644, "hellaswag/accuracy/group_average": 0.3189603664608644, "hellaswag/accuracy/seq_average": 0.3189603664608644, "piqa/accuracy/val": 0.6137105549510338, "piqa/accuracy/group_average": 0.6137105549510338, "piqa/accuracy/seq_average": 0.6137105549510338, "ai2arc/accuracy/ARC-Easy": 0.3513742071881607, "ai2arc/accuracy/ARC-Challenge": 0.2206008583690987, "ai2arc/accuracy/group_average": 0.2859875327786297, "ai2arc/accuracy/seq_average": 0.3082152974504249, "race/accuracy/test/high": 0.27644368210405945, "race/accuracy/test/middle": 0.3551532033426184, "race/accuracy/group_average": 0.3157984427233389, "race/accuracy/seq_average": 0.29935143899473043, "siqa/accuracy/dev": 0.36898669396110545, "siqa/accuracy/group_average": 0.36898669396110545, "siqa/accuracy/seq_average": 0.36898669396110545, "commonsenseqa/accuracy/dev_rand_split": 0.2702702702702703, "commonsenseqa/accuracy/group_average": 0.2702702702702703, "commonsenseqa/accuracy/seq_average": 0.2702702702702703}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-60000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.6807740226624506, "val/accuracy": 0.46717568049355157, "val/perplexity": 14.596386858798736, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.5353401136694487, "lambada/accuracy/total": 0.23971273291925466, "lambada/accuracy/openai_last_token": 0.749805900621118, "lambada/perplexity": 12.755823905315358, "lambada/lm_loss": 3.242824448993532, "lambada/lm_perplexity": 25.605942385784974, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3534442067064031, "mean_loss": 2.60805706816595, "blimp/accuracy/passive_2": 0.892, "blimp/accuracy/determiner_noun_agreement_2": 0.976, "blimp/accuracy/ellipsis_n_bar_1": 0.792, "blimp/accuracy/tough_vs_raising_2": 0.862, "blimp/accuracy/tough_vs_raising_1": 0.539, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.886, "blimp/accuracy/principle_A_reconstruction": 0.529, "blimp/accuracy/wh_vs_that_with_gap": 0.532, "blimp/accuracy/principle_A_domain_2": 0.844, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.89, "blimp/accuracy/principle_A_domain_3": 0.589, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.91, "blimp/accuracy/animate_subject_trans": 0.883, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.872, "blimp/accuracy/distractor_agreement_relative_clause": 0.627, "blimp/accuracy/transitive": 0.862, "blimp/accuracy/sentential_subject_island": 0.383, "blimp/accuracy/adjunct_island": 0.822, "blimp/accuracy/intransitive": 0.741, "blimp/accuracy/existential_there_subject_raising": 0.879, "blimp/accuracy/irregular_past_participle_adjectives": 0.945, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.313, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.307, "blimp/accuracy/only_npi_scope": 0.603, "blimp/accuracy/superlative_quantifiers_2": 0.472, "blimp/accuracy/passive_1": 0.89, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.902, "blimp/accuracy/inchoative": 0.582, "blimp/accuracy/anaphor_gender_agreement": 0.953, "blimp/accuracy/principle_A_c_command": 0.665, "blimp/accuracy/only_npi_licensor_present": 0.827, "blimp/accuracy/expletive_it_object_raising": 0.781, "blimp/accuracy/left_branch_island_simple_question": 0.421, "blimp/accuracy/wh_questions_subject_gap": 0.916, "blimp/accuracy/existential_there_quantifiers_2": 0.31, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.931, "blimp/accuracy/sentential_negation_npi_scope": 0.555, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.831, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.835, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.881, "blimp/accuracy/principle_A_case_2": 0.932, "blimp/accuracy/distractor_agreement_relational_noun": 0.837, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.986, "blimp/accuracy/superlative_quantifiers_1": 0.496, "blimp/accuracy/wh_island": 0.855, "blimp/accuracy/principle_A_domain_1": 0.986, "blimp/accuracy/complex_NP_island": 0.578, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.966, "blimp/accuracy/irregular_past_participle_verbs": 0.833, "blimp/accuracy/drop_argument": 0.755, "blimp/accuracy/wh_questions_object_gap": 0.772, "blimp/accuracy/animate_subject_passive": 0.785, "blimp/accuracy/existential_there_quantifiers_1": 0.963, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.899, "blimp/accuracy/npi_present_2": 0.564, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.895, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.847, "blimp/accuracy/matrix_question_npi_licensor_present": 0.208, "blimp/accuracy/npi_present_1": 0.531, "blimp/accuracy/wh_vs_that_no_gap": 0.975, "blimp/accuracy/left_branch_island_echo_question": 0.386, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.962, "blimp/accuracy/causative": 0.682, "blimp/accuracy/group_average": 0.7589701492537314, "blimp/accuracy/seq_average": 0.7589701492537313, "cbt/accuracy/NE": 0.7580128205128205, "cbt/accuracy/V": 0.9084, "cbt/accuracy/CN": 0.8212, "cbt/accuracy/P": 0.8852, "cbt/accuracy/group_average": 0.8432032051282052, "cbt/accuracy/seq_average": 0.8432372949179672, "hellaswag/accuracy/val": 0.29416450906193986, "hellaswag/accuracy/group_average": 0.29416450906193986, "hellaswag/accuracy/seq_average": 0.29416450906193986, "piqa/accuracy/val": 0.5772578890097932, "piqa/accuracy/group_average": 0.5772578890097932, "piqa/accuracy/seq_average": 0.5772578890097932, "ai2arc/accuracy/ARC-Easy": 0.33488372093023255, "ai2arc/accuracy/ARC-Challenge": 0.21201716738197424, "ai2arc/accuracy/group_average": 0.2734504441561034, "ai2arc/accuracy/seq_average": 0.2943342776203966, "race/accuracy/test/high": 0.2741566609491138, "race/accuracy/test/middle": 0.3447075208913649, "race/accuracy/group_average": 0.3094320909202394, "race/accuracy/seq_average": 0.29468990676935547, "siqa/accuracy/dev": 0.35363357215967245, "siqa/accuracy/group_average": 0.35363357215967245, "siqa/accuracy/seq_average": 0.35363357215967245, "commonsenseqa/accuracy/dev_rand_split": 0.2678132678132678, "commonsenseqa/accuracy/group_average": 0.2678132678132678, "commonsenseqa/accuracy/seq_average": 0.2678132678132678}
Pretrain_language_model/save/slimpajama_dense_660M/export/result-model-80000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.639250013563368, "val/accuracy": 0.4730718703497024, "val/perplexity": 14.00269783517995, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6029424252717392, "lambada/accuracy/total": 0.24281832298136646, "lambada/accuracy/openai_last_token": 0.7546583850931677, "lambada/perplexity": 12.710137655517755, "lambada/lm_loss": 3.2200676042349814, "lambada/lm_perplexity": 25.029812245450394, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3579450966655344, "mean_loss": 2.6210962194175536, "blimp/accuracy/passive_2": 0.907, "blimp/accuracy/determiner_noun_agreement_2": 0.985, "blimp/accuracy/ellipsis_n_bar_1": 0.784, "blimp/accuracy/tough_vs_raising_2": 0.827, "blimp/accuracy/tough_vs_raising_1": 0.666, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.877, "blimp/accuracy/principle_A_reconstruction": 0.403, "blimp/accuracy/wh_vs_that_with_gap": 0.579, "blimp/accuracy/principle_A_domain_2": 0.873, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.874, "blimp/accuracy/principle_A_domain_3": 0.581, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.923, "blimp/accuracy/animate_subject_trans": 0.884, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.885, "blimp/accuracy/distractor_agreement_relative_clause": 0.593, "blimp/accuracy/transitive": 0.857, "blimp/accuracy/sentential_subject_island": 0.35, "blimp/accuracy/adjunct_island": 0.801, "blimp/accuracy/intransitive": 0.791, "blimp/accuracy/existential_there_subject_raising": 0.865, "blimp/accuracy/irregular_past_participle_adjectives": 0.925, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.34, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.27, "blimp/accuracy/only_npi_scope": 0.445, "blimp/accuracy/superlative_quantifiers_2": 0.673, "blimp/accuracy/passive_1": 0.904, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.891, "blimp/accuracy/inchoative": 0.644, "blimp/accuracy/anaphor_gender_agreement": 0.96, "blimp/accuracy/principle_A_c_command": 0.708, "blimp/accuracy/only_npi_licensor_present": 0.702, "blimp/accuracy/expletive_it_object_raising": 0.757, "blimp/accuracy/left_branch_island_simple_question": 0.386, "blimp/accuracy/wh_questions_subject_gap": 0.921, "blimp/accuracy/existential_there_quantifiers_2": 0.389, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.933, "blimp/accuracy/sentential_negation_npi_scope": 0.72, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.834, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.881, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.893, "blimp/accuracy/principle_A_case_2": 0.958, "blimp/accuracy/distractor_agreement_relational_noun": 0.816, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.995, "blimp/accuracy/superlative_quantifiers_1": 0.526, "blimp/accuracy/wh_island": 0.751, "blimp/accuracy/principle_A_domain_1": 0.992, "blimp/accuracy/complex_NP_island": 0.537, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.956, "blimp/accuracy/irregular_past_participle_verbs": 0.893, "blimp/accuracy/drop_argument": 0.782, "blimp/accuracy/wh_questions_object_gap": 0.805, "blimp/accuracy/animate_subject_passive": 0.795, "blimp/accuracy/existential_there_quantifiers_1": 0.959, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.88, "blimp/accuracy/npi_present_2": 0.596, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.937, "blimp/accuracy/anaphor_number_agreement": 0.985, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.948, "blimp/accuracy/existential_there_object_raising": 0.868, "blimp/accuracy/matrix_question_npi_licensor_present": 0.199, "blimp/accuracy/npi_present_1": 0.569, "blimp/accuracy/wh_vs_that_no_gap": 0.978, "blimp/accuracy/left_branch_island_echo_question": 0.388, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.977, "blimp/accuracy/causative": 0.703, "blimp/accuracy/group_average": 0.7650895522388059, "blimp/accuracy/seq_average": 0.7650895522388059, "cbt/accuracy/NE": 0.7732371794871795, "cbt/accuracy/V": 0.9136, "cbt/accuracy/CN": 0.8332, "cbt/accuracy/P": 0.8852, "cbt/accuracy/group_average": 0.8513092948717949, "cbt/accuracy/seq_average": 0.8513405362144858, "hellaswag/accuracy/val": 0.29286994622585144, "hellaswag/accuracy/group_average": 0.29286994622585144, "hellaswag/accuracy/seq_average": 0.29286994622585144, "piqa/accuracy/val": 0.5799782372143635, "piqa/accuracy/group_average": 0.5799782372143635, "piqa/accuracy/seq_average": 0.5799782372143635, "ai2arc/accuracy/ARC-Easy": 0.3403805496828753, "ai2arc/accuracy/ARC-Challenge": 0.21545064377682405, "ai2arc/accuracy/group_average": 0.27791559672984967, "ai2arc/accuracy/seq_average": 0.2991501416430595, "race/accuracy/test/high": 0.27672955974842767, "race/accuracy/test/middle": 0.3370473537604457, "race/accuracy/group_average": 0.30688845675443666, "race/accuracy/seq_average": 0.294284556141062, "siqa/accuracy/dev": 0.3618219037871034, "siqa/accuracy/group_average": 0.3618219037871034, "siqa/accuracy/seq_average": 0.3618219037871034, "commonsenseqa/accuracy/dev_rand_split": 0.26126126126126126, "commonsenseqa/accuracy/group_average": 0.26126126126126126, "commonsenseqa/accuracy/seq_average": 0.26126126126126126}