Upload folder using huggingface_hub

#1703
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-10000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 3.2025543697296626, "val/accuracy": 0.40221102275545634, "val/perplexity": 24.595275452811094, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.8275362571574147, "lambada/accuracy/total": 0.1298524844720497, "lambada/accuracy/openai_last_token": 0.7043866459627329, "lambada/perplexity": 35.25697270105791, "lambada/lm_loss": 3.748841331567743, "lambada/lm_perplexity": 42.47184269616613, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.26603175361375303, "mean_loss": 3.0150453134435384, "blimp/accuracy/passive_2": 0.855, "blimp/accuracy/determiner_noun_agreement_2": 0.966, "blimp/accuracy/ellipsis_n_bar_1": 0.7, "blimp/accuracy/tough_vs_raising_2": 0.817, "blimp/accuracy/tough_vs_raising_1": 0.487, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.819, "blimp/accuracy/principle_A_reconstruction": 0.531, "blimp/accuracy/wh_vs_that_with_gap": 0.415, "blimp/accuracy/principle_A_domain_2": 0.774, "blimp/accuracy/determiner_noun_agreement_1": 0.962, "blimp/accuracy/ellipsis_n_bar_2": 0.865, "blimp/accuracy/principle_A_domain_3": 0.534, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.833, "blimp/accuracy/animate_subject_trans": 0.84, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.735, "blimp/accuracy/distractor_agreement_relative_clause": 0.409, "blimp/accuracy/transitive": 0.779, "blimp/accuracy/sentential_subject_island": 0.376, "blimp/accuracy/adjunct_island": 0.736, "blimp/accuracy/intransitive": 0.666, "blimp/accuracy/existential_there_subject_raising": 0.77, "blimp/accuracy/irregular_past_participle_adjectives": 0.954, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.213, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.097, "blimp/accuracy/only_npi_scope": 0.754, "blimp/accuracy/superlative_quantifiers_2": 0.67, "blimp/accuracy/passive_1": 0.856, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.827, "blimp/accuracy/inchoative": 0.481, "blimp/accuracy/anaphor_gender_agreement": 0.896, "blimp/accuracy/principle_A_c_command": 0.368, "blimp/accuracy/only_npi_licensor_present": 0.657, "blimp/accuracy/expletive_it_object_raising": 0.705, "blimp/accuracy/left_branch_island_simple_question": 0.169, "blimp/accuracy/wh_questions_subject_gap": 0.84, "blimp/accuracy/existential_there_quantifiers_2": 0.434, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.909, "blimp/accuracy/sentential_negation_npi_scope": 0.291, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.687, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.918, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.744, "blimp/accuracy/principle_A_case_2": 0.921, "blimp/accuracy/distractor_agreement_relational_noun": 0.665, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.998, "blimp/accuracy/superlative_quantifiers_1": 0.729, "blimp/accuracy/wh_island": 0.661, "blimp/accuracy/principle_A_domain_1": 0.98, "blimp/accuracy/complex_NP_island": 0.532, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.902, "blimp/accuracy/irregular_past_participle_verbs": 0.843, "blimp/accuracy/drop_argument": 0.728, "blimp/accuracy/wh_questions_object_gap": 0.683, "blimp/accuracy/animate_subject_passive": 0.788, "blimp/accuracy/existential_there_quantifiers_1": 0.92, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.812, "blimp/accuracy/npi_present_2": 0.605, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.786, "blimp/accuracy/anaphor_number_agreement": 0.946, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.937, "blimp/accuracy/existential_there_object_raising": 0.737, "blimp/accuracy/matrix_question_npi_licensor_present": 0.024, "blimp/accuracy/npi_present_1": 0.554, "blimp/accuracy/wh_vs_that_no_gap": 0.942, "blimp/accuracy/left_branch_island_echo_question": 0.58, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.984, "blimp/accuracy/causative": 0.611, "blimp/accuracy/group_average": 0.7045820895522388, "blimp/accuracy/seq_average": 0.7045820895522388, "cbt/accuracy/NE": 0.6822916666666666, "cbt/accuracy/V": 0.8368, "cbt/accuracy/CN": 0.6848, "cbt/accuracy/P": 0.8036, "cbt/accuracy/group_average": 0.7518729166666667, "cbt/accuracy/seq_average": 0.7519007603041217, "hellaswag/accuracy/val": 0.2704640509858594, "hellaswag/accuracy/group_average": 0.2704640509858594, "hellaswag/accuracy/seq_average": 0.2704640509858594, "piqa/accuracy/val": 0.5369967355821545, "piqa/accuracy/group_average": 0.5369967355821545, "piqa/accuracy/seq_average": 0.5369967355821545, "ai2arc/accuracy/ARC-Easy": 0.2955602536997886, "ai2arc/accuracy/ARC-Challenge": 0.2, "ai2arc/accuracy/group_average": 0.2477801268498943, "ai2arc/accuracy/seq_average": 0.2640226628895184, "race/accuracy/test/high": 0.2504288164665523, "race/accuracy/test/middle": 0.3224233983286908, "race/accuracy/group_average": 0.28642610739762153, "race/accuracy/seq_average": 0.27138224564248076, "siqa/accuracy/dev": 0.36489252814739, "siqa/accuracy/group_average": 0.36489252814739, "siqa/accuracy/seq_average": 0.36489252814739, "commonsenseqa/accuracy/dev_rand_split": 0.22276822276822278, "commonsenseqa/accuracy/group_average": 0.22276822276822278, "commonsenseqa/accuracy/seq_average": 0.22276822276822278}
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-100000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.835506378658234,
3
+ "val/accuracy": 0.44936891586061506,
4
+ "val/perplexity": 17.039026315480147,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.6062356700067935,
8
+ "lambada/accuracy/total": 0.19739906832298137,
9
+ "lambada/accuracy/openai_last_token": 0.7434006211180124,
10
+ "lambada/perplexity": 18.09651274992837,
11
+ "lambada/lm_loss": 3.403382924202551,
12
+ "lambada/lm_perplexity": 30.06563797738061,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.32338399209179824,
16
+ "mean_loss": 2.7208710243325136,
17
+ "blimp/accuracy/passive_2": 0.893,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.975,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.794,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.865,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.501,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.846,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.428,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.509,
25
+ "blimp/accuracy/principle_A_domain_2": 0.832,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.982,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.906,
28
+ "blimp/accuracy/principle_A_domain_3": 0.589,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.912,
30
+ "blimp/accuracy/animate_subject_trans": 0.895,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.872,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.513,
33
+ "blimp/accuracy/transitive": 0.843,
34
+ "blimp/accuracy/sentential_subject_island": 0.314,
35
+ "blimp/accuracy/adjunct_island": 0.664,
36
+ "blimp/accuracy/intransitive": 0.732,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.827,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.934,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.217,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.233,
42
+ "blimp/accuracy/only_npi_scope": 0.559,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.811,
44
+ "blimp/accuracy/passive_1": 0.901,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.859,
46
+ "blimp/accuracy/inchoative": 0.555,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.941,
48
+ "blimp/accuracy/principle_A_c_command": 0.492,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.527,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.743,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.253,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.914,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.406,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.934,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.564,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.81,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.826,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.833,
59
+ "blimp/accuracy/principle_A_case_2": 0.952,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.691,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.478,
63
+ "blimp/accuracy/wh_island": 0.738,
64
+ "blimp/accuracy/principle_A_domain_1": 0.963,
65
+ "blimp/accuracy/complex_NP_island": 0.535,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.95,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.861,
68
+ "blimp/accuracy/drop_argument": 0.731,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.749,
70
+ "blimp/accuracy/animate_subject_passive": 0.793,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.959,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.833,
73
+ "blimp/accuracy/npi_present_2": 0.598,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.898,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.97,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.959,
77
+ "blimp/accuracy/existential_there_object_raising": 0.837,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.108,
79
+ "blimp/accuracy/npi_present_1": 0.573,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.957,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.431,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974,
83
+ "blimp/accuracy/causative": 0.701,
84
+ "blimp/accuracy/group_average": 0.7347761194029852,
85
+ "blimp/accuracy/seq_average": 0.734776119402985,
86
+ "cbt/accuracy/NE": 0.7399839743589743,
87
+ "cbt/accuracy/V": 0.8812,
88
+ "cbt/accuracy/CN": 0.7616,
89
+ "cbt/accuracy/P": 0.8584,
90
+ "cbt/accuracy/group_average": 0.8102959935897436,
91
+ "cbt/accuracy/seq_average": 0.8103241296518607,
92
+ "hellaswag/accuracy/val": 0.2756423023302131,
93
+ "hellaswag/accuracy/group_average": 0.2756423023302131,
94
+ "hellaswag/accuracy/seq_average": 0.2756423023302131,
95
+ "piqa/accuracy/val": 0.558215451577802,
96
+ "piqa/accuracy/group_average": 0.558215451577802,
97
+ "piqa/accuracy/seq_average": 0.558215451577802,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3209302325581395,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.20858369098712445,
100
+ "ai2arc/accuracy/group_average": 0.26475696177263197,
101
+ "ai2arc/accuracy/seq_average": 0.28385269121813034,
102
+ "race/accuracy/test/high": 0.2564322469982847,
103
+ "race/accuracy/test/middle": 0.3307799442896936,
104
+ "race/accuracy/group_average": 0.29360609564398915,
105
+ "race/accuracy/seq_average": 0.27807053100932305,
106
+ "siqa/accuracy/dev": 0.3572159672466735,
107
+ "siqa/accuracy/group_average": 0.3572159672466735,
108
+ "siqa/accuracy/seq_average": 0.3572159672466735,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.25143325143325146,
110
+ "commonsenseqa/accuracy/group_average": 0.25143325143325146,
111
+ "commonsenseqa/accuracy/seq_average": 0.25143325143325146
112
+ }
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-20000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 3.0745127844432045, "val/accuracy": 0.41752212766617064, "val/perplexity": 21.63933632143836, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.666469597668381, "lambada/accuracy/total": 0.15372670807453417, "lambada/accuracy/openai_last_token": 0.720108695652174, "lambada/perplexity": 25.216885297256862, "lambada/lm_loss": 3.596378480195779, "lambada/lm_perplexity": 36.46593292372698, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.2856244178703524, "mean_loss": 2.870491191055793, "blimp/accuracy/passive_2": 0.871, "blimp/accuracy/determiner_noun_agreement_2": 0.96, "blimp/accuracy/ellipsis_n_bar_1": 0.77, "blimp/accuracy/tough_vs_raising_2": 0.844, "blimp/accuracy/tough_vs_raising_1": 0.47, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.825, "blimp/accuracy/principle_A_reconstruction": 0.468, "blimp/accuracy/wh_vs_that_with_gap": 0.467, "blimp/accuracy/principle_A_domain_2": 0.788, "blimp/accuracy/determiner_noun_agreement_1": 0.969, "blimp/accuracy/ellipsis_n_bar_2": 0.892, "blimp/accuracy/principle_A_domain_3": 0.568, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.856, "blimp/accuracy/animate_subject_trans": 0.866, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.782, "blimp/accuracy/distractor_agreement_relative_clause": 0.502, "blimp/accuracy/transitive": 0.831, "blimp/accuracy/sentential_subject_island": 0.3, "blimp/accuracy/adjunct_island": 0.688, "blimp/accuracy/intransitive": 0.679, "blimp/accuracy/existential_there_subject_raising": 0.802, "blimp/accuracy/irregular_past_participle_adjectives": 0.891, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.17, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.146, "blimp/accuracy/only_npi_scope": 0.634, "blimp/accuracy/superlative_quantifiers_2": 0.834, "blimp/accuracy/passive_1": 0.871, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.846, "blimp/accuracy/inchoative": 0.514, "blimp/accuracy/anaphor_gender_agreement": 0.902, "blimp/accuracy/principle_A_c_command": 0.432, "blimp/accuracy/only_npi_licensor_present": 0.415, "blimp/accuracy/expletive_it_object_raising": 0.715, "blimp/accuracy/left_branch_island_simple_question": 0.208, "blimp/accuracy/wh_questions_subject_gap": 0.872, "blimp/accuracy/existential_there_quantifiers_2": 0.459, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.912, "blimp/accuracy/sentential_negation_npi_scope": 0.443, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.75, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.9, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.781, "blimp/accuracy/principle_A_case_2": 0.952, "blimp/accuracy/distractor_agreement_relational_noun": 0.722, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.464, "blimp/accuracy/wh_island": 0.679, "blimp/accuracy/principle_A_domain_1": 0.97, "blimp/accuracy/complex_NP_island": 0.498, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.919, "blimp/accuracy/irregular_past_participle_verbs": 0.819, "blimp/accuracy/drop_argument": 0.745, "blimp/accuracy/wh_questions_object_gap": 0.729, "blimp/accuracy/animate_subject_passive": 0.759, "blimp/accuracy/existential_there_quantifiers_1": 0.952, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.803, "blimp/accuracy/npi_present_2": 0.565, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.85, "blimp/accuracy/anaphor_number_agreement": 0.958, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.953, "blimp/accuracy/existential_there_object_raising": 0.8, "blimp/accuracy/matrix_question_npi_licensor_present": 0.067, "blimp/accuracy/npi_present_1": 0.576, "blimp/accuracy/wh_vs_that_no_gap": 0.962, "blimp/accuracy/left_branch_island_echo_question": 0.443, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.981, "blimp/accuracy/causative": 0.649, "blimp/accuracy/group_average": 0.7119552238805968, "blimp/accuracy/seq_average": 0.7119552238805971, "cbt/accuracy/NE": 0.6854967948717948, "cbt/accuracy/V": 0.8536, "cbt/accuracy/CN": 0.7252, "cbt/accuracy/P": 0.8212, "cbt/accuracy/group_average": 0.7713741987179488, "cbt/accuracy/seq_average": 0.7714085634253701, "hellaswag/accuracy/val": 0.2747460665206134, "hellaswag/accuracy/group_average": 0.2747460665206134, "hellaswag/accuracy/seq_average": 0.2747460665206134, "piqa/accuracy/val": 0.5429815016322089, "piqa/accuracy/group_average": 0.5429815016322089, "piqa/accuracy/seq_average": 0.5429815016322089, "ai2arc/accuracy/ARC-Easy": 0.3023255813953488, "ai2arc/accuracy/ARC-Challenge": 0.20085836909871244, "ai2arc/accuracy/group_average": 0.25159197524703064, "ai2arc/accuracy/seq_average": 0.2688385269121813, "race/accuracy/test/high": 0.2552887364208119, "race/accuracy/test/middle": 0.3279944289693593, "race/accuracy/group_average": 0.2916415826950856, "race/accuracy/seq_average": 0.27644912849614917, "siqa/accuracy/dev": 0.3587512794268168, "siqa/accuracy/group_average": 0.3587512794268168, "siqa/accuracy/seq_average": 0.3587512794268168, "commonsenseqa/accuracy/dev_rand_split": 0.22604422604422605, "commonsenseqa/accuracy/group_average": 0.22604422604422605, "commonsenseqa/accuracy/seq_average": 0.22604422604422605}
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-30000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 3.006318591889881, "val/accuracy": 0.4260069831969246, "val/perplexity": 20.212851033265963, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.687244865464868, "lambada/accuracy/total": 0.15120341614906832, "lambada/accuracy/openai_last_token": 0.7214673913043478, "lambada/perplexity": 24.528620766355957, "lambada/lm_loss": 3.557020944031915, "lambada/lm_perplexity": 35.05859989068845, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.28860519967299647, "mean_loss": 2.8467817286773744, "blimp/accuracy/passive_2": 0.873, "blimp/accuracy/determiner_noun_agreement_2": 0.973, "blimp/accuracy/ellipsis_n_bar_1": 0.765, "blimp/accuracy/tough_vs_raising_2": 0.839, "blimp/accuracy/tough_vs_raising_1": 0.494, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.856, "blimp/accuracy/principle_A_reconstruction": 0.583, "blimp/accuracy/wh_vs_that_with_gap": 0.471, "blimp/accuracy/principle_A_domain_2": 0.828, "blimp/accuracy/determiner_noun_agreement_1": 0.97, "blimp/accuracy/ellipsis_n_bar_2": 0.862, "blimp/accuracy/principle_A_domain_3": 0.544, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.883, "blimp/accuracy/animate_subject_trans": 0.869, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.806, "blimp/accuracy/distractor_agreement_relative_clause": 0.51, "blimp/accuracy/transitive": 0.821, "blimp/accuracy/sentential_subject_island": 0.356, "blimp/accuracy/adjunct_island": 0.698, "blimp/accuracy/intransitive": 0.737, "blimp/accuracy/existential_there_subject_raising": 0.799, "blimp/accuracy/irregular_past_participle_adjectives": 0.952, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.201, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.2, "blimp/accuracy/only_npi_scope": 0.651, "blimp/accuracy/superlative_quantifiers_2": 0.708, "blimp/accuracy/passive_1": 0.874, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.863, "blimp/accuracy/inchoative": 0.552, "blimp/accuracy/anaphor_gender_agreement": 0.923, "blimp/accuracy/principle_A_c_command": 0.464, "blimp/accuracy/only_npi_licensor_present": 0.449, "blimp/accuracy/expletive_it_object_raising": 0.737, "blimp/accuracy/left_branch_island_simple_question": 0.229, "blimp/accuracy/wh_questions_subject_gap": 0.857, "blimp/accuracy/existential_there_quantifiers_2": 0.357, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.924, "blimp/accuracy/sentential_negation_npi_scope": 0.485, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.77, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.848, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.808, "blimp/accuracy/principle_A_case_2": 0.939, "blimp/accuracy/distractor_agreement_relational_noun": 0.686, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.999, "blimp/accuracy/superlative_quantifiers_1": 0.647, "blimp/accuracy/wh_island": 0.603, "blimp/accuracy/principle_A_domain_1": 0.973, "blimp/accuracy/complex_NP_island": 0.467, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.937, "blimp/accuracy/irregular_past_participle_verbs": 0.855, "blimp/accuracy/drop_argument": 0.769, "blimp/accuracy/wh_questions_object_gap": 0.718, "blimp/accuracy/animate_subject_passive": 0.795, "blimp/accuracy/existential_there_quantifiers_1": 0.93, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.778, "blimp/accuracy/npi_present_2": 0.591, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.869, "blimp/accuracy/anaphor_number_agreement": 0.97, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.942, "blimp/accuracy/existential_there_object_raising": 0.808, "blimp/accuracy/matrix_question_npi_licensor_present": 0.086, "blimp/accuracy/npi_present_1": 0.514, "blimp/accuracy/wh_vs_that_no_gap": 0.963, "blimp/accuracy/left_branch_island_echo_question": 0.427, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.969, "blimp/accuracy/causative": 0.676, "blimp/accuracy/group_average": 0.7208955223880593, "blimp/accuracy/seq_average": 0.7208955223880597, "cbt/accuracy/NE": 0.7083333333333334, "cbt/accuracy/V": 0.8632, "cbt/accuracy/CN": 0.7304, "cbt/accuracy/P": 0.84, "cbt/accuracy/group_average": 0.7854833333333333, "cbt/accuracy/seq_average": 0.7855142056822729, "hellaswag/accuracy/val": 0.27215694084843656, "hellaswag/accuracy/group_average": 0.27215694084843656, "hellaswag/accuracy/seq_average": 0.27215694084843656, "piqa/accuracy/val": 0.5560391730141458, "piqa/accuracy/group_average": 0.5560391730141458, "piqa/accuracy/seq_average": 0.5560391730141458, "ai2arc/accuracy/ARC-Easy": 0.30824524312896406, "ai2arc/accuracy/ARC-Challenge": 0.2034334763948498, "ai2arc/accuracy/group_average": 0.2558393597619069, "ai2arc/accuracy/seq_average": 0.27365439093484417, "race/accuracy/test/high": 0.2567181246426529, "race/accuracy/test/middle": 0.3266016713091922, "race/accuracy/group_average": 0.29165989797592257, "race/accuracy/seq_average": 0.27705715443858936, "siqa/accuracy/dev": 0.3572159672466735, "siqa/accuracy/group_average": 0.3572159672466735, "siqa/accuracy/seq_average": 0.3572159672466735, "commonsenseqa/accuracy/dev_rand_split": 0.23832923832923833, "commonsenseqa/accuracy/group_average": 0.23832923832923833, "commonsenseqa/accuracy/seq_average": 0.23832923832923833}
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-40000.pth.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "val/loss": 2.9593333895244296,
3
+ "val/accuracy": 0.4317985413566468,
4
+ "val/perplexity": 19.285111812135973,
5
+ "val/time_since_best_loss": 0,
6
+ "val/time_since_best_accuracy": 0,
7
+ "lambada/loss": 2.699080757472826,
8
+ "lambada/accuracy/total": 0.17895962732919254,
9
+ "lambada/accuracy/openai_last_token": 0.7329192546583851,
10
+ "lambada/perplexity": 20.9124715145371,
11
+ "lambada/lm_loss": 3.50822172681406,
12
+ "lambada/lm_perplexity": 33.38884048087871,
13
+ "lambada/time_since_best_loss": 0,
14
+ "lambada/time_since_best_accuracy": 0,
15
+ "mean_accuracy": 0.30537908434291966,
16
+ "mean_loss": 2.829207073498628,
17
+ "blimp/accuracy/passive_2": 0.856,
18
+ "blimp/accuracy/determiner_noun_agreement_2": 0.971,
19
+ "blimp/accuracy/ellipsis_n_bar_1": 0.793,
20
+ "blimp/accuracy/tough_vs_raising_2": 0.868,
21
+ "blimp/accuracy/tough_vs_raising_1": 0.518,
22
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.816,
23
+ "blimp/accuracy/principle_A_reconstruction": 0.409,
24
+ "blimp/accuracy/wh_vs_that_with_gap": 0.482,
25
+ "blimp/accuracy/principle_A_domain_2": 0.822,
26
+ "blimp/accuracy/determiner_noun_agreement_1": 0.968,
27
+ "blimp/accuracy/ellipsis_n_bar_2": 0.893,
28
+ "blimp/accuracy/principle_A_domain_3": 0.554,
29
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.888,
30
+ "blimp/accuracy/animate_subject_trans": 0.88,
31
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.815,
32
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.529,
33
+ "blimp/accuracy/transitive": 0.821,
34
+ "blimp/accuracy/sentential_subject_island": 0.364,
35
+ "blimp/accuracy/adjunct_island": 0.706,
36
+ "blimp/accuracy/intransitive": 0.723,
37
+ "blimp/accuracy/existential_there_subject_raising": 0.827,
38
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.956,
39
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.195,
40
+ "blimp/accuracy/principle_A_case_1": 1.0,
41
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.218,
42
+ "blimp/accuracy/only_npi_scope": 0.649,
43
+ "blimp/accuracy/superlative_quantifiers_2": 0.704,
44
+ "blimp/accuracy/passive_1": 0.895,
45
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.833,
46
+ "blimp/accuracy/inchoative": 0.553,
47
+ "blimp/accuracy/anaphor_gender_agreement": 0.905,
48
+ "blimp/accuracy/principle_A_c_command": 0.448,
49
+ "blimp/accuracy/only_npi_licensor_present": 0.618,
50
+ "blimp/accuracy/expletive_it_object_raising": 0.747,
51
+ "blimp/accuracy/left_branch_island_simple_question": 0.233,
52
+ "blimp/accuracy/wh_questions_subject_gap": 0.912,
53
+ "blimp/accuracy/existential_there_quantifiers_2": 0.293,
54
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.933,
55
+ "blimp/accuracy/sentential_negation_npi_scope": 0.625,
56
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.784,
57
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.858,
58
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.845,
59
+ "blimp/accuracy/principle_A_case_2": 0.972,
60
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.733,
61
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.998,
62
+ "blimp/accuracy/superlative_quantifiers_1": 0.632,
63
+ "blimp/accuracy/wh_island": 0.652,
64
+ "blimp/accuracy/principle_A_domain_1": 0.983,
65
+ "blimp/accuracy/complex_NP_island": 0.491,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.952,
67
+ "blimp/accuracy/irregular_past_participle_verbs": 0.889,
68
+ "blimp/accuracy/drop_argument": 0.769,
69
+ "blimp/accuracy/wh_questions_object_gap": 0.709,
70
+ "blimp/accuracy/animate_subject_passive": 0.768,
71
+ "blimp/accuracy/existential_there_quantifiers_1": 0.949,
72
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.823,
73
+ "blimp/accuracy/npi_present_2": 0.556,
74
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.867,
75
+ "blimp/accuracy/anaphor_number_agreement": 0.954,
76
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.944,
77
+ "blimp/accuracy/existential_there_object_raising": 0.816,
78
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.047,
79
+ "blimp/accuracy/npi_present_1": 0.5,
80
+ "blimp/accuracy/wh_vs_that_no_gap": 0.968,
81
+ "blimp/accuracy/left_branch_island_echo_question": 0.436,
82
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.98,
83
+ "blimp/accuracy/causative": 0.666,
84
+ "blimp/accuracy/group_average": 0.727776119402985,
85
+ "blimp/accuracy/seq_average": 0.727776119402985,
86
+ "cbt/accuracy/NE": 0.7123397435897436,
87
+ "cbt/accuracy/V": 0.8652,
88
+ "cbt/accuracy/CN": 0.726,
89
+ "cbt/accuracy/P": 0.8416,
90
+ "cbt/accuracy/group_average": 0.7862849358974359,
91
+ "cbt/accuracy/seq_average": 0.7863145258103241,
92
+ "hellaswag/accuracy/val": 0.27375024895439154,
93
+ "hellaswag/accuracy/group_average": 0.27375024895439154,
94
+ "hellaswag/accuracy/seq_average": 0.27375024895439154,
95
+ "piqa/accuracy/val": 0.5533188248095756,
96
+ "piqa/accuracy/group_average": 0.5533188248095756,
97
+ "piqa/accuracy/seq_average": 0.5533188248095756,
98
+ "ai2arc/accuracy/ARC-Easy": 0.3112050739957717,
99
+ "ai2arc/accuracy/ARC-Challenge": 0.19313304721030042,
100
+ "ai2arc/accuracy/group_average": 0.25216906060303607,
101
+ "ai2arc/accuracy/seq_average": 0.27223796033994335,
102
+ "race/accuracy/test/high": 0.25586049170954833,
103
+ "race/accuracy/test/middle": 0.3224233983286908,
104
+ "race/accuracy/group_average": 0.2891419450191196,
105
+ "race/accuracy/seq_average": 0.2752330766112687,
106
+ "siqa/accuracy/dev": 0.3607983623336745,
107
+ "siqa/accuracy/group_average": 0.3607983623336745,
108
+ "siqa/accuracy/seq_average": 0.3607983623336745,
109
+ "commonsenseqa/accuracy/dev_rand_split": 0.24406224406224405,
110
+ "commonsenseqa/accuracy/group_average": 0.24406224406224405,
111
+ "commonsenseqa/accuracy/seq_average": 0.24406224406224405
112
+ }
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-50000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.9244496179005455, "val/accuracy": 0.4370795355902778, "val/perplexity": 18.62397292727035, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.761265725082492, "lambada/accuracy/total": 0.18808229813664595, "lambada/accuracy/openai_last_token": 0.7375776397515528, "lambada/perplexity": 19.540106086510423, "lambada/lm_loss": 3.491144462696324, "lambada/lm_perplexity": 32.82349147997168, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3125809168634619, "mean_loss": 2.842857671491519, "blimp/accuracy/passive_2": 0.872, "blimp/accuracy/determiner_noun_agreement_2": 0.977, "blimp/accuracy/ellipsis_n_bar_1": 0.758, "blimp/accuracy/tough_vs_raising_2": 0.855, "blimp/accuracy/tough_vs_raising_1": 0.481, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.854, "blimp/accuracy/principle_A_reconstruction": 0.486, "blimp/accuracy/wh_vs_that_with_gap": 0.507, "blimp/accuracy/principle_A_domain_2": 0.872, "blimp/accuracy/determiner_noun_agreement_1": 0.976, "blimp/accuracy/ellipsis_n_bar_2": 0.902, "blimp/accuracy/principle_A_domain_3": 0.578, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.907, "blimp/accuracy/animate_subject_trans": 0.875, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.84, "blimp/accuracy/distractor_agreement_relative_clause": 0.508, "blimp/accuracy/transitive": 0.828, "blimp/accuracy/sentential_subject_island": 0.359, "blimp/accuracy/adjunct_island": 0.709, "blimp/accuracy/intransitive": 0.713, "blimp/accuracy/existential_there_subject_raising": 0.803, "blimp/accuracy/irregular_past_participle_adjectives": 0.926, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.154, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.181, "blimp/accuracy/only_npi_scope": 0.683, "blimp/accuracy/superlative_quantifiers_2": 0.666, "blimp/accuracy/passive_1": 0.898, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.853, "blimp/accuracy/inchoative": 0.562, "blimp/accuracy/anaphor_gender_agreement": 0.913, "blimp/accuracy/principle_A_c_command": 0.505, "blimp/accuracy/only_npi_licensor_present": 0.47, "blimp/accuracy/expletive_it_object_raising": 0.749, "blimp/accuracy/left_branch_island_simple_question": 0.164, "blimp/accuracy/wh_questions_subject_gap": 0.904, "blimp/accuracy/existential_there_quantifiers_2": 0.307, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.942, "blimp/accuracy/sentential_negation_npi_scope": 0.538, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.851, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.84, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.819, "blimp/accuracy/principle_A_case_2": 0.969, "blimp/accuracy/distractor_agreement_relational_noun": 0.736, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.997, "blimp/accuracy/superlative_quantifiers_1": 0.589, "blimp/accuracy/wh_island": 0.634, "blimp/accuracy/principle_A_domain_1": 0.988, "blimp/accuracy/complex_NP_island": 0.522, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.944, "blimp/accuracy/irregular_past_participle_verbs": 0.879, "blimp/accuracy/drop_argument": 0.735, "blimp/accuracy/wh_questions_object_gap": 0.773, "blimp/accuracy/animate_subject_passive": 0.775, "blimp/accuracy/existential_there_quantifiers_1": 0.956, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.862, "blimp/accuracy/npi_present_2": 0.588, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.888, "blimp/accuracy/anaphor_number_agreement": 0.969, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.831, "blimp/accuracy/matrix_question_npi_licensor_present": 0.102, "blimp/accuracy/npi_present_1": 0.554, "blimp/accuracy/wh_vs_that_no_gap": 0.971, "blimp/accuracy/left_branch_island_echo_question": 0.497, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.973, "blimp/accuracy/causative": 0.685, "blimp/accuracy/group_average": 0.7307014925373134, "blimp/accuracy/seq_average": 0.7307014925373134, "cbt/accuracy/NE": 0.7247596153846154, "cbt/accuracy/V": 0.872, "cbt/accuracy/CN": 0.7436, "cbt/accuracy/P": 0.8496, "cbt/accuracy/group_average": 0.7974899038461539, "cbt/accuracy/seq_average": 0.7975190076030412, "hellaswag/accuracy/val": 0.27594104760007965, "hellaswag/accuracy/group_average": 0.27594104760007965, "hellaswag/accuracy/seq_average": 0.27594104760007965, "piqa/accuracy/val": 0.5505984766050055, "piqa/accuracy/group_average": 0.5505984766050055, "piqa/accuracy/seq_average": 0.5505984766050055, "ai2arc/accuracy/ARC-Easy": 0.30274841437632133, "ai2arc/accuracy/ARC-Challenge": 0.19742489270386265, "ai2arc/accuracy/group_average": 0.25008665354009196, "ai2arc/accuracy/seq_average": 0.2679886685552408, "race/accuracy/test/high": 0.2550028587764437, "race/accuracy/test/middle": 0.32729805013927576, "race/accuracy/group_average": 0.2911504544578597, "race/accuracy/seq_average": 0.2760437778678557, "siqa/accuracy/dev": 0.3638689866939611, "siqa/accuracy/group_average": 0.3638689866939611, "siqa/accuracy/seq_average": 0.3638689866939611, "commonsenseqa/accuracy/dev_rand_split": 0.2416052416052416, "commonsenseqa/accuracy/group_average": 0.2416052416052416, "commonsenseqa/accuracy/seq_average": 0.2416052416052416}
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-60000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.8948117210751487, "val/accuracy": 0.4414285326760913, "val/perplexity": 18.080097019549328, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6400343616556676, "lambada/accuracy/total": 0.20089285714285715, "lambada/accuracy/openai_last_token": 0.7412655279503105, "lambada/perplexity": 18.47638672889119, "lambada/lm_loss": 3.447593824904876, "lambada/lm_perplexity": 31.42468796476684, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3211606949094742, "mean_loss": 2.767423041365408, "blimp/accuracy/passive_2": 0.899, "blimp/accuracy/determiner_noun_agreement_2": 0.976, "blimp/accuracy/ellipsis_n_bar_1": 0.783, "blimp/accuracy/tough_vs_raising_2": 0.881, "blimp/accuracy/tough_vs_raising_1": 0.481, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.833, "blimp/accuracy/principle_A_reconstruction": 0.468, "blimp/accuracy/wh_vs_that_with_gap": 0.473, "blimp/accuracy/principle_A_domain_2": 0.817, "blimp/accuracy/determiner_noun_agreement_1": 0.977, "blimp/accuracy/ellipsis_n_bar_2": 0.901, "blimp/accuracy/principle_A_domain_3": 0.586, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.902, "blimp/accuracy/animate_subject_trans": 0.89, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.827, "blimp/accuracy/distractor_agreement_relative_clause": 0.529, "blimp/accuracy/transitive": 0.845, "blimp/accuracy/sentential_subject_island": 0.331, "blimp/accuracy/adjunct_island": 0.72, "blimp/accuracy/intransitive": 0.741, "blimp/accuracy/existential_there_subject_raising": 0.82, "blimp/accuracy/irregular_past_participle_adjectives": 0.922, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.19, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.207, "blimp/accuracy/only_npi_scope": 0.659, "blimp/accuracy/superlative_quantifiers_2": 0.765, "blimp/accuracy/passive_1": 0.881, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.844, "blimp/accuracy/inchoative": 0.563, "blimp/accuracy/anaphor_gender_agreement": 0.942, "blimp/accuracy/principle_A_c_command": 0.521, "blimp/accuracy/only_npi_licensor_present": 0.561, "blimp/accuracy/expletive_it_object_raising": 0.736, "blimp/accuracy/left_branch_island_simple_question": 0.205, "blimp/accuracy/wh_questions_subject_gap": 0.915, "blimp/accuracy/existential_there_quantifiers_2": 0.301, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.939, "blimp/accuracy/sentential_negation_npi_scope": 0.534, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.814, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.815, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.816, "blimp/accuracy/principle_A_case_2": 0.97, "blimp/accuracy/distractor_agreement_relational_noun": 0.7, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.56, "blimp/accuracy/wh_island": 0.739, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.548, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.952, "blimp/accuracy/irregular_past_participle_verbs": 0.868, "blimp/accuracy/drop_argument": 0.732, "blimp/accuracy/wh_questions_object_gap": 0.787, "blimp/accuracy/animate_subject_passive": 0.788, "blimp/accuracy/existential_there_quantifiers_1": 0.952, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.827, "blimp/accuracy/npi_present_2": 0.565, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.904, "blimp/accuracy/anaphor_number_agreement": 0.967, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.946, "blimp/accuracy/existential_there_object_raising": 0.798, "blimp/accuracy/matrix_question_npi_licensor_present": 0.121, "blimp/accuracy/npi_present_1": 0.532, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.429, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.975, "blimp/accuracy/causative": 0.691, "blimp/accuracy/group_average": 0.7328955223880597, "blimp/accuracy/seq_average": 0.7328955223880597, "cbt/accuracy/NE": 0.7327724358974359, "cbt/accuracy/V": 0.8732, "cbt/accuracy/CN": 0.7616, "cbt/accuracy/P": 0.8504, "cbt/accuracy/group_average": 0.804493108974359, "cbt/accuracy/seq_average": 0.8045218087234894, "hellaswag/accuracy/val": 0.27604062935670187, "hellaswag/accuracy/group_average": 0.27604062935670187, "hellaswag/accuracy/seq_average": 0.27604062935670187, "piqa/accuracy/val": 0.5511425462459195, "piqa/accuracy/group_average": 0.5511425462459195, "piqa/accuracy/seq_average": 0.5511425462459195, "ai2arc/accuracy/ARC-Easy": 0.3175475687103594, "ai2arc/accuracy/ARC-Challenge": 0.19484978540772532, "ai2arc/accuracy/group_average": 0.25619867705904237, "ai2arc/accuracy/seq_average": 0.2770538243626062, "race/accuracy/test/high": 0.259576901086335, "race/accuracy/test/middle": 0.3307799442896936, "race/accuracy/group_average": 0.2951784226880143, "race/accuracy/seq_average": 0.2802999594649372, "siqa/accuracy/dev": 0.3556806550665302, "siqa/accuracy/group_average": 0.3556806550665302, "siqa/accuracy/seq_average": 0.3556806550665302, "commonsenseqa/accuracy/dev_rand_split": 0.24733824733824733, "commonsenseqa/accuracy/group_average": 0.24733824733824733, "commonsenseqa/accuracy/seq_average": 0.24733824733824733}
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-70000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.871607946971106, "val/accuracy": 0.444488040984623, "val/perplexity": 17.66540040225413, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.615190399359472, "lambada/accuracy/total": 0.18303571428571427, "lambada/accuracy/openai_last_token": 0.7387422360248447, "lambada/perplexity": 19.340302937732627, "lambada/lm_loss": 3.429595178430748, "lambada/lm_perplexity": 30.864145748404383, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.31376187763516866, "mean_loss": 2.743399173165289, "blimp/accuracy/passive_2": 0.885, "blimp/accuracy/determiner_noun_agreement_2": 0.978, "blimp/accuracy/ellipsis_n_bar_1": 0.79, "blimp/accuracy/tough_vs_raising_2": 0.86, "blimp/accuracy/tough_vs_raising_1": 0.499, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.83, "blimp/accuracy/principle_A_reconstruction": 0.414, "blimp/accuracy/wh_vs_that_with_gap": 0.477, "blimp/accuracy/principle_A_domain_2": 0.847, "blimp/accuracy/determiner_noun_agreement_1": 0.977, "blimp/accuracy/ellipsis_n_bar_2": 0.896, "blimp/accuracy/principle_A_domain_3": 0.598, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.914, "blimp/accuracy/animate_subject_trans": 0.901, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.829, "blimp/accuracy/distractor_agreement_relative_clause": 0.545, "blimp/accuracy/transitive": 0.838, "blimp/accuracy/sentential_subject_island": 0.328, "blimp/accuracy/adjunct_island": 0.7, "blimp/accuracy/intransitive": 0.742, "blimp/accuracy/existential_there_subject_raising": 0.83, "blimp/accuracy/irregular_past_participle_adjectives": 0.934, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.19, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.214, "blimp/accuracy/only_npi_scope": 0.653, "blimp/accuracy/superlative_quantifiers_2": 0.817, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.855, "blimp/accuracy/inchoative": 0.571, "blimp/accuracy/anaphor_gender_agreement": 0.942, "blimp/accuracy/principle_A_c_command": 0.52, "blimp/accuracy/only_npi_licensor_present": 0.554, "blimp/accuracy/expletive_it_object_raising": 0.749, "blimp/accuracy/left_branch_island_simple_question": 0.214, "blimp/accuracy/wh_questions_subject_gap": 0.902, "blimp/accuracy/existential_there_quantifiers_2": 0.414, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.943, "blimp/accuracy/sentential_negation_npi_scope": 0.595, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.839, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.817, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.823, "blimp/accuracy/principle_A_case_2": 0.96, "blimp/accuracy/distractor_agreement_relational_noun": 0.71, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.996, "blimp/accuracy/superlative_quantifiers_1": 0.616, "blimp/accuracy/wh_island": 0.717, "blimp/accuracy/principle_A_domain_1": 0.98, "blimp/accuracy/complex_NP_island": 0.51, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.952, "blimp/accuracy/irregular_past_participle_verbs": 0.841, "blimp/accuracy/drop_argument": 0.73, "blimp/accuracy/wh_questions_object_gap": 0.755, "blimp/accuracy/animate_subject_passive": 0.791, "blimp/accuracy/existential_there_quantifiers_1": 0.967, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.842, "blimp/accuracy/npi_present_2": 0.545, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.892, "blimp/accuracy/anaphor_number_agreement": 0.976, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.955, "blimp/accuracy/existential_there_object_raising": 0.808, "blimp/accuracy/matrix_question_npi_licensor_present": 0.101, "blimp/accuracy/npi_present_1": 0.543, "blimp/accuracy/wh_vs_that_no_gap": 0.961, "blimp/accuracy/left_branch_island_echo_question": 0.475, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.967, "blimp/accuracy/causative": 0.687, "blimp/accuracy/group_average": 0.7372537313432833, "blimp/accuracy/seq_average": 0.7372537313432835, "cbt/accuracy/NE": 0.7323717948717948, "cbt/accuracy/V": 0.8724, "cbt/accuracy/CN": 0.7536, "cbt/accuracy/P": 0.856, "cbt/accuracy/group_average": 0.8035929487179487, "cbt/accuracy/seq_average": 0.8036214485794317, "hellaswag/accuracy/val": 0.27912766381198967, "hellaswag/accuracy/group_average": 0.27912766381198967, "hellaswag/accuracy/seq_average": 0.27912766381198967, "piqa/accuracy/val": 0.5652883569096845, "piqa/accuracy/group_average": 0.5652883569096845, "piqa/accuracy/seq_average": 0.5652883569096845, "ai2arc/accuracy/ARC-Easy": 0.3192389006342495, "ai2arc/accuracy/ARC-Challenge": 0.20600858369098712, "ai2arc/accuracy/group_average": 0.2626237421626183, "ai2arc/accuracy/seq_average": 0.2818696883852691, "race/accuracy/test/high": 0.26214979988564896, "race/accuracy/test/middle": 0.32729805013927576, "race/accuracy/group_average": 0.29472392501246236, "race/accuracy/seq_average": 0.28111066072152413, "siqa/accuracy/dev": 0.3577277379733879, "siqa/accuracy/group_average": 0.3577277379733879, "siqa/accuracy/seq_average": 0.3577277379733879, "commonsenseqa/accuracy/dev_rand_split": 0.2497952497952498, "commonsenseqa/accuracy/group_average": 0.2497952497952498, "commonsenseqa/accuracy/seq_average": 0.2497952497952498}
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-80000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.851964799184648, "val/accuracy": 0.44763571118551587, "val/perplexity": 17.32178225128395, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6014690517638783, "lambada/accuracy/total": 0.2063276397515528, "lambada/accuracy/openai_last_token": 0.7449534161490683, "lambada/perplexity": 17.736084987596502, "lambada/lm_loss": 3.414293893209742, "lambada/lm_perplexity": 30.395479393995753, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.32698167546853435, "mean_loss": 2.726716925474263, "blimp/accuracy/passive_2": 0.892, "blimp/accuracy/determiner_noun_agreement_2": 0.976, "blimp/accuracy/ellipsis_n_bar_1": 0.786, "blimp/accuracy/tough_vs_raising_2": 0.873, "blimp/accuracy/tough_vs_raising_1": 0.531, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.853, "blimp/accuracy/principle_A_reconstruction": 0.427, "blimp/accuracy/wh_vs_that_with_gap": 0.52, "blimp/accuracy/principle_A_domain_2": 0.824, "blimp/accuracy/determiner_noun_agreement_1": 0.977, "blimp/accuracy/ellipsis_n_bar_2": 0.914, "blimp/accuracy/principle_A_domain_3": 0.603, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.895, "blimp/accuracy/animate_subject_trans": 0.881, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.838, "blimp/accuracy/distractor_agreement_relative_clause": 0.527, "blimp/accuracy/transitive": 0.856, "blimp/accuracy/sentential_subject_island": 0.346, "blimp/accuracy/adjunct_island": 0.698, "blimp/accuracy/intransitive": 0.763, "blimp/accuracy/existential_there_subject_raising": 0.829, "blimp/accuracy/irregular_past_participle_adjectives": 0.96, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.186, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.22, "blimp/accuracy/only_npi_scope": 0.614, "blimp/accuracy/superlative_quantifiers_2": 0.785, "blimp/accuracy/passive_1": 0.905, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.842, "blimp/accuracy/inchoative": 0.592, "blimp/accuracy/anaphor_gender_agreement": 0.961, "blimp/accuracy/principle_A_c_command": 0.51, "blimp/accuracy/only_npi_licensor_present": 0.484, "blimp/accuracy/expletive_it_object_raising": 0.752, "blimp/accuracy/left_branch_island_simple_question": 0.217, "blimp/accuracy/wh_questions_subject_gap": 0.905, "blimp/accuracy/existential_there_quantifiers_2": 0.398, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.941, "blimp/accuracy/sentential_negation_npi_scope": 0.618, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.818, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.838, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.827, "blimp/accuracy/principle_A_case_2": 0.97, "blimp/accuracy/distractor_agreement_relational_noun": 0.699, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.993, "blimp/accuracy/superlative_quantifiers_1": 0.575, "blimp/accuracy/wh_island": 0.783, "blimp/accuracy/principle_A_domain_1": 0.982, "blimp/accuracy/complex_NP_island": 0.552, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.945, "blimp/accuracy/irregular_past_participle_verbs": 0.85, "blimp/accuracy/drop_argument": 0.755, "blimp/accuracy/wh_questions_object_gap": 0.764, "blimp/accuracy/animate_subject_passive": 0.784, "blimp/accuracy/existential_there_quantifiers_1": 0.964, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.847, "blimp/accuracy/npi_present_2": 0.588, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.902, "blimp/accuracy/anaphor_number_agreement": 0.976, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.954, "blimp/accuracy/existential_there_object_raising": 0.837, "blimp/accuracy/matrix_question_npi_licensor_present": 0.086, "blimp/accuracy/npi_present_1": 0.577, "blimp/accuracy/wh_vs_that_no_gap": 0.966, "blimp/accuracy/left_branch_island_echo_question": 0.365, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972, "blimp/accuracy/causative": 0.704, "blimp/accuracy/group_average": 0.7398805970149255, "blimp/accuracy/seq_average": 0.7398805970149254, "cbt/accuracy/NE": 0.7383814102564102, "cbt/accuracy/V": 0.8748, "cbt/accuracy/CN": 0.7628, "cbt/accuracy/P": 0.856, "cbt/accuracy/group_average": 0.8079953525641025, "cbt/accuracy/seq_average": 0.8080232092837135, "hellaswag/accuracy/val": 0.27753435570603463, "hellaswag/accuracy/group_average": 0.27753435570603463, "hellaswag/accuracy/seq_average": 0.27753435570603463, "piqa/accuracy/val": 0.5669205658324266, "piqa/accuracy/group_average": 0.5669205658324266, "piqa/accuracy/seq_average": 0.5669205658324266, "ai2arc/accuracy/ARC-Easy": 0.32727272727272727, "ai2arc/accuracy/ARC-Challenge": 0.2017167381974249, "ai2arc/accuracy/group_average": 0.26449473273507607, "ai2arc/accuracy/seq_average": 0.2858356940509915, "race/accuracy/test/high": 0.2607204116638079, "race/accuracy/test/middle": 0.33147632311977715, "race/accuracy/group_average": 0.2960983673917925, "race/accuracy/seq_average": 0.2813133360356709, "siqa/accuracy/dev": 0.35516888433981575, "siqa/accuracy/group_average": 0.35516888433981575, "siqa/accuracy/seq_average": 0.35516888433981575, "commonsenseqa/accuracy/dev_rand_split": 0.24815724815724816, "commonsenseqa/accuracy/group_average": 0.24815724815724816, "commonsenseqa/accuracy/seq_average": 0.24815724815724816}
Pretrain_language_model/save/slimpajama_dense_154M/export/result-model-90000.pth.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"val/loss": 2.841030060298859, "val/accuracy": 0.4493611653645833, "val/perplexity": 17.13340489094965, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.629480587029309, "lambada/accuracy/total": 0.20574534161490685, "lambada/accuracy/openai_last_token": 0.7455357142857143, "lambada/perplexity": 17.387048886773783, "lambada/lm_loss": 3.394580553646558, "lambada/lm_perplexity": 29.802150449616367, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.32755325348974507, "mean_loss": 2.7352553236640844, "blimp/accuracy/passive_2": 0.89, "blimp/accuracy/determiner_noun_agreement_2": 0.981, "blimp/accuracy/ellipsis_n_bar_1": 0.797, "blimp/accuracy/tough_vs_raising_2": 0.866, "blimp/accuracy/tough_vs_raising_1": 0.489, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.84, "blimp/accuracy/principle_A_reconstruction": 0.383, "blimp/accuracy/wh_vs_that_with_gap": 0.527, "blimp/accuracy/principle_A_domain_2": 0.838, "blimp/accuracy/determiner_noun_agreement_1": 0.983, "blimp/accuracy/ellipsis_n_bar_2": 0.919, "blimp/accuracy/principle_A_domain_3": 0.602, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.903, "blimp/accuracy/animate_subject_trans": 0.895, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.846, "blimp/accuracy/distractor_agreement_relative_clause": 0.533, "blimp/accuracy/transitive": 0.832, "blimp/accuracy/sentential_subject_island": 0.314, "blimp/accuracy/adjunct_island": 0.65, "blimp/accuracy/intransitive": 0.759, "blimp/accuracy/existential_there_subject_raising": 0.823, "blimp/accuracy/irregular_past_participle_adjectives": 0.927, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.224, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.211, "blimp/accuracy/only_npi_scope": 0.575, "blimp/accuracy/superlative_quantifiers_2": 0.722, "blimp/accuracy/passive_1": 0.895, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.852, "blimp/accuracy/inchoative": 0.576, "blimp/accuracy/anaphor_gender_agreement": 0.956, "blimp/accuracy/principle_A_c_command": 0.514, "blimp/accuracy/only_npi_licensor_present": 0.512, "blimp/accuracy/expletive_it_object_raising": 0.747, "blimp/accuracy/left_branch_island_simple_question": 0.246, "blimp/accuracy/wh_questions_subject_gap": 0.899, "blimp/accuracy/existential_there_quantifiers_2": 0.385, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.945, "blimp/accuracy/sentential_negation_npi_scope": 0.578, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.807, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.803, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.841, "blimp/accuracy/principle_A_case_2": 0.955, "blimp/accuracy/distractor_agreement_relational_noun": 0.693, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989, "blimp/accuracy/superlative_quantifiers_1": 0.618, "blimp/accuracy/wh_island": 0.699, "blimp/accuracy/principle_A_domain_1": 0.965, "blimp/accuracy/complex_NP_island": 0.529, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.952, "blimp/accuracy/irregular_past_participle_verbs": 0.858, "blimp/accuracy/drop_argument": 0.755, "blimp/accuracy/wh_questions_object_gap": 0.769, "blimp/accuracy/animate_subject_passive": 0.791, "blimp/accuracy/existential_there_quantifiers_1": 0.964, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.838, "blimp/accuracy/npi_present_2": 0.577, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.908, "blimp/accuracy/anaphor_number_agreement": 0.974, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.961, "blimp/accuracy/existential_there_object_raising": 0.83, "blimp/accuracy/matrix_question_npi_licensor_present": 0.118, "blimp/accuracy/npi_present_1": 0.565, "blimp/accuracy/wh_vs_that_no_gap": 0.959, "blimp/accuracy/left_branch_island_echo_question": 0.428, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.963, "blimp/accuracy/causative": 0.703, "blimp/accuracy/group_average": 0.7350149253731343, "blimp/accuracy/seq_average": 0.7350149253731343, "cbt/accuracy/NE": 0.7415865384615384, "cbt/accuracy/V": 0.8792, "cbt/accuracy/CN": 0.7628, "cbt/accuracy/P": 0.8612, "cbt/accuracy/group_average": 0.8111966346153845, "cbt/accuracy/seq_average": 0.8112244897959183, "hellaswag/accuracy/val": 0.27912766381198967, "hellaswag/accuracy/group_average": 0.27912766381198967, "hellaswag/accuracy/seq_average": 0.27912766381198967, "piqa/accuracy/val": 0.55930359085963, "piqa/accuracy/group_average": 0.55930359085963, "piqa/accuracy/seq_average": 0.55930359085963, "ai2arc/accuracy/ARC-Easy": 0.31627906976744186, "ai2arc/accuracy/ARC-Challenge": 0.21201716738197424, "ai2arc/accuracy/group_average": 0.26414811857470805, "ai2arc/accuracy/seq_average": 0.2818696883852691, "race/accuracy/test/high": 0.2610062893081761, "race/accuracy/test/middle": 0.334958217270195, "race/accuracy/group_average": 0.29798225328918554, "race/accuracy/seq_average": 0.28252938792055127, "siqa/accuracy/dev": 0.35977482088024565, "siqa/accuracy/group_average": 0.35977482088024565, "siqa/accuracy/seq_average": 0.35977482088024565, "commonsenseqa/accuracy/dev_rand_split": 0.25143325143325146, "commonsenseqa/accuracy/group_average": 0.25143325143325146, "commonsenseqa/accuracy/seq_average": 0.25143325143325146}