Upload folder using huggingface_hub
#449
by
DavidNguyen
- opened
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-10000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-100000.pth.json +81 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-20000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-30000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-40000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-50000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-60000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-70000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-80000.pth.json +1 -0
- Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-90000.pth.json +1 -0
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-10000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 3.026035853794643, "val/accuracy": 0.42390562996031744, "val/perplexity": 20.61534814155123, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.42390562996031744, "mean_loss": 3.026035853794643, "blimp/accuracy/passive_2": 0.869, "blimp/accuracy/determiner_noun_agreement_2": 0.95, "blimp/accuracy/ellipsis_n_bar_1": 0.698, "blimp/accuracy/tough_vs_raising_2": 0.778, "blimp/accuracy/tough_vs_raising_1": 0.571, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.885, "blimp/accuracy/principle_A_reconstruction": 0.326, "blimp/accuracy/wh_vs_that_with_gap": 0.429, "blimp/accuracy/principle_A_domain_2": 0.838, "blimp/accuracy/determiner_noun_agreement_1": 0.966, "blimp/accuracy/ellipsis_n_bar_2": 0.854, "blimp/accuracy/principle_A_domain_3": 0.523, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.863, "blimp/accuracy/animate_subject_trans": 0.858, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.748, "blimp/accuracy/distractor_agreement_relative_clause": 0.435, "blimp/accuracy/transitive": 0.829, "blimp/accuracy/sentential_subject_island": 0.385, "blimp/accuracy/adjunct_island": 0.734, "blimp/accuracy/intransitive": 0.709, "blimp/accuracy/existential_there_subject_raising": 0.813, "blimp/accuracy/irregular_past_participle_adjectives": 0.892, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.182, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.188, "blimp/accuracy/only_npi_scope": 0.653, "blimp/accuracy/superlative_quantifiers_2": 0.677, "blimp/accuracy/passive_1": 0.88, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.872, "blimp/accuracy/inchoative": 0.518, "blimp/accuracy/anaphor_gender_agreement": 0.926, "blimp/accuracy/principle_A_c_command": 0.476, "blimp/accuracy/only_npi_licensor_present": 0.656, "blimp/accuracy/expletive_it_object_raising": 0.769, "blimp/accuracy/left_branch_island_simple_question": 0.195, "blimp/accuracy/wh_questions_subject_gap": 0.918, "blimp/accuracy/existential_there_quantifiers_2": 0.51, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.906, "blimp/accuracy/sentential_negation_npi_scope": 0.409, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.759, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.862, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.837, "blimp/accuracy/principle_A_case_2": 0.872, "blimp/accuracy/distractor_agreement_relational_noun": 0.754, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.973, "blimp/accuracy/superlative_quantifiers_1": 0.744, "blimp/accuracy/wh_island": 0.776, "blimp/accuracy/principle_A_domain_1": 0.971, "blimp/accuracy/complex_NP_island": 0.513, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.906, "blimp/accuracy/irregular_past_participle_verbs": 0.813, "blimp/accuracy/drop_argument": 0.737, "blimp/accuracy/wh_questions_object_gap": 0.714, "blimp/accuracy/animate_subject_passive": 0.795, "blimp/accuracy/existential_there_quantifiers_1": 0.974, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.861, "blimp/accuracy/npi_present_2": 0.598, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.828, "blimp/accuracy/anaphor_number_agreement": 0.972, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.909, "blimp/accuracy/existential_there_object_raising": 0.728, "blimp/accuracy/matrix_question_npi_licensor_present": 0.065, "blimp/accuracy/npi_present_1": 0.529, "blimp/accuracy/wh_vs_that_no_gap": 0.966, "blimp/accuracy/left_branch_island_echo_question": 0.44, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.981, "blimp/accuracy/causative": 0.648, "blimp/accuracy/group_average": 0.7195970149253731, "blimp/accuracy/seq_average": 0.7195970149253731, "boolq/accuracy/dev": 0.5886850152905199, "boolq/accuracy/group_average": 0.5886850152905199, "boolq/accuracy/seq_average": 0.5886850152905199}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-100000.pth.json
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"val/loss": 2.611968751937624,
|
| 3 |
+
"val/accuracy": 0.479094974578373,
|
| 4 |
+
"val/perplexity": 13.62585038170228,
|
| 5 |
+
"val/time_since_best_loss": 0,
|
| 6 |
+
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"mean_accuracy": 0.479094974578373,
|
| 8 |
+
"mean_loss": 2.611968751937624,
|
| 9 |
+
"blimp/accuracy/passive_2": 0.905,
|
| 10 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.976,
|
| 11 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.799,
|
| 12 |
+
"blimp/accuracy/tough_vs_raising_2": 0.888,
|
| 13 |
+
"blimp/accuracy/tough_vs_raising_1": 0.596,
|
| 14 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.905,
|
| 15 |
+
"blimp/accuracy/principle_A_reconstruction": 0.309,
|
| 16 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.509,
|
| 17 |
+
"blimp/accuracy/principle_A_domain_2": 0.785,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.988,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.91,
|
| 20 |
+
"blimp/accuracy/principle_A_domain_3": 0.526,
|
| 21 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.937,
|
| 22 |
+
"blimp/accuracy/animate_subject_trans": 0.902,
|
| 23 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.879,
|
| 24 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.634,
|
| 25 |
+
"blimp/accuracy/transitive": 0.876,
|
| 26 |
+
"blimp/accuracy/sentential_subject_island": 0.29,
|
| 27 |
+
"blimp/accuracy/adjunct_island": 0.782,
|
| 28 |
+
"blimp/accuracy/intransitive": 0.787,
|
| 29 |
+
"blimp/accuracy/existential_there_subject_raising": 0.854,
|
| 30 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.941,
|
| 31 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.479,
|
| 32 |
+
"blimp/accuracy/principle_A_case_1": 1.0,
|
| 33 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.227,
|
| 34 |
+
"blimp/accuracy/only_npi_scope": 0.721,
|
| 35 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.793,
|
| 36 |
+
"blimp/accuracy/passive_1": 0.899,
|
| 37 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.914,
|
| 38 |
+
"blimp/accuracy/inchoative": 0.624,
|
| 39 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.94,
|
| 40 |
+
"blimp/accuracy/principle_A_c_command": 0.619,
|
| 41 |
+
"blimp/accuracy/only_npi_licensor_present": 0.808,
|
| 42 |
+
"blimp/accuracy/expletive_it_object_raising": 0.78,
|
| 43 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.558,
|
| 44 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.935,
|
| 45 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.495,
|
| 46 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.925,
|
| 47 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.604,
|
| 48 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.818,
|
| 49 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.858,
|
| 50 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.909,
|
| 51 |
+
"blimp/accuracy/principle_A_case_2": 0.961,
|
| 52 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.804,
|
| 53 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.978,
|
| 54 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.809,
|
| 55 |
+
"blimp/accuracy/wh_island": 0.767,
|
| 56 |
+
"blimp/accuracy/principle_A_domain_1": 0.978,
|
| 57 |
+
"blimp/accuracy/complex_NP_island": 0.576,
|
| 58 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.965,
|
| 59 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.865,
|
| 60 |
+
"blimp/accuracy/drop_argument": 0.78,
|
| 61 |
+
"blimp/accuracy/wh_questions_object_gap": 0.808,
|
| 62 |
+
"blimp/accuracy/animate_subject_passive": 0.794,
|
| 63 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.981,
|
| 64 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.844,
|
| 65 |
+
"blimp/accuracy/npi_present_2": 0.645,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.919,
|
| 67 |
+
"blimp/accuracy/anaphor_number_agreement": 0.983,
|
| 68 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.951,
|
| 69 |
+
"blimp/accuracy/existential_there_object_raising": 0.826,
|
| 70 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.265,
|
| 71 |
+
"blimp/accuracy/npi_present_1": 0.588,
|
| 72 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.971,
|
| 73 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.453,
|
| 74 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974,
|
| 75 |
+
"blimp/accuracy/causative": 0.699,
|
| 76 |
+
"blimp/accuracy/group_average": 0.7771343283582091,
|
| 77 |
+
"blimp/accuracy/seq_average": 0.777134328358209,
|
| 78 |
+
"boolq/accuracy/dev": 0.5929663608562691,
|
| 79 |
+
"boolq/accuracy/group_average": 0.5929663608562691,
|
| 80 |
+
"boolq/accuracy/seq_average": 0.5929663608562691
|
| 81 |
+
}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-20000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.8806939503503224, "val/accuracy": 0.4414091564360119, "val/perplexity": 17.826639691102145, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.4414091564360119, "mean_loss": 2.8806939503503224, "blimp/accuracy/passive_2": 0.87, "blimp/accuracy/determiner_noun_agreement_2": 0.949, "blimp/accuracy/ellipsis_n_bar_1": 0.743, "blimp/accuracy/tough_vs_raising_2": 0.846, "blimp/accuracy/tough_vs_raising_1": 0.57, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.891, "blimp/accuracy/principle_A_reconstruction": 0.373, "blimp/accuracy/wh_vs_that_with_gap": 0.434, "blimp/accuracy/principle_A_domain_2": 0.837, "blimp/accuracy/determiner_noun_agreement_1": 0.976, "blimp/accuracy/ellipsis_n_bar_2": 0.897, "blimp/accuracy/principle_A_domain_3": 0.548, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.853, "blimp/accuracy/animate_subject_trans": 0.865, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.826, "blimp/accuracy/distractor_agreement_relative_clause": 0.477, "blimp/accuracy/transitive": 0.826, "blimp/accuracy/sentential_subject_island": 0.356, "blimp/accuracy/adjunct_island": 0.725, "blimp/accuracy/intransitive": 0.73, "blimp/accuracy/existential_there_subject_raising": 0.81, "blimp/accuracy/irregular_past_participle_adjectives": 0.846, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.247, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.151, "blimp/accuracy/only_npi_scope": 0.694, "blimp/accuracy/superlative_quantifiers_2": 0.841, "blimp/accuracy/passive_1": 0.893, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.912, "blimp/accuracy/inchoative": 0.55, "blimp/accuracy/anaphor_gender_agreement": 0.896, "blimp/accuracy/principle_A_c_command": 0.481, "blimp/accuracy/only_npi_licensor_present": 0.759, "blimp/accuracy/expletive_it_object_raising": 0.776, "blimp/accuracy/left_branch_island_simple_question": 0.289, "blimp/accuracy/wh_questions_subject_gap": 0.906, "blimp/accuracy/existential_there_quantifiers_2": 0.397, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.899, "blimp/accuracy/sentential_negation_npi_scope": 0.447, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.78, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.903, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.876, "blimp/accuracy/principle_A_case_2": 0.92, "blimp/accuracy/distractor_agreement_relational_noun": 0.789, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.962, "blimp/accuracy/superlative_quantifiers_1": 0.632, "blimp/accuracy/wh_island": 0.699, "blimp/accuracy/principle_A_domain_1": 0.986, "blimp/accuracy/complex_NP_island": 0.539, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.92, "blimp/accuracy/irregular_past_participle_verbs": 0.873, "blimp/accuracy/drop_argument": 0.735, "blimp/accuracy/wh_questions_object_gap": 0.775, "blimp/accuracy/animate_subject_passive": 0.754, "blimp/accuracy/existential_there_quantifiers_1": 0.987, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.838, "blimp/accuracy/npi_present_2": 0.597, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.886, "blimp/accuracy/anaphor_number_agreement": 0.968, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.941, "blimp/accuracy/existential_there_object_raising": 0.759, "blimp/accuracy/matrix_question_npi_licensor_present": 0.112, "blimp/accuracy/npi_present_1": 0.492, "blimp/accuracy/wh_vs_that_no_gap": 0.957, "blimp/accuracy/left_branch_island_echo_question": 0.342, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.981, "blimp/accuracy/causative": 0.665, "blimp/accuracy/group_average": 0.7321492537313434, "blimp/accuracy/seq_average": 0.7321492537313433, "boolq/accuracy/dev": 0.5944954128440367, "boolq/accuracy/group_average": 0.5944954128440367, "boolq/accuracy/seq_average": 0.5944954128440367}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-30000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.8053152901785716, "val/accuracy": 0.4509800502232143, "val/perplexity": 16.532287552323613, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.4509800502232143, "mean_loss": 2.8053152901785716, "blimp/accuracy/passive_2": 0.872, "blimp/accuracy/determiner_noun_agreement_2": 0.96, "blimp/accuracy/ellipsis_n_bar_1": 0.771, "blimp/accuracy/tough_vs_raising_2": 0.828, "blimp/accuracy/tough_vs_raising_1": 0.604, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.883, "blimp/accuracy/principle_A_reconstruction": 0.361, "blimp/accuracy/wh_vs_that_with_gap": 0.486, "blimp/accuracy/principle_A_domain_2": 0.847, "blimp/accuracy/determiner_noun_agreement_1": 0.97, "blimp/accuracy/ellipsis_n_bar_2": 0.881, "blimp/accuracy/principle_A_domain_3": 0.562, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.89, "blimp/accuracy/animate_subject_trans": 0.884, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.849, "blimp/accuracy/distractor_agreement_relative_clause": 0.526, "blimp/accuracy/transitive": 0.841, "blimp/accuracy/sentential_subject_island": 0.395, "blimp/accuracy/adjunct_island": 0.782, "blimp/accuracy/intransitive": 0.799, "blimp/accuracy/existential_there_subject_raising": 0.811, "blimp/accuracy/irregular_past_participle_adjectives": 0.863, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.263, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.215, "blimp/accuracy/only_npi_scope": 0.751, "blimp/accuracy/superlative_quantifiers_2": 0.833, "blimp/accuracy/passive_1": 0.899, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.889, "blimp/accuracy/inchoative": 0.646, "blimp/accuracy/anaphor_gender_agreement": 0.935, "blimp/accuracy/principle_A_c_command": 0.558, "blimp/accuracy/only_npi_licensor_present": 0.641, "blimp/accuracy/expletive_it_object_raising": 0.799, "blimp/accuracy/left_branch_island_simple_question": 0.332, "blimp/accuracy/wh_questions_subject_gap": 0.898, "blimp/accuracy/existential_there_quantifiers_2": 0.316, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.91, "blimp/accuracy/sentential_negation_npi_scope": 0.599, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.773, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.912, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.889, "blimp/accuracy/principle_A_case_2": 0.908, "blimp/accuracy/distractor_agreement_relational_noun": 0.81, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.98, "blimp/accuracy/superlative_quantifiers_1": 0.712, "blimp/accuracy/wh_island": 0.781, "blimp/accuracy/principle_A_domain_1": 0.984, "blimp/accuracy/complex_NP_island": 0.458, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.94, "blimp/accuracy/irregular_past_participle_verbs": 0.845, "blimp/accuracy/drop_argument": 0.781, "blimp/accuracy/wh_questions_object_gap": 0.766, "blimp/accuracy/animate_subject_passive": 0.814, "blimp/accuracy/existential_there_quantifiers_1": 0.963, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.865, "blimp/accuracy/npi_present_2": 0.601, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.91, "blimp/accuracy/anaphor_number_agreement": 0.972, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.937, "blimp/accuracy/existential_there_object_raising": 0.815, "blimp/accuracy/matrix_question_npi_licensor_present": 0.246, "blimp/accuracy/npi_present_1": 0.541, "blimp/accuracy/wh_vs_that_no_gap": 0.944, "blimp/accuracy/left_branch_island_echo_question": 0.329, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97, "blimp/accuracy/causative": 0.682, "blimp/accuracy/group_average": 0.7504029850746267, "blimp/accuracy/seq_average": 0.7504029850746269, "boolq/accuracy/dev": 0.6134556574923548, "boolq/accuracy/group_average": 0.6134556574923548, "boolq/accuracy/seq_average": 0.6134556574923548}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-40000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.756096976143973, "val/accuracy": 0.45806303478422616, "val/perplexity": 15.738295972101756, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.45806303478422616, "mean_loss": 2.756096976143973, "blimp/accuracy/passive_2": 0.873, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.803, "blimp/accuracy/tough_vs_raising_2": 0.842, "blimp/accuracy/tough_vs_raising_1": 0.584, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.885, "blimp/accuracy/principle_A_reconstruction": 0.28, "blimp/accuracy/wh_vs_that_with_gap": 0.47, "blimp/accuracy/principle_A_domain_2": 0.827, "blimp/accuracy/determiner_noun_agreement_1": 0.987, "blimp/accuracy/ellipsis_n_bar_2": 0.9, "blimp/accuracy/principle_A_domain_3": 0.582, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.91, "blimp/accuracy/animate_subject_trans": 0.879, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.884, "blimp/accuracy/distractor_agreement_relative_clause": 0.613, "blimp/accuracy/transitive": 0.852, "blimp/accuracy/sentential_subject_island": 0.32, "blimp/accuracy/adjunct_island": 0.8, "blimp/accuracy/intransitive": 0.801, "blimp/accuracy/existential_there_subject_raising": 0.854, "blimp/accuracy/irregular_past_participle_adjectives": 0.906, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.288, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.201, "blimp/accuracy/only_npi_scope": 0.747, "blimp/accuracy/superlative_quantifiers_2": 0.724, "blimp/accuracy/passive_1": 0.892, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.877, "blimp/accuracy/inchoative": 0.639, "blimp/accuracy/anaphor_gender_agreement": 0.939, "blimp/accuracy/principle_A_c_command": 0.553, "blimp/accuracy/only_npi_licensor_present": 0.806, "blimp/accuracy/expletive_it_object_raising": 0.758, "blimp/accuracy/left_branch_island_simple_question": 0.331, "blimp/accuracy/wh_questions_subject_gap": 0.922, "blimp/accuracy/existential_there_quantifiers_2": 0.32, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.935, "blimp/accuracy/sentential_negation_npi_scope": 0.541, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.786, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.9, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.894, "blimp/accuracy/principle_A_case_2": 0.927, "blimp/accuracy/distractor_agreement_relational_noun": 0.861, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.991, "blimp/accuracy/superlative_quantifiers_1": 0.767, "blimp/accuracy/wh_island": 0.716, "blimp/accuracy/principle_A_domain_1": 0.993, "blimp/accuracy/complex_NP_island": 0.515, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.956, "blimp/accuracy/irregular_past_participle_verbs": 0.878, "blimp/accuracy/drop_argument": 0.775, "blimp/accuracy/wh_questions_object_gap": 0.765, "blimp/accuracy/animate_subject_passive": 0.793, "blimp/accuracy/existential_there_quantifiers_1": 0.982, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.875, "blimp/accuracy/npi_present_2": 0.536, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.899, "blimp/accuracy/anaphor_number_agreement": 0.979, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.944, "blimp/accuracy/existential_there_object_raising": 0.839, "blimp/accuracy/matrix_question_npi_licensor_present": 0.181, "blimp/accuracy/npi_present_1": 0.499, "blimp/accuracy/wh_vs_that_no_gap": 0.963, "blimp/accuracy/left_branch_island_echo_question": 0.442, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.974, "blimp/accuracy/causative": 0.712, "blimp/accuracy/group_average": 0.7559253731343284, "blimp/accuracy/seq_average": 0.7559253731343284, "boolq/accuracy/dev": 0.6079510703363914, "boolq/accuracy/group_average": 0.6079510703363914, "boolq/accuracy/seq_average": 0.6079510703363914}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-50000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.7115478515625, "val/accuracy": 0.4647168356274802, "val/perplexity": 15.0525566156603, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.4647168356274802, "mean_loss": 2.7115478515625, "blimp/accuracy/passive_2": 0.877, "blimp/accuracy/determiner_noun_agreement_2": 0.979, "blimp/accuracy/ellipsis_n_bar_1": 0.805, "blimp/accuracy/tough_vs_raising_2": 0.864, "blimp/accuracy/tough_vs_raising_1": 0.554, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.913, "blimp/accuracy/principle_A_reconstruction": 0.329, "blimp/accuracy/wh_vs_that_with_gap": 0.467, "blimp/accuracy/principle_A_domain_2": 0.842, "blimp/accuracy/determiner_noun_agreement_1": 0.985, "blimp/accuracy/ellipsis_n_bar_2": 0.903, "blimp/accuracy/principle_A_domain_3": 0.547, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.918, "blimp/accuracy/animate_subject_trans": 0.882, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.889, "blimp/accuracy/distractor_agreement_relative_clause": 0.561, "blimp/accuracy/transitive": 0.862, "blimp/accuracy/sentential_subject_island": 0.361, "blimp/accuracy/adjunct_island": 0.771, "blimp/accuracy/intransitive": 0.799, "blimp/accuracy/existential_there_subject_raising": 0.841, "blimp/accuracy/irregular_past_participle_adjectives": 0.745, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.339, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.177, "blimp/accuracy/only_npi_scope": 0.635, "blimp/accuracy/superlative_quantifiers_2": 0.698, "blimp/accuracy/passive_1": 0.899, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.902, "blimp/accuracy/inchoative": 0.616, "blimp/accuracy/anaphor_gender_agreement": 0.927, "blimp/accuracy/principle_A_c_command": 0.568, "blimp/accuracy/only_npi_licensor_present": 0.726, "blimp/accuracy/expletive_it_object_raising": 0.775, "blimp/accuracy/left_branch_island_simple_question": 0.362, "blimp/accuracy/wh_questions_subject_gap": 0.918, "blimp/accuracy/existential_there_quantifiers_2": 0.415, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.936, "blimp/accuracy/sentential_negation_npi_scope": 0.582, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.8, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.89, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.903, "blimp/accuracy/principle_A_case_2": 0.953, "blimp/accuracy/distractor_agreement_relational_noun": 0.843, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.706, "blimp/accuracy/wh_island": 0.807, "blimp/accuracy/principle_A_domain_1": 0.987, "blimp/accuracy/complex_NP_island": 0.499, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.959, "blimp/accuracy/irregular_past_participle_verbs": 0.878, "blimp/accuracy/drop_argument": 0.756, "blimp/accuracy/wh_questions_object_gap": 0.816, "blimp/accuracy/animate_subject_passive": 0.801, "blimp/accuracy/existential_there_quantifiers_1": 0.982, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.913, "blimp/accuracy/npi_present_2": 0.639, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.913, "blimp/accuracy/anaphor_number_agreement": 0.987, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.943, "blimp/accuracy/existential_there_object_raising": 0.829, "blimp/accuracy/matrix_question_npi_licensor_present": 0.162, "blimp/accuracy/npi_present_1": 0.61, "blimp/accuracy/wh_vs_that_no_gap": 0.966, "blimp/accuracy/left_branch_island_echo_question": 0.384, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.981, "blimp/accuracy/causative": 0.711, "blimp/accuracy/group_average": 0.7578208955223882, "blimp/accuracy/seq_average": 0.7578208955223881, "boolq/accuracy/dev": 0.6103975535168196, "boolq/accuracy/group_average": 0.6103975535168196, "boolq/accuracy/seq_average": 0.6103975535168196}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-60000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.6786971319289434, "val/accuracy": 0.4690009223090278, "val/perplexity": 14.566103216982187, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.4690009223090278, "mean_loss": 2.6786971319289434, "blimp/accuracy/passive_2": 0.896, "blimp/accuracy/determiner_noun_agreement_2": 0.983, "blimp/accuracy/ellipsis_n_bar_1": 0.814, "blimp/accuracy/tough_vs_raising_2": 0.884, "blimp/accuracy/tough_vs_raising_1": 0.549, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.896, "blimp/accuracy/principle_A_reconstruction": 0.303, "blimp/accuracy/wh_vs_that_with_gap": 0.478, "blimp/accuracy/principle_A_domain_2": 0.795, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.899, "blimp/accuracy/principle_A_domain_3": 0.527, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.939, "blimp/accuracy/animate_subject_trans": 0.9, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.866, "blimp/accuracy/distractor_agreement_relative_clause": 0.666, "blimp/accuracy/transitive": 0.856, "blimp/accuracy/sentential_subject_island": 0.337, "blimp/accuracy/adjunct_island": 0.791, "blimp/accuracy/intransitive": 0.771, "blimp/accuracy/existential_there_subject_raising": 0.87, "blimp/accuracy/irregular_past_participle_adjectives": 0.98, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.38, "blimp/accuracy/principle_A_case_1": 0.999, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.201, "blimp/accuracy/only_npi_scope": 0.746, "blimp/accuracy/superlative_quantifiers_2": 0.734, "blimp/accuracy/passive_1": 0.877, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.92, "blimp/accuracy/inchoative": 0.633, "blimp/accuracy/anaphor_gender_agreement": 0.94, "blimp/accuracy/principle_A_c_command": 0.643, "blimp/accuracy/only_npi_licensor_present": 0.696, "blimp/accuracy/expletive_it_object_raising": 0.769, "blimp/accuracy/left_branch_island_simple_question": 0.465, "blimp/accuracy/wh_questions_subject_gap": 0.923, "blimp/accuracy/existential_there_quantifiers_2": 0.377, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.924, "blimp/accuracy/sentential_negation_npi_scope": 0.615, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.809, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.86, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.886, "blimp/accuracy/principle_A_case_2": 0.96, "blimp/accuracy/distractor_agreement_relational_noun": 0.819, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.994, "blimp/accuracy/superlative_quantifiers_1": 0.811, "blimp/accuracy/wh_island": 0.804, "blimp/accuracy/principle_A_domain_1": 0.983, "blimp/accuracy/complex_NP_island": 0.605, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.957, "blimp/accuracy/irregular_past_participle_verbs": 0.871, "blimp/accuracy/drop_argument": 0.771, "blimp/accuracy/wh_questions_object_gap": 0.76, "blimp/accuracy/animate_subject_passive": 0.786, "blimp/accuracy/existential_there_quantifiers_1": 0.992, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.885, "blimp/accuracy/npi_present_2": 0.602, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.906, "blimp/accuracy/anaphor_number_agreement": 0.985, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.945, "blimp/accuracy/existential_there_object_raising": 0.787, "blimp/accuracy/matrix_question_npi_licensor_present": 0.252, "blimp/accuracy/npi_present_1": 0.565, "blimp/accuracy/wh_vs_that_no_gap": 0.966, "blimp/accuracy/left_branch_island_echo_question": 0.447, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.978, "blimp/accuracy/causative": 0.725, "blimp/accuracy/group_average": 0.7692686567164178, "blimp/accuracy/seq_average": 0.7692686567164179, "boolq/accuracy/dev": 0.6137614678899083, "boolq/accuracy/group_average": 0.6137614678899083, "boolq/accuracy/seq_average": 0.6137614678899083}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-70000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.653255159892733, "val/accuracy": 0.4725816514756944, "val/perplexity": 14.20018737429058, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.4725816514756944, "mean_loss": 2.653255159892733, "blimp/accuracy/passive_2": 0.881, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.81, "blimp/accuracy/tough_vs_raising_2": 0.863, "blimp/accuracy/tough_vs_raising_1": 0.553, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.909, "blimp/accuracy/principle_A_reconstruction": 0.323, "blimp/accuracy/wh_vs_that_with_gap": 0.473, "blimp/accuracy/principle_A_domain_2": 0.76, "blimp/accuracy/determiner_noun_agreement_1": 0.985, "blimp/accuracy/ellipsis_n_bar_2": 0.904, "blimp/accuracy/principle_A_domain_3": 0.522, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.919, "blimp/accuracy/animate_subject_trans": 0.902, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.861, "blimp/accuracy/distractor_agreement_relative_clause": 0.664, "blimp/accuracy/transitive": 0.862, "blimp/accuracy/sentential_subject_island": 0.3, "blimp/accuracy/adjunct_island": 0.76, "blimp/accuracy/intransitive": 0.76, "blimp/accuracy/existential_there_subject_raising": 0.859, "blimp/accuracy/irregular_past_participle_adjectives": 0.958, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.367, "blimp/accuracy/principle_A_case_1": 0.999, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.229, "blimp/accuracy/only_npi_scope": 0.693, "blimp/accuracy/superlative_quantifiers_2": 0.62, "blimp/accuracy/passive_1": 0.888, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.891, "blimp/accuracy/inchoative": 0.612, "blimp/accuracy/anaphor_gender_agreement": 0.95, "blimp/accuracy/principle_A_c_command": 0.632, "blimp/accuracy/only_npi_licensor_present": 0.785, "blimp/accuracy/expletive_it_object_raising": 0.761, "blimp/accuracy/left_branch_island_simple_question": 0.465, "blimp/accuracy/wh_questions_subject_gap": 0.92, "blimp/accuracy/existential_there_quantifiers_2": 0.434, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.921, "blimp/accuracy/sentential_negation_npi_scope": 0.596, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.827, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.896, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.905, "blimp/accuracy/principle_A_case_2": 0.954, "blimp/accuracy/distractor_agreement_relational_noun": 0.833, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.992, "blimp/accuracy/superlative_quantifiers_1": 0.749, "blimp/accuracy/wh_island": 0.799, "blimp/accuracy/principle_A_domain_1": 0.986, "blimp/accuracy/complex_NP_island": 0.569, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.975, "blimp/accuracy/irregular_past_participle_verbs": 0.846, "blimp/accuracy/drop_argument": 0.75, "blimp/accuracy/wh_questions_object_gap": 0.809, "blimp/accuracy/animate_subject_passive": 0.779, "blimp/accuracy/existential_there_quantifiers_1": 0.988, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.882, "blimp/accuracy/npi_present_2": 0.577, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.914, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.941, "blimp/accuracy/existential_there_object_raising": 0.81, "blimp/accuracy/matrix_question_npi_licensor_present": 0.226, "blimp/accuracy/npi_present_1": 0.553, "blimp/accuracy/wh_vs_that_no_gap": 0.972, "blimp/accuracy/left_branch_island_echo_question": 0.445, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.972, "blimp/accuracy/causative": 0.697, "blimp/accuracy/group_average": 0.7642537313432837, "blimp/accuracy/seq_average": 0.7642537313432836, "boolq/accuracy/dev": 0.6048929663608563, "boolq/accuracy/group_average": 0.6048929663608563, "boolq/accuracy/seq_average": 0.6048929663608563}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-80000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.630679660373264, "val/accuracy": 0.47608487568204366, "val/perplexity": 13.883202558898565, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.47608487568204366, "mean_loss": 2.630679660373264, "blimp/accuracy/passive_2": 0.898, "blimp/accuracy/determiner_noun_agreement_2": 0.98, "blimp/accuracy/ellipsis_n_bar_1": 0.818, "blimp/accuracy/tough_vs_raising_2": 0.871, "blimp/accuracy/tough_vs_raising_1": 0.603, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.911, "blimp/accuracy/principle_A_reconstruction": 0.27, "blimp/accuracy/wh_vs_that_with_gap": 0.498, "blimp/accuracy/principle_A_domain_2": 0.791, "blimp/accuracy/determiner_noun_agreement_1": 0.989, "blimp/accuracy/ellipsis_n_bar_2": 0.9, "blimp/accuracy/principle_A_domain_3": 0.538, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.935, "blimp/accuracy/animate_subject_trans": 0.896, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.889, "blimp/accuracy/distractor_agreement_relative_clause": 0.658, "blimp/accuracy/transitive": 0.882, "blimp/accuracy/sentential_subject_island": 0.299, "blimp/accuracy/adjunct_island": 0.788, "blimp/accuracy/intransitive": 0.792, "blimp/accuracy/existential_there_subject_raising": 0.855, "blimp/accuracy/irregular_past_participle_adjectives": 0.934, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.355, "blimp/accuracy/principle_A_case_1": 0.999, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.233, "blimp/accuracy/only_npi_scope": 0.712, "blimp/accuracy/superlative_quantifiers_2": 0.757, "blimp/accuracy/passive_1": 0.901, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.896, "blimp/accuracy/inchoative": 0.644, "blimp/accuracy/anaphor_gender_agreement": 0.94, "blimp/accuracy/principle_A_c_command": 0.643, "blimp/accuracy/only_npi_licensor_present": 0.866, "blimp/accuracy/expletive_it_object_raising": 0.766, "blimp/accuracy/left_branch_island_simple_question": 0.464, "blimp/accuracy/wh_questions_subject_gap": 0.912, "blimp/accuracy/existential_there_quantifiers_2": 0.395, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.927, "blimp/accuracy/sentential_negation_npi_scope": 0.663, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.802, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.859, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.9, "blimp/accuracy/principle_A_case_2": 0.954, "blimp/accuracy/distractor_agreement_relational_noun": 0.832, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.987, "blimp/accuracy/superlative_quantifiers_1": 0.772, "blimp/accuracy/wh_island": 0.772, "blimp/accuracy/principle_A_domain_1": 0.973, "blimp/accuracy/complex_NP_island": 0.56, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.968, "blimp/accuracy/irregular_past_participle_verbs": 0.852, "blimp/accuracy/drop_argument": 0.776, "blimp/accuracy/wh_questions_object_gap": 0.785, "blimp/accuracy/animate_subject_passive": 0.787, "blimp/accuracy/existential_there_quantifiers_1": 0.991, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.872, "blimp/accuracy/npi_present_2": 0.641, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.927, "blimp/accuracy/anaphor_number_agreement": 0.986, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.951, "blimp/accuracy/existential_there_object_raising": 0.831, "blimp/accuracy/matrix_question_npi_licensor_present": 0.285, "blimp/accuracy/npi_present_1": 0.621, "blimp/accuracy/wh_vs_that_no_gap": 0.962, "blimp/accuracy/left_branch_island_echo_question": 0.413, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.966, "blimp/accuracy/causative": 0.705, "blimp/accuracy/group_average": 0.7731044776119402, "blimp/accuracy/seq_average": 0.7731044776119403, "boolq/accuracy/dev": 0.6131498470948012, "boolq/accuracy/group_average": 0.6131498470948012, "boolq/accuracy/seq_average": 0.6131498470948012}
|
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_standard_lb/tmp/result-model-90000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.6191866435701887, "val/accuracy": 0.4788663349454365, "val/perplexity": 13.72455608817525, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "mean_accuracy": 0.4788663349454365, "mean_loss": 2.6191866435701887, "blimp/accuracy/passive_2": 0.89, "blimp/accuracy/determiner_noun_agreement_2": 0.976, "blimp/accuracy/ellipsis_n_bar_1": 0.797, "blimp/accuracy/tough_vs_raising_2": 0.886, "blimp/accuracy/tough_vs_raising_1": 0.589, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.914, "blimp/accuracy/principle_A_reconstruction": 0.322, "blimp/accuracy/wh_vs_that_with_gap": 0.5, "blimp/accuracy/principle_A_domain_2": 0.783, "blimp/accuracy/determiner_noun_agreement_1": 0.986, "blimp/accuracy/ellipsis_n_bar_2": 0.904, "blimp/accuracy/principle_A_domain_3": 0.535, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.928, "blimp/accuracy/animate_subject_trans": 0.895, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.896, "blimp/accuracy/distractor_agreement_relative_clause": 0.68, "blimp/accuracy/transitive": 0.879, "blimp/accuracy/sentential_subject_island": 0.292, "blimp/accuracy/adjunct_island": 0.775, "blimp/accuracy/intransitive": 0.767, "blimp/accuracy/existential_there_subject_raising": 0.846, "blimp/accuracy/irregular_past_participle_adjectives": 0.922, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.402, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.236, "blimp/accuracy/only_npi_scope": 0.713, "blimp/accuracy/superlative_quantifiers_2": 0.635, "blimp/accuracy/passive_1": 0.885, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.913, "blimp/accuracy/inchoative": 0.625, "blimp/accuracy/anaphor_gender_agreement": 0.958, "blimp/accuracy/principle_A_c_command": 0.657, "blimp/accuracy/only_npi_licensor_present": 0.848, "blimp/accuracy/expletive_it_object_raising": 0.788, "blimp/accuracy/left_branch_island_simple_question": 0.504, "blimp/accuracy/wh_questions_subject_gap": 0.923, "blimp/accuracy/existential_there_quantifiers_2": 0.466, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.928, "blimp/accuracy/sentential_negation_npi_scope": 0.614, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.815, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.85, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.923, "blimp/accuracy/principle_A_case_2": 0.957, "blimp/accuracy/distractor_agreement_relational_noun": 0.833, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.989, "blimp/accuracy/superlative_quantifiers_1": 0.821, "blimp/accuracy/wh_island": 0.767, "blimp/accuracy/principle_A_domain_1": 0.976, "blimp/accuracy/complex_NP_island": 0.55, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.962, "blimp/accuracy/irregular_past_participle_verbs": 0.888, "blimp/accuracy/drop_argument": 0.774, "blimp/accuracy/wh_questions_object_gap": 0.806, "blimp/accuracy/animate_subject_passive": 0.793, "blimp/accuracy/existential_there_quantifiers_1": 0.991, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.871, "blimp/accuracy/npi_present_2": 0.66, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.927, "blimp/accuracy/anaphor_number_agreement": 0.992, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.945, "blimp/accuracy/existential_there_object_raising": 0.83, "blimp/accuracy/matrix_question_npi_licensor_present": 0.273, "blimp/accuracy/npi_present_1": 0.626, "blimp/accuracy/wh_vs_that_no_gap": 0.969, "blimp/accuracy/left_branch_island_echo_question": 0.47, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.97, "blimp/accuracy/causative": 0.71, "blimp/accuracy/group_average": 0.7760447761194029, "blimp/accuracy/seq_average": 0.776044776119403, "boolq/accuracy/dev": 0.5877675840978593, "boolq/accuracy/group_average": 0.5877675840978593, "boolq/accuracy/seq_average": 0.5877675840978593}
|