47aefa5f3614711e3301a09d2ae771a963d50916d4eb90e5801032d210697295
Browse files
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_deepseek_sigmoidonly/tmp/result-model-100000.pth.json
CHANGED
|
@@ -1,15 +1,81 @@
|
|
| 1 |
{
|
| 2 |
-
"val/loss":
|
| 3 |
-
"val/accuracy": 0.
|
| 4 |
-
"val/perplexity":
|
| 5 |
"val/time_since_best_loss": 0,
|
| 6 |
"val/time_since_best_accuracy": 0,
|
| 7 |
-
"mean_accuracy": 0.
|
| 8 |
-
"mean_loss":
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"
|
| 12 |
-
"
|
| 13 |
-
"
|
| 14 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"val/loss": 5.210205562531002,
|
| 3 |
+
"val/accuracy": 0.23550463479662698,
|
| 4 |
+
"val/perplexity": 183.13169934040414,
|
| 5 |
"val/time_since_best_loss": 0,
|
| 6 |
"val/time_since_best_accuracy": 0,
|
| 7 |
+
"mean_accuracy": 0.23550463479662698,
|
| 8 |
+
"mean_loss": 5.210205562531002,
|
| 9 |
+
"blimp/accuracy/passive_2": 0.552,
|
| 10 |
+
"blimp/accuracy/determiner_noun_agreement_2": 0.697,
|
| 11 |
+
"blimp/accuracy/ellipsis_n_bar_1": 0.395,
|
| 12 |
+
"blimp/accuracy/tough_vs_raising_2": 0.67,
|
| 13 |
+
"blimp/accuracy/tough_vs_raising_1": 0.351,
|
| 14 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.729,
|
| 15 |
+
"blimp/accuracy/principle_A_reconstruction": 0.32,
|
| 16 |
+
"blimp/accuracy/wh_vs_that_with_gap": 0.168,
|
| 17 |
+
"blimp/accuracy/principle_A_domain_2": 0.642,
|
| 18 |
+
"blimp/accuracy/determiner_noun_agreement_1": 0.706,
|
| 19 |
+
"blimp/accuracy/ellipsis_n_bar_2": 0.782,
|
| 20 |
+
"blimp/accuracy/principle_A_domain_3": 0.504,
|
| 21 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.604,
|
| 22 |
+
"blimp/accuracy/animate_subject_trans": 0.695,
|
| 23 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.6,
|
| 24 |
+
"blimp/accuracy/distractor_agreement_relative_clause": 0.472,
|
| 25 |
+
"blimp/accuracy/transitive": 0.586,
|
| 26 |
+
"blimp/accuracy/sentential_subject_island": 0.325,
|
| 27 |
+
"blimp/accuracy/adjunct_island": 0.432,
|
| 28 |
+
"blimp/accuracy/intransitive": 0.515,
|
| 29 |
+
"blimp/accuracy/existential_there_subject_raising": 0.553,
|
| 30 |
+
"blimp/accuracy/irregular_past_participle_adjectives": 0.689,
|
| 31 |
+
"blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.432,
|
| 32 |
+
"blimp/accuracy/principle_A_case_1": 0.989,
|
| 33 |
+
"blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.196,
|
| 34 |
+
"blimp/accuracy/only_npi_scope": 0.304,
|
| 35 |
+
"blimp/accuracy/superlative_quantifiers_2": 0.85,
|
| 36 |
+
"blimp/accuracy/passive_1": 0.606,
|
| 37 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.635,
|
| 38 |
+
"blimp/accuracy/inchoative": 0.401,
|
| 39 |
+
"blimp/accuracy/anaphor_gender_agreement": 0.728,
|
| 40 |
+
"blimp/accuracy/principle_A_c_command": 0.592,
|
| 41 |
+
"blimp/accuracy/only_npi_licensor_present": 0.032,
|
| 42 |
+
"blimp/accuracy/expletive_it_object_raising": 0.64,
|
| 43 |
+
"blimp/accuracy/left_branch_island_simple_question": 0.438,
|
| 44 |
+
"blimp/accuracy/wh_questions_subject_gap": 0.831,
|
| 45 |
+
"blimp/accuracy/existential_there_quantifiers_2": 0.583,
|
| 46 |
+
"blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.625,
|
| 47 |
+
"blimp/accuracy/sentential_negation_npi_scope": 0.589,
|
| 48 |
+
"blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.609,
|
| 49 |
+
"blimp/accuracy/wh_questions_subject_gap_long_distance": 0.83,
|
| 50 |
+
"blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.616,
|
| 51 |
+
"blimp/accuracy/principle_A_case_2": 0.561,
|
| 52 |
+
"blimp/accuracy/distractor_agreement_relational_noun": 0.508,
|
| 53 |
+
"blimp/accuracy/sentential_negation_npi_licensor_present": 0.976,
|
| 54 |
+
"blimp/accuracy/superlative_quantifiers_1": 0.501,
|
| 55 |
+
"blimp/accuracy/wh_island": 0.519,
|
| 56 |
+
"blimp/accuracy/principle_A_domain_1": 0.83,
|
| 57 |
+
"blimp/accuracy/complex_NP_island": 0.342,
|
| 58 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_2": 0.63,
|
| 59 |
+
"blimp/accuracy/irregular_past_participle_verbs": 0.648,
|
| 60 |
+
"blimp/accuracy/drop_argument": 0.688,
|
| 61 |
+
"blimp/accuracy/wh_questions_object_gap": 0.568,
|
| 62 |
+
"blimp/accuracy/animate_subject_passive": 0.657,
|
| 63 |
+
"blimp/accuracy/existential_there_quantifiers_1": 0.919,
|
| 64 |
+
"blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.676,
|
| 65 |
+
"blimp/accuracy/npi_present_2": 0.702,
|
| 66 |
+
"blimp/accuracy/determiner_noun_agreement_irregular_1": 0.587,
|
| 67 |
+
"blimp/accuracy/anaphor_number_agreement": 0.941,
|
| 68 |
+
"blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.62,
|
| 69 |
+
"blimp/accuracy/existential_there_object_raising": 0.71,
|
| 70 |
+
"blimp/accuracy/matrix_question_npi_licensor_present": 0.174,
|
| 71 |
+
"blimp/accuracy/npi_present_1": 0.668,
|
| 72 |
+
"blimp/accuracy/wh_vs_that_no_gap": 0.803,
|
| 73 |
+
"blimp/accuracy/left_branch_island_echo_question": 0.709,
|
| 74 |
+
"blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.802,
|
| 75 |
+
"blimp/accuracy/causative": 0.469,
|
| 76 |
+
"blimp/accuracy/group_average": 0.5928507462686566,
|
| 77 |
+
"blimp/accuracy/seq_average": 0.5928507462686567,
|
| 78 |
+
"boolq/accuracy/dev": 0.39327217125382263,
|
| 79 |
+
"boolq/accuracy/group_average": 0.39327217125382263,
|
| 80 |
+
"boolq/accuracy/seq_average": 0.39327217125382263
|
| 81 |
}
|