DavidNguyen commited on
Commit
e2de8e1
·
verified ·
1 Parent(s): e25ff14

47aefa5f3614711e3301a09d2ae771a963d50916d4eb90e5801032d210697295

Browse files
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_deepseek_sigmoidonly/tmp/result-model-100000.pth.json CHANGED
@@ -1,15 +1,81 @@
1
  {
2
- "val/loss": 2.5970938546316966,
3
- "val/accuracy": 0.4811004154265873,
4
- "val/perplexity": 13.424667255252297,
5
  "val/time_since_best_loss": 0,
6
  "val/time_since_best_accuracy": 0,
7
- "mean_accuracy": 0.4811004154265873,
8
- "mean_loss": 2.5970938546316966,
9
- "squad/accuracy/SQuAD-v1.1": 0.8738883632923368,
10
- "squad/accuracy/group_average": 0.8738883632923368,
11
- "squad/accuracy/seq_average": 0.8738883632923368,
12
- "mmlu/accuracy/MMLU": 0.2622810153736146,
13
- "mmlu/accuracy/group_average": 0.2622810153736146,
14
- "mmlu/accuracy/seq_average": 0.2622810153736146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
 
1
  {
2
+ "val/loss": 5.210205562531002,
3
+ "val/accuracy": 0.23550463479662698,
4
+ "val/perplexity": 183.13169934040414,
5
  "val/time_since_best_loss": 0,
6
  "val/time_since_best_accuracy": 0,
7
+ "mean_accuracy": 0.23550463479662698,
8
+ "mean_loss": 5.210205562531002,
9
+ "blimp/accuracy/passive_2": 0.552,
10
+ "blimp/accuracy/determiner_noun_agreement_2": 0.697,
11
+ "blimp/accuracy/ellipsis_n_bar_1": 0.395,
12
+ "blimp/accuracy/tough_vs_raising_2": 0.67,
13
+ "blimp/accuracy/tough_vs_raising_1": 0.351,
14
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.729,
15
+ "blimp/accuracy/principle_A_reconstruction": 0.32,
16
+ "blimp/accuracy/wh_vs_that_with_gap": 0.168,
17
+ "blimp/accuracy/principle_A_domain_2": 0.642,
18
+ "blimp/accuracy/determiner_noun_agreement_1": 0.706,
19
+ "blimp/accuracy/ellipsis_n_bar_2": 0.782,
20
+ "blimp/accuracy/principle_A_domain_3": 0.504,
21
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.604,
22
+ "blimp/accuracy/animate_subject_trans": 0.695,
23
+ "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.6,
24
+ "blimp/accuracy/distractor_agreement_relative_clause": 0.472,
25
+ "blimp/accuracy/transitive": 0.586,
26
+ "blimp/accuracy/sentential_subject_island": 0.325,
27
+ "blimp/accuracy/adjunct_island": 0.432,
28
+ "blimp/accuracy/intransitive": 0.515,
29
+ "blimp/accuracy/existential_there_subject_raising": 0.553,
30
+ "blimp/accuracy/irregular_past_participle_adjectives": 0.689,
31
+ "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.432,
32
+ "blimp/accuracy/principle_A_case_1": 0.989,
33
+ "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.196,
34
+ "blimp/accuracy/only_npi_scope": 0.304,
35
+ "blimp/accuracy/superlative_quantifiers_2": 0.85,
36
+ "blimp/accuracy/passive_1": 0.606,
37
+ "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.635,
38
+ "blimp/accuracy/inchoative": 0.401,
39
+ "blimp/accuracy/anaphor_gender_agreement": 0.728,
40
+ "blimp/accuracy/principle_A_c_command": 0.592,
41
+ "blimp/accuracy/only_npi_licensor_present": 0.032,
42
+ "blimp/accuracy/expletive_it_object_raising": 0.64,
43
+ "blimp/accuracy/left_branch_island_simple_question": 0.438,
44
+ "blimp/accuracy/wh_questions_subject_gap": 0.831,
45
+ "blimp/accuracy/existential_there_quantifiers_2": 0.583,
46
+ "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.625,
47
+ "blimp/accuracy/sentential_negation_npi_scope": 0.589,
48
+ "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.609,
49
+ "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.83,
50
+ "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.616,
51
+ "blimp/accuracy/principle_A_case_2": 0.561,
52
+ "blimp/accuracy/distractor_agreement_relational_noun": 0.508,
53
+ "blimp/accuracy/sentential_negation_npi_licensor_present": 0.976,
54
+ "blimp/accuracy/superlative_quantifiers_1": 0.501,
55
+ "blimp/accuracy/wh_island": 0.519,
56
+ "blimp/accuracy/principle_A_domain_1": 0.83,
57
+ "blimp/accuracy/complex_NP_island": 0.342,
58
+ "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.63,
59
+ "blimp/accuracy/irregular_past_participle_verbs": 0.648,
60
+ "blimp/accuracy/drop_argument": 0.688,
61
+ "blimp/accuracy/wh_questions_object_gap": 0.568,
62
+ "blimp/accuracy/animate_subject_passive": 0.657,
63
+ "blimp/accuracy/existential_there_quantifiers_1": 0.919,
64
+ "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.676,
65
+ "blimp/accuracy/npi_present_2": 0.702,
66
+ "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.587,
67
+ "blimp/accuracy/anaphor_number_agreement": 0.941,
68
+ "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.62,
69
+ "blimp/accuracy/existential_there_object_raising": 0.71,
70
+ "blimp/accuracy/matrix_question_npi_licensor_present": 0.174,
71
+ "blimp/accuracy/npi_present_1": 0.668,
72
+ "blimp/accuracy/wh_vs_that_no_gap": 0.803,
73
+ "blimp/accuracy/left_branch_island_echo_question": 0.709,
74
+ "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.802,
75
+ "blimp/accuracy/causative": 0.469,
76
+ "blimp/accuracy/group_average": 0.5928507462686566,
77
+ "blimp/accuracy/seq_average": 0.5928507462686567,
78
+ "boolq/accuracy/dev": 0.39327217125382263,
79
+ "boolq/accuracy/group_average": 0.39327217125382263,
80
+ "boolq/accuracy/seq_average": 0.39327217125382263
81
  }