enoriega commited on
Commit
435c11f
·
verified ·
1 Parent(s): 4076971

Upload best checkpoint (F1=0.8245)

Browse files
Files changed (4) hide show
  1. model.safetensors +1 -1
  2. tokenizer.json +1 -1
  3. trainer_state.json +130 -56
  4. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63150d81d41cc50619fbf414eb368c0478f21176c8aacc1ca5daa571277c9c5
3
  size 598449012
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d19cdbd7d9e728b2c39a3515ff9740b1bf05da6dd5290c962ee98f8e1a61234
3
  size 598449012
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 8192,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
trainer_state.json CHANGED
@@ -1,90 +1,164 @@
1
  {
2
- "best_global_step": 2646,
3
- "best_metric": 0.7086840484500103,
4
- "best_model_checkpoint": "./data/coarser_labels/checkpoint-2646",
5
- "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 2646,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.5672149744753261,
14
- "grad_norm": 0.3870491087436676,
15
- "learning_rate": 1.6228269085411943e-05,
16
- "loss": 0.10851879119873047,
 
 
 
 
 
 
 
 
 
 
 
 
17
  "step": 500
18
  },
19
  {
20
- "epoch": 1.0,
21
- "eval_accuracy": 0.9402714932126697,
22
- "eval_f1": 0.49097052312711176,
23
- "eval_loss": 0.01582016609609127,
24
- "eval_precision": 0.5857659160411454,
25
- "eval_recall": 0.42258323305254714,
26
- "eval_runtime": 43.5666,
27
- "eval_samples_per_second": 64.269,
28
- "eval_steps_per_second": 16.067,
29
  "step": 882
30
  },
31
  {
32
- "epoch": 1.1338627339761769,
33
- "grad_norm": 0.2627342939376831,
34
- "learning_rate": 1.2448979591836736e-05,
35
- "loss": 0.03889805603027344,
36
  "step": 1000
37
  },
38
  {
39
- "epoch": 1.701077708451503,
40
- "grad_norm": 0.1561833620071411,
41
- "learning_rate": 8.669690098261528e-06,
42
- "loss": 0.026378578186035156,
 
 
 
 
 
 
 
 
 
 
 
 
43
  "step": 1500
44
  },
45
  {
46
- "epoch": 2.0,
47
- "eval_accuracy": 0.9582323748357904,
48
- "eval_f1": 0.6174682734350007,
49
- "eval_loss": 0.014365866780281067,
50
- "eval_precision": 0.6593031200182191,
51
- "eval_recall": 0.5806257521058965,
52
- "eval_runtime": 23.0661,
53
- "eval_samples_per_second": 121.39,
54
- "eval_steps_per_second": 30.348,
55
  "step": 1764
56
  },
57
  {
58
- "epoch": 2.2677254679523537,
59
- "grad_norm": 0.4613684117794037,
60
- "learning_rate": 4.890400604686319e-06,
61
- "loss": 0.020320886611938475,
62
  "step": 2000
63
  },
64
  {
65
- "epoch": 2.83494044242768,
66
- "grad_norm": 0.21372494101524353,
67
- "learning_rate": 1.111111111111111e-06,
68
- "loss": 0.01625680351257324,
 
 
 
 
 
 
 
 
 
 
 
 
69
  "step": 2500
70
  },
71
  {
72
- "epoch": 3.0,
73
- "eval_accuracy": 0.97002627353671,
74
- "eval_f1": 0.7086840484500103,
75
- "eval_loss": 0.01741624064743519,
76
- "eval_precision": 0.7258200168208578,
77
- "eval_recall": 0.6923385479342158,
78
- "eval_runtime": 23.0668,
79
- "eval_samples_per_second": 121.386,
80
- "eval_steps_per_second": 30.347,
81
  "step": 2646
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
  ],
84
  "logging_steps": 500,
85
- "max_steps": 2646,
86
  "num_input_tokens_seen": 0,
87
- "num_train_epochs": 3,
88
  "save_steps": 500,
89
  "stateful_callbacks": {
90
  "TrainerControl": {
@@ -93,12 +167,12 @@
93
  "should_evaluate": false,
94
  "should_log": false,
95
  "should_save": true,
96
- "should_training_stop": true
97
  },
98
  "attributes": {}
99
  }
100
  },
101
- "total_flos": 4644546171644400.0,
102
  "train_batch_size": 8,
103
  "trial_name": null,
104
  "trial_params": null
 
1
  {
2
+ "best_global_step": 3528,
3
+ "best_metric": 0.8244905994684517,
4
+ "best_model_checkpoint": "/projects/gyorilab/variants_ner_coarser/checkpoint-3528",
5
+ "epoch": 8.0,
6
  "eval_steps": 500,
7
+ "global_step": 3528,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.9403182734357377,
15
+ "eval_f1": 0.47378233512001866,
16
+ "eval_loss": 0.01702982187271118,
17
+ "eval_precision": 0.5675600223338917,
18
+ "eval_recall": 0.4066,
19
+ "eval_runtime": 8.8591,
20
+ "eval_samples_per_second": 316.058,
21
+ "eval_steps_per_second": 39.507,
22
+ "step": 441
23
+ },
24
+ {
25
+ "epoch": 1.1338627339761769,
26
+ "grad_norm": 0.19258293509483337,
27
+ "learning_rate": 1.7736961451247167e-05,
28
+ "loss": 0.1472896270751953,
29
  "step": 500
30
  },
31
  {
32
+ "epoch": 2.0,
33
+ "eval_accuracy": 0.9491082668141367,
34
+ "eval_f1": 0.5545414069456812,
35
+ "eval_loss": 0.014995112083852291,
36
+ "eval_precision": 0.6252510040160643,
37
+ "eval_recall": 0.4982,
38
+ "eval_runtime": 8.4202,
39
+ "eval_samples_per_second": 332.534,
40
+ "eval_steps_per_second": 41.567,
41
  "step": 882
42
  },
43
  {
44
+ "epoch": 2.2677254679523537,
45
+ "grad_norm": 0.7697210907936096,
46
+ "learning_rate": 1.546938775510204e-05,
47
+ "loss": 0.052211280822753905,
48
  "step": 1000
49
  },
50
  {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.9703556018016576,
53
+ "eval_f1": 0.7212371134020619,
54
+ "eval_loss": 0.017216848209500313,
55
+ "eval_precision": 0.7442553191489362,
56
+ "eval_recall": 0.6996,
57
+ "eval_runtime": 8.3974,
58
+ "eval_samples_per_second": 333.438,
59
+ "eval_steps_per_second": 41.68,
60
+ "step": 1323
61
+ },
62
+ {
63
+ "epoch": 3.401588201928531,
64
+ "grad_norm": 0.342649906873703,
65
+ "learning_rate": 1.3201814058956916e-05,
66
+ "loss": 0.03277603530883789,
67
  "step": 1500
68
  },
69
  {
70
+ "epoch": 4.0,
71
+ "eval_accuracy": 0.9722474877936971,
72
+ "eval_f1": 0.7500510516642842,
73
+ "eval_loss": 0.01973508670926094,
74
+ "eval_precision": 0.7661660408844388,
75
+ "eval_recall": 0.7346,
76
+ "eval_runtime": 8.5095,
77
+ "eval_samples_per_second": 329.042,
78
+ "eval_steps_per_second": 41.13,
79
  "step": 1764
80
  },
81
  {
82
+ "epoch": 4.5354509359047075,
83
+ "grad_norm": 0.06497496366500854,
84
+ "learning_rate": 1.0934240362811793e-05,
85
+ "loss": 0.019792444229125978,
86
  "step": 2000
87
  },
88
  {
89
+ "epoch": 5.0,
90
+ "eval_accuracy": 0.9771954973113389,
91
+ "eval_f1": 0.7891332470892627,
92
+ "eval_loss": 0.03587024286389351,
93
+ "eval_precision": 0.7853040205981382,
94
+ "eval_recall": 0.793,
95
+ "eval_runtime": 8.4468,
96
+ "eval_samples_per_second": 331.488,
97
+ "eval_steps_per_second": 41.436,
98
+ "step": 2205
99
+ },
100
+ {
101
+ "epoch": 5.669313669880885,
102
+ "grad_norm": 0.02606065385043621,
103
+ "learning_rate": 8.666666666666668e-06,
104
+ "loss": 0.013434083938598632,
105
  "step": 2500
106
  },
107
  {
108
+ "epoch": 6.0,
109
+ "eval_accuracy": 0.9781268873381892,
110
+ "eval_f1": 0.8013205282112845,
111
+ "eval_loss": 0.03571802005171776,
112
+ "eval_precision": 0.8016413130504404,
113
+ "eval_recall": 0.801,
114
+ "eval_runtime": 8.4407,
115
+ "eval_samples_per_second": 331.725,
116
+ "eval_steps_per_second": 41.466,
117
  "step": 2646
118
+ },
119
+ {
120
+ "epoch": 6.803176403857062,
121
+ "grad_norm": 0.06468810141086578,
122
+ "learning_rate": 6.399092970521542e-06,
123
+ "loss": 0.009369298934936524,
124
+ "step": 3000
125
+ },
126
+ {
127
+ "epoch": 7.0,
128
+ "eval_accuracy": 0.9796622255855751,
129
+ "eval_f1": 0.8202601497832086,
130
+ "eval_loss": 0.06079654023051262,
131
+ "eval_precision": 0.8084693084693084,
132
+ "eval_recall": 0.8324,
133
+ "eval_runtime": 8.4309,
134
+ "eval_samples_per_second": 332.113,
135
+ "eval_steps_per_second": 41.514,
136
+ "step": 3087
137
+ },
138
+ {
139
+ "epoch": 7.9370391378332386,
140
+ "grad_norm": 0.018788253888487816,
141
+ "learning_rate": 4.131519274376418e-06,
142
+ "loss": 0.005781527042388916,
143
+ "step": 3500
144
+ },
145
+ {
146
+ "epoch": 8.0,
147
+ "eval_accuracy": 0.9798732436385333,
148
+ "eval_f1": 0.8244905994684517,
149
+ "eval_loss": 0.06755472719669342,
150
+ "eval_precision": 0.8117852296956775,
151
+ "eval_recall": 0.8376,
152
+ "eval_runtime": 8.4796,
153
+ "eval_samples_per_second": 330.204,
154
+ "eval_steps_per_second": 41.276,
155
+ "step": 3528
156
  }
157
  ],
158
  "logging_steps": 500,
159
+ "max_steps": 4410,
160
  "num_input_tokens_seen": 0,
161
+ "num_train_epochs": 10,
162
  "save_steps": 500,
163
  "stateful_callbacks": {
164
  "TrainerControl": {
 
167
  "should_evaluate": false,
168
  "should_log": false,
169
  "should_save": true,
170
+ "should_training_stop": false
171
  },
172
  "attributes": {}
173
  }
174
  },
175
+ "total_flos": 1.254969894362148e+16,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c276bd9da05a8ab8c9d402db0a83e16f25adaa446bf1b0d1ec82e117b00d6f3
3
  size 5201
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2801ea6fc0f1d1a5848157d1ae39e7e2bfc9a91f89e678b21ce55c99c950f598
3
  size 5201