ShengdingHu commited on
Commit
5ab3f00
·
1 Parent(s): 7d28d2e

Training in progress, step 200

Browse files
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 88.23529411764706,
4
- "eval_average_metrics": 89.89229494614747,
5
  "eval_combined_score": 0.7652279521674141,
6
- "eval_f1": 91.54929577464787,
7
- "eval_loss": 0.16003645956516266,
8
- "eval_runtime": 0.6995,
9
  "eval_samples": 408,
10
- "eval_samples_per_second": 291.653,
11
  "eval_steps_per_second": 14.442,
12
- "test_accuracy": 89.2156862745098,
13
- "test_average_metrics": 90.86634653861545,
14
- "test_f1": 92.51700680272108,
15
- "test_loss": 0.14261329174041748,
16
- "test_runtime": 0.7757,
17
- "test_samples_per_second": 262.979,
18
- "train_loss": 0.17493162812857793,
19
- "train_runtime": 418.5318,
20
  "train_samples": 3668,
21
- "train_samples_per_second": 175.279,
22
- "train_steps_per_second": 2.772
23
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.0,
4
+ "eval_average_metrics": 0.0,
5
  "eval_combined_score": 0.7652279521674141,
6
+ "eval_f1": 0.0,
7
+ "eval_loss": 0.20416393876075745,
8
+ "eval_runtime": 1.8778,
9
  "eval_samples": 408,
10
+ "eval_samples_per_second": 108.639,
11
  "eval_steps_per_second": 14.442,
12
+ "test_accuracy": 0.0,
13
+ "test_average_metrics": 0.0,
14
+ "test_f1": 0.0,
15
+ "test_loss": 0.1770762801170349,
16
+ "test_runtime": 1.5085,
17
+ "test_samples_per_second": 135.238,
18
+ "train_loss": 0.03386630766055595,
19
+ "train_runtime": 583.9583,
20
  "train_samples": 3668,
21
+ "train_samples_per_second": 125.625,
22
+ "train_steps_per_second": 1.986
23
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 88.23529411764706,
4
- "eval_average_metrics": 89.89229494614747,
5
- "eval_f1": 91.54929577464787,
6
- "eval_loss": 0.16003645956516266,
7
- "eval_runtime": 0.6995,
8
- "eval_samples_per_second": 291.653
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.0,
4
+ "eval_average_metrics": 0.0,
5
+ "eval_f1": 0.0,
6
+ "eval_loss": 0.20416393876075745,
7
+ "eval_runtime": 1.8778,
8
+ "eval_samples_per_second": 108.639
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d5f6b8983d397ca16e82e2f585aa3796899c599cfe0f4687a7bd4fdabefcaf4
3
  size 7551621
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09e19d290866a921a7cc2616939f5dd9656a5f74c49c3237fddda3484eeed7f
3
  size 7551621
runs/Feb13_22-56-31_node1/events.out.tfevents.1644764371.node1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69a956498141efe723ceaf01e5ecb1576ceb782f0259821ac15cc44c79dee5e1
3
- size 23268
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4cd309cdec289c22d17acf2860f18494f01944764852783d3cb158f00c0165c
3
+ size 26870
runs/Feb13_22-56-31_node1/events.out.tfevents.1644764957.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bf1b50853f805eb0176a67e04c230c52c43f5ceaf93c00714284789985ca342
3
+ size 776
runs/Feb13_23-17-27_node1/1644765604.990227/events.out.tfevents.1644765605.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36f1cfd03c4861da5e4d99c214c91c07d5cbb8c3384f884a29a59df68df33498
3
+ size 5011
runs/Feb13_23-17-27_node1/events.out.tfevents.1644765604.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d4c2d9ed6ec309e7ed50089f7c1741fd3aef3127c0264aacdab67b2a453a48
3
+ size 5839
runs/Feb13_23-22-36_node1/1644765898.9699917/events.out.tfevents.1644765898.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f92d5e16e1da9d913a0b12e83162007c5b98744ea2aca8114165390461b5eafd
3
+ size 5011
runs/Feb13_23-22-36_node1/events.out.tfevents.1644765898.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eade3c061dc82bd40aa8e6a09a2862a505334def00d6ccd0879556d8db16628d
3
+ size 7764
test_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "test_accuracy": 89.2156862745098,
4
- "test_average_metrics": 90.86634653861545,
5
- "test_f1": 92.51700680272108,
6
- "test_loss": 0.14261329174041748,
7
- "test_runtime": 0.7757,
8
- "test_samples_per_second": 262.979
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "test_accuracy": 0.0,
4
+ "test_average_metrics": 0.0,
5
+ "test_f1": 0.0,
6
+ "test_loss": 0.1770762801170349,
7
+ "test_runtime": 1.5085,
8
+ "test_samples_per_second": 135.238
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.17493162812857793,
4
- "train_runtime": 418.5318,
5
  "train_samples": 3668,
6
- "train_samples_per_second": 175.279,
7
- "train_steps_per_second": 2.772
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.03386630766055595,
4
+ "train_runtime": 583.9583,
5
  "train_samples": 3668,
6
+ "train_samples_per_second": 125.625,
7
+ "train_steps_per_second": 1.986
8
  }
trainer_state.json CHANGED
@@ -1,87 +1,831 @@
1
  {
2
- "best_metric": 89.89229494614747,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/mrpc/checkpoint-600",
4
  "epoch": 20.0,
5
  "global_step": 1160,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 3.45,
12
- "eval_accuracy": 87.25490196078431,
13
- "eval_average_metrics": 89.04998619165977,
14
- "eval_f1": 90.84507042253522,
15
- "eval_loss": 0.15426486730575562,
16
- "eval_runtime": 1.9622,
17
- "eval_samples_per_second": 103.964,
18
  "step": 200
19
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  {
21
  "epoch": 6.9,
22
- "eval_accuracy": 88.23529411764706,
23
- "eval_average_metrics": 89.86232790988736,
24
- "eval_f1": 91.48936170212765,
25
- "eval_loss": 0.15000468492507935,
26
- "eval_runtime": 0.758,
27
- "eval_samples_per_second": 269.133,
28
  "step": 400
29
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  {
31
  "epoch": 8.62,
32
  "learning_rate": 0.0001706896551724138,
33
- "loss": 0.3075,
 
 
 
 
 
 
 
 
 
 
34
  "step": 500
35
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  {
37
  "epoch": 10.34,
38
- "eval_accuracy": 88.23529411764706,
39
- "eval_average_metrics": 89.89229494614747,
40
- "eval_f1": 91.54929577464787,
41
- "eval_loss": 0.16003645956516266,
42
- "eval_runtime": 0.7968,
43
- "eval_samples_per_second": 256.036,
44
  "step": 600
45
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  {
47
  "epoch": 13.79,
48
- "eval_accuracy": 88.23529411764706,
49
- "eval_average_metrics": 89.89229494614747,
50
- "eval_f1": 91.54929577464787,
51
- "eval_loss": 0.17383529245853424,
52
- "eval_runtime": 0.718,
53
- "eval_samples_per_second": 284.125,
54
  "step": 800
55
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  {
57
  "epoch": 17.24,
58
  "learning_rate": 4.137931034482758e-05,
59
- "loss": 0.0775,
60
  "step": 1000
61
  },
62
  {
63
  "epoch": 17.24,
64
- "eval_accuracy": 86.27450980392157,
65
- "eval_average_metrics": 88.13725490196079,
66
- "eval_f1": 90.0,
67
- "eval_loss": 0.18229342997074127,
68
- "eval_runtime": 0.9163,
69
- "eval_samples_per_second": 222.63,
70
  "step": 1000
71
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  {
73
  "epoch": 20.0,
74
  "step": 1160,
75
- "total_flos": 9593848425259008.0,
76
- "train_loss": 0.17493162812857793,
77
- "train_runtime": 418.5318,
78
- "train_samples_per_second": 175.279,
79
- "train_steps_per_second": 2.772
80
  }
81
  ],
82
  "max_steps": 1160,
83
  "num_train_epochs": 20,
84
- "total_flos": 9593848425259008.0,
85
  "trial_name": null,
86
  "trial_params": null
87
  }
 
1
  {
2
+ "best_metric": 0.0,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/mrpc/checkpoint-200",
4
  "epoch": 20.0,
5
  "global_step": 1160,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.17,
12
+ "learning_rate": 0.00029741379310344827,
13
+ "loss": 2.2211,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.34,
18
+ "learning_rate": 0.0002948275862068965,
19
+ "loss": 0.194,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.52,
24
+ "learning_rate": 0.0002922413793103448,
25
+ "loss": 0.1031,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.69,
30
+ "learning_rate": 0.0002896551724137931,
31
+ "loss": 0.121,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.86,
36
+ "learning_rate": 0.0002870689655172413,
37
+ "loss": 0.1258,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 1.03,
42
+ "learning_rate": 0.0002844827586206896,
43
+ "loss": 0.0988,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 1.21,
48
+ "learning_rate": 0.0002818965517241379,
49
+ "loss": 0.0688,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 1.38,
54
+ "learning_rate": 0.0002793103448275862,
55
+ "loss": 0.081,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 1.55,
60
+ "learning_rate": 0.0002767241379310345,
61
+ "loss": 0.0719,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 1.72,
66
+ "learning_rate": 0.0002741379310344827,
67
+ "loss": 0.0606,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 1.72,
72
+ "eval_accuracy": 0.0,
73
+ "eval_average_metrics": 0.0,
74
+ "eval_f1": 0.0,
75
+ "eval_loss": 0.157722607254982,
76
+ "eval_runtime": 1.6787,
77
+ "eval_samples_per_second": 121.522,
78
+ "step": 100
79
+ },
80
+ {
81
+ "epoch": 1.9,
82
+ "learning_rate": 0.000271551724137931,
83
+ "loss": 0.0726,
84
+ "step": 110
85
+ },
86
+ {
87
+ "epoch": 2.07,
88
+ "learning_rate": 0.0002689655172413793,
89
+ "loss": 0.0686,
90
+ "step": 120
91
+ },
92
+ {
93
+ "epoch": 2.24,
94
+ "learning_rate": 0.0002663793103448276,
95
+ "loss": 0.0445,
96
+ "step": 130
97
+ },
98
+ {
99
+ "epoch": 2.41,
100
+ "learning_rate": 0.00026379310344827584,
101
+ "loss": 0.0414,
102
+ "step": 140
103
+ },
104
+ {
105
+ "epoch": 2.59,
106
+ "learning_rate": 0.00026120689655172413,
107
+ "loss": 0.0431,
108
+ "step": 150
109
+ },
110
+ {
111
+ "epoch": 2.76,
112
+ "learning_rate": 0.00025862068965517237,
113
+ "loss": 0.0274,
114
+ "step": 160
115
+ },
116
+ {
117
+ "epoch": 2.93,
118
+ "learning_rate": 0.00025603448275862066,
119
+ "loss": 0.0399,
120
+ "step": 170
121
+ },
122
+ {
123
+ "epoch": 3.1,
124
+ "learning_rate": 0.00025344827586206895,
125
+ "loss": 0.032,
126
+ "step": 180
127
+ },
128
+ {
129
+ "epoch": 3.28,
130
+ "learning_rate": 0.0002508620689655172,
131
+ "loss": 0.0163,
132
+ "step": 190
133
+ },
134
  {
135
  "epoch": 3.45,
136
+ "learning_rate": 0.0002482758620689655,
137
+ "loss": 0.0196,
 
 
 
 
138
  "step": 200
139
  },
140
+ {
141
+ "epoch": 3.45,
142
+ "eval_accuracy": 0.0,
143
+ "eval_average_metrics": 0.0,
144
+ "eval_f1": 0.0,
145
+ "eval_loss": 0.20416393876075745,
146
+ "eval_runtime": 1.3232,
147
+ "eval_samples_per_second": 154.176,
148
+ "step": 200
149
+ },
150
+ {
151
+ "epoch": 3.62,
152
+ "learning_rate": 0.00024568965517241377,
153
+ "loss": 0.0236,
154
+ "step": 210
155
+ },
156
+ {
157
+ "epoch": 3.79,
158
+ "learning_rate": 0.00024310344827586203,
159
+ "loss": 0.0354,
160
+ "step": 220
161
+ },
162
+ {
163
+ "epoch": 3.97,
164
+ "learning_rate": 0.00024051724137931033,
165
+ "loss": 0.0277,
166
+ "step": 230
167
+ },
168
+ {
169
+ "epoch": 4.14,
170
+ "learning_rate": 0.00023793103448275862,
171
+ "loss": 0.0194,
172
+ "step": 240
173
+ },
174
+ {
175
+ "epoch": 4.31,
176
+ "learning_rate": 0.00023534482758620685,
177
+ "loss": 0.0153,
178
+ "step": 250
179
+ },
180
+ {
181
+ "epoch": 4.48,
182
+ "learning_rate": 0.00023275862068965515,
183
+ "loss": 0.0129,
184
+ "step": 260
185
+ },
186
+ {
187
+ "epoch": 4.66,
188
+ "learning_rate": 0.00023017241379310344,
189
+ "loss": 0.0112,
190
+ "step": 270
191
+ },
192
+ {
193
+ "epoch": 4.83,
194
+ "learning_rate": 0.0002275862068965517,
195
+ "loss": 0.0083,
196
+ "step": 280
197
+ },
198
+ {
199
+ "epoch": 5.0,
200
+ "learning_rate": 0.000225,
201
+ "loss": 0.0167,
202
+ "step": 290
203
+ },
204
+ {
205
+ "epoch": 5.17,
206
+ "learning_rate": 0.00022241379310344826,
207
+ "loss": 0.0184,
208
+ "step": 300
209
+ },
210
+ {
211
+ "epoch": 5.17,
212
+ "eval_accuracy": 0.0,
213
+ "eval_average_metrics": 0.0,
214
+ "eval_f1": 0.0,
215
+ "eval_loss": 0.2221006453037262,
216
+ "eval_runtime": 1.4478,
217
+ "eval_samples_per_second": 140.908,
218
+ "step": 300
219
+ },
220
+ {
221
+ "epoch": 5.34,
222
+ "learning_rate": 0.00021982758620689652,
223
+ "loss": 0.0053,
224
+ "step": 310
225
+ },
226
+ {
227
+ "epoch": 5.52,
228
+ "learning_rate": 0.00021724137931034481,
229
+ "loss": 0.0049,
230
+ "step": 320
231
+ },
232
+ {
233
+ "epoch": 5.69,
234
+ "learning_rate": 0.0002146551724137931,
235
+ "loss": 0.0096,
236
+ "step": 330
237
+ },
238
+ {
239
+ "epoch": 5.86,
240
+ "learning_rate": 0.00021206896551724134,
241
+ "loss": 0.008,
242
+ "step": 340
243
+ },
244
+ {
245
+ "epoch": 6.03,
246
+ "learning_rate": 0.00020948275862068963,
247
+ "loss": 0.0099,
248
+ "step": 350
249
+ },
250
+ {
251
+ "epoch": 6.21,
252
+ "learning_rate": 0.00020689655172413793,
253
+ "loss": 0.0128,
254
+ "step": 360
255
+ },
256
+ {
257
+ "epoch": 6.38,
258
+ "learning_rate": 0.0002043103448275862,
259
+ "loss": 0.0026,
260
+ "step": 370
261
+ },
262
+ {
263
+ "epoch": 6.55,
264
+ "learning_rate": 0.00020172413793103448,
265
+ "loss": 0.002,
266
+ "step": 380
267
+ },
268
+ {
269
+ "epoch": 6.72,
270
+ "learning_rate": 0.00019913793103448275,
271
+ "loss": 0.0062,
272
+ "step": 390
273
+ },
274
+ {
275
+ "epoch": 6.9,
276
+ "learning_rate": 0.000196551724137931,
277
+ "loss": 0.0099,
278
+ "step": 400
279
+ },
280
  {
281
  "epoch": 6.9,
282
+ "eval_accuracy": 0.0,
283
+ "eval_average_metrics": 0.0,
284
+ "eval_f1": 0.0,
285
+ "eval_loss": 0.24928051233291626,
286
+ "eval_runtime": 1.0803,
287
+ "eval_samples_per_second": 188.836,
288
  "step": 400
289
  },
290
+ {
291
+ "epoch": 7.07,
292
+ "learning_rate": 0.0001939655172413793,
293
+ "loss": 0.0138,
294
+ "step": 410
295
+ },
296
+ {
297
+ "epoch": 7.24,
298
+ "learning_rate": 0.0001913793103448276,
299
+ "loss": 0.0016,
300
+ "step": 420
301
+ },
302
+ {
303
+ "epoch": 7.41,
304
+ "learning_rate": 0.00018879310344827583,
305
+ "loss": 0.0044,
306
+ "step": 430
307
+ },
308
+ {
309
+ "epoch": 7.59,
310
+ "learning_rate": 0.00018620689655172412,
311
+ "loss": 0.0039,
312
+ "step": 440
313
+ },
314
+ {
315
+ "epoch": 7.76,
316
+ "learning_rate": 0.0001836206896551724,
317
+ "loss": 0.0063,
318
+ "step": 450
319
+ },
320
+ {
321
+ "epoch": 7.93,
322
+ "learning_rate": 0.00018103448275862068,
323
+ "loss": 0.0076,
324
+ "step": 460
325
+ },
326
+ {
327
+ "epoch": 8.1,
328
+ "learning_rate": 0.00017844827586206897,
329
+ "loss": 0.005,
330
+ "step": 470
331
+ },
332
+ {
333
+ "epoch": 8.28,
334
+ "learning_rate": 0.0001758620689655172,
335
+ "loss": 0.0018,
336
+ "step": 480
337
+ },
338
+ {
339
+ "epoch": 8.45,
340
+ "learning_rate": 0.0001732758620689655,
341
+ "loss": 0.0033,
342
+ "step": 490
343
+ },
344
  {
345
  "epoch": 8.62,
346
  "learning_rate": 0.0001706896551724138,
347
+ "loss": 0.0049,
348
+ "step": 500
349
+ },
350
+ {
351
+ "epoch": 8.62,
352
+ "eval_accuracy": 0.0,
353
+ "eval_average_metrics": 0.0,
354
+ "eval_f1": 0.0,
355
+ "eval_loss": 0.26071295142173767,
356
+ "eval_runtime": 1.6002,
357
+ "eval_samples_per_second": 127.481,
358
  "step": 500
359
  },
360
+ {
361
+ "epoch": 8.79,
362
+ "learning_rate": 0.00016810344827586203,
363
+ "loss": 0.0027,
364
+ "step": 510
365
+ },
366
+ {
367
+ "epoch": 8.97,
368
+ "learning_rate": 0.00016551724137931032,
369
+ "loss": 0.0052,
370
+ "step": 520
371
+ },
372
+ {
373
+ "epoch": 9.14,
374
+ "learning_rate": 0.0001629310344827586,
375
+ "loss": 0.0003,
376
+ "step": 530
377
+ },
378
+ {
379
+ "epoch": 9.31,
380
+ "learning_rate": 0.00016034482758620688,
381
+ "loss": 0.0017,
382
+ "step": 540
383
+ },
384
+ {
385
+ "epoch": 9.48,
386
+ "learning_rate": 0.00015775862068965517,
387
+ "loss": 0.0016,
388
+ "step": 550
389
+ },
390
+ {
391
+ "epoch": 9.66,
392
+ "learning_rate": 0.00015517241379310346,
393
+ "loss": 0.0037,
394
+ "step": 560
395
+ },
396
+ {
397
+ "epoch": 9.83,
398
+ "learning_rate": 0.0001525862068965517,
399
+ "loss": 0.0008,
400
+ "step": 570
401
+ },
402
+ {
403
+ "epoch": 10.0,
404
+ "learning_rate": 0.00015,
405
+ "loss": 0.0065,
406
+ "step": 580
407
+ },
408
+ {
409
+ "epoch": 10.17,
410
+ "learning_rate": 0.00014741379310344825,
411
+ "loss": 0.0005,
412
+ "step": 590
413
+ },
414
+ {
415
+ "epoch": 10.34,
416
+ "learning_rate": 0.00014482758620689654,
417
+ "loss": 0.0012,
418
+ "step": 600
419
+ },
420
  {
421
  "epoch": 10.34,
422
+ "eval_accuracy": 0.0,
423
+ "eval_average_metrics": 0.0,
424
+ "eval_f1": 0.0,
425
+ "eval_loss": 0.3229135274887085,
426
+ "eval_runtime": 1.6354,
427
+ "eval_samples_per_second": 124.74,
428
  "step": 600
429
  },
430
+ {
431
+ "epoch": 10.52,
432
+ "learning_rate": 0.0001422413793103448,
433
+ "loss": 0.0015,
434
+ "step": 610
435
+ },
436
+ {
437
+ "epoch": 10.69,
438
+ "learning_rate": 0.0001396551724137931,
439
+ "loss": 0.0031,
440
+ "step": 620
441
+ },
442
+ {
443
+ "epoch": 10.86,
444
+ "learning_rate": 0.00013706896551724136,
445
+ "loss": 0.009,
446
+ "step": 630
447
+ },
448
+ {
449
+ "epoch": 11.03,
450
+ "learning_rate": 0.00013448275862068965,
451
+ "loss": 0.0008,
452
+ "step": 640
453
+ },
454
+ {
455
+ "epoch": 11.21,
456
+ "learning_rate": 0.00013189655172413792,
457
+ "loss": 0.0002,
458
+ "step": 650
459
+ },
460
+ {
461
+ "epoch": 11.38,
462
+ "learning_rate": 0.00012931034482758618,
463
+ "loss": 0.0002,
464
+ "step": 660
465
+ },
466
+ {
467
+ "epoch": 11.55,
468
+ "learning_rate": 0.00012672413793103447,
469
+ "loss": 0.004,
470
+ "step": 670
471
+ },
472
+ {
473
+ "epoch": 11.72,
474
+ "learning_rate": 0.00012413793103448274,
475
+ "loss": 0.004,
476
+ "step": 680
477
+ },
478
+ {
479
+ "epoch": 11.9,
480
+ "learning_rate": 0.00012155172413793102,
481
+ "loss": 0.0005,
482
+ "step": 690
483
+ },
484
+ {
485
+ "epoch": 12.07,
486
+ "learning_rate": 0.00011896551724137931,
487
+ "loss": 0.003,
488
+ "step": 700
489
+ },
490
+ {
491
+ "epoch": 12.07,
492
+ "eval_accuracy": 0.0,
493
+ "eval_average_metrics": 0.0,
494
+ "eval_f1": 0.0,
495
+ "eval_loss": 0.3359452784061432,
496
+ "eval_runtime": 1.9034,
497
+ "eval_samples_per_second": 107.177,
498
+ "step": 700
499
+ },
500
+ {
501
+ "epoch": 12.24,
502
+ "learning_rate": 0.00011637931034482757,
503
+ "loss": 0.0005,
504
+ "step": 710
505
+ },
506
+ {
507
+ "epoch": 12.41,
508
+ "learning_rate": 0.00011379310344827585,
509
+ "loss": 0.0002,
510
+ "step": 720
511
+ },
512
+ {
513
+ "epoch": 12.59,
514
+ "learning_rate": 0.00011120689655172413,
515
+ "loss": 0.0001,
516
+ "step": 730
517
+ },
518
+ {
519
+ "epoch": 12.76,
520
+ "learning_rate": 0.00010862068965517241,
521
+ "loss": 0.0024,
522
+ "step": 740
523
+ },
524
+ {
525
+ "epoch": 12.93,
526
+ "learning_rate": 0.00010603448275862067,
527
+ "loss": 0.0002,
528
+ "step": 750
529
+ },
530
+ {
531
+ "epoch": 13.1,
532
+ "learning_rate": 0.00010344827586206896,
533
+ "loss": 0.0003,
534
+ "step": 760
535
+ },
536
+ {
537
+ "epoch": 13.28,
538
+ "learning_rate": 0.00010086206896551724,
539
+ "loss": 0.0007,
540
+ "step": 770
541
+ },
542
+ {
543
+ "epoch": 13.45,
544
+ "learning_rate": 9.82758620689655e-05,
545
+ "loss": 0.0,
546
+ "step": 780
547
+ },
548
+ {
549
+ "epoch": 13.62,
550
+ "learning_rate": 9.56896551724138e-05,
551
+ "loss": 0.0001,
552
+ "step": 790
553
+ },
554
  {
555
  "epoch": 13.79,
556
+ "learning_rate": 9.310344827586206e-05,
557
+ "loss": 0.0003,
 
 
 
 
558
  "step": 800
559
  },
560
+ {
561
+ "epoch": 13.79,
562
+ "eval_accuracy": 0.0,
563
+ "eval_average_metrics": 0.0,
564
+ "eval_f1": 0.0,
565
+ "eval_loss": 0.40144264698028564,
566
+ "eval_runtime": 1.2399,
567
+ "eval_samples_per_second": 164.525,
568
+ "step": 800
569
+ },
570
+ {
571
+ "epoch": 13.97,
572
+ "learning_rate": 9.051724137931034e-05,
573
+ "loss": 0.003,
574
+ "step": 810
575
+ },
576
+ {
577
+ "epoch": 14.14,
578
+ "learning_rate": 8.79310344827586e-05,
579
+ "loss": 0.0,
580
+ "step": 820
581
+ },
582
+ {
583
+ "epoch": 14.31,
584
+ "learning_rate": 8.53448275862069e-05,
585
+ "loss": 0.0004,
586
+ "step": 830
587
+ },
588
+ {
589
+ "epoch": 14.48,
590
+ "learning_rate": 8.275862068965516e-05,
591
+ "loss": 0.0001,
592
+ "step": 840
593
+ },
594
+ {
595
+ "epoch": 14.66,
596
+ "learning_rate": 8.017241379310344e-05,
597
+ "loss": 0.0001,
598
+ "step": 850
599
+ },
600
+ {
601
+ "epoch": 14.83,
602
+ "learning_rate": 7.758620689655173e-05,
603
+ "loss": 0.0008,
604
+ "step": 860
605
+ },
606
+ {
607
+ "epoch": 15.0,
608
+ "learning_rate": 7.5e-05,
609
+ "loss": 0.0001,
610
+ "step": 870
611
+ },
612
+ {
613
+ "epoch": 15.17,
614
+ "learning_rate": 7.241379310344827e-05,
615
+ "loss": 0.0001,
616
+ "step": 880
617
+ },
618
+ {
619
+ "epoch": 15.34,
620
+ "learning_rate": 6.982758620689655e-05,
621
+ "loss": 0.0018,
622
+ "step": 890
623
+ },
624
+ {
625
+ "epoch": 15.52,
626
+ "learning_rate": 6.724137931034483e-05,
627
+ "loss": 0.0001,
628
+ "step": 900
629
+ },
630
+ {
631
+ "epoch": 15.52,
632
+ "eval_accuracy": 0.0,
633
+ "eval_average_metrics": 0.0,
634
+ "eval_f1": 0.0,
635
+ "eval_loss": 0.37248921394348145,
636
+ "eval_runtime": 1.2617,
637
+ "eval_samples_per_second": 161.691,
638
+ "step": 900
639
+ },
640
+ {
641
+ "epoch": 15.69,
642
+ "learning_rate": 6.465517241379309e-05,
643
+ "loss": 0.0,
644
+ "step": 910
645
+ },
646
+ {
647
+ "epoch": 15.86,
648
+ "learning_rate": 6.206896551724137e-05,
649
+ "loss": 0.0,
650
+ "step": 920
651
+ },
652
+ {
653
+ "epoch": 16.03,
654
+ "learning_rate": 5.9482758620689654e-05,
655
+ "loss": 0.0001,
656
+ "step": 930
657
+ },
658
+ {
659
+ "epoch": 16.21,
660
+ "learning_rate": 5.6896551724137926e-05,
661
+ "loss": 0.0007,
662
+ "step": 940
663
+ },
664
+ {
665
+ "epoch": 16.38,
666
+ "learning_rate": 5.4310344827586204e-05,
667
+ "loss": 0.0,
668
+ "step": 950
669
+ },
670
+ {
671
+ "epoch": 16.55,
672
+ "learning_rate": 5.172413793103448e-05,
673
+ "loss": 0.0,
674
+ "step": 960
675
+ },
676
+ {
677
+ "epoch": 16.72,
678
+ "learning_rate": 4.913793103448275e-05,
679
+ "loss": 0.0,
680
+ "step": 970
681
+ },
682
+ {
683
+ "epoch": 16.9,
684
+ "learning_rate": 4.655172413793103e-05,
685
+ "loss": 0.0001,
686
+ "step": 980
687
+ },
688
+ {
689
+ "epoch": 17.07,
690
+ "learning_rate": 4.39655172413793e-05,
691
+ "loss": 0.0,
692
+ "step": 990
693
+ },
694
  {
695
  "epoch": 17.24,
696
  "learning_rate": 4.137931034482758e-05,
697
+ "loss": 0.0001,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 17.24,
702
+ "eval_accuracy": 0.0,
703
+ "eval_average_metrics": 0.0,
704
+ "eval_f1": 0.0,
705
+ "eval_loss": 0.38377830386161804,
706
+ "eval_runtime": 1.3412,
707
+ "eval_samples_per_second": 152.099,
708
  "step": 1000
709
  },
710
+ {
711
+ "epoch": 17.41,
712
+ "learning_rate": 3.8793103448275865e-05,
713
+ "loss": 0.0,
714
+ "step": 1010
715
+ },
716
+ {
717
+ "epoch": 17.59,
718
+ "learning_rate": 3.6206896551724136e-05,
719
+ "loss": 0.0,
720
+ "step": 1020
721
+ },
722
+ {
723
+ "epoch": 17.76,
724
+ "learning_rate": 3.3620689655172414e-05,
725
+ "loss": 0.0001,
726
+ "step": 1030
727
+ },
728
+ {
729
+ "epoch": 17.93,
730
+ "learning_rate": 3.1034482758620685e-05,
731
+ "loss": 0.0004,
732
+ "step": 1040
733
+ },
734
+ {
735
+ "epoch": 18.1,
736
+ "learning_rate": 2.8448275862068963e-05,
737
+ "loss": 0.0,
738
+ "step": 1050
739
+ },
740
+ {
741
+ "epoch": 18.28,
742
+ "learning_rate": 2.586206896551724e-05,
743
+ "loss": 0.0001,
744
+ "step": 1060
745
+ },
746
+ {
747
+ "epoch": 18.45,
748
+ "learning_rate": 2.3275862068965515e-05,
749
+ "loss": 0.0002,
750
+ "step": 1070
751
+ },
752
+ {
753
+ "epoch": 18.62,
754
+ "learning_rate": 2.068965517241379e-05,
755
+ "loss": 0.0,
756
+ "step": 1080
757
+ },
758
+ {
759
+ "epoch": 18.79,
760
+ "learning_rate": 1.8103448275862068e-05,
761
+ "loss": 0.0,
762
+ "step": 1090
763
+ },
764
+ {
765
+ "epoch": 18.97,
766
+ "learning_rate": 1.5517241379310342e-05,
767
+ "loss": 0.0,
768
+ "step": 1100
769
+ },
770
+ {
771
+ "epoch": 18.97,
772
+ "eval_accuracy": 0.0,
773
+ "eval_average_metrics": 0.0,
774
+ "eval_f1": 0.0,
775
+ "eval_loss": 0.4036322236061096,
776
+ "eval_runtime": 1.3625,
777
+ "eval_samples_per_second": 149.728,
778
+ "step": 1100
779
+ },
780
+ {
781
+ "epoch": 19.14,
782
+ "learning_rate": 1.293103448275862e-05,
783
+ "loss": 0.0,
784
+ "step": 1110
785
+ },
786
+ {
787
+ "epoch": 19.31,
788
+ "learning_rate": 1.0344827586206895e-05,
789
+ "loss": 0.0,
790
+ "step": 1120
791
+ },
792
+ {
793
+ "epoch": 19.48,
794
+ "learning_rate": 7.758620689655171e-06,
795
+ "loss": 0.0,
796
+ "step": 1130
797
+ },
798
+ {
799
+ "epoch": 19.66,
800
+ "learning_rate": 5.1724137931034475e-06,
801
+ "loss": 0.0,
802
+ "step": 1140
803
+ },
804
+ {
805
+ "epoch": 19.83,
806
+ "learning_rate": 2.5862068965517237e-06,
807
+ "loss": 0.0001,
808
+ "step": 1150
809
+ },
810
+ {
811
+ "epoch": 20.0,
812
+ "learning_rate": 0.0,
813
+ "loss": 0.0001,
814
+ "step": 1160
815
+ },
816
  {
817
  "epoch": 20.0,
818
  "step": 1160,
819
+ "total_flos": 9652606012532736.0,
820
+ "train_loss": 0.03386630766055595,
821
+ "train_runtime": 583.9583,
822
+ "train_samples_per_second": 125.625,
823
+ "train_steps_per_second": 1.986
824
  }
825
  ],
826
  "max_steps": 1160,
827
  "num_train_epochs": 20,
828
+ "total_flos": 9652606012532736.0,
829
  "trial_name": null,
830
  "trial_params": null
831
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daee28c9156f543f7f6829e2a789d057b49a2aeffe3d2a274fec45557718c61c
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fbe4afb7644cd203f43cc1a7b0a7c5af3dfc2635d1ea70a03c3deaa68c6a6e2
3
  size 3183