samitizerxu commited on
Commit
28d9b91
·
1 Parent(s): f52df2a

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.6226525821596244,
4
+ "eval_loss": 0.9150213599205017,
5
+ "eval_runtime": 22.1612,
6
+ "eval_samples_per_second": 76.891,
7
+ "eval_steps_per_second": 2.437,
8
+ "total_flos": 2.3761445690374963e+19,
9
+ "train_loss": 0.9564308677117029,
10
+ "train_runtime": 12406.5537,
11
+ "train_samples_per_second": 24.714,
12
+ "train_steps_per_second": 0.193
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.6226525821596244,
4
+ "eval_loss": 0.9150213599205017,
5
+ "eval_runtime": 22.1612,
6
+ "eval_samples_per_second": 76.891,
7
+ "eval_steps_per_second": 2.437
8
+ }
runs/Feb16_23-57-35_bea9d7f6d280/events.out.tfevents.1676604391.bea9d7f6d280.283.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5b4b8fa130d20c4a7941fcc8f3c0dd0e463f42ee017f766b2c976f7c437140
3
+ size 363
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "total_flos": 2.3761445690374963e+19,
4
+ "train_loss": 0.9564308677117029,
5
+ "train_runtime": 12406.5537,
6
+ "train_samples_per_second": 24.714,
7
+ "train_steps_per_second": 0.193
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6226525821596244,
3
+ "best_model_checkpoint": "large-algae-vit-rgb/checkpoint-2280",
4
+ "epoch": 20.0,
5
+ "global_step": 2400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 2.0833333333333334e-06,
13
+ "loss": 1.7796,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.17,
18
+ "learning_rate": 4.166666666666667e-06,
19
+ "loss": 1.6643,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.25,
24
+ "learning_rate": 6.25e-06,
25
+ "loss": 1.4798,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.33,
30
+ "learning_rate": 8.333333333333334e-06,
31
+ "loss": 1.3417,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.42,
36
+ "learning_rate": 1.0416666666666668e-05,
37
+ "loss": 1.2473,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.5,
42
+ "learning_rate": 1.25e-05,
43
+ "loss": 1.1897,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.58,
48
+ "learning_rate": 1.4583333333333335e-05,
49
+ "loss": 1.1835,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.67,
54
+ "learning_rate": 1.6666666666666667e-05,
55
+ "loss": 1.1931,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.75,
60
+ "learning_rate": 1.8750000000000002e-05,
61
+ "loss": 1.1324,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.83,
66
+ "learning_rate": 2.0833333333333336e-05,
67
+ "loss": 1.1343,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.92,
72
+ "learning_rate": 2.2916666666666667e-05,
73
+ "loss": 1.124,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 1.0,
78
+ "learning_rate": 2.5e-05,
79
+ "loss": 1.1433,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 1.0,
84
+ "eval_accuracy": 0.5575117370892019,
85
+ "eval_loss": 1.0966269969940186,
86
+ "eval_runtime": 22.9114,
87
+ "eval_samples_per_second": 74.374,
88
+ "eval_steps_per_second": 2.357,
89
+ "step": 120
90
+ },
91
+ {
92
+ "epoch": 1.08,
93
+ "learning_rate": 2.7083333333333332e-05,
94
+ "loss": 1.1211,
95
+ "step": 130
96
+ },
97
+ {
98
+ "epoch": 1.17,
99
+ "learning_rate": 2.916666666666667e-05,
100
+ "loss": 1.0712,
101
+ "step": 140
102
+ },
103
+ {
104
+ "epoch": 1.25,
105
+ "learning_rate": 3.125e-05,
106
+ "loss": 1.0919,
107
+ "step": 150
108
+ },
109
+ {
110
+ "epoch": 1.33,
111
+ "learning_rate": 3.3333333333333335e-05,
112
+ "loss": 1.0908,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 1.42,
117
+ "learning_rate": 3.541666666666667e-05,
118
+ "loss": 1.0912,
119
+ "step": 170
120
+ },
121
+ {
122
+ "epoch": 1.5,
123
+ "learning_rate": 3.7500000000000003e-05,
124
+ "loss": 1.1271,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 1.58,
129
+ "learning_rate": 3.958333333333333e-05,
130
+ "loss": 1.0785,
131
+ "step": 190
132
+ },
133
+ {
134
+ "epoch": 1.67,
135
+ "learning_rate": 4.166666666666667e-05,
136
+ "loss": 1.0361,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 1.75,
141
+ "learning_rate": 4.375e-05,
142
+ "loss": 1.1093,
143
+ "step": 210
144
+ },
145
+ {
146
+ "epoch": 1.83,
147
+ "learning_rate": 4.5833333333333334e-05,
148
+ "loss": 1.0827,
149
+ "step": 220
150
+ },
151
+ {
152
+ "epoch": 1.92,
153
+ "learning_rate": 4.791666666666667e-05,
154
+ "loss": 1.0748,
155
+ "step": 230
156
+ },
157
+ {
158
+ "epoch": 2.0,
159
+ "learning_rate": 5e-05,
160
+ "loss": 1.0507,
161
+ "step": 240
162
+ },
163
+ {
164
+ "epoch": 2.0,
165
+ "eval_accuracy": 0.5856807511737089,
166
+ "eval_loss": 1.0356614589691162,
167
+ "eval_runtime": 22.7063,
168
+ "eval_samples_per_second": 75.045,
169
+ "eval_steps_per_second": 2.378,
170
+ "step": 240
171
+ },
172
+ {
173
+ "epoch": 2.08,
174
+ "learning_rate": 4.976851851851852e-05,
175
+ "loss": 1.0658,
176
+ "step": 250
177
+ },
178
+ {
179
+ "epoch": 2.17,
180
+ "learning_rate": 4.9537037037037035e-05,
181
+ "loss": 1.0552,
182
+ "step": 260
183
+ },
184
+ {
185
+ "epoch": 2.25,
186
+ "learning_rate": 4.930555555555556e-05,
187
+ "loss": 1.0831,
188
+ "step": 270
189
+ },
190
+ {
191
+ "epoch": 2.33,
192
+ "learning_rate": 4.9074074074074075e-05,
193
+ "loss": 1.0378,
194
+ "step": 280
195
+ },
196
+ {
197
+ "epoch": 2.42,
198
+ "learning_rate": 4.8842592592592595e-05,
199
+ "loss": 1.0336,
200
+ "step": 290
201
+ },
202
+ {
203
+ "epoch": 2.5,
204
+ "learning_rate": 4.8611111111111115e-05,
205
+ "loss": 1.0519,
206
+ "step": 300
207
+ },
208
+ {
209
+ "epoch": 2.58,
210
+ "learning_rate": 4.837962962962963e-05,
211
+ "loss": 1.0065,
212
+ "step": 310
213
+ },
214
+ {
215
+ "epoch": 2.67,
216
+ "learning_rate": 4.814814814814815e-05,
217
+ "loss": 1.0432,
218
+ "step": 320
219
+ },
220
+ {
221
+ "epoch": 2.75,
222
+ "learning_rate": 4.791666666666667e-05,
223
+ "loss": 1.0496,
224
+ "step": 330
225
+ },
226
+ {
227
+ "epoch": 2.83,
228
+ "learning_rate": 4.768518518518519e-05,
229
+ "loss": 1.0635,
230
+ "step": 340
231
+ },
232
+ {
233
+ "epoch": 2.92,
234
+ "learning_rate": 4.745370370370371e-05,
235
+ "loss": 1.0402,
236
+ "step": 350
237
+ },
238
+ {
239
+ "epoch": 3.0,
240
+ "learning_rate": 4.722222222222222e-05,
241
+ "loss": 1.0104,
242
+ "step": 360
243
+ },
244
+ {
245
+ "epoch": 3.0,
246
+ "eval_accuracy": 0.5921361502347418,
247
+ "eval_loss": 1.0168485641479492,
248
+ "eval_runtime": 22.8587,
249
+ "eval_samples_per_second": 74.545,
250
+ "eval_steps_per_second": 2.362,
251
+ "step": 360
252
+ },
253
+ {
254
+ "epoch": 3.08,
255
+ "learning_rate": 4.699074074074074e-05,
256
+ "loss": 0.9874,
257
+ "step": 370
258
+ },
259
+ {
260
+ "epoch": 3.17,
261
+ "learning_rate": 4.675925925925926e-05,
262
+ "loss": 1.0564,
263
+ "step": 380
264
+ },
265
+ {
266
+ "epoch": 3.25,
267
+ "learning_rate": 4.652777777777778e-05,
268
+ "loss": 1.002,
269
+ "step": 390
270
+ },
271
+ {
272
+ "epoch": 3.33,
273
+ "learning_rate": 4.62962962962963e-05,
274
+ "loss": 1.0306,
275
+ "step": 400
276
+ },
277
+ {
278
+ "epoch": 3.42,
279
+ "learning_rate": 4.6064814814814814e-05,
280
+ "loss": 1.0384,
281
+ "step": 410
282
+ },
283
+ {
284
+ "epoch": 3.5,
285
+ "learning_rate": 4.5833333333333334e-05,
286
+ "loss": 1.0109,
287
+ "step": 420
288
+ },
289
+ {
290
+ "epoch": 3.58,
291
+ "learning_rate": 4.5601851851851854e-05,
292
+ "loss": 1.0797,
293
+ "step": 430
294
+ },
295
+ {
296
+ "epoch": 3.67,
297
+ "learning_rate": 4.5370370370370374e-05,
298
+ "loss": 1.0382,
299
+ "step": 440
300
+ },
301
+ {
302
+ "epoch": 3.75,
303
+ "learning_rate": 4.5138888888888894e-05,
304
+ "loss": 1.0057,
305
+ "step": 450
306
+ },
307
+ {
308
+ "epoch": 3.83,
309
+ "learning_rate": 4.490740740740741e-05,
310
+ "loss": 1.0405,
311
+ "step": 460
312
+ },
313
+ {
314
+ "epoch": 3.92,
315
+ "learning_rate": 4.467592592592593e-05,
316
+ "loss": 1.015,
317
+ "step": 470
318
+ },
319
+ {
320
+ "epoch": 4.0,
321
+ "learning_rate": 4.4444444444444447e-05,
322
+ "loss": 1.0353,
323
+ "step": 480
324
+ },
325
+ {
326
+ "epoch": 4.0,
327
+ "eval_accuracy": 0.5856807511737089,
328
+ "eval_loss": 1.0344651937484741,
329
+ "eval_runtime": 22.8799,
330
+ "eval_samples_per_second": 74.476,
331
+ "eval_steps_per_second": 2.36,
332
+ "step": 480
333
+ },
334
+ {
335
+ "epoch": 4.08,
336
+ "learning_rate": 4.4212962962962966e-05,
337
+ "loss": 1.0024,
338
+ "step": 490
339
+ },
340
+ {
341
+ "epoch": 4.17,
342
+ "learning_rate": 4.3981481481481486e-05,
343
+ "loss": 0.9605,
344
+ "step": 500
345
+ },
346
+ {
347
+ "epoch": 4.25,
348
+ "learning_rate": 4.375e-05,
349
+ "loss": 1.0132,
350
+ "step": 510
351
+ },
352
+ {
353
+ "epoch": 4.33,
354
+ "learning_rate": 4.351851851851852e-05,
355
+ "loss": 0.9964,
356
+ "step": 520
357
+ },
358
+ {
359
+ "epoch": 4.42,
360
+ "learning_rate": 4.328703703703704e-05,
361
+ "loss": 1.0063,
362
+ "step": 530
363
+ },
364
+ {
365
+ "epoch": 4.5,
366
+ "learning_rate": 4.305555555555556e-05,
367
+ "loss": 1.0355,
368
+ "step": 540
369
+ },
370
+ {
371
+ "epoch": 4.58,
372
+ "learning_rate": 4.282407407407408e-05,
373
+ "loss": 1.0556,
374
+ "step": 550
375
+ },
376
+ {
377
+ "epoch": 4.67,
378
+ "learning_rate": 4.259259259259259e-05,
379
+ "loss": 0.9885,
380
+ "step": 560
381
+ },
382
+ {
383
+ "epoch": 4.75,
384
+ "learning_rate": 4.236111111111111e-05,
385
+ "loss": 0.9804,
386
+ "step": 570
387
+ },
388
+ {
389
+ "epoch": 4.83,
390
+ "learning_rate": 4.212962962962963e-05,
391
+ "loss": 1.0036,
392
+ "step": 580
393
+ },
394
+ {
395
+ "epoch": 4.92,
396
+ "learning_rate": 4.1898148148148145e-05,
397
+ "loss": 1.0063,
398
+ "step": 590
399
+ },
400
+ {
401
+ "epoch": 5.0,
402
+ "learning_rate": 4.166666666666667e-05,
403
+ "loss": 0.9629,
404
+ "step": 600
405
+ },
406
+ {
407
+ "epoch": 5.0,
408
+ "eval_accuracy": 0.6015258215962441,
409
+ "eval_loss": 0.983875036239624,
410
+ "eval_runtime": 22.9086,
411
+ "eval_samples_per_second": 74.383,
412
+ "eval_steps_per_second": 2.357,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 5.08,
417
+ "learning_rate": 4.1435185185185185e-05,
418
+ "loss": 0.9983,
419
+ "step": 610
420
+ },
421
+ {
422
+ "epoch": 5.17,
423
+ "learning_rate": 4.1203703703703705e-05,
424
+ "loss": 1.0425,
425
+ "step": 620
426
+ },
427
+ {
428
+ "epoch": 5.25,
429
+ "learning_rate": 4.0972222222222225e-05,
430
+ "loss": 1.0025,
431
+ "step": 630
432
+ },
433
+ {
434
+ "epoch": 5.33,
435
+ "learning_rate": 4.074074074074074e-05,
436
+ "loss": 0.9785,
437
+ "step": 640
438
+ },
439
+ {
440
+ "epoch": 5.42,
441
+ "learning_rate": 4.0509259259259265e-05,
442
+ "loss": 1.0057,
443
+ "step": 650
444
+ },
445
+ {
446
+ "epoch": 5.5,
447
+ "learning_rate": 4.027777777777778e-05,
448
+ "loss": 0.9692,
449
+ "step": 660
450
+ },
451
+ {
452
+ "epoch": 5.58,
453
+ "learning_rate": 4.00462962962963e-05,
454
+ "loss": 0.9976,
455
+ "step": 670
456
+ },
457
+ {
458
+ "epoch": 5.67,
459
+ "learning_rate": 3.981481481481482e-05,
460
+ "loss": 0.9917,
461
+ "step": 680
462
+ },
463
+ {
464
+ "epoch": 5.75,
465
+ "learning_rate": 3.958333333333333e-05,
466
+ "loss": 0.9818,
467
+ "step": 690
468
+ },
469
+ {
470
+ "epoch": 5.83,
471
+ "learning_rate": 3.935185185185186e-05,
472
+ "loss": 0.996,
473
+ "step": 700
474
+ },
475
+ {
476
+ "epoch": 5.92,
477
+ "learning_rate": 3.912037037037037e-05,
478
+ "loss": 0.9892,
479
+ "step": 710
480
+ },
481
+ {
482
+ "epoch": 6.0,
483
+ "learning_rate": 3.888888888888889e-05,
484
+ "loss": 0.9684,
485
+ "step": 720
486
+ },
487
+ {
488
+ "epoch": 6.0,
489
+ "eval_accuracy": 0.6068075117370892,
490
+ "eval_loss": 0.9672098159790039,
491
+ "eval_runtime": 22.7827,
492
+ "eval_samples_per_second": 74.794,
493
+ "eval_steps_per_second": 2.37,
494
+ "step": 720
495
+ },
496
+ {
497
+ "epoch": 6.08,
498
+ "learning_rate": 3.865740740740741e-05,
499
+ "loss": 0.9977,
500
+ "step": 730
501
+ },
502
+ {
503
+ "epoch": 6.17,
504
+ "learning_rate": 3.8425925925925924e-05,
505
+ "loss": 0.9858,
506
+ "step": 740
507
+ },
508
+ {
509
+ "epoch": 6.25,
510
+ "learning_rate": 3.8194444444444444e-05,
511
+ "loss": 1.0029,
512
+ "step": 750
513
+ },
514
+ {
515
+ "epoch": 6.33,
516
+ "learning_rate": 3.7962962962962964e-05,
517
+ "loss": 0.9715,
518
+ "step": 760
519
+ },
520
+ {
521
+ "epoch": 6.42,
522
+ "learning_rate": 3.7731481481481484e-05,
523
+ "loss": 0.9484,
524
+ "step": 770
525
+ },
526
+ {
527
+ "epoch": 6.5,
528
+ "learning_rate": 3.7500000000000003e-05,
529
+ "loss": 0.9948,
530
+ "step": 780
531
+ },
532
+ {
533
+ "epoch": 6.58,
534
+ "learning_rate": 3.726851851851852e-05,
535
+ "loss": 0.9935,
536
+ "step": 790
537
+ },
538
+ {
539
+ "epoch": 6.67,
540
+ "learning_rate": 3.7037037037037037e-05,
541
+ "loss": 1.0129,
542
+ "step": 800
543
+ },
544
+ {
545
+ "epoch": 6.75,
546
+ "learning_rate": 3.6805555555555556e-05,
547
+ "loss": 0.94,
548
+ "step": 810
549
+ },
550
+ {
551
+ "epoch": 6.83,
552
+ "learning_rate": 3.6574074074074076e-05,
553
+ "loss": 0.9886,
554
+ "step": 820
555
+ },
556
+ {
557
+ "epoch": 6.92,
558
+ "learning_rate": 3.6342592592592596e-05,
559
+ "loss": 0.9441,
560
+ "step": 830
561
+ },
562
+ {
563
+ "epoch": 7.0,
564
+ "learning_rate": 3.611111111111111e-05,
565
+ "loss": 0.9727,
566
+ "step": 840
567
+ },
568
+ {
569
+ "epoch": 7.0,
570
+ "eval_accuracy": 0.6132629107981221,
571
+ "eval_loss": 0.9590299129486084,
572
+ "eval_runtime": 22.8896,
573
+ "eval_samples_per_second": 74.444,
574
+ "eval_steps_per_second": 2.359,
575
+ "step": 840
576
+ },
577
+ {
578
+ "epoch": 7.08,
579
+ "learning_rate": 3.587962962962963e-05,
580
+ "loss": 0.9784,
581
+ "step": 850
582
+ },
583
+ {
584
+ "epoch": 7.17,
585
+ "learning_rate": 3.564814814814815e-05,
586
+ "loss": 0.9698,
587
+ "step": 860
588
+ },
589
+ {
590
+ "epoch": 7.25,
591
+ "learning_rate": 3.541666666666667e-05,
592
+ "loss": 0.9599,
593
+ "step": 870
594
+ },
595
+ {
596
+ "epoch": 7.33,
597
+ "learning_rate": 3.518518518518519e-05,
598
+ "loss": 0.961,
599
+ "step": 880
600
+ },
601
+ {
602
+ "epoch": 7.42,
603
+ "learning_rate": 3.49537037037037e-05,
604
+ "loss": 0.9919,
605
+ "step": 890
606
+ },
607
+ {
608
+ "epoch": 7.5,
609
+ "learning_rate": 3.472222222222222e-05,
610
+ "loss": 0.9915,
611
+ "step": 900
612
+ },
613
+ {
614
+ "epoch": 7.58,
615
+ "learning_rate": 3.449074074074074e-05,
616
+ "loss": 0.9777,
617
+ "step": 910
618
+ },
619
+ {
620
+ "epoch": 7.67,
621
+ "learning_rate": 3.425925925925926e-05,
622
+ "loss": 0.9584,
623
+ "step": 920
624
+ },
625
+ {
626
+ "epoch": 7.75,
627
+ "learning_rate": 3.402777777777778e-05,
628
+ "loss": 0.9627,
629
+ "step": 930
630
+ },
631
+ {
632
+ "epoch": 7.83,
633
+ "learning_rate": 3.3796296296296295e-05,
634
+ "loss": 0.9634,
635
+ "step": 940
636
+ },
637
+ {
638
+ "epoch": 7.92,
639
+ "learning_rate": 3.3564814814814815e-05,
640
+ "loss": 0.9518,
641
+ "step": 950
642
+ },
643
+ {
644
+ "epoch": 8.0,
645
+ "learning_rate": 3.3333333333333335e-05,
646
+ "loss": 0.9626,
647
+ "step": 960
648
+ },
649
+ {
650
+ "epoch": 8.0,
651
+ "eval_accuracy": 0.6126760563380281,
652
+ "eval_loss": 0.9426460266113281,
653
+ "eval_runtime": 22.8408,
654
+ "eval_samples_per_second": 74.603,
655
+ "eval_steps_per_second": 2.364,
656
+ "step": 960
657
+ },
658
+ {
659
+ "epoch": 8.08,
660
+ "learning_rate": 3.3101851851851855e-05,
661
+ "loss": 0.9318,
662
+ "step": 970
663
+ },
664
+ {
665
+ "epoch": 8.17,
666
+ "learning_rate": 3.2870370370370375e-05,
667
+ "loss": 0.993,
668
+ "step": 980
669
+ },
670
+ {
671
+ "epoch": 8.25,
672
+ "learning_rate": 3.263888888888889e-05,
673
+ "loss": 0.9247,
674
+ "step": 990
675
+ },
676
+ {
677
+ "epoch": 8.33,
678
+ "learning_rate": 3.240740740740741e-05,
679
+ "loss": 0.9803,
680
+ "step": 1000
681
+ },
682
+ {
683
+ "epoch": 8.42,
684
+ "learning_rate": 3.217592592592593e-05,
685
+ "loss": 0.9353,
686
+ "step": 1010
687
+ },
688
+ {
689
+ "epoch": 8.5,
690
+ "learning_rate": 3.194444444444444e-05,
691
+ "loss": 0.9683,
692
+ "step": 1020
693
+ },
694
+ {
695
+ "epoch": 8.58,
696
+ "learning_rate": 3.171296296296297e-05,
697
+ "loss": 0.9368,
698
+ "step": 1030
699
+ },
700
+ {
701
+ "epoch": 8.67,
702
+ "learning_rate": 3.148148148148148e-05,
703
+ "loss": 0.9811,
704
+ "step": 1040
705
+ },
706
+ {
707
+ "epoch": 8.75,
708
+ "learning_rate": 3.125e-05,
709
+ "loss": 0.9497,
710
+ "step": 1050
711
+ },
712
+ {
713
+ "epoch": 8.83,
714
+ "learning_rate": 3.101851851851852e-05,
715
+ "loss": 0.9227,
716
+ "step": 1060
717
+ },
718
+ {
719
+ "epoch": 8.92,
720
+ "learning_rate": 3.0787037037037034e-05,
721
+ "loss": 0.9876,
722
+ "step": 1070
723
+ },
724
+ {
725
+ "epoch": 9.0,
726
+ "learning_rate": 3.055555555555556e-05,
727
+ "loss": 0.9857,
728
+ "step": 1080
729
+ },
730
+ {
731
+ "epoch": 9.0,
732
+ "eval_accuracy": 0.607981220657277,
733
+ "eval_loss": 0.9669484496116638,
734
+ "eval_runtime": 22.8277,
735
+ "eval_samples_per_second": 74.646,
736
+ "eval_steps_per_second": 2.366,
737
+ "step": 1080
738
+ },
739
+ {
740
+ "epoch": 9.08,
741
+ "learning_rate": 3.0324074074074077e-05,
742
+ "loss": 0.8884,
743
+ "step": 1090
744
+ },
745
+ {
746
+ "epoch": 9.17,
747
+ "learning_rate": 3.0092592592592593e-05,
748
+ "loss": 0.9424,
749
+ "step": 1100
750
+ },
751
+ {
752
+ "epoch": 9.25,
753
+ "learning_rate": 2.9861111111111113e-05,
754
+ "loss": 0.9701,
755
+ "step": 1110
756
+ },
757
+ {
758
+ "epoch": 9.33,
759
+ "learning_rate": 2.962962962962963e-05,
760
+ "loss": 0.9577,
761
+ "step": 1120
762
+ },
763
+ {
764
+ "epoch": 9.42,
765
+ "learning_rate": 2.9398148148148146e-05,
766
+ "loss": 0.9542,
767
+ "step": 1130
768
+ },
769
+ {
770
+ "epoch": 9.5,
771
+ "learning_rate": 2.916666666666667e-05,
772
+ "loss": 0.9483,
773
+ "step": 1140
774
+ },
775
+ {
776
+ "epoch": 9.58,
777
+ "learning_rate": 2.8935185185185186e-05,
778
+ "loss": 0.9406,
779
+ "step": 1150
780
+ },
781
+ {
782
+ "epoch": 9.67,
783
+ "learning_rate": 2.8703703703703706e-05,
784
+ "loss": 0.9335,
785
+ "step": 1160
786
+ },
787
+ {
788
+ "epoch": 9.75,
789
+ "learning_rate": 2.8472222222222223e-05,
790
+ "loss": 0.9205,
791
+ "step": 1170
792
+ },
793
+ {
794
+ "epoch": 9.83,
795
+ "learning_rate": 2.824074074074074e-05,
796
+ "loss": 0.9193,
797
+ "step": 1180
798
+ },
799
+ {
800
+ "epoch": 9.92,
801
+ "learning_rate": 2.8009259259259263e-05,
802
+ "loss": 0.956,
803
+ "step": 1190
804
+ },
805
+ {
806
+ "epoch": 10.0,
807
+ "learning_rate": 2.777777777777778e-05,
808
+ "loss": 0.9321,
809
+ "step": 1200
810
+ },
811
+ {
812
+ "epoch": 10.0,
813
+ "eval_accuracy": 0.6109154929577465,
814
+ "eval_loss": 0.9396657347679138,
815
+ "eval_runtime": 22.8434,
816
+ "eval_samples_per_second": 74.595,
817
+ "eval_steps_per_second": 2.364,
818
+ "step": 1200
819
+ },
820
+ {
821
+ "epoch": 10.08,
822
+ "learning_rate": 2.75462962962963e-05,
823
+ "loss": 0.9324,
824
+ "step": 1210
825
+ },
826
+ {
827
+ "epoch": 10.17,
828
+ "learning_rate": 2.7314814814814816e-05,
829
+ "loss": 0.9112,
830
+ "step": 1220
831
+ },
832
+ {
833
+ "epoch": 10.25,
834
+ "learning_rate": 2.7083333333333332e-05,
835
+ "loss": 0.9613,
836
+ "step": 1230
837
+ },
838
+ {
839
+ "epoch": 10.33,
840
+ "learning_rate": 2.6851851851851855e-05,
841
+ "loss": 0.9083,
842
+ "step": 1240
843
+ },
844
+ {
845
+ "epoch": 10.42,
846
+ "learning_rate": 2.6620370370370372e-05,
847
+ "loss": 0.9691,
848
+ "step": 1250
849
+ },
850
+ {
851
+ "epoch": 10.5,
852
+ "learning_rate": 2.6388888888888892e-05,
853
+ "loss": 0.9256,
854
+ "step": 1260
855
+ },
856
+ {
857
+ "epoch": 10.58,
858
+ "learning_rate": 2.615740740740741e-05,
859
+ "loss": 0.929,
860
+ "step": 1270
861
+ },
862
+ {
863
+ "epoch": 10.67,
864
+ "learning_rate": 2.5925925925925925e-05,
865
+ "loss": 0.9056,
866
+ "step": 1280
867
+ },
868
+ {
869
+ "epoch": 10.75,
870
+ "learning_rate": 2.5694444444444445e-05,
871
+ "loss": 0.979,
872
+ "step": 1290
873
+ },
874
+ {
875
+ "epoch": 10.83,
876
+ "learning_rate": 2.5462962962962965e-05,
877
+ "loss": 0.9242,
878
+ "step": 1300
879
+ },
880
+ {
881
+ "epoch": 10.92,
882
+ "learning_rate": 2.5231481481481485e-05,
883
+ "loss": 0.9118,
884
+ "step": 1310
885
+ },
886
+ {
887
+ "epoch": 11.0,
888
+ "learning_rate": 2.5e-05,
889
+ "loss": 0.9052,
890
+ "step": 1320
891
+ },
892
+ {
893
+ "epoch": 11.0,
894
+ "eval_accuracy": 0.602112676056338,
895
+ "eval_loss": 0.9402132034301758,
896
+ "eval_runtime": 23.1769,
897
+ "eval_samples_per_second": 73.522,
898
+ "eval_steps_per_second": 2.33,
899
+ "step": 1320
900
+ },
901
+ {
902
+ "epoch": 11.08,
903
+ "learning_rate": 2.4768518518518518e-05,
904
+ "loss": 0.9046,
905
+ "step": 1330
906
+ },
907
+ {
908
+ "epoch": 11.17,
909
+ "learning_rate": 2.4537037037037038e-05,
910
+ "loss": 0.9401,
911
+ "step": 1340
912
+ },
913
+ {
914
+ "epoch": 11.25,
915
+ "learning_rate": 2.4305555555555558e-05,
916
+ "loss": 0.8835,
917
+ "step": 1350
918
+ },
919
+ {
920
+ "epoch": 11.33,
921
+ "learning_rate": 2.4074074074074074e-05,
922
+ "loss": 0.8835,
923
+ "step": 1360
924
+ },
925
+ {
926
+ "epoch": 11.42,
927
+ "learning_rate": 2.3842592592592594e-05,
928
+ "loss": 0.8896,
929
+ "step": 1370
930
+ },
931
+ {
932
+ "epoch": 11.5,
933
+ "learning_rate": 2.361111111111111e-05,
934
+ "loss": 0.9183,
935
+ "step": 1380
936
+ },
937
+ {
938
+ "epoch": 11.58,
939
+ "learning_rate": 2.337962962962963e-05,
940
+ "loss": 0.9436,
941
+ "step": 1390
942
+ },
943
+ {
944
+ "epoch": 11.67,
945
+ "learning_rate": 2.314814814814815e-05,
946
+ "loss": 0.8846,
947
+ "step": 1400
948
+ },
949
+ {
950
+ "epoch": 11.75,
951
+ "learning_rate": 2.2916666666666667e-05,
952
+ "loss": 0.9532,
953
+ "step": 1410
954
+ },
955
+ {
956
+ "epoch": 11.83,
957
+ "learning_rate": 2.2685185185185187e-05,
958
+ "loss": 0.9215,
959
+ "step": 1420
960
+ },
961
+ {
962
+ "epoch": 11.92,
963
+ "learning_rate": 2.2453703703703703e-05,
964
+ "loss": 0.933,
965
+ "step": 1430
966
+ },
967
+ {
968
+ "epoch": 12.0,
969
+ "learning_rate": 2.2222222222222223e-05,
970
+ "loss": 0.9457,
971
+ "step": 1440
972
+ },
973
+ {
974
+ "epoch": 12.0,
975
+ "eval_accuracy": 0.6214788732394366,
976
+ "eval_loss": 0.9181148409843445,
977
+ "eval_runtime": 22.9806,
978
+ "eval_samples_per_second": 74.15,
979
+ "eval_steps_per_second": 2.35,
980
+ "step": 1440
981
+ },
982
+ {
983
+ "epoch": 12.08,
984
+ "learning_rate": 2.1990740740740743e-05,
985
+ "loss": 0.8826,
986
+ "step": 1450
987
+ },
988
+ {
989
+ "epoch": 12.17,
990
+ "learning_rate": 2.175925925925926e-05,
991
+ "loss": 0.908,
992
+ "step": 1460
993
+ },
994
+ {
995
+ "epoch": 12.25,
996
+ "learning_rate": 2.152777777777778e-05,
997
+ "loss": 0.916,
998
+ "step": 1470
999
+ },
1000
+ {
1001
+ "epoch": 12.33,
1002
+ "learning_rate": 2.1296296296296296e-05,
1003
+ "loss": 0.8875,
1004
+ "step": 1480
1005
+ },
1006
+ {
1007
+ "epoch": 12.42,
1008
+ "learning_rate": 2.1064814814814816e-05,
1009
+ "loss": 0.897,
1010
+ "step": 1490
1011
+ },
1012
+ {
1013
+ "epoch": 12.5,
1014
+ "learning_rate": 2.0833333333333336e-05,
1015
+ "loss": 0.9158,
1016
+ "step": 1500
1017
+ },
1018
+ {
1019
+ "epoch": 12.58,
1020
+ "learning_rate": 2.0601851851851853e-05,
1021
+ "loss": 0.9398,
1022
+ "step": 1510
1023
+ },
1024
+ {
1025
+ "epoch": 12.67,
1026
+ "learning_rate": 2.037037037037037e-05,
1027
+ "loss": 0.9385,
1028
+ "step": 1520
1029
+ },
1030
+ {
1031
+ "epoch": 12.75,
1032
+ "learning_rate": 2.013888888888889e-05,
1033
+ "loss": 0.8998,
1034
+ "step": 1530
1035
+ },
1036
+ {
1037
+ "epoch": 12.83,
1038
+ "learning_rate": 1.990740740740741e-05,
1039
+ "loss": 0.9058,
1040
+ "step": 1540
1041
+ },
1042
+ {
1043
+ "epoch": 12.92,
1044
+ "learning_rate": 1.967592592592593e-05,
1045
+ "loss": 0.8997,
1046
+ "step": 1550
1047
+ },
1048
+ {
1049
+ "epoch": 13.0,
1050
+ "learning_rate": 1.9444444444444445e-05,
1051
+ "loss": 0.9101,
1052
+ "step": 1560
1053
+ },
1054
+ {
1055
+ "epoch": 13.0,
1056
+ "eval_accuracy": 0.6185446009389671,
1057
+ "eval_loss": 0.9350312352180481,
1058
+ "eval_runtime": 23.0031,
1059
+ "eval_samples_per_second": 74.077,
1060
+ "eval_steps_per_second": 2.348,
1061
+ "step": 1560
1062
+ },
1063
+ {
1064
+ "epoch": 13.08,
1065
+ "learning_rate": 1.9212962962962962e-05,
1066
+ "loss": 0.9129,
1067
+ "step": 1570
1068
+ },
1069
+ {
1070
+ "epoch": 13.17,
1071
+ "learning_rate": 1.8981481481481482e-05,
1072
+ "loss": 0.8878,
1073
+ "step": 1580
1074
+ },
1075
+ {
1076
+ "epoch": 13.25,
1077
+ "learning_rate": 1.8750000000000002e-05,
1078
+ "loss": 0.8675,
1079
+ "step": 1590
1080
+ },
1081
+ {
1082
+ "epoch": 13.33,
1083
+ "learning_rate": 1.8518518518518518e-05,
1084
+ "loss": 0.8817,
1085
+ "step": 1600
1086
+ },
1087
+ {
1088
+ "epoch": 13.42,
1089
+ "learning_rate": 1.8287037037037038e-05,
1090
+ "loss": 0.8981,
1091
+ "step": 1610
1092
+ },
1093
+ {
1094
+ "epoch": 13.5,
1095
+ "learning_rate": 1.8055555555555555e-05,
1096
+ "loss": 0.9016,
1097
+ "step": 1620
1098
+ },
1099
+ {
1100
+ "epoch": 13.58,
1101
+ "learning_rate": 1.7824074074074075e-05,
1102
+ "loss": 0.8743,
1103
+ "step": 1630
1104
+ },
1105
+ {
1106
+ "epoch": 13.67,
1107
+ "learning_rate": 1.7592592592592595e-05,
1108
+ "loss": 0.8778,
1109
+ "step": 1640
1110
+ },
1111
+ {
1112
+ "epoch": 13.75,
1113
+ "learning_rate": 1.736111111111111e-05,
1114
+ "loss": 0.8631,
1115
+ "step": 1650
1116
+ },
1117
+ {
1118
+ "epoch": 13.83,
1119
+ "learning_rate": 1.712962962962963e-05,
1120
+ "loss": 0.9453,
1121
+ "step": 1660
1122
+ },
1123
+ {
1124
+ "epoch": 13.92,
1125
+ "learning_rate": 1.6898148148148148e-05,
1126
+ "loss": 0.9266,
1127
+ "step": 1670
1128
+ },
1129
+ {
1130
+ "epoch": 14.0,
1131
+ "learning_rate": 1.6666666666666667e-05,
1132
+ "loss": 0.8772,
1133
+ "step": 1680
1134
+ },
1135
+ {
1136
+ "epoch": 14.0,
1137
+ "eval_accuracy": 0.6050469483568075,
1138
+ "eval_loss": 0.9537418484687805,
1139
+ "eval_runtime": 22.909,
1140
+ "eval_samples_per_second": 74.381,
1141
+ "eval_steps_per_second": 2.357,
1142
+ "step": 1680
1143
+ },
1144
+ {
1145
+ "epoch": 14.08,
1146
+ "learning_rate": 1.6435185185185187e-05,
1147
+ "loss": 0.8748,
1148
+ "step": 1690
1149
+ },
1150
+ {
1151
+ "epoch": 14.17,
1152
+ "learning_rate": 1.6203703703703704e-05,
1153
+ "loss": 0.8794,
1154
+ "step": 1700
1155
+ },
1156
+ {
1157
+ "epoch": 14.25,
1158
+ "learning_rate": 1.597222222222222e-05,
1159
+ "loss": 0.8645,
1160
+ "step": 1710
1161
+ },
1162
+ {
1163
+ "epoch": 14.33,
1164
+ "learning_rate": 1.574074074074074e-05,
1165
+ "loss": 0.902,
1166
+ "step": 1720
1167
+ },
1168
+ {
1169
+ "epoch": 14.42,
1170
+ "learning_rate": 1.550925925925926e-05,
1171
+ "loss": 0.9094,
1172
+ "step": 1730
1173
+ },
1174
+ {
1175
+ "epoch": 14.5,
1176
+ "learning_rate": 1.527777777777778e-05,
1177
+ "loss": 0.9081,
1178
+ "step": 1740
1179
+ },
1180
+ {
1181
+ "epoch": 14.58,
1182
+ "learning_rate": 1.5046296296296297e-05,
1183
+ "loss": 0.8869,
1184
+ "step": 1750
1185
+ },
1186
+ {
1187
+ "epoch": 14.67,
1188
+ "learning_rate": 1.4814814814814815e-05,
1189
+ "loss": 0.8552,
1190
+ "step": 1760
1191
+ },
1192
+ {
1193
+ "epoch": 14.75,
1194
+ "learning_rate": 1.4583333333333335e-05,
1195
+ "loss": 0.9357,
1196
+ "step": 1770
1197
+ },
1198
+ {
1199
+ "epoch": 14.83,
1200
+ "learning_rate": 1.4351851851851853e-05,
1201
+ "loss": 0.8996,
1202
+ "step": 1780
1203
+ },
1204
+ {
1205
+ "epoch": 14.92,
1206
+ "learning_rate": 1.412037037037037e-05,
1207
+ "loss": 0.9254,
1208
+ "step": 1790
1209
+ },
1210
+ {
1211
+ "epoch": 15.0,
1212
+ "learning_rate": 1.388888888888889e-05,
1213
+ "loss": 0.8865,
1214
+ "step": 1800
1215
+ },
1216
+ {
1217
+ "epoch": 15.0,
1218
+ "eval_accuracy": 0.6126760563380281,
1219
+ "eval_loss": 0.9256580471992493,
1220
+ "eval_runtime": 22.8459,
1221
+ "eval_samples_per_second": 74.587,
1222
+ "eval_steps_per_second": 2.364,
1223
+ "step": 1800
1224
+ },
1225
+ {
1226
+ "epoch": 15.08,
1227
+ "learning_rate": 1.3657407407407408e-05,
1228
+ "loss": 0.8656,
1229
+ "step": 1810
1230
+ },
1231
+ {
1232
+ "epoch": 15.17,
1233
+ "learning_rate": 1.3425925925925928e-05,
1234
+ "loss": 0.8727,
1235
+ "step": 1820
1236
+ },
1237
+ {
1238
+ "epoch": 15.25,
1239
+ "learning_rate": 1.3194444444444446e-05,
1240
+ "loss": 0.8891,
1241
+ "step": 1830
1242
+ },
1243
+ {
1244
+ "epoch": 15.33,
1245
+ "learning_rate": 1.2962962962962962e-05,
1246
+ "loss": 0.8763,
1247
+ "step": 1840
1248
+ },
1249
+ {
1250
+ "epoch": 15.42,
1251
+ "learning_rate": 1.2731481481481482e-05,
1252
+ "loss": 0.883,
1253
+ "step": 1850
1254
+ },
1255
+ {
1256
+ "epoch": 15.5,
1257
+ "learning_rate": 1.25e-05,
1258
+ "loss": 0.9007,
1259
+ "step": 1860
1260
+ },
1261
+ {
1262
+ "epoch": 15.58,
1263
+ "learning_rate": 1.2268518518518519e-05,
1264
+ "loss": 0.8868,
1265
+ "step": 1870
1266
+ },
1267
+ {
1268
+ "epoch": 15.67,
1269
+ "learning_rate": 1.2037037037037037e-05,
1270
+ "loss": 0.8687,
1271
+ "step": 1880
1272
+ },
1273
+ {
1274
+ "epoch": 15.75,
1275
+ "learning_rate": 1.1805555555555555e-05,
1276
+ "loss": 0.8334,
1277
+ "step": 1890
1278
+ },
1279
+ {
1280
+ "epoch": 15.83,
1281
+ "learning_rate": 1.1574074074074075e-05,
1282
+ "loss": 0.8756,
1283
+ "step": 1900
1284
+ },
1285
+ {
1286
+ "epoch": 15.92,
1287
+ "learning_rate": 1.1342592592592593e-05,
1288
+ "loss": 0.8799,
1289
+ "step": 1910
1290
+ },
1291
+ {
1292
+ "epoch": 16.0,
1293
+ "learning_rate": 1.1111111111111112e-05,
1294
+ "loss": 0.8454,
1295
+ "step": 1920
1296
+ },
1297
+ {
1298
+ "epoch": 16.0,
1299
+ "eval_accuracy": 0.6214788732394366,
1300
+ "eval_loss": 0.9159719347953796,
1301
+ "eval_runtime": 22.7638,
1302
+ "eval_samples_per_second": 74.856,
1303
+ "eval_steps_per_second": 2.372,
1304
+ "step": 1920
1305
+ },
1306
+ {
1307
+ "epoch": 16.08,
1308
+ "learning_rate": 1.087962962962963e-05,
1309
+ "loss": 0.9168,
1310
+ "step": 1930
1311
+ },
1312
+ {
1313
+ "epoch": 16.17,
1314
+ "learning_rate": 1.0648148148148148e-05,
1315
+ "loss": 0.8289,
1316
+ "step": 1940
1317
+ },
1318
+ {
1319
+ "epoch": 16.25,
1320
+ "learning_rate": 1.0416666666666668e-05,
1321
+ "loss": 0.8544,
1322
+ "step": 1950
1323
+ },
1324
+ {
1325
+ "epoch": 16.33,
1326
+ "learning_rate": 1.0185185185185185e-05,
1327
+ "loss": 0.8349,
1328
+ "step": 1960
1329
+ },
1330
+ {
1331
+ "epoch": 16.42,
1332
+ "learning_rate": 9.953703703703704e-06,
1333
+ "loss": 0.9044,
1334
+ "step": 1970
1335
+ },
1336
+ {
1337
+ "epoch": 16.5,
1338
+ "learning_rate": 9.722222222222223e-06,
1339
+ "loss": 0.8508,
1340
+ "step": 1980
1341
+ },
1342
+ {
1343
+ "epoch": 16.58,
1344
+ "learning_rate": 9.490740740740741e-06,
1345
+ "loss": 0.8482,
1346
+ "step": 1990
1347
+ },
1348
+ {
1349
+ "epoch": 16.67,
1350
+ "learning_rate": 9.259259259259259e-06,
1351
+ "loss": 0.8412,
1352
+ "step": 2000
1353
+ },
1354
+ {
1355
+ "epoch": 16.75,
1356
+ "learning_rate": 9.027777777777777e-06,
1357
+ "loss": 0.8827,
1358
+ "step": 2010
1359
+ },
1360
+ {
1361
+ "epoch": 16.83,
1362
+ "learning_rate": 8.796296296296297e-06,
1363
+ "loss": 0.9122,
1364
+ "step": 2020
1365
+ },
1366
+ {
1367
+ "epoch": 16.92,
1368
+ "learning_rate": 8.564814814814816e-06,
1369
+ "loss": 0.8558,
1370
+ "step": 2030
1371
+ },
1372
+ {
1373
+ "epoch": 17.0,
1374
+ "learning_rate": 8.333333333333334e-06,
1375
+ "loss": 0.8909,
1376
+ "step": 2040
1377
+ },
1378
+ {
1379
+ "epoch": 17.0,
1380
+ "eval_accuracy": 0.613849765258216,
1381
+ "eval_loss": 0.9154210090637207,
1382
+ "eval_runtime": 22.6154,
1383
+ "eval_samples_per_second": 75.347,
1384
+ "eval_steps_per_second": 2.388,
1385
+ "step": 2040
1386
+ },
1387
+ {
1388
+ "epoch": 17.08,
1389
+ "learning_rate": 8.101851851851852e-06,
1390
+ "loss": 0.8449,
1391
+ "step": 2050
1392
+ },
1393
+ {
1394
+ "epoch": 17.17,
1395
+ "learning_rate": 7.87037037037037e-06,
1396
+ "loss": 0.8799,
1397
+ "step": 2060
1398
+ },
1399
+ {
1400
+ "epoch": 17.25,
1401
+ "learning_rate": 7.63888888888889e-06,
1402
+ "loss": 0.8713,
1403
+ "step": 2070
1404
+ },
1405
+ {
1406
+ "epoch": 17.33,
1407
+ "learning_rate": 7.4074074074074075e-06,
1408
+ "loss": 0.8708,
1409
+ "step": 2080
1410
+ },
1411
+ {
1412
+ "epoch": 17.42,
1413
+ "learning_rate": 7.1759259259259266e-06,
1414
+ "loss": 0.8545,
1415
+ "step": 2090
1416
+ },
1417
+ {
1418
+ "epoch": 17.5,
1419
+ "learning_rate": 6.944444444444445e-06,
1420
+ "loss": 0.853,
1421
+ "step": 2100
1422
+ },
1423
+ {
1424
+ "epoch": 17.58,
1425
+ "learning_rate": 6.712962962962964e-06,
1426
+ "loss": 0.8293,
1427
+ "step": 2110
1428
+ },
1429
+ {
1430
+ "epoch": 17.67,
1431
+ "learning_rate": 6.481481481481481e-06,
1432
+ "loss": 0.8409,
1433
+ "step": 2120
1434
+ },
1435
+ {
1436
+ "epoch": 17.75,
1437
+ "learning_rate": 6.25e-06,
1438
+ "loss": 0.8404,
1439
+ "step": 2130
1440
+ },
1441
+ {
1442
+ "epoch": 17.83,
1443
+ "learning_rate": 6.0185185185185185e-06,
1444
+ "loss": 0.836,
1445
+ "step": 2140
1446
+ },
1447
+ {
1448
+ "epoch": 17.92,
1449
+ "learning_rate": 5.787037037037038e-06,
1450
+ "loss": 0.8787,
1451
+ "step": 2150
1452
+ },
1453
+ {
1454
+ "epoch": 18.0,
1455
+ "learning_rate": 5.555555555555556e-06,
1456
+ "loss": 0.8473,
1457
+ "step": 2160
1458
+ },
1459
+ {
1460
+ "epoch": 18.0,
1461
+ "eval_accuracy": 0.6185446009389671,
1462
+ "eval_loss": 0.9096030592918396,
1463
+ "eval_runtime": 22.3019,
1464
+ "eval_samples_per_second": 76.406,
1465
+ "eval_steps_per_second": 2.421,
1466
+ "step": 2160
1467
+ },
1468
+ {
1469
+ "epoch": 18.08,
1470
+ "learning_rate": 5.324074074074074e-06,
1471
+ "loss": 0.8474,
1472
+ "step": 2170
1473
+ },
1474
+ {
1475
+ "epoch": 18.17,
1476
+ "learning_rate": 5.092592592592592e-06,
1477
+ "loss": 0.8388,
1478
+ "step": 2180
1479
+ },
1480
+ {
1481
+ "epoch": 18.25,
1482
+ "learning_rate": 4.861111111111111e-06,
1483
+ "loss": 0.8502,
1484
+ "step": 2190
1485
+ },
1486
+ {
1487
+ "epoch": 18.33,
1488
+ "learning_rate": 4.6296296296296296e-06,
1489
+ "loss": 0.8149,
1490
+ "step": 2200
1491
+ },
1492
+ {
1493
+ "epoch": 18.42,
1494
+ "learning_rate": 4.398148148148149e-06,
1495
+ "loss": 0.8442,
1496
+ "step": 2210
1497
+ },
1498
+ {
1499
+ "epoch": 18.5,
1500
+ "learning_rate": 4.166666666666667e-06,
1501
+ "loss": 0.8348,
1502
+ "step": 2220
1503
+ },
1504
+ {
1505
+ "epoch": 18.58,
1506
+ "learning_rate": 3.935185185185185e-06,
1507
+ "loss": 0.8841,
1508
+ "step": 2230
1509
+ },
1510
+ {
1511
+ "epoch": 18.67,
1512
+ "learning_rate": 3.7037037037037037e-06,
1513
+ "loss": 0.8669,
1514
+ "step": 2240
1515
+ },
1516
+ {
1517
+ "epoch": 18.75,
1518
+ "learning_rate": 3.4722222222222224e-06,
1519
+ "loss": 0.8684,
1520
+ "step": 2250
1521
+ },
1522
+ {
1523
+ "epoch": 18.83,
1524
+ "learning_rate": 3.2407407407407406e-06,
1525
+ "loss": 0.8048,
1526
+ "step": 2260
1527
+ },
1528
+ {
1529
+ "epoch": 18.92,
1530
+ "learning_rate": 3.0092592592592593e-06,
1531
+ "loss": 0.8316,
1532
+ "step": 2270
1533
+ },
1534
+ {
1535
+ "epoch": 19.0,
1536
+ "learning_rate": 2.777777777777778e-06,
1537
+ "loss": 0.8979,
1538
+ "step": 2280
1539
+ },
1540
+ {
1541
+ "epoch": 19.0,
1542
+ "eval_accuracy": 0.6226525821596244,
1543
+ "eval_loss": 0.9150213599205017,
1544
+ "eval_runtime": 22.6094,
1545
+ "eval_samples_per_second": 75.367,
1546
+ "eval_steps_per_second": 2.388,
1547
+ "step": 2280
1548
+ },
1549
+ {
1550
+ "epoch": 19.08,
1551
+ "learning_rate": 2.546296296296296e-06,
1552
+ "loss": 0.8163,
1553
+ "step": 2290
1554
+ },
1555
+ {
1556
+ "epoch": 19.17,
1557
+ "learning_rate": 2.3148148148148148e-06,
1558
+ "loss": 0.823,
1559
+ "step": 2300
1560
+ },
1561
+ {
1562
+ "epoch": 19.25,
1563
+ "learning_rate": 2.0833333333333334e-06,
1564
+ "loss": 0.8712,
1565
+ "step": 2310
1566
+ },
1567
+ {
1568
+ "epoch": 19.33,
1569
+ "learning_rate": 1.8518518518518519e-06,
1570
+ "loss": 0.8384,
1571
+ "step": 2320
1572
+ },
1573
+ {
1574
+ "epoch": 19.42,
1575
+ "learning_rate": 1.6203703703703703e-06,
1576
+ "loss": 0.855,
1577
+ "step": 2330
1578
+ },
1579
+ {
1580
+ "epoch": 19.5,
1581
+ "learning_rate": 1.388888888888889e-06,
1582
+ "loss": 0.8418,
1583
+ "step": 2340
1584
+ },
1585
+ {
1586
+ "epoch": 19.58,
1587
+ "learning_rate": 1.1574074074074074e-06,
1588
+ "loss": 0.8504,
1589
+ "step": 2350
1590
+ },
1591
+ {
1592
+ "epoch": 19.67,
1593
+ "learning_rate": 9.259259259259259e-07,
1594
+ "loss": 0.8736,
1595
+ "step": 2360
1596
+ },
1597
+ {
1598
+ "epoch": 19.75,
1599
+ "learning_rate": 6.944444444444445e-07,
1600
+ "loss": 0.7959,
1601
+ "step": 2370
1602
+ },
1603
+ {
1604
+ "epoch": 19.83,
1605
+ "learning_rate": 4.6296296296296297e-07,
1606
+ "loss": 0.8431,
1607
+ "step": 2380
1608
+ },
1609
+ {
1610
+ "epoch": 19.92,
1611
+ "learning_rate": 2.3148148148148148e-07,
1612
+ "loss": 0.8712,
1613
+ "step": 2390
1614
+ },
1615
+ {
1616
+ "epoch": 20.0,
1617
+ "learning_rate": 0.0,
1618
+ "loss": 0.8337,
1619
+ "step": 2400
1620
+ },
1621
+ {
1622
+ "epoch": 20.0,
1623
+ "eval_accuracy": 0.6220657276995305,
1624
+ "eval_loss": 0.9112741351127625,
1625
+ "eval_runtime": 23.2097,
1626
+ "eval_samples_per_second": 73.418,
1627
+ "eval_steps_per_second": 2.327,
1628
+ "step": 2400
1629
+ },
1630
+ {
1631
+ "epoch": 20.0,
1632
+ "step": 2400,
1633
+ "total_flos": 2.3761445690374963e+19,
1634
+ "train_loss": 0.9564308677117029,
1635
+ "train_runtime": 12406.5537,
1636
+ "train_samples_per_second": 24.714,
1637
+ "train_steps_per_second": 0.193
1638
+ }
1639
+ ],
1640
+ "max_steps": 2400,
1641
+ "num_train_epochs": 20,
1642
+ "total_flos": 2.3761445690374963e+19,
1643
+ "trial_name": null,
1644
+ "trial_params": null
1645
+ }