pthpth commited on
Commit
84ee137
·
1 Parent(s): fe3f66e

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.99,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.007474538870155811,
5
+ "eval_runtime": 6.689,
6
+ "eval_samples_per_second": 62.341,
7
+ "eval_steps_per_second": 4.036,
8
+ "total_flos": 5.035680667331113e+18,
9
+ "train_loss": 0.13053628388996147,
10
+ "train_runtime": 2520.4596,
11
+ "train_samples_per_second": 25.799,
12
+ "train_steps_per_second": 0.399
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.99,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.007474538870155811,
5
+ "eval_runtime": 6.689,
6
+ "eval_samples_per_second": 62.341,
7
+ "eval_steps_per_second": 4.036
8
+ }
runs/Jul15_08-20-36_69b1e7626d29/events.out.tfevents.1657876260.69b1e7626d29.72.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3046375eca418249b13fec1f241595ab18dfa6dc24f54a96f5ca5324a1438516
3
+ size 363
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 14.99,
3
+ "total_flos": 5.035680667331113e+18,
4
+ "train_loss": 0.13053628388996147,
5
+ "train_runtime": 2520.4596,
6
+ "train_samples_per_second": 25.799,
7
+ "train_steps_per_second": 0.399
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,760 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "ViTFineTuned/checkpoint-603",
4
+ "epoch": 14.988929889298893,
5
+ "global_step": 1005,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.15,
12
+ "learning_rate": 4.950495049504951e-05,
13
+ "loss": 2.3113,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.3,
18
+ "learning_rate": 9.900990099009902e-05,
19
+ "loss": 1.8782,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.44,
24
+ "learning_rate": 0.0001485148514851485,
25
+ "loss": 1.2023,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.59,
30
+ "learning_rate": 0.00019801980198019803,
31
+ "loss": 0.6597,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.74,
36
+ "learning_rate": 0.00024752475247524753,
37
+ "loss": 0.4134,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.89,
42
+ "learning_rate": 0.000297029702970297,
43
+ "loss": 0.2859,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.99,
48
+ "eval_accuracy": 0.9784172661870504,
49
+ "eval_loss": 0.21802514791488647,
50
+ "eval_runtime": 6.2997,
51
+ "eval_samples_per_second": 66.193,
52
+ "eval_steps_per_second": 4.286,
53
+ "step": 67
54
+ },
55
+ {
56
+ "epoch": 1.04,
57
+ "learning_rate": 0.0003465346534653465,
58
+ "loss": 0.285,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 1.19,
63
+ "learning_rate": 0.00039603960396039607,
64
+ "loss": 0.261,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 1.34,
69
+ "learning_rate": 0.00044554455445544556,
70
+ "loss": 0.2659,
71
+ "step": 90
72
+ },
73
+ {
74
+ "epoch": 1.49,
75
+ "learning_rate": 0.0004950495049504951,
76
+ "loss": 0.2812,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 1.63,
81
+ "learning_rate": 0.0004950221238938053,
82
+ "loss": 0.2512,
83
+ "step": 110
84
+ },
85
+ {
86
+ "epoch": 1.78,
87
+ "learning_rate": 0.0004894911504424779,
88
+ "loss": 0.2033,
89
+ "step": 120
90
+ },
91
+ {
92
+ "epoch": 1.93,
93
+ "learning_rate": 0.0004839601769911505,
94
+ "loss": 0.293,
95
+ "step": 130
96
+ },
97
+ {
98
+ "epoch": 1.99,
99
+ "eval_accuracy": 0.9184652278177458,
100
+ "eval_loss": 0.3308344781398773,
101
+ "eval_runtime": 6.9556,
102
+ "eval_samples_per_second": 59.952,
103
+ "eval_steps_per_second": 3.882,
104
+ "step": 134
105
+ },
106
+ {
107
+ "epoch": 2.09,
108
+ "learning_rate": 0.000478429203539823,
109
+ "loss": 0.2754,
110
+ "step": 140
111
+ },
112
+ {
113
+ "epoch": 2.24,
114
+ "learning_rate": 0.0004728982300884956,
115
+ "loss": 0.2746,
116
+ "step": 150
117
+ },
118
+ {
119
+ "epoch": 2.38,
120
+ "learning_rate": 0.00046736725663716817,
121
+ "loss": 0.1477,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 2.53,
126
+ "learning_rate": 0.0004618362831858407,
127
+ "loss": 0.1984,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 2.68,
132
+ "learning_rate": 0.00045630530973451327,
133
+ "loss": 0.279,
134
+ "step": 180
135
+ },
136
+ {
137
+ "epoch": 2.83,
138
+ "learning_rate": 0.00045077433628318585,
139
+ "loss": 0.1846,
140
+ "step": 190
141
+ },
142
+ {
143
+ "epoch": 2.97,
144
+ "learning_rate": 0.0004452433628318584,
145
+ "loss": 0.1444,
146
+ "step": 200
147
+ },
148
+ {
149
+ "epoch": 2.99,
150
+ "eval_accuracy": 0.9568345323741008,
151
+ "eval_loss": 0.15321111679077148,
152
+ "eval_runtime": 6.3227,
153
+ "eval_samples_per_second": 65.953,
154
+ "eval_steps_per_second": 4.27,
155
+ "step": 201
156
+ },
157
+ {
158
+ "epoch": 3.13,
159
+ "learning_rate": 0.000439712389380531,
160
+ "loss": 0.1136,
161
+ "step": 210
162
+ },
163
+ {
164
+ "epoch": 3.28,
165
+ "learning_rate": 0.0004341814159292036,
166
+ "loss": 0.1482,
167
+ "step": 220
168
+ },
169
+ {
170
+ "epoch": 3.43,
171
+ "learning_rate": 0.0004286504424778761,
172
+ "loss": 0.1124,
173
+ "step": 230
174
+ },
175
+ {
176
+ "epoch": 3.58,
177
+ "learning_rate": 0.0004231194690265487,
178
+ "loss": 0.2128,
179
+ "step": 240
180
+ },
181
+ {
182
+ "epoch": 3.72,
183
+ "learning_rate": 0.00041758849557522125,
184
+ "loss": 0.0859,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 3.87,
189
+ "learning_rate": 0.0004120575221238938,
190
+ "loss": 0.0833,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 3.99,
195
+ "eval_accuracy": 0.9856115107913669,
196
+ "eval_loss": 0.051548197865486145,
197
+ "eval_runtime": 6.2776,
198
+ "eval_samples_per_second": 66.426,
199
+ "eval_steps_per_second": 4.301,
200
+ "step": 268
201
+ },
202
+ {
203
+ "epoch": 4.03,
204
+ "learning_rate": 0.0004065265486725664,
205
+ "loss": 0.1025,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 4.18,
210
+ "learning_rate": 0.000400995575221239,
211
+ "loss": 0.0917,
212
+ "step": 280
213
+ },
214
+ {
215
+ "epoch": 4.32,
216
+ "learning_rate": 0.0003954646017699115,
217
+ "loss": 0.0812,
218
+ "step": 290
219
+ },
220
+ {
221
+ "epoch": 4.47,
222
+ "learning_rate": 0.0003899336283185841,
223
+ "loss": 0.0879,
224
+ "step": 300
225
+ },
226
+ {
227
+ "epoch": 4.62,
228
+ "learning_rate": 0.0003844026548672566,
229
+ "loss": 0.0763,
230
+ "step": 310
231
+ },
232
+ {
233
+ "epoch": 4.77,
234
+ "learning_rate": 0.0003788716814159292,
235
+ "loss": 0.0582,
236
+ "step": 320
237
+ },
238
+ {
239
+ "epoch": 4.92,
240
+ "learning_rate": 0.00037334070796460176,
241
+ "loss": 0.1007,
242
+ "step": 330
243
+ },
244
+ {
245
+ "epoch": 4.99,
246
+ "eval_accuracy": 0.9904076738609112,
247
+ "eval_loss": 0.029486695304512978,
248
+ "eval_runtime": 6.2974,
249
+ "eval_samples_per_second": 66.218,
250
+ "eval_steps_per_second": 4.288,
251
+ "step": 335
252
+ },
253
+ {
254
+ "epoch": 5.07,
255
+ "learning_rate": 0.00036780973451327434,
256
+ "loss": 0.0564,
257
+ "step": 340
258
+ },
259
+ {
260
+ "epoch": 5.22,
261
+ "learning_rate": 0.0003622787610619469,
262
+ "loss": 0.0506,
263
+ "step": 350
264
+ },
265
+ {
266
+ "epoch": 5.37,
267
+ "learning_rate": 0.0003567477876106195,
268
+ "loss": 0.0611,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 5.52,
273
+ "learning_rate": 0.000351216814159292,
274
+ "loss": 0.0576,
275
+ "step": 370
276
+ },
277
+ {
278
+ "epoch": 5.66,
279
+ "learning_rate": 0.0003456858407079646,
280
+ "loss": 0.0832,
281
+ "step": 380
282
+ },
283
+ {
284
+ "epoch": 5.81,
285
+ "learning_rate": 0.0003401548672566372,
286
+ "loss": 0.0561,
287
+ "step": 390
288
+ },
289
+ {
290
+ "epoch": 5.96,
291
+ "learning_rate": 0.0003346238938053097,
292
+ "loss": 0.0372,
293
+ "step": 400
294
+ },
295
+ {
296
+ "epoch": 5.99,
297
+ "eval_accuracy": 0.9808153477218226,
298
+ "eval_loss": 0.05741920694708824,
299
+ "eval_runtime": 6.2799,
300
+ "eval_samples_per_second": 66.402,
301
+ "eval_steps_per_second": 4.299,
302
+ "step": 402
303
+ },
304
+ {
305
+ "epoch": 6.12,
306
+ "learning_rate": 0.00032909292035398233,
307
+ "loss": 0.0749,
308
+ "step": 410
309
+ },
310
+ {
311
+ "epoch": 6.27,
312
+ "learning_rate": 0.0003235619469026549,
313
+ "loss": 0.0464,
314
+ "step": 420
315
+ },
316
+ {
317
+ "epoch": 6.41,
318
+ "learning_rate": 0.00031803097345132743,
319
+ "loss": 0.0538,
320
+ "step": 430
321
+ },
322
+ {
323
+ "epoch": 6.56,
324
+ "learning_rate": 0.0003125,
325
+ "loss": 0.073,
326
+ "step": 440
327
+ },
328
+ {
329
+ "epoch": 6.71,
330
+ "learning_rate": 0.0003069690265486726,
331
+ "loss": 0.0533,
332
+ "step": 450
333
+ },
334
+ {
335
+ "epoch": 6.86,
336
+ "learning_rate": 0.0003014380530973451,
337
+ "loss": 0.0919,
338
+ "step": 460
339
+ },
340
+ {
341
+ "epoch": 6.99,
342
+ "eval_accuracy": 0.988009592326139,
343
+ "eval_loss": 0.05372486636042595,
344
+ "eval_runtime": 6.3165,
345
+ "eval_samples_per_second": 66.017,
346
+ "eval_steps_per_second": 4.275,
347
+ "step": 469
348
+ },
349
+ {
350
+ "epoch": 7.01,
351
+ "learning_rate": 0.0002959070796460177,
352
+ "loss": 0.0518,
353
+ "step": 470
354
+ },
355
+ {
356
+ "epoch": 7.16,
357
+ "learning_rate": 0.0002903761061946903,
358
+ "loss": 0.0233,
359
+ "step": 480
360
+ },
361
+ {
362
+ "epoch": 7.31,
363
+ "learning_rate": 0.00028484513274336284,
364
+ "loss": 0.0501,
365
+ "step": 490
366
+ },
367
+ {
368
+ "epoch": 7.46,
369
+ "learning_rate": 0.0002793141592920354,
370
+ "loss": 0.0685,
371
+ "step": 500
372
+ },
373
+ {
374
+ "epoch": 7.61,
375
+ "learning_rate": 0.000273783185840708,
376
+ "loss": 0.0116,
377
+ "step": 510
378
+ },
379
+ {
380
+ "epoch": 7.75,
381
+ "learning_rate": 0.0002682522123893805,
382
+ "loss": 0.0228,
383
+ "step": 520
384
+ },
385
+ {
386
+ "epoch": 7.9,
387
+ "learning_rate": 0.0002627212389380531,
388
+ "loss": 0.0135,
389
+ "step": 530
390
+ },
391
+ {
392
+ "epoch": 7.99,
393
+ "eval_accuracy": 0.9952038369304557,
394
+ "eval_loss": 0.011652274057269096,
395
+ "eval_runtime": 6.3118,
396
+ "eval_samples_per_second": 66.067,
397
+ "eval_steps_per_second": 4.278,
398
+ "step": 536
399
+ },
400
+ {
401
+ "epoch": 8.06,
402
+ "learning_rate": 0.00025719026548672567,
403
+ "loss": 0.0091,
404
+ "step": 540
405
+ },
406
+ {
407
+ "epoch": 8.21,
408
+ "learning_rate": 0.00025165929203539825,
409
+ "loss": 0.0469,
410
+ "step": 550
411
+ },
412
+ {
413
+ "epoch": 8.35,
414
+ "learning_rate": 0.0002461283185840708,
415
+ "loss": 0.0157,
416
+ "step": 560
417
+ },
418
+ {
419
+ "epoch": 8.5,
420
+ "learning_rate": 0.00024059734513274338,
421
+ "loss": 0.0172,
422
+ "step": 570
423
+ },
424
+ {
425
+ "epoch": 8.65,
426
+ "learning_rate": 0.00023506637168141593,
427
+ "loss": 0.0202,
428
+ "step": 580
429
+ },
430
+ {
431
+ "epoch": 8.8,
432
+ "learning_rate": 0.0002295353982300885,
433
+ "loss": 0.0303,
434
+ "step": 590
435
+ },
436
+ {
437
+ "epoch": 8.94,
438
+ "learning_rate": 0.00022400442477876108,
439
+ "loss": 0.0472,
440
+ "step": 600
441
+ },
442
+ {
443
+ "epoch": 8.99,
444
+ "eval_accuracy": 1.0,
445
+ "eval_loss": 0.007474538870155811,
446
+ "eval_runtime": 6.2502,
447
+ "eval_samples_per_second": 66.718,
448
+ "eval_steps_per_second": 4.32,
449
+ "step": 603
450
+ },
451
+ {
452
+ "epoch": 9.1,
453
+ "learning_rate": 0.00021847345132743363,
454
+ "loss": 0.0147,
455
+ "step": 610
456
+ },
457
+ {
458
+ "epoch": 9.25,
459
+ "learning_rate": 0.0002129424778761062,
460
+ "loss": 0.0133,
461
+ "step": 620
462
+ },
463
+ {
464
+ "epoch": 9.4,
465
+ "learning_rate": 0.00020741150442477876,
466
+ "loss": 0.0264,
467
+ "step": 630
468
+ },
469
+ {
470
+ "epoch": 9.55,
471
+ "learning_rate": 0.00020188053097345134,
472
+ "loss": 0.053,
473
+ "step": 640
474
+ },
475
+ {
476
+ "epoch": 9.69,
477
+ "learning_rate": 0.0001963495575221239,
478
+ "loss": 0.0056,
479
+ "step": 650
480
+ },
481
+ {
482
+ "epoch": 9.84,
483
+ "learning_rate": 0.00019081858407079646,
484
+ "loss": 0.0128,
485
+ "step": 660
486
+ },
487
+ {
488
+ "epoch": 9.99,
489
+ "learning_rate": 0.00018528761061946904,
490
+ "loss": 0.0151,
491
+ "step": 670
492
+ },
493
+ {
494
+ "epoch": 9.99,
495
+ "eval_accuracy": 1.0,
496
+ "eval_loss": 0.004758651368319988,
497
+ "eval_runtime": 6.2595,
498
+ "eval_samples_per_second": 66.619,
499
+ "eval_steps_per_second": 4.313,
500
+ "step": 670
501
+ },
502
+ {
503
+ "epoch": 10.15,
504
+ "learning_rate": 0.0001797566371681416,
505
+ "loss": 0.0135,
506
+ "step": 680
507
+ },
508
+ {
509
+ "epoch": 10.3,
510
+ "learning_rate": 0.00017422566371681417,
511
+ "loss": 0.0394,
512
+ "step": 690
513
+ },
514
+ {
515
+ "epoch": 10.44,
516
+ "learning_rate": 0.00016869469026548672,
517
+ "loss": 0.0122,
518
+ "step": 700
519
+ },
520
+ {
521
+ "epoch": 10.59,
522
+ "learning_rate": 0.0001631637168141593,
523
+ "loss": 0.0136,
524
+ "step": 710
525
+ },
526
+ {
527
+ "epoch": 10.74,
528
+ "learning_rate": 0.00015763274336283187,
529
+ "loss": 0.01,
530
+ "step": 720
531
+ },
532
+ {
533
+ "epoch": 10.89,
534
+ "learning_rate": 0.00015210176991150442,
535
+ "loss": 0.0052,
536
+ "step": 730
537
+ },
538
+ {
539
+ "epoch": 10.99,
540
+ "eval_accuracy": 0.9976019184652278,
541
+ "eval_loss": 0.007309095934033394,
542
+ "eval_runtime": 6.2533,
543
+ "eval_samples_per_second": 66.685,
544
+ "eval_steps_per_second": 4.318,
545
+ "step": 737
546
+ },
547
+ {
548
+ "epoch": 11.04,
549
+ "learning_rate": 0.00014657079646017697,
550
+ "loss": 0.0113,
551
+ "step": 740
552
+ },
553
+ {
554
+ "epoch": 11.19,
555
+ "learning_rate": 0.00014103982300884958,
556
+ "loss": 0.0325,
557
+ "step": 750
558
+ },
559
+ {
560
+ "epoch": 11.34,
561
+ "learning_rate": 0.00013550884955752213,
562
+ "loss": 0.012,
563
+ "step": 760
564
+ },
565
+ {
566
+ "epoch": 11.49,
567
+ "learning_rate": 0.00012997787610619468,
568
+ "loss": 0.0185,
569
+ "step": 770
570
+ },
571
+ {
572
+ "epoch": 11.63,
573
+ "learning_rate": 0.00012444690265486725,
574
+ "loss": 0.0049,
575
+ "step": 780
576
+ },
577
+ {
578
+ "epoch": 11.78,
579
+ "learning_rate": 0.00011891592920353983,
580
+ "loss": 0.0038,
581
+ "step": 790
582
+ },
583
+ {
584
+ "epoch": 11.93,
585
+ "learning_rate": 0.00011338495575221238,
586
+ "loss": 0.0109,
587
+ "step": 800
588
+ },
589
+ {
590
+ "epoch": 11.99,
591
+ "eval_accuracy": 0.9952038369304557,
592
+ "eval_loss": 0.01980188488960266,
593
+ "eval_runtime": 6.279,
594
+ "eval_samples_per_second": 66.411,
595
+ "eval_steps_per_second": 4.3,
596
+ "step": 804
597
+ },
598
+ {
599
+ "epoch": 12.09,
600
+ "learning_rate": 0.00010785398230088496,
601
+ "loss": 0.0096,
602
+ "step": 810
603
+ },
604
+ {
605
+ "epoch": 12.24,
606
+ "learning_rate": 0.00010232300884955754,
607
+ "loss": 0.0115,
608
+ "step": 820
609
+ },
610
+ {
611
+ "epoch": 12.38,
612
+ "learning_rate": 9.679203539823009e-05,
613
+ "loss": 0.0171,
614
+ "step": 830
615
+ },
616
+ {
617
+ "epoch": 12.53,
618
+ "learning_rate": 9.126106194690266e-05,
619
+ "loss": 0.004,
620
+ "step": 840
621
+ },
622
+ {
623
+ "epoch": 12.68,
624
+ "learning_rate": 8.573008849557521e-05,
625
+ "loss": 0.0073,
626
+ "step": 850
627
+ },
628
+ {
629
+ "epoch": 12.83,
630
+ "learning_rate": 8.019911504424779e-05,
631
+ "loss": 0.0045,
632
+ "step": 860
633
+ },
634
+ {
635
+ "epoch": 12.97,
636
+ "learning_rate": 7.466814159292036e-05,
637
+ "loss": 0.0033,
638
+ "step": 870
639
+ },
640
+ {
641
+ "epoch": 12.99,
642
+ "eval_accuracy": 0.9976019184652278,
643
+ "eval_loss": 0.006568592973053455,
644
+ "eval_runtime": 6.2861,
645
+ "eval_samples_per_second": 66.336,
646
+ "eval_steps_per_second": 4.295,
647
+ "step": 871
648
+ },
649
+ {
650
+ "epoch": 13.13,
651
+ "learning_rate": 6.913716814159292e-05,
652
+ "loss": 0.0077,
653
+ "step": 880
654
+ },
655
+ {
656
+ "epoch": 13.28,
657
+ "learning_rate": 6.360619469026548e-05,
658
+ "loss": 0.0027,
659
+ "step": 890
660
+ },
661
+ {
662
+ "epoch": 13.43,
663
+ "learning_rate": 5.8075221238938054e-05,
664
+ "loss": 0.0028,
665
+ "step": 900
666
+ },
667
+ {
668
+ "epoch": 13.58,
669
+ "learning_rate": 5.254424778761062e-05,
670
+ "loss": 0.0025,
671
+ "step": 910
672
+ },
673
+ {
674
+ "epoch": 13.72,
675
+ "learning_rate": 4.701327433628318e-05,
676
+ "loss": 0.0024,
677
+ "step": 920
678
+ },
679
+ {
680
+ "epoch": 13.87,
681
+ "learning_rate": 4.148230088495576e-05,
682
+ "loss": 0.011,
683
+ "step": 930
684
+ },
685
+ {
686
+ "epoch": 13.99,
687
+ "eval_accuracy": 0.9976019184652278,
688
+ "eval_loss": 0.006684896536171436,
689
+ "eval_runtime": 6.258,
690
+ "eval_samples_per_second": 66.635,
691
+ "eval_steps_per_second": 4.314,
692
+ "step": 938
693
+ },
694
+ {
695
+ "epoch": 14.03,
696
+ "learning_rate": 3.595132743362832e-05,
697
+ "loss": 0.0027,
698
+ "step": 940
699
+ },
700
+ {
701
+ "epoch": 14.18,
702
+ "learning_rate": 3.0420353982300886e-05,
703
+ "loss": 0.0092,
704
+ "step": 950
705
+ },
706
+ {
707
+ "epoch": 14.32,
708
+ "learning_rate": 2.4889380530973453e-05,
709
+ "loss": 0.0025,
710
+ "step": 960
711
+ },
712
+ {
713
+ "epoch": 14.47,
714
+ "learning_rate": 1.9358407079646017e-05,
715
+ "loss": 0.0024,
716
+ "step": 970
717
+ },
718
+ {
719
+ "epoch": 14.62,
720
+ "learning_rate": 1.3827433628318586e-05,
721
+ "loss": 0.0061,
722
+ "step": 980
723
+ },
724
+ {
725
+ "epoch": 14.77,
726
+ "learning_rate": 8.296460176991151e-06,
727
+ "loss": 0.0028,
728
+ "step": 990
729
+ },
730
+ {
731
+ "epoch": 14.92,
732
+ "learning_rate": 2.765486725663717e-06,
733
+ "loss": 0.0032,
734
+ "step": 1000
735
+ },
736
+ {
737
+ "epoch": 14.99,
738
+ "eval_accuracy": 0.9976019184652278,
739
+ "eval_loss": 0.00603157514706254,
740
+ "eval_runtime": 6.2764,
741
+ "eval_samples_per_second": 66.44,
742
+ "eval_steps_per_second": 4.302,
743
+ "step": 1005
744
+ },
745
+ {
746
+ "epoch": 14.99,
747
+ "step": 1005,
748
+ "total_flos": 5.035680667331113e+18,
749
+ "train_loss": 0.13053628388996147,
750
+ "train_runtime": 2520.4596,
751
+ "train_samples_per_second": 25.799,
752
+ "train_steps_per_second": 0.399
753
+ }
754
+ ],
755
+ "max_steps": 1005,
756
+ "num_train_epochs": 15,
757
+ "total_flos": 5.035680667331113e+18,
758
+ "trial_name": null,
759
+ "trial_params": null
760
+ }