Soulaimen commited on
Commit
6cc4279
·
1 Parent(s): b547148

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.99,
3
- "eval_accuracy": 0.9259259259259259,
4
- "eval_loss": 0.20305776596069336,
5
- "eval_runtime": 8.7135,
6
- "eval_samples_per_second": 71.269,
7
- "eval_steps_per_second": 8.952,
8
- "total_flos": 6.917311166047027e+17,
9
- "train_loss": 0.5214220342964961,
10
- "train_runtime": 714.1939,
11
- "train_samples_per_second": 39.072,
12
- "train_steps_per_second": 1.218
13
  }
 
1
  {
2
+ "epoch": 9.97,
3
+ "eval_accuracy": 0.9726247987117552,
4
+ "eval_loss": 0.08394750207662582,
5
+ "eval_runtime": 8.4058,
6
+ "eval_samples_per_second": 73.878,
7
+ "eval_steps_per_second": 9.279,
8
+ "total_flos": 1.3833876610752307e+18,
9
+ "train_loss": 0.24886192696532983,
10
+ "train_runtime": 1438.0658,
11
+ "train_samples_per_second": 38.809,
12
+ "train_steps_per_second": 1.21
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.99,
3
- "eval_accuracy": 0.9259259259259259,
4
- "eval_loss": 0.20305776596069336,
5
- "eval_runtime": 8.7135,
6
- "eval_samples_per_second": 71.269,
7
- "eval_steps_per_second": 8.952
8
  }
 
1
  {
2
+ "epoch": 9.97,
3
+ "eval_accuracy": 0.9726247987117552,
4
+ "eval_loss": 0.08394750207662582,
5
+ "eval_runtime": 8.4058,
6
+ "eval_samples_per_second": 73.878,
7
+ "eval_steps_per_second": 9.279
8
  }
runs/Apr17_12-51-18_5910a653ef0c/events.out.tfevents.1681737349.5910a653ef0c.229.10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2d2e04027a81787cd460c45f4a5670b80215e417fa8f3114638ba29475e32a
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.99,
3
- "total_flos": 6.917311166047027e+17,
4
- "train_loss": 0.5214220342964961,
5
- "train_runtime": 714.1939,
6
- "train_samples_per_second": 39.072,
7
- "train_steps_per_second": 1.218
8
  }
 
1
  {
2
+ "epoch": 9.97,
3
+ "total_flos": 1.3833876610752307e+18,
4
+ "train_loss": 0.24886192696532983,
5
+ "train_runtime": 1438.0658,
6
+ "train_samples_per_second": 38.809,
7
+ "train_steps_per_second": 1.21
8
  }
trainer_state.json CHANGED
@@ -1,592 +1,1159 @@
1
  {
2
- "best_metric": 0.9259259259259259,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-bottom_cleaned_data/checkpoint-870",
4
- "epoch": 4.98567335243553,
5
- "global_step": 870,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.06,
12
- "learning_rate": 1.1494252873563219e-06,
13
- "loss": 1.4361,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.11,
18
- "learning_rate": 2.2988505747126437e-06,
19
- "loss": 1.3935,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.17,
24
- "learning_rate": 3.448275862068966e-06,
25
- "loss": 1.3852,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.23,
30
- "learning_rate": 4.5977011494252875e-06,
31
- "loss": 1.3455,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.29,
36
- "learning_rate": 5.747126436781609e-06,
37
- "loss": 1.2788,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.34,
42
- "learning_rate": 6.896551724137932e-06,
43
- "loss": 1.2361,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.4,
48
- "learning_rate": 8.045977011494253e-06,
49
- "loss": 1.1974,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.46,
54
- "learning_rate": 9.195402298850575e-06,
55
- "loss": 1.1278,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.52,
60
- "learning_rate": 9.96168582375479e-06,
61
- "loss": 1.0761,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.57,
66
- "learning_rate": 9.833971902937422e-06,
67
- "loss": 1.0022,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.63,
72
- "learning_rate": 9.706257982120052e-06,
73
- "loss": 0.9669,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.69,
78
- "learning_rate": 9.578544061302683e-06,
79
- "loss": 0.8852,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.74,
84
- "learning_rate": 9.450830140485315e-06,
85
- "loss": 0.8496,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.8,
90
- "learning_rate": 9.323116219667945e-06,
91
- "loss": 0.804,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.86,
96
- "learning_rate": 9.195402298850575e-06,
97
- "loss": 0.7712,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.92,
102
- "learning_rate": 9.067688378033207e-06,
103
- "loss": 0.6497,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.97,
108
- "learning_rate": 8.939974457215838e-06,
109
- "loss": 0.6591,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 1.0,
114
- "eval_accuracy": 0.8067632850241546,
115
- "eval_loss": 0.5231599807739258,
116
- "eval_runtime": 9.148,
117
- "eval_samples_per_second": 67.884,
118
- "eval_steps_per_second": 8.526,
119
  "step": 174
120
  },
121
  {
122
  "epoch": 1.03,
123
- "learning_rate": 8.812260536398468e-06,
124
- "loss": 0.6781,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.09,
129
- "learning_rate": 8.684546615581098e-06,
130
- "loss": 0.6175,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.15,
135
- "learning_rate": 8.55683269476373e-06,
136
- "loss": 0.5585,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.2,
141
- "learning_rate": 8.429118773946362e-06,
142
- "loss": 0.5456,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.26,
147
- "learning_rate": 8.301404853128992e-06,
148
- "loss": 0.5256,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.32,
153
- "learning_rate": 8.173690932311623e-06,
154
- "loss": 0.5278,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.38,
159
- "learning_rate": 8.045977011494253e-06,
160
- "loss": 0.4722,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.43,
165
- "learning_rate": 7.918263090676885e-06,
166
- "loss": 0.509,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.49,
171
- "learning_rate": 7.790549169859515e-06,
172
- "loss": 0.4425,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.55,
177
- "learning_rate": 7.662835249042147e-06,
178
- "loss": 0.4515,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.6,
183
- "learning_rate": 7.535121328224777e-06,
184
- "loss": 0.477,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.66,
189
- "learning_rate": 7.4074074074074075e-06,
190
- "loss": 0.5036,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.72,
195
- "learning_rate": 7.279693486590039e-06,
196
- "loss": 0.5159,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.78,
201
- "learning_rate": 7.15197956577267e-06,
202
- "loss": 0.5527,
203
  "step": 310
204
  },
205
  {
206
  "epoch": 1.83,
207
- "learning_rate": 7.0242656449553e-06,
208
- "loss": 0.4243,
209
  "step": 320
210
  },
211
  {
212
  "epoch": 1.89,
213
- "learning_rate": 6.896551724137932e-06,
214
- "loss": 0.4485,
215
  "step": 330
216
  },
217
  {
218
  "epoch": 1.95,
219
- "learning_rate": 6.7688378033205625e-06,
220
- "loss": 0.4104,
221
  "step": 340
222
  },
223
  {
224
  "epoch": 2.0,
225
- "eval_accuracy": 0.8888888888888888,
226
- "eval_loss": 0.31606417894363403,
227
- "eval_runtime": 9.5164,
228
- "eval_samples_per_second": 65.255,
229
- "eval_steps_per_second": 8.196,
230
  "step": 349
231
  },
232
  {
233
  "epoch": 2.01,
234
- "learning_rate": 6.641123882503193e-06,
235
- "loss": 0.3783,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.06,
240
- "learning_rate": 6.513409961685824e-06,
241
- "loss": 0.372,
242
  "step": 360
243
  },
244
  {
245
  "epoch": 2.12,
246
- "learning_rate": 6.385696040868455e-06,
247
- "loss": 0.4664,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.18,
252
- "learning_rate": 6.257982120051086e-06,
253
- "loss": 0.4034,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.23,
258
- "learning_rate": 6.130268199233717e-06,
259
- "loss": 0.3513,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 2.29,
264
- "learning_rate": 6.002554278416348e-06,
265
- "loss": 0.3816,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.35,
270
- "learning_rate": 5.874840357598979e-06,
271
- "loss": 0.4518,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.41,
276
- "learning_rate": 5.747126436781609e-06,
277
- "loss": 0.4686,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.46,
282
- "learning_rate": 5.619412515964241e-06,
283
- "loss": 0.3879,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.52,
288
- "learning_rate": 5.491698595146872e-06,
289
- "loss": 0.3775,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.58,
294
- "learning_rate": 5.3639846743295025e-06,
295
- "loss": 0.457,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.64,
300
- "learning_rate": 5.236270753512134e-06,
301
- "loss": 0.3692,
302
  "step": 460
303
  },
304
  {
305
  "epoch": 2.69,
306
- "learning_rate": 5.108556832694764e-06,
307
- "loss": 0.4145,
308
  "step": 470
309
  },
310
  {
311
  "epoch": 2.75,
312
- "learning_rate": 4.980842911877395e-06,
313
- "loss": 0.3576,
314
  "step": 480
315
  },
316
  {
317
  "epoch": 2.81,
318
- "learning_rate": 4.853128991060026e-06,
319
- "loss": 0.3297,
320
  "step": 490
321
  },
322
  {
323
  "epoch": 2.87,
324
- "learning_rate": 4.7254150702426575e-06,
325
- "loss": 0.4084,
326
  "step": 500
327
  },
328
  {
329
  "epoch": 2.92,
330
- "learning_rate": 4.5977011494252875e-06,
331
- "loss": 0.3237,
332
  "step": 510
333
  },
334
  {
335
  "epoch": 2.98,
336
- "learning_rate": 4.469987228607919e-06,
337
- "loss": 0.3559,
338
  "step": 520
339
  },
340
  {
341
  "epoch": 3.0,
342
- "eval_accuracy": 0.9162640901771336,
343
- "eval_loss": 0.2237011194229126,
344
- "eval_runtime": 9.3284,
345
- "eval_samples_per_second": 66.571,
346
- "eval_steps_per_second": 8.362,
347
  "step": 523
348
  },
349
  {
350
  "epoch": 3.04,
351
- "learning_rate": 4.342273307790549e-06,
352
- "loss": 0.3295,
353
  "step": 530
354
  },
355
  {
356
  "epoch": 3.09,
357
- "learning_rate": 4.214559386973181e-06,
358
- "loss": 0.3902,
359
  "step": 540
360
  },
361
  {
362
  "epoch": 3.15,
363
- "learning_rate": 4.086845466155812e-06,
364
- "loss": 0.2753,
365
  "step": 550
366
  },
367
  {
368
  "epoch": 3.21,
369
- "learning_rate": 3.9591315453384425e-06,
370
- "loss": 0.3705,
371
  "step": 560
372
  },
373
  {
374
  "epoch": 3.27,
375
- "learning_rate": 3.831417624521073e-06,
376
- "loss": 0.3797,
377
  "step": 570
378
  },
379
  {
380
  "epoch": 3.32,
381
- "learning_rate": 3.7037037037037037e-06,
382
- "loss": 0.3163,
383
  "step": 580
384
  },
385
  {
386
  "epoch": 3.38,
387
- "learning_rate": 3.575989782886335e-06,
388
- "loss": 0.3488,
389
  "step": 590
390
  },
391
  {
392
  "epoch": 3.44,
393
- "learning_rate": 3.448275862068966e-06,
394
- "loss": 0.3248,
395
  "step": 600
396
  },
397
  {
398
  "epoch": 3.5,
399
- "learning_rate": 3.3205619412515967e-06,
400
- "loss": 0.3806,
401
  "step": 610
402
  },
403
  {
404
  "epoch": 3.55,
405
- "learning_rate": 3.1928480204342275e-06,
406
- "loss": 0.2763,
407
  "step": 620
408
  },
409
  {
410
  "epoch": 3.61,
411
- "learning_rate": 3.0651340996168583e-06,
412
- "loss": 0.3623,
413
  "step": 630
414
  },
415
  {
416
  "epoch": 3.67,
417
- "learning_rate": 2.9374201787994896e-06,
418
- "loss": 0.3353,
419
  "step": 640
420
  },
421
  {
422
  "epoch": 3.72,
423
- "learning_rate": 2.8097062579821204e-06,
424
- "loss": 0.3229,
425
  "step": 650
426
  },
427
  {
428
  "epoch": 3.78,
429
- "learning_rate": 2.6819923371647512e-06,
430
- "loss": 0.3127,
431
  "step": 660
432
  },
433
  {
434
  "epoch": 3.84,
435
- "learning_rate": 2.554278416347382e-06,
436
- "loss": 0.28,
437
  "step": 670
438
  },
439
  {
440
  "epoch": 3.9,
441
- "learning_rate": 2.426564495530013e-06,
442
- "loss": 0.4105,
443
  "step": 680
444
  },
445
  {
446
  "epoch": 3.95,
447
- "learning_rate": 2.2988505747126437e-06,
448
- "loss": 0.3487,
449
  "step": 690
450
  },
451
  {
452
  "epoch": 4.0,
453
- "eval_accuracy": 0.9194847020933977,
454
- "eval_loss": 0.19985385239124298,
455
- "eval_runtime": 8.2263,
456
- "eval_samples_per_second": 75.49,
457
- "eval_steps_per_second": 9.482,
458
  "step": 698
459
  },
460
  {
461
  "epoch": 4.01,
462
- "learning_rate": 2.1711366538952746e-06,
463
- "loss": 0.333,
464
  "step": 700
465
  },
466
  {
467
  "epoch": 4.07,
468
- "learning_rate": 2.043422733077906e-06,
469
- "loss": 0.3239,
470
  "step": 710
471
  },
472
  {
473
  "epoch": 4.13,
474
- "learning_rate": 1.9157088122605367e-06,
475
- "loss": 0.256,
476
  "step": 720
477
  },
478
  {
479
  "epoch": 4.18,
480
- "learning_rate": 1.7879948914431675e-06,
481
- "loss": 0.3252,
482
  "step": 730
483
  },
484
  {
485
  "epoch": 4.24,
486
- "learning_rate": 1.6602809706257983e-06,
487
- "loss": 0.2693,
488
  "step": 740
489
  },
490
  {
491
  "epoch": 4.3,
492
- "learning_rate": 1.5325670498084292e-06,
493
- "loss": 0.3099,
494
  "step": 750
495
  },
496
  {
497
  "epoch": 4.36,
498
- "learning_rate": 1.4048531289910602e-06,
499
- "loss": 0.3115,
500
  "step": 760
501
  },
502
  {
503
  "epoch": 4.41,
504
- "learning_rate": 1.277139208173691e-06,
505
- "loss": 0.3501,
506
  "step": 770
507
  },
508
  {
509
  "epoch": 4.47,
510
- "learning_rate": 1.1494252873563219e-06,
511
- "loss": 0.3733,
512
  "step": 780
513
  },
514
  {
515
  "epoch": 4.53,
516
- "learning_rate": 1.021711366538953e-06,
517
- "loss": 0.3264,
518
  "step": 790
519
  },
520
  {
521
  "epoch": 4.58,
522
- "learning_rate": 8.939974457215837e-07,
523
- "loss": 0.3111,
524
  "step": 800
525
  },
526
  {
527
  "epoch": 4.64,
528
- "learning_rate": 7.662835249042146e-07,
529
- "loss": 0.4114,
530
  "step": 810
531
  },
532
  {
533
  "epoch": 4.7,
534
- "learning_rate": 6.385696040868455e-07,
535
- "loss": 0.2706,
536
  "step": 820
537
  },
538
  {
539
  "epoch": 4.76,
540
- "learning_rate": 5.108556832694765e-07,
541
- "loss": 0.2863,
542
  "step": 830
543
  },
544
  {
545
  "epoch": 4.81,
546
- "learning_rate": 3.831417624521073e-07,
547
- "loss": 0.2813,
548
  "step": 840
549
  },
550
  {
551
  "epoch": 4.87,
552
- "learning_rate": 2.5542784163473823e-07,
553
- "loss": 0.3687,
554
  "step": 850
555
  },
556
  {
557
  "epoch": 4.93,
558
- "learning_rate": 1.2771392081736911e-07,
559
- "loss": 0.3689,
560
  "step": 860
561
  },
562
  {
563
  "epoch": 4.99,
564
- "learning_rate": 0.0,
565
- "loss": 0.3422,
566
  "step": 870
567
  },
568
  {
569
- "epoch": 4.99,
570
- "eval_accuracy": 0.9259259259259259,
571
- "eval_loss": 0.20305776596069336,
572
- "eval_runtime": 9.5579,
573
- "eval_samples_per_second": 64.972,
574
- "eval_steps_per_second": 8.161,
575
- "step": 870
576
  },
577
  {
578
- "epoch": 4.99,
579
- "step": 870,
580
- "total_flos": 6.917311166047027e+17,
581
- "train_loss": 0.5214220342964961,
582
- "train_runtime": 714.1939,
583
- "train_samples_per_second": 39.072,
584
- "train_steps_per_second": 1.218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
  }
586
  ],
587
- "max_steps": 870,
588
- "num_train_epochs": 5,
589
- "total_flos": 6.917311166047027e+17,
590
  "trial_name": null,
591
  "trial_params": null
592
  }
 
1
  {
2
+ "best_metric": 0.9726247987117552,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-bottom_cleaned_data/checkpoint-1047",
4
+ "epoch": 9.97134670487106,
5
+ "global_step": 1740,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.06,
12
+ "learning_rate": 2.777777777777778e-05,
13
+ "loss": 1.3661,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.11,
18
+ "learning_rate": 4.994192799070848e-05,
19
+ "loss": 1.1811,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.17,
24
+ "learning_rate": 4.965156794425087e-05,
25
+ "loss": 0.9235,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.23,
30
+ "learning_rate": 4.9361207897793264e-05,
31
+ "loss": 0.84,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.29,
36
+ "learning_rate": 4.907084785133566e-05,
37
+ "loss": 0.6387,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.34,
42
+ "learning_rate": 4.878048780487805e-05,
43
+ "loss": 0.7581,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.4,
48
+ "learning_rate": 4.8490127758420445e-05,
49
+ "loss": 0.6915,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.46,
54
+ "learning_rate": 4.819976771196283e-05,
55
+ "loss": 0.6255,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.52,
60
+ "learning_rate": 4.7909407665505226e-05,
61
+ "loss": 0.527,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.57,
66
+ "learning_rate": 4.761904761904762e-05,
67
+ "loss": 0.5023,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.63,
72
+ "learning_rate": 4.7328687572590014e-05,
73
+ "loss": 0.5157,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "learning_rate": 4.703832752613241e-05,
79
+ "loss": 0.4598,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.74,
84
+ "learning_rate": 4.6747967479674795e-05,
85
+ "loss": 0.4344,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.8,
90
+ "learning_rate": 4.6457607433217196e-05,
91
+ "loss": 0.4745,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.86,
96
+ "learning_rate": 4.616724738675958e-05,
97
+ "loss": 0.4485,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.92,
102
+ "learning_rate": 4.587688734030198e-05,
103
+ "loss": 0.3849,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "learning_rate": 4.5586527293844364e-05,
109
+ "loss": 0.4444,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 1.0,
114
+ "eval_accuracy": 0.9162640901771336,
115
+ "eval_loss": 0.2271285504102707,
116
+ "eval_runtime": 8.1654,
117
+ "eval_samples_per_second": 76.052,
118
+ "eval_steps_per_second": 9.552,
119
  "step": 174
120
  },
121
  {
122
  "epoch": 1.03,
123
+ "learning_rate": 4.529616724738676e-05,
124
+ "loss": 0.4314,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.09,
129
+ "learning_rate": 4.500580720092916e-05,
130
+ "loss": 0.325,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.15,
135
+ "learning_rate": 4.4715447154471546e-05,
136
+ "loss": 0.3656,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.2,
141
+ "learning_rate": 4.442508710801394e-05,
142
+ "loss": 0.3626,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.26,
147
+ "learning_rate": 4.413472706155633e-05,
148
+ "loss": 0.3937,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.32,
153
+ "learning_rate": 4.384436701509873e-05,
154
+ "loss": 0.3097,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.38,
159
+ "learning_rate": 4.3554006968641115e-05,
160
+ "loss": 0.3426,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.43,
165
+ "learning_rate": 4.326364692218351e-05,
166
+ "loss": 0.3182,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.49,
171
+ "learning_rate": 4.29732868757259e-05,
172
+ "loss": 0.3147,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.55,
177
+ "learning_rate": 4.26829268292683e-05,
178
+ "loss": 0.3389,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.6,
183
+ "learning_rate": 4.239256678281069e-05,
184
+ "loss": 0.3291,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.66,
189
+ "learning_rate": 4.210220673635308e-05,
190
+ "loss": 0.3706,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.72,
195
+ "learning_rate": 4.181184668989547e-05,
196
+ "loss": 0.3576,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.78,
201
+ "learning_rate": 4.1521486643437866e-05,
202
+ "loss": 0.3543,
203
  "step": 310
204
  },
205
  {
206
  "epoch": 1.83,
207
+ "learning_rate": 4.123112659698026e-05,
208
+ "loss": 0.305,
209
  "step": 320
210
  },
211
  {
212
  "epoch": 1.89,
213
+ "learning_rate": 4.0940766550522653e-05,
214
+ "loss": 0.3487,
215
  "step": 330
216
  },
217
  {
218
  "epoch": 1.95,
219
+ "learning_rate": 4.065040650406504e-05,
220
+ "loss": 0.3518,
221
  "step": 340
222
  },
223
  {
224
  "epoch": 2.0,
225
+ "eval_accuracy": 0.9033816425120773,
226
+ "eval_loss": 0.24492110311985016,
227
+ "eval_runtime": 9.5576,
228
+ "eval_samples_per_second": 64.975,
229
+ "eval_steps_per_second": 8.161,
230
  "step": 349
231
  },
232
  {
233
  "epoch": 2.01,
234
+ "learning_rate": 4.0360046457607435e-05,
235
+ "loss": 0.2923,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.06,
240
+ "learning_rate": 4.006968641114983e-05,
241
+ "loss": 0.3222,
242
  "step": 360
243
  },
244
  {
245
  "epoch": 2.12,
246
+ "learning_rate": 3.977932636469222e-05,
247
+ "loss": 0.317,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.18,
252
+ "learning_rate": 3.948896631823461e-05,
253
+ "loss": 0.3217,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.23,
258
+ "learning_rate": 3.9198606271777003e-05,
259
+ "loss": 0.2979,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 2.29,
264
+ "learning_rate": 3.89082462253194e-05,
265
+ "loss": 0.259,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.35,
270
+ "learning_rate": 3.861788617886179e-05,
271
+ "loss": 0.3416,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.41,
276
+ "learning_rate": 3.8327526132404185e-05,
277
+ "loss": 0.3103,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.46,
282
+ "learning_rate": 3.803716608594657e-05,
283
+ "loss": 0.2513,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.52,
288
+ "learning_rate": 3.7746806039488966e-05,
289
+ "loss": 0.2599,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.58,
294
+ "learning_rate": 3.745644599303136e-05,
295
+ "loss": 0.3085,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.64,
300
+ "learning_rate": 3.7166085946573754e-05,
301
+ "loss": 0.2228,
302
  "step": 460
303
  },
304
  {
305
  "epoch": 2.69,
306
+ "learning_rate": 3.687572590011615e-05,
307
+ "loss": 0.2387,
308
  "step": 470
309
  },
310
  {
311
  "epoch": 2.75,
312
+ "learning_rate": 3.6585365853658535e-05,
313
+ "loss": 0.2562,
314
  "step": 480
315
  },
316
  {
317
  "epoch": 2.81,
318
+ "learning_rate": 3.629500580720093e-05,
319
+ "loss": 0.2431,
320
  "step": 490
321
  },
322
  {
323
  "epoch": 2.87,
324
+ "learning_rate": 3.600464576074332e-05,
325
+ "loss": 0.2774,
326
  "step": 500
327
  },
328
  {
329
  "epoch": 2.92,
330
+ "learning_rate": 3.571428571428572e-05,
331
+ "loss": 0.2445,
332
  "step": 510
333
  },
334
  {
335
  "epoch": 2.98,
336
+ "learning_rate": 3.5423925667828104e-05,
337
+ "loss": 0.225,
338
  "step": 520
339
  },
340
  {
341
  "epoch": 3.0,
342
+ "eval_accuracy": 0.9500805152979066,
343
+ "eval_loss": 0.13247297704219818,
344
+ "eval_runtime": 8.7139,
345
+ "eval_samples_per_second": 71.265,
346
+ "eval_steps_per_second": 8.951,
347
  "step": 523
348
  },
349
  {
350
  "epoch": 3.04,
351
+ "learning_rate": 3.51335656213705e-05,
352
+ "loss": 0.2198,
353
  "step": 530
354
  },
355
  {
356
  "epoch": 3.09,
357
+ "learning_rate": 3.48432055749129e-05,
358
+ "loss": 0.2398,
359
  "step": 540
360
  },
361
  {
362
  "epoch": 3.15,
363
+ "learning_rate": 3.4552845528455286e-05,
364
+ "loss": 0.1821,
365
  "step": 550
366
  },
367
  {
368
  "epoch": 3.21,
369
+ "learning_rate": 3.426248548199768e-05,
370
+ "loss": 0.2148,
371
  "step": 560
372
  },
373
  {
374
  "epoch": 3.27,
375
+ "learning_rate": 3.397212543554007e-05,
376
+ "loss": 0.3183,
377
  "step": 570
378
  },
379
  {
380
  "epoch": 3.32,
381
+ "learning_rate": 3.368176538908247e-05,
382
+ "loss": 0.2292,
383
  "step": 580
384
  },
385
  {
386
  "epoch": 3.38,
387
+ "learning_rate": 3.3391405342624855e-05,
388
+ "loss": 0.1987,
389
  "step": 590
390
  },
391
  {
392
  "epoch": 3.44,
393
+ "learning_rate": 3.310104529616725e-05,
394
+ "loss": 0.2434,
395
  "step": 600
396
  },
397
  {
398
  "epoch": 3.5,
399
+ "learning_rate": 3.281068524970964e-05,
400
+ "loss": 0.2906,
401
  "step": 610
402
  },
403
  {
404
  "epoch": 3.55,
405
+ "learning_rate": 3.2520325203252037e-05,
406
+ "loss": 0.2117,
407
  "step": 620
408
  },
409
  {
410
  "epoch": 3.61,
411
+ "learning_rate": 3.222996515679443e-05,
412
+ "loss": 0.22,
413
  "step": 630
414
  },
415
  {
416
  "epoch": 3.67,
417
+ "learning_rate": 3.193960511033682e-05,
418
+ "loss": 0.1831,
419
  "step": 640
420
  },
421
  {
422
  "epoch": 3.72,
423
+ "learning_rate": 3.164924506387921e-05,
424
+ "loss": 0.1943,
425
  "step": 650
426
  },
427
  {
428
  "epoch": 3.78,
429
+ "learning_rate": 3.13588850174216e-05,
430
+ "loss": 0.2528,
431
  "step": 660
432
  },
433
  {
434
  "epoch": 3.84,
435
+ "learning_rate": 3.1068524970964e-05,
436
+ "loss": 0.2057,
437
  "step": 670
438
  },
439
  {
440
  "epoch": 3.9,
441
+ "learning_rate": 3.077816492450639e-05,
442
+ "loss": 0.2584,
443
  "step": 680
444
  },
445
  {
446
  "epoch": 3.95,
447
+ "learning_rate": 3.048780487804878e-05,
448
+ "loss": 0.2195,
449
  "step": 690
450
  },
451
  {
452
  "epoch": 4.0,
453
+ "eval_accuracy": 0.9549114331723028,
454
+ "eval_loss": 0.10237770527601242,
455
+ "eval_runtime": 9.1181,
456
+ "eval_samples_per_second": 68.106,
457
+ "eval_steps_per_second": 8.554,
458
  "step": 698
459
  },
460
  {
461
  "epoch": 4.01,
462
+ "learning_rate": 3.0197444831591178e-05,
463
+ "loss": 0.2832,
464
  "step": 700
465
  },
466
  {
467
  "epoch": 4.07,
468
+ "learning_rate": 2.9907084785133565e-05,
469
+ "loss": 0.2087,
470
  "step": 710
471
  },
472
  {
473
  "epoch": 4.13,
474
+ "learning_rate": 2.9616724738675962e-05,
475
+ "loss": 0.1709,
476
  "step": 720
477
  },
478
  {
479
  "epoch": 4.18,
480
+ "learning_rate": 2.932636469221835e-05,
481
+ "loss": 0.1891,
482
  "step": 730
483
  },
484
  {
485
  "epoch": 4.24,
486
+ "learning_rate": 2.9036004645760743e-05,
487
+ "loss": 0.1923,
488
  "step": 740
489
  },
490
  {
491
  "epoch": 4.3,
492
+ "learning_rate": 2.874564459930314e-05,
493
+ "loss": 0.2178,
494
  "step": 750
495
  },
496
  {
497
  "epoch": 4.36,
498
+ "learning_rate": 2.8455284552845528e-05,
499
+ "loss": 0.2256,
500
  "step": 760
501
  },
502
  {
503
  "epoch": 4.41,
504
+ "learning_rate": 2.8164924506387925e-05,
505
+ "loss": 0.2184,
506
  "step": 770
507
  },
508
  {
509
  "epoch": 4.47,
510
+ "learning_rate": 2.7874564459930312e-05,
511
+ "loss": 0.245,
512
  "step": 780
513
  },
514
  {
515
  "epoch": 4.53,
516
+ "learning_rate": 2.758420441347271e-05,
517
+ "loss": 0.229,
518
  "step": 790
519
  },
520
  {
521
  "epoch": 4.58,
522
+ "learning_rate": 2.7293844367015097e-05,
523
+ "loss": 0.2236,
524
  "step": 800
525
  },
526
  {
527
  "epoch": 4.64,
528
+ "learning_rate": 2.7003484320557494e-05,
529
+ "loss": 0.2517,
530
  "step": 810
531
  },
532
  {
533
  "epoch": 4.7,
534
+ "learning_rate": 2.6713124274099888e-05,
535
+ "loss": 0.1755,
536
  "step": 820
537
  },
538
  {
539
  "epoch": 4.76,
540
+ "learning_rate": 2.642276422764228e-05,
541
+ "loss": 0.1663,
542
  "step": 830
543
  },
544
  {
545
  "epoch": 4.81,
546
+ "learning_rate": 2.6132404181184672e-05,
547
+ "loss": 0.1958,
548
  "step": 840
549
  },
550
  {
551
  "epoch": 4.87,
552
+ "learning_rate": 2.5842044134727063e-05,
553
+ "loss": 0.2334,
554
  "step": 850
555
  },
556
  {
557
  "epoch": 4.93,
558
+ "learning_rate": 2.5551684088269457e-05,
559
+ "loss": 0.2576,
560
  "step": 860
561
  },
562
  {
563
  "epoch": 4.99,
564
+ "learning_rate": 2.5261324041811847e-05,
565
+ "loss": 0.2627,
566
  "step": 870
567
  },
568
  {
569
+ "epoch": 5.0,
570
+ "eval_accuracy": 0.9629629629629629,
571
+ "eval_loss": 0.1045805960893631,
572
+ "eval_runtime": 9.624,
573
+ "eval_samples_per_second": 64.526,
574
+ "eval_steps_per_second": 8.105,
575
+ "step": 872
576
  },
577
  {
578
+ "epoch": 5.04,
579
+ "learning_rate": 2.497096399535424e-05,
580
+ "loss": 0.2139,
581
+ "step": 880
582
+ },
583
+ {
584
+ "epoch": 5.1,
585
+ "learning_rate": 2.4680603948896632e-05,
586
+ "loss": 0.2114,
587
+ "step": 890
588
+ },
589
+ {
590
+ "epoch": 5.16,
591
+ "learning_rate": 2.4390243902439026e-05,
592
+ "loss": 0.2097,
593
+ "step": 900
594
+ },
595
+ {
596
+ "epoch": 5.21,
597
+ "learning_rate": 2.4099883855981416e-05,
598
+ "loss": 0.2136,
599
+ "step": 910
600
+ },
601
+ {
602
+ "epoch": 5.27,
603
+ "learning_rate": 2.380952380952381e-05,
604
+ "loss": 0.2523,
605
+ "step": 920
606
+ },
607
+ {
608
+ "epoch": 5.33,
609
+ "learning_rate": 2.3519163763066204e-05,
610
+ "loss": 0.1787,
611
+ "step": 930
612
+ },
613
+ {
614
+ "epoch": 5.39,
615
+ "learning_rate": 2.3228803716608598e-05,
616
+ "loss": 0.1828,
617
+ "step": 940
618
+ },
619
+ {
620
+ "epoch": 5.44,
621
+ "learning_rate": 2.293844367015099e-05,
622
+ "loss": 0.1549,
623
+ "step": 950
624
+ },
625
+ {
626
+ "epoch": 5.5,
627
+ "learning_rate": 2.264808362369338e-05,
628
+ "loss": 0.1531,
629
+ "step": 960
630
+ },
631
+ {
632
+ "epoch": 5.56,
633
+ "learning_rate": 2.2357723577235773e-05,
634
+ "loss": 0.1957,
635
+ "step": 970
636
+ },
637
+ {
638
+ "epoch": 5.62,
639
+ "learning_rate": 2.2067363530778164e-05,
640
+ "loss": 0.222,
641
+ "step": 980
642
+ },
643
+ {
644
+ "epoch": 5.67,
645
+ "learning_rate": 2.1777003484320557e-05,
646
+ "loss": 0.2211,
647
+ "step": 990
648
+ },
649
+ {
650
+ "epoch": 5.73,
651
+ "learning_rate": 2.148664343786295e-05,
652
+ "loss": 0.1711,
653
+ "step": 1000
654
+ },
655
+ {
656
+ "epoch": 5.79,
657
+ "learning_rate": 2.1196283391405345e-05,
658
+ "loss": 0.1759,
659
+ "step": 1010
660
+ },
661
+ {
662
+ "epoch": 5.85,
663
+ "learning_rate": 2.0905923344947736e-05,
664
+ "loss": 0.2333,
665
+ "step": 1020
666
+ },
667
+ {
668
+ "epoch": 5.9,
669
+ "learning_rate": 2.061556329849013e-05,
670
+ "loss": 0.2269,
671
+ "step": 1030
672
+ },
673
+ {
674
+ "epoch": 5.96,
675
+ "learning_rate": 2.032520325203252e-05,
676
+ "loss": 0.142,
677
+ "step": 1040
678
+ },
679
+ {
680
+ "epoch": 6.0,
681
+ "eval_accuracy": 0.9726247987117552,
682
+ "eval_loss": 0.08394750207662582,
683
+ "eval_runtime": 8.0552,
684
+ "eval_samples_per_second": 77.093,
685
+ "eval_steps_per_second": 9.683,
686
+ "step": 1047
687
+ },
688
+ {
689
+ "epoch": 6.02,
690
+ "learning_rate": 2.0034843205574914e-05,
691
+ "loss": 0.1847,
692
+ "step": 1050
693
+ },
694
+ {
695
+ "epoch": 6.07,
696
+ "learning_rate": 1.9744483159117305e-05,
697
+ "loss": 0.1569,
698
+ "step": 1060
699
+ },
700
+ {
701
+ "epoch": 6.13,
702
+ "learning_rate": 1.94541231126597e-05,
703
+ "loss": 0.2001,
704
+ "step": 1070
705
+ },
706
+ {
707
+ "epoch": 6.19,
708
+ "learning_rate": 1.9163763066202093e-05,
709
+ "loss": 0.1721,
710
+ "step": 1080
711
+ },
712
+ {
713
+ "epoch": 6.25,
714
+ "learning_rate": 1.8873403019744483e-05,
715
+ "loss": 0.1406,
716
+ "step": 1090
717
+ },
718
+ {
719
+ "epoch": 6.3,
720
+ "learning_rate": 1.8583042973286877e-05,
721
+ "loss": 0.1943,
722
+ "step": 1100
723
+ },
724
+ {
725
+ "epoch": 6.36,
726
+ "learning_rate": 1.8292682926829268e-05,
727
+ "loss": 0.1812,
728
+ "step": 1110
729
+ },
730
+ {
731
+ "epoch": 6.42,
732
+ "learning_rate": 1.800232288037166e-05,
733
+ "loss": 0.105,
734
+ "step": 1120
735
+ },
736
+ {
737
+ "epoch": 6.48,
738
+ "learning_rate": 1.7711962833914052e-05,
739
+ "loss": 0.1356,
740
+ "step": 1130
741
+ },
742
+ {
743
+ "epoch": 6.53,
744
+ "learning_rate": 1.742160278745645e-05,
745
+ "loss": 0.1678,
746
+ "step": 1140
747
+ },
748
+ {
749
+ "epoch": 6.59,
750
+ "learning_rate": 1.713124274099884e-05,
751
+ "loss": 0.1989,
752
+ "step": 1150
753
+ },
754
+ {
755
+ "epoch": 6.65,
756
+ "learning_rate": 1.6840882694541234e-05,
757
+ "loss": 0.0919,
758
+ "step": 1160
759
+ },
760
+ {
761
+ "epoch": 6.7,
762
+ "learning_rate": 1.6550522648083624e-05,
763
+ "loss": 0.1554,
764
+ "step": 1170
765
+ },
766
+ {
767
+ "epoch": 6.76,
768
+ "learning_rate": 1.6260162601626018e-05,
769
+ "loss": 0.1257,
770
+ "step": 1180
771
+ },
772
+ {
773
+ "epoch": 6.82,
774
+ "learning_rate": 1.596980255516841e-05,
775
+ "loss": 0.2064,
776
+ "step": 1190
777
+ },
778
+ {
779
+ "epoch": 6.88,
780
+ "learning_rate": 1.56794425087108e-05,
781
+ "loss": 0.1877,
782
+ "step": 1200
783
+ },
784
+ {
785
+ "epoch": 6.93,
786
+ "learning_rate": 1.5389082462253197e-05,
787
+ "loss": 0.1878,
788
+ "step": 1210
789
+ },
790
+ {
791
+ "epoch": 6.99,
792
+ "learning_rate": 1.5098722415795589e-05,
793
+ "loss": 0.1516,
794
+ "step": 1220
795
+ },
796
+ {
797
+ "epoch": 7.0,
798
+ "eval_accuracy": 0.9629629629629629,
799
+ "eval_loss": 0.09183160960674286,
800
+ "eval_runtime": 9.7233,
801
+ "eval_samples_per_second": 63.867,
802
+ "eval_steps_per_second": 8.022,
803
+ "step": 1221
804
+ },
805
+ {
806
+ "epoch": 7.05,
807
+ "learning_rate": 1.4808362369337981e-05,
808
+ "loss": 0.1592,
809
+ "step": 1230
810
+ },
811
+ {
812
+ "epoch": 7.11,
813
+ "learning_rate": 1.4518002322880372e-05,
814
+ "loss": 0.198,
815
+ "step": 1240
816
+ },
817
+ {
818
+ "epoch": 7.16,
819
+ "learning_rate": 1.4227642276422764e-05,
820
+ "loss": 0.1892,
821
+ "step": 1250
822
+ },
823
+ {
824
+ "epoch": 7.22,
825
+ "learning_rate": 1.3937282229965156e-05,
826
+ "loss": 0.1777,
827
+ "step": 1260
828
+ },
829
+ {
830
+ "epoch": 7.28,
831
+ "learning_rate": 1.3646922183507548e-05,
832
+ "loss": 0.0918,
833
+ "step": 1270
834
+ },
835
+ {
836
+ "epoch": 7.34,
837
+ "learning_rate": 1.3356562137049944e-05,
838
+ "loss": 0.0896,
839
+ "step": 1280
840
+ },
841
+ {
842
+ "epoch": 7.39,
843
+ "learning_rate": 1.3066202090592336e-05,
844
+ "loss": 0.1273,
845
+ "step": 1290
846
+ },
847
+ {
848
+ "epoch": 7.45,
849
+ "learning_rate": 1.2775842044134728e-05,
850
+ "loss": 0.1521,
851
+ "step": 1300
852
+ },
853
+ {
854
+ "epoch": 7.51,
855
+ "learning_rate": 1.248548199767712e-05,
856
+ "loss": 0.1628,
857
+ "step": 1310
858
+ },
859
+ {
860
+ "epoch": 7.56,
861
+ "learning_rate": 1.2195121951219513e-05,
862
+ "loss": 0.1587,
863
+ "step": 1320
864
+ },
865
+ {
866
+ "epoch": 7.62,
867
+ "learning_rate": 1.1904761904761905e-05,
868
+ "loss": 0.205,
869
+ "step": 1330
870
+ },
871
+ {
872
+ "epoch": 7.68,
873
+ "learning_rate": 1.1614401858304299e-05,
874
+ "loss": 0.1415,
875
+ "step": 1340
876
+ },
877
+ {
878
+ "epoch": 7.74,
879
+ "learning_rate": 1.132404181184669e-05,
880
+ "loss": 0.1982,
881
+ "step": 1350
882
+ },
883
+ {
884
+ "epoch": 7.79,
885
+ "learning_rate": 1.1033681765389082e-05,
886
+ "loss": 0.1079,
887
+ "step": 1360
888
+ },
889
+ {
890
+ "epoch": 7.85,
891
+ "learning_rate": 1.0743321718931476e-05,
892
+ "loss": 0.1115,
893
+ "step": 1370
894
+ },
895
+ {
896
+ "epoch": 7.91,
897
+ "learning_rate": 1.0452961672473868e-05,
898
+ "loss": 0.2234,
899
+ "step": 1380
900
+ },
901
+ {
902
+ "epoch": 7.97,
903
+ "learning_rate": 1.016260162601626e-05,
904
+ "loss": 0.1498,
905
+ "step": 1390
906
+ },
907
+ {
908
+ "epoch": 8.0,
909
+ "eval_accuracy": 0.9726247987117552,
910
+ "eval_loss": 0.07796485722064972,
911
+ "eval_runtime": 9.3543,
912
+ "eval_samples_per_second": 66.386,
913
+ "eval_steps_per_second": 8.338,
914
+ "step": 1396
915
+ },
916
+ {
917
+ "epoch": 8.02,
918
+ "learning_rate": 9.872241579558652e-06,
919
+ "loss": 0.1299,
920
+ "step": 1400
921
+ },
922
+ {
923
+ "epoch": 8.08,
924
+ "learning_rate": 9.581881533101046e-06,
925
+ "loss": 0.1389,
926
+ "step": 1410
927
+ },
928
+ {
929
+ "epoch": 8.14,
930
+ "learning_rate": 9.291521486643439e-06,
931
+ "loss": 0.1499,
932
+ "step": 1420
933
+ },
934
+ {
935
+ "epoch": 8.19,
936
+ "learning_rate": 9.00116144018583e-06,
937
+ "loss": 0.2523,
938
+ "step": 1430
939
+ },
940
+ {
941
+ "epoch": 8.25,
942
+ "learning_rate": 8.710801393728225e-06,
943
+ "loss": 0.1067,
944
+ "step": 1440
945
+ },
946
+ {
947
+ "epoch": 8.31,
948
+ "learning_rate": 8.420441347270617e-06,
949
+ "loss": 0.1696,
950
+ "step": 1450
951
+ },
952
+ {
953
+ "epoch": 8.37,
954
+ "learning_rate": 8.130081300813009e-06,
955
+ "loss": 0.1279,
956
+ "step": 1460
957
+ },
958
+ {
959
+ "epoch": 8.42,
960
+ "learning_rate": 7.8397212543554e-06,
961
+ "loss": 0.1523,
962
+ "step": 1470
963
+ },
964
+ {
965
+ "epoch": 8.48,
966
+ "learning_rate": 7.5493612078977944e-06,
967
+ "loss": 0.1335,
968
+ "step": 1480
969
+ },
970
+ {
971
+ "epoch": 8.54,
972
+ "learning_rate": 7.259001161440186e-06,
973
+ "loss": 0.1122,
974
+ "step": 1490
975
+ },
976
+ {
977
+ "epoch": 8.6,
978
+ "learning_rate": 6.968641114982578e-06,
979
+ "loss": 0.1596,
980
+ "step": 1500
981
+ },
982
+ {
983
+ "epoch": 8.65,
984
+ "learning_rate": 6.678281068524972e-06,
985
+ "loss": 0.0975,
986
+ "step": 1510
987
+ },
988
+ {
989
+ "epoch": 8.71,
990
+ "learning_rate": 6.387921022067364e-06,
991
+ "loss": 0.1002,
992
+ "step": 1520
993
+ },
994
+ {
995
+ "epoch": 8.77,
996
+ "learning_rate": 6.0975609756097564e-06,
997
+ "loss": 0.0895,
998
+ "step": 1530
999
+ },
1000
+ {
1001
+ "epoch": 8.83,
1002
+ "learning_rate": 5.8072009291521495e-06,
1003
+ "loss": 0.0911,
1004
+ "step": 1540
1005
+ },
1006
+ {
1007
+ "epoch": 8.88,
1008
+ "learning_rate": 5.516840882694541e-06,
1009
+ "loss": 0.157,
1010
+ "step": 1550
1011
+ },
1012
+ {
1013
+ "epoch": 8.94,
1014
+ "learning_rate": 5.226480836236934e-06,
1015
+ "loss": 0.1513,
1016
+ "step": 1560
1017
+ },
1018
+ {
1019
+ "epoch": 9.0,
1020
+ "learning_rate": 4.936120789779326e-06,
1021
+ "loss": 0.1189,
1022
+ "step": 1570
1023
+ },
1024
+ {
1025
+ "epoch": 9.0,
1026
+ "eval_accuracy": 0.966183574879227,
1027
+ "eval_loss": 0.07211676239967346,
1028
+ "eval_runtime": 8.3308,
1029
+ "eval_samples_per_second": 74.542,
1030
+ "eval_steps_per_second": 9.363,
1031
+ "step": 1570
1032
+ },
1033
+ {
1034
+ "epoch": 9.05,
1035
+ "learning_rate": 4.645760743321719e-06,
1036
+ "loss": 0.1239,
1037
+ "step": 1580
1038
+ },
1039
+ {
1040
+ "epoch": 9.11,
1041
+ "learning_rate": 4.355400696864112e-06,
1042
+ "loss": 0.1039,
1043
+ "step": 1590
1044
+ },
1045
+ {
1046
+ "epoch": 9.17,
1047
+ "learning_rate": 4.0650406504065046e-06,
1048
+ "loss": 0.1144,
1049
+ "step": 1600
1050
+ },
1051
+ {
1052
+ "epoch": 9.23,
1053
+ "learning_rate": 3.7746806039488972e-06,
1054
+ "loss": 0.1033,
1055
+ "step": 1610
1056
+ },
1057
+ {
1058
+ "epoch": 9.28,
1059
+ "learning_rate": 3.484320557491289e-06,
1060
+ "loss": 0.0829,
1061
+ "step": 1620
1062
+ },
1063
+ {
1064
+ "epoch": 9.34,
1065
+ "learning_rate": 3.193960511033682e-06,
1066
+ "loss": 0.1072,
1067
+ "step": 1630
1068
+ },
1069
+ {
1070
+ "epoch": 9.4,
1071
+ "learning_rate": 2.9036004645760748e-06,
1072
+ "loss": 0.1257,
1073
+ "step": 1640
1074
+ },
1075
+ {
1076
+ "epoch": 9.46,
1077
+ "learning_rate": 2.613240418118467e-06,
1078
+ "loss": 0.0804,
1079
+ "step": 1650
1080
+ },
1081
+ {
1082
+ "epoch": 9.51,
1083
+ "learning_rate": 2.3228803716608596e-06,
1084
+ "loss": 0.119,
1085
+ "step": 1660
1086
+ },
1087
+ {
1088
+ "epoch": 9.57,
1089
+ "learning_rate": 2.0325203252032523e-06,
1090
+ "loss": 0.0765,
1091
+ "step": 1670
1092
+ },
1093
+ {
1094
+ "epoch": 9.63,
1095
+ "learning_rate": 1.7421602787456445e-06,
1096
+ "loss": 0.1681,
1097
+ "step": 1680
1098
+ },
1099
+ {
1100
+ "epoch": 9.68,
1101
+ "learning_rate": 1.4518002322880374e-06,
1102
+ "loss": 0.1005,
1103
+ "step": 1690
1104
+ },
1105
+ {
1106
+ "epoch": 9.74,
1107
+ "learning_rate": 1.1614401858304298e-06,
1108
+ "loss": 0.1498,
1109
+ "step": 1700
1110
+ },
1111
+ {
1112
+ "epoch": 9.8,
1113
+ "learning_rate": 8.710801393728223e-07,
1114
+ "loss": 0.1365,
1115
+ "step": 1710
1116
+ },
1117
+ {
1118
+ "epoch": 9.86,
1119
+ "learning_rate": 5.807200929152149e-07,
1120
+ "loss": 0.122,
1121
+ "step": 1720
1122
+ },
1123
+ {
1124
+ "epoch": 9.91,
1125
+ "learning_rate": 2.9036004645760745e-07,
1126
+ "loss": 0.0906,
1127
+ "step": 1730
1128
+ },
1129
+ {
1130
+ "epoch": 9.97,
1131
+ "learning_rate": 0.0,
1132
+ "loss": 0.1594,
1133
+ "step": 1740
1134
+ },
1135
+ {
1136
+ "epoch": 9.97,
1137
+ "eval_accuracy": 0.9726247987117552,
1138
+ "eval_loss": 0.0668075904250145,
1139
+ "eval_runtime": 9.2098,
1140
+ "eval_samples_per_second": 67.428,
1141
+ "eval_steps_per_second": 8.469,
1142
+ "step": 1740
1143
+ },
1144
+ {
1145
+ "epoch": 9.97,
1146
+ "step": 1740,
1147
+ "total_flos": 1.3833876610752307e+18,
1148
+ "train_loss": 0.24886192696532983,
1149
+ "train_runtime": 1438.0658,
1150
+ "train_samples_per_second": 38.809,
1151
+ "train_steps_per_second": 1.21
1152
  }
1153
  ],
1154
+ "max_steps": 1740,
1155
+ "num_train_epochs": 10,
1156
+ "total_flos": 1.3833876610752307e+18,
1157
  "trial_name": null,
1158
  "trial_params": null
1159
  }