barten commited on
Commit
dcb71fd
·
1 Parent(s): 6264dfa

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. train_results.json +6 -6
  3. trainer_state.json +538 -700
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 14.89,
3
- "total_flos": 7.877470546062803e+18,
4
- "train_loss": 0.5140632056590146,
5
- "train_runtime": 3982.2883,
6
- "train_samples_per_second": 25.704,
7
- "train_steps_per_second": 0.399
8
  }
 
1
  {
2
+ "epoch": 14.96,
3
+ "total_flos": 6.546875329145733e+18,
4
+ "train_loss": 0.44751356618874,
5
+ "train_runtime": 3414.1758,
6
+ "train_samples_per_second": 24.814,
7
+ "train_steps_per_second": 0.387
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 14.89,
3
- "total_flos": 7.877470546062803e+18,
4
- "train_loss": 0.5140632056590146,
5
- "train_runtime": 3982.2883,
6
- "train_samples_per_second": 25.704,
7
- "train_steps_per_second": 0.399
8
  }
 
1
  {
2
+ "epoch": 14.96,
3
+ "total_flos": 6.546875329145733e+18,
4
+ "train_loss": 0.44751356618874,
5
+ "train_runtime": 3414.1758,
6
+ "train_samples_per_second": 24.814,
7
+ "train_steps_per_second": 0.387
8
  }
trainer_state.json CHANGED
@@ -1,1117 +1,955 @@
1
  {
2
- "best_metric": 0.8398058252427184,
3
- "best_model_checkpoint": "vit-base-patch16-224-brand/checkpoint-1590",
4
- "epoch": 14.894613583138174,
5
  "eval_steps": 500,
6
- "global_step": 1590,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.09,
13
- "learning_rate": 3.1446540880503146e-06,
14
- "loss": 2.3942,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.19,
19
- "learning_rate": 6.289308176100629e-06,
20
- "loss": 2.3923,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 0.28,
25
- "learning_rate": 9.433962264150944e-06,
26
- "loss": 2.3106,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 0.37,
31
- "learning_rate": 1.2578616352201259e-05,
32
- "loss": 2.2216,
33
  "step": 40
34
  },
35
  {
36
- "epoch": 0.47,
37
- "learning_rate": 1.572327044025157e-05,
38
- "loss": 2.0774,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.56,
43
- "learning_rate": 1.8867924528301888e-05,
44
- "loss": 2.0392,
45
  "step": 60
46
  },
47
  {
48
- "epoch": 0.66,
49
- "learning_rate": 2.2012578616352204e-05,
50
- "loss": 1.8905,
51
  "step": 70
52
  },
53
  {
54
- "epoch": 0.75,
55
- "learning_rate": 2.5157232704402517e-05,
56
- "loss": 1.8387,
57
  "step": 80
58
  },
59
  {
60
- "epoch": 0.84,
61
- "learning_rate": 2.830188679245283e-05,
62
- "loss": 1.7255,
63
- "step": 90
 
 
 
64
  },
65
  {
66
- "epoch": 0.94,
67
- "learning_rate": 3.144654088050314e-05,
68
- "loss": 1.6631,
69
- "step": 100
70
  },
71
  {
72
- "epoch": 0.99,
73
- "eval_accuracy": 0.5494263018534863,
74
- "eval_loss": 1.4344048500061035,
75
- "eval_runtime": 29.697,
76
- "eval_samples_per_second": 76.304,
77
- "eval_steps_per_second": 4.782,
78
- "step": 106
79
  },
80
  {
81
- "epoch": 1.03,
82
- "learning_rate": 3.4591194968553456e-05,
83
- "loss": 1.5273,
84
  "step": 110
85
  },
86
  {
87
- "epoch": 1.12,
88
- "learning_rate": 3.7735849056603776e-05,
89
- "loss": 1.4406,
90
  "step": 120
91
  },
92
  {
93
- "epoch": 1.22,
94
- "learning_rate": 4.088050314465409e-05,
95
- "loss": 1.4031,
96
  "step": 130
97
  },
98
  {
99
- "epoch": 1.31,
100
- "learning_rate": 4.402515723270441e-05,
101
- "loss": 1.2573,
102
  "step": 140
103
  },
104
  {
105
- "epoch": 1.41,
106
- "learning_rate": 4.716981132075472e-05,
107
- "loss": 1.2428,
108
  "step": 150
109
  },
110
  {
111
- "epoch": 1.5,
112
- "learning_rate": 4.9965059399021665e-05,
113
- "loss": 1.1119,
114
  "step": 160
115
  },
116
  {
117
- "epoch": 1.59,
118
- "learning_rate": 4.9615653389238295e-05,
119
- "loss": 1.2198,
120
  "step": 170
121
  },
122
  {
123
- "epoch": 1.69,
124
- "learning_rate": 4.9266247379454926e-05,
125
- "loss": 1.0555,
 
 
 
 
 
 
 
 
 
126
  "step": 180
127
  },
128
  {
129
- "epoch": 1.78,
130
- "learning_rate": 4.891684136967156e-05,
131
- "loss": 1.0781,
132
  "step": 190
133
  },
134
  {
135
- "epoch": 1.87,
136
- "learning_rate": 4.856743535988819e-05,
137
- "loss": 1.0989,
138
  "step": 200
139
  },
140
  {
141
- "epoch": 1.97,
142
- "learning_rate": 4.8218029350104823e-05,
143
- "loss": 1.069,
144
  "step": 210
145
  },
146
  {
147
- "epoch": 2.0,
148
- "eval_accuracy": 0.7281553398058253,
149
- "eval_loss": 0.8916863799095154,
150
- "eval_runtime": 29.9218,
151
- "eval_samples_per_second": 75.731,
152
- "eval_steps_per_second": 4.746,
153
- "step": 213
154
- },
155
- {
156
- "epoch": 2.06,
157
- "learning_rate": 4.7868623340321454e-05,
158
- "loss": 0.9655,
159
  "step": 220
160
  },
161
  {
162
- "epoch": 2.15,
163
- "learning_rate": 4.7519217330538084e-05,
164
- "loss": 0.8571,
165
  "step": 230
166
  },
167
  {
168
- "epoch": 2.25,
169
- "learning_rate": 4.716981132075472e-05,
170
- "loss": 0.8375,
171
  "step": 240
172
  },
173
  {
174
- "epoch": 2.34,
175
- "learning_rate": 4.682040531097135e-05,
176
- "loss": 0.8631,
177
  "step": 250
178
  },
179
  {
180
- "epoch": 2.44,
181
- "learning_rate": 4.647099930118798e-05,
182
- "loss": 0.8991,
183
  "step": 260
184
  },
185
  {
186
- "epoch": 2.53,
187
- "learning_rate": 4.612159329140461e-05,
188
- "loss": 0.9349,
 
 
 
 
 
 
 
 
 
189
  "step": 270
190
  },
191
  {
192
- "epoch": 2.62,
193
- "learning_rate": 4.577218728162125e-05,
194
- "loss": 0.8766,
195
  "step": 280
196
  },
197
  {
198
- "epoch": 2.72,
199
- "learning_rate": 4.542278127183788e-05,
200
- "loss": 0.8294,
201
  "step": 290
202
  },
203
  {
204
- "epoch": 2.81,
205
- "learning_rate": 4.5073375262054504e-05,
206
- "loss": 0.8507,
207
  "step": 300
208
  },
209
  {
210
- "epoch": 2.9,
211
- "learning_rate": 4.472396925227114e-05,
212
- "loss": 0.7529,
213
  "step": 310
214
  },
215
  {
216
- "epoch": 3.0,
217
- "learning_rate": 4.437456324248777e-05,
218
- "loss": 0.801,
219
  "step": 320
220
  },
221
  {
222
- "epoch": 3.0,
223
- "eval_accuracy": 0.7533097969991174,
224
- "eval_loss": 0.7611907720565796,
225
- "eval_runtime": 29.9179,
226
- "eval_samples_per_second": 75.741,
227
- "eval_steps_per_second": 4.746,
228
- "step": 320
229
- },
230
- {
231
- "epoch": 3.09,
232
- "learning_rate": 4.402515723270441e-05,
233
- "loss": 0.7598,
234
  "step": 330
235
  },
236
  {
237
- "epoch": 3.19,
238
- "learning_rate": 4.367575122292103e-05,
239
- "loss": 0.6375,
240
  "step": 340
241
  },
242
  {
243
- "epoch": 3.28,
244
- "learning_rate": 4.332634521313767e-05,
245
- "loss": 0.6634,
246
  "step": 350
247
  },
248
  {
249
- "epoch": 3.37,
250
- "learning_rate": 4.29769392033543e-05,
251
- "loss": 0.6474,
 
 
 
 
 
 
 
 
 
252
  "step": 360
253
  },
254
  {
255
- "epoch": 3.47,
256
- "learning_rate": 4.262753319357093e-05,
257
- "loss": 0.6328,
258
  "step": 370
259
  },
260
  {
261
- "epoch": 3.56,
262
- "learning_rate": 4.227812718378756e-05,
263
- "loss": 0.6945,
264
  "step": 380
265
  },
266
  {
267
- "epoch": 3.65,
268
- "learning_rate": 4.192872117400419e-05,
269
- "loss": 0.6194,
270
  "step": 390
271
  },
272
  {
273
- "epoch": 3.75,
274
- "learning_rate": 4.157931516422083e-05,
275
- "loss": 0.6398,
276
  "step": 400
277
  },
278
  {
279
- "epoch": 3.84,
280
- "learning_rate": 4.122990915443746e-05,
281
- "loss": 0.6756,
282
  "step": 410
283
  },
284
  {
285
- "epoch": 3.93,
286
- "learning_rate": 4.088050314465409e-05,
287
- "loss": 0.6075,
288
  "step": 420
289
  },
290
  {
291
- "epoch": 4.0,
292
- "eval_accuracy": 0.792144748455428,
293
- "eval_loss": 0.6522440314292908,
294
- "eval_runtime": 29.5461,
295
- "eval_samples_per_second": 76.694,
296
- "eval_steps_per_second": 4.806,
297
- "step": 427
298
- },
299
- {
300
- "epoch": 4.03,
301
- "learning_rate": 4.053109713487072e-05,
302
- "loss": 0.5489,
303
  "step": 430
304
  },
305
  {
306
- "epoch": 4.12,
307
- "learning_rate": 4.018169112508735e-05,
308
- "loss": 0.5316,
309
  "step": 440
310
  },
311
  {
312
- "epoch": 4.22,
313
- "learning_rate": 3.983228511530399e-05,
314
- "loss": 0.523,
 
 
 
 
 
 
 
 
 
315
  "step": 450
316
  },
317
  {
318
- "epoch": 4.31,
319
- "learning_rate": 3.948287910552062e-05,
320
- "loss": 0.5247,
321
  "step": 460
322
  },
323
  {
324
- "epoch": 4.4,
325
- "learning_rate": 3.913347309573725e-05,
326
- "loss": 0.5356,
327
  "step": 470
328
  },
329
  {
330
- "epoch": 4.5,
331
- "learning_rate": 3.878406708595388e-05,
332
- "loss": 0.5128,
333
  "step": 480
334
  },
335
  {
336
- "epoch": 4.59,
337
- "learning_rate": 3.8434661076170515e-05,
338
- "loss": 0.5752,
339
  "step": 490
340
  },
341
  {
342
- "epoch": 4.68,
343
- "learning_rate": 3.8085255066387145e-05,
344
- "loss": 0.5445,
345
  "step": 500
346
  },
347
  {
348
- "epoch": 4.78,
349
- "learning_rate": 3.7735849056603776e-05,
350
- "loss": 0.5064,
351
  "step": 510
352
  },
353
  {
354
- "epoch": 4.87,
355
- "learning_rate": 3.7386443046820406e-05,
356
- "loss": 0.508,
357
  "step": 520
358
  },
359
  {
360
- "epoch": 4.96,
361
- "learning_rate": 3.7037037037037037e-05,
362
- "loss": 0.5046,
363
- "step": 530
 
 
 
364
  },
365
  {
366
- "epoch": 4.99,
367
- "eval_accuracy": 0.8005295675198588,
368
- "eval_loss": 0.6084854006767273,
369
- "eval_runtime": 29.8921,
370
- "eval_samples_per_second": 75.806,
371
- "eval_steps_per_second": 4.75,
372
- "step": 533
373
  },
374
  {
375
- "epoch": 5.06,
376
- "learning_rate": 3.6687631027253674e-05,
377
- "loss": 0.4403,
378
  "step": 540
379
  },
380
  {
381
- "epoch": 5.15,
382
- "learning_rate": 3.6338225017470304e-05,
383
- "loss": 0.3919,
384
  "step": 550
385
  },
386
  {
387
- "epoch": 5.25,
388
- "learning_rate": 3.5988819007686934e-05,
389
- "loss": 0.4224,
390
  "step": 560
391
  },
392
  {
393
- "epoch": 5.34,
394
- "learning_rate": 3.5639412997903565e-05,
395
- "loss": 0.4708,
396
  "step": 570
397
  },
398
  {
399
- "epoch": 5.43,
400
- "learning_rate": 3.52900069881202e-05,
401
- "loss": 0.4206,
402
  "step": 580
403
  },
404
  {
405
- "epoch": 5.53,
406
- "learning_rate": 3.494060097833683e-05,
407
- "loss": 0.4416,
408
  "step": 590
409
  },
410
  {
411
- "epoch": 5.62,
412
- "learning_rate": 3.4591194968553456e-05,
413
- "loss": 0.4478,
414
  "step": 600
415
  },
416
  {
417
- "epoch": 5.71,
418
- "learning_rate": 3.424178895877009e-05,
419
- "loss": 0.4403,
420
  "step": 610
421
  },
422
  {
423
- "epoch": 5.81,
424
- "learning_rate": 3.3892382948986724e-05,
425
- "loss": 0.4428,
426
- "step": 620
 
 
 
427
  },
428
  {
429
- "epoch": 5.9,
430
- "learning_rate": 3.354297693920336e-05,
431
- "loss": 0.3941,
432
- "step": 630
433
  },
434
  {
435
- "epoch": 6.0,
436
- "learning_rate": 3.3193570929419984e-05,
437
- "loss": 0.4018,
438
- "step": 640
439
  },
440
  {
441
- "epoch": 6.0,
442
- "eval_accuracy": 0.8022947925860547,
443
- "eval_loss": 0.6132453680038452,
444
- "eval_runtime": 30.1158,
445
- "eval_samples_per_second": 75.243,
446
- "eval_steps_per_second": 4.715,
447
  "step": 640
448
  },
449
  {
450
- "epoch": 6.09,
451
- "learning_rate": 3.284416491963662e-05,
452
- "loss": 0.3719,
453
  "step": 650
454
  },
455
  {
456
- "epoch": 6.18,
457
- "learning_rate": 3.249475890985325e-05,
458
- "loss": 0.3549,
459
  "step": 660
460
  },
461
  {
462
- "epoch": 6.28,
463
- "learning_rate": 3.214535290006988e-05,
464
- "loss": 0.3866,
465
  "step": 670
466
  },
467
  {
468
- "epoch": 6.37,
469
- "learning_rate": 3.179594689028651e-05,
470
- "loss": 0.4172,
471
  "step": 680
472
  },
473
  {
474
- "epoch": 6.46,
475
- "learning_rate": 3.144654088050314e-05,
476
- "loss": 0.3798,
477
  "step": 690
478
  },
479
  {
480
- "epoch": 6.56,
481
- "learning_rate": 3.109713487071978e-05,
482
- "loss": 0.3288,
483
  "step": 700
484
  },
485
  {
486
- "epoch": 6.65,
487
- "learning_rate": 3.074772886093641e-05,
488
- "loss": 0.3829,
 
 
 
 
 
 
 
 
 
489
  "step": 710
490
  },
491
  {
492
- "epoch": 6.74,
493
- "learning_rate": 3.0398322851153044e-05,
494
- "loss": 0.3771,
495
  "step": 720
496
  },
497
  {
498
- "epoch": 6.84,
499
- "learning_rate": 3.004891684136967e-05,
500
- "loss": 0.3637,
501
  "step": 730
502
  },
503
  {
504
- "epoch": 6.93,
505
- "learning_rate": 2.9699510831586302e-05,
506
- "loss": 0.3641,
507
  "step": 740
508
  },
509
  {
510
- "epoch": 7.0,
511
- "eval_accuracy": 0.8292144748455428,
512
- "eval_loss": 0.5509597659111023,
513
- "eval_runtime": 29.6593,
514
- "eval_samples_per_second": 76.401,
515
- "eval_steps_per_second": 4.788,
516
- "step": 747
517
- },
518
- {
519
- "epoch": 7.03,
520
- "learning_rate": 2.935010482180294e-05,
521
- "loss": 0.373,
522
  "step": 750
523
  },
524
  {
525
- "epoch": 7.12,
526
- "learning_rate": 2.9000698812019566e-05,
527
- "loss": 0.3128,
528
  "step": 760
529
  },
530
  {
531
- "epoch": 7.21,
532
- "learning_rate": 2.8651292802236203e-05,
533
- "loss": 0.3177,
534
  "step": 770
535
  },
536
  {
537
- "epoch": 7.31,
538
- "learning_rate": 2.830188679245283e-05,
539
- "loss": 0.2853,
540
  "step": 780
541
  },
542
  {
543
- "epoch": 7.4,
544
- "learning_rate": 2.7952480782669467e-05,
545
- "loss": 0.2969,
546
  "step": 790
547
  },
548
  {
549
- "epoch": 7.49,
550
- "learning_rate": 2.7603074772886094e-05,
551
- "loss": 0.2786,
 
 
 
 
 
 
 
 
 
552
  "step": 800
553
  },
554
  {
555
- "epoch": 7.59,
556
- "learning_rate": 2.7253668763102725e-05,
557
- "loss": 0.2653,
558
  "step": 810
559
  },
560
  {
561
- "epoch": 7.68,
562
- "learning_rate": 2.690426275331936e-05,
563
- "loss": 0.2388,
564
  "step": 820
565
  },
566
  {
567
- "epoch": 7.78,
568
- "learning_rate": 2.655485674353599e-05,
569
- "loss": 0.2941,
570
  "step": 830
571
  },
572
  {
573
- "epoch": 7.87,
574
- "learning_rate": 2.6205450733752623e-05,
575
- "loss": 0.3248,
576
  "step": 840
577
  },
578
  {
579
- "epoch": 7.96,
580
- "learning_rate": 2.5856044723969253e-05,
581
- "loss": 0.4003,
582
  "step": 850
583
  },
584
  {
585
- "epoch": 8.0,
586
- "eval_accuracy": 0.824801412180053,
587
- "eval_loss": 0.530396580696106,
588
- "eval_runtime": 29.7498,
589
- "eval_samples_per_second": 76.169,
590
- "eval_steps_per_second": 4.773,
591
- "step": 854
592
- },
593
- {
594
- "epoch": 8.06,
595
- "learning_rate": 2.5506638714185887e-05,
596
- "loss": 0.2842,
597
  "step": 860
598
  },
599
  {
600
- "epoch": 8.15,
601
- "learning_rate": 2.5157232704402517e-05,
602
- "loss": 0.2404,
603
  "step": 870
604
  },
605
  {
606
- "epoch": 8.24,
607
- "learning_rate": 2.4807826694619148e-05,
608
- "loss": 0.2525,
609
  "step": 880
610
  },
611
  {
612
- "epoch": 8.34,
613
- "learning_rate": 2.445842068483578e-05,
614
- "loss": 0.2415,
 
 
 
 
 
 
 
 
 
615
  "step": 890
616
  },
617
  {
618
- "epoch": 8.43,
619
- "learning_rate": 2.4109014675052412e-05,
620
- "loss": 0.222,
621
  "step": 900
622
  },
623
  {
624
- "epoch": 8.52,
625
- "learning_rate": 2.3759608665269042e-05,
626
- "loss": 0.2461,
627
  "step": 910
628
  },
629
  {
630
- "epoch": 8.62,
631
- "learning_rate": 2.3410202655485676e-05,
632
- "loss": 0.2533,
633
  "step": 920
634
  },
635
  {
636
- "epoch": 8.71,
637
- "learning_rate": 2.3060796645702306e-05,
638
- "loss": 0.267,
639
  "step": 930
640
  },
641
  {
642
- "epoch": 8.81,
643
- "learning_rate": 2.271139063591894e-05,
644
- "loss": 0.2572,
645
  "step": 940
646
  },
647
  {
648
- "epoch": 8.9,
649
- "learning_rate": 2.236198462613557e-05,
650
- "loss": 0.2263,
651
  "step": 950
652
  },
653
  {
654
- "epoch": 8.99,
655
- "learning_rate": 2.2012578616352204e-05,
656
- "loss": 0.3142,
657
  "step": 960
658
  },
659
  {
660
- "epoch": 8.99,
661
- "eval_accuracy": 0.8349514563106796,
662
- "eval_loss": 0.5270902514457703,
663
- "eval_runtime": 30.327,
664
- "eval_samples_per_second": 74.719,
665
- "eval_steps_per_second": 4.682,
666
- "step": 960
667
  },
668
  {
669
- "epoch": 9.09,
670
- "learning_rate": 2.1663172606568835e-05,
671
- "loss": 0.2163,
 
 
 
672
  "step": 970
673
  },
674
  {
675
- "epoch": 9.18,
676
- "learning_rate": 2.1313766596785465e-05,
677
- "loss": 0.21,
678
  "step": 980
679
  },
680
  {
681
- "epoch": 9.27,
682
- "learning_rate": 2.0964360587002095e-05,
683
- "loss": 0.2354,
684
  "step": 990
685
  },
686
  {
687
- "epoch": 9.37,
688
- "learning_rate": 2.061495457721873e-05,
689
- "loss": 0.2394,
690
  "step": 1000
691
  },
692
  {
693
- "epoch": 9.46,
694
- "learning_rate": 2.026554856743536e-05,
695
- "loss": 0.2225,
696
  "step": 1010
697
  },
698
  {
699
- "epoch": 9.56,
700
- "learning_rate": 1.9916142557651993e-05,
701
- "loss": 0.2392,
702
  "step": 1020
703
  },
704
  {
705
- "epoch": 9.65,
706
- "learning_rate": 1.9566736547868624e-05,
707
- "loss": 0.2237,
708
  "step": 1030
709
  },
710
  {
711
- "epoch": 9.74,
712
- "learning_rate": 1.9217330538085258e-05,
713
- "loss": 0.251,
714
  "step": 1040
715
  },
716
  {
717
- "epoch": 9.84,
718
- "learning_rate": 1.8867924528301888e-05,
719
- "loss": 0.2033,
720
  "step": 1050
721
  },
722
  {
723
- "epoch": 9.93,
724
- "learning_rate": 1.8518518518518518e-05,
725
- "loss": 0.2266,
726
- "step": 1060
 
 
 
727
  },
728
  {
729
- "epoch": 10.0,
730
- "eval_accuracy": 0.8362753751103266,
731
- "eval_loss": 0.5450473427772522,
732
- "eval_runtime": 29.6596,
733
- "eval_samples_per_second": 76.4,
734
- "eval_steps_per_second": 4.788,
735
- "step": 1067
736
  },
737
  {
738
- "epoch": 10.02,
739
- "learning_rate": 1.8169112508735152e-05,
740
- "loss": 0.211,
741
  "step": 1070
742
  },
743
  {
744
- "epoch": 10.12,
745
- "learning_rate": 1.7819706498951782e-05,
746
- "loss": 0.2449,
747
  "step": 1080
748
  },
749
  {
750
- "epoch": 10.21,
751
- "learning_rate": 1.7470300489168416e-05,
752
- "loss": 0.1846,
753
  "step": 1090
754
  },
755
  {
756
- "epoch": 10.3,
757
- "learning_rate": 1.7120894479385047e-05,
758
- "loss": 0.1763,
759
  "step": 1100
760
  },
761
  {
762
- "epoch": 10.4,
763
- "learning_rate": 1.677148846960168e-05,
764
- "loss": 0.2067,
765
  "step": 1110
766
  },
767
  {
768
- "epoch": 10.49,
769
- "learning_rate": 1.642208245981831e-05,
770
- "loss": 0.188,
771
  "step": 1120
772
  },
773
  {
774
- "epoch": 10.59,
775
- "learning_rate": 1.607267645003494e-05,
776
- "loss": 0.2359,
777
  "step": 1130
778
  },
779
  {
780
- "epoch": 10.68,
781
- "learning_rate": 1.572327044025157e-05,
782
- "loss": 0.187,
783
  "step": 1140
784
  },
785
  {
786
- "epoch": 10.77,
787
- "learning_rate": 1.5373864430468205e-05,
788
- "loss": 0.2369,
 
 
 
 
 
 
 
 
 
789
  "step": 1150
790
  },
791
  {
792
- "epoch": 10.87,
793
- "learning_rate": 1.5024458420684836e-05,
794
- "loss": 0.1856,
795
  "step": 1160
796
  },
797
  {
798
- "epoch": 10.96,
799
- "learning_rate": 1.467505241090147e-05,
800
- "loss": 0.1572,
801
  "step": 1170
802
  },
803
  {
804
- "epoch": 11.0,
805
- "eval_accuracy": 0.8323036187113857,
806
- "eval_loss": 0.5316830277442932,
807
- "eval_runtime": 29.5637,
808
- "eval_samples_per_second": 76.648,
809
- "eval_steps_per_second": 4.803,
810
- "step": 1174
811
- },
812
- {
813
- "epoch": 11.05,
814
- "learning_rate": 1.4325646401118102e-05,
815
- "loss": 0.1944,
816
  "step": 1180
817
  },
818
  {
819
- "epoch": 11.15,
820
- "learning_rate": 1.3976240391334734e-05,
821
- "loss": 0.1841,
822
  "step": 1190
823
  },
824
  {
825
- "epoch": 11.24,
826
- "learning_rate": 1.3626834381551362e-05,
827
- "loss": 0.1675,
828
  "step": 1200
829
  },
830
  {
831
- "epoch": 11.33,
832
- "learning_rate": 1.3277428371767994e-05,
833
- "loss": 0.1881,
834
  "step": 1210
835
  },
836
  {
837
- "epoch": 11.43,
838
- "learning_rate": 1.2928022361984627e-05,
839
- "loss": 0.2145,
840
  "step": 1220
841
  },
842
  {
843
- "epoch": 11.52,
844
- "learning_rate": 1.2578616352201259e-05,
845
- "loss": 0.1618,
846
  "step": 1230
847
  },
848
  {
849
- "epoch": 11.62,
850
- "learning_rate": 1.222921034241789e-05,
851
- "loss": 0.189,
 
 
 
 
 
 
 
 
 
852
  "step": 1240
853
  },
854
  {
855
- "epoch": 11.71,
856
- "learning_rate": 1.1879804332634521e-05,
857
- "loss": 0.1915,
858
  "step": 1250
859
  },
860
  {
861
- "epoch": 11.8,
862
- "learning_rate": 1.1530398322851153e-05,
863
- "loss": 0.1697,
864
  "step": 1260
865
  },
866
  {
867
- "epoch": 11.9,
868
- "learning_rate": 1.1180992313067785e-05,
869
- "loss": 0.1793,
870
  "step": 1270
871
  },
872
  {
873
- "epoch": 11.99,
874
- "learning_rate": 1.0831586303284417e-05,
875
- "loss": 0.1891,
876
  "step": 1280
877
  },
878
  {
879
- "epoch": 12.0,
880
- "eval_accuracy": 0.8362753751103266,
881
- "eval_loss": 0.5269371867179871,
882
- "eval_runtime": 29.7573,
883
- "eval_samples_per_second": 76.149,
884
- "eval_steps_per_second": 4.772,
885
- "step": 1281
886
- },
887
- {
888
- "epoch": 12.08,
889
- "learning_rate": 1.0482180293501048e-05,
890
- "loss": 0.1265,
891
  "step": 1290
892
  },
893
  {
894
- "epoch": 12.18,
895
- "learning_rate": 1.013277428371768e-05,
896
- "loss": 0.1668,
897
  "step": 1300
898
  },
899
  {
900
- "epoch": 12.27,
901
- "learning_rate": 9.783368273934312e-06,
902
- "loss": 0.179,
903
  "step": 1310
904
  },
905
  {
906
- "epoch": 12.37,
907
- "learning_rate": 9.433962264150944e-06,
908
- "loss": 0.176,
909
  "step": 1320
910
  },
911
  {
912
- "epoch": 12.46,
913
- "learning_rate": 9.084556254367576e-06,
914
- "loss": 0.1698,
915
- "step": 1330
916
- },
917
- {
918
- "epoch": 12.55,
919
- "learning_rate": 8.735150244584208e-06,
920
- "loss": 0.1563,
921
- "step": 1340
922
- },
923
- {
924
- "epoch": 12.65,
925
- "learning_rate": 8.38574423480084e-06,
926
- "loss": 0.1583,
927
- "step": 1350
928
- },
929
- {
930
- "epoch": 12.74,
931
- "learning_rate": 8.03633822501747e-06,
932
- "loss": 0.1185,
933
- "step": 1360
934
- },
935
- {
936
- "epoch": 12.83,
937
- "learning_rate": 7.686932215234103e-06,
938
- "loss": 0.1229,
939
- "step": 1370
940
- },
941
- {
942
- "epoch": 12.93,
943
- "learning_rate": 7.337526205450735e-06,
944
- "loss": 0.1551,
945
- "step": 1380
946
- },
947
- {
948
- "epoch": 12.99,
949
- "eval_accuracy": 0.8375992939099736,
950
- "eval_loss": 0.5339873433113098,
951
- "eval_runtime": 29.8516,
952
- "eval_samples_per_second": 75.909,
953
- "eval_steps_per_second": 4.757,
954
- "step": 1387
955
- },
956
- {
957
- "epoch": 13.02,
958
- "learning_rate": 6.988120195667367e-06,
959
- "loss": 0.1566,
960
- "step": 1390
961
- },
962
- {
963
- "epoch": 13.11,
964
- "learning_rate": 6.638714185883997e-06,
965
- "loss": 0.1394,
966
- "step": 1400
967
- },
968
- {
969
- "epoch": 13.21,
970
- "learning_rate": 6.289308176100629e-06,
971
- "loss": 0.1642,
972
- "step": 1410
973
- },
974
- {
975
- "epoch": 13.3,
976
- "learning_rate": 5.9399021663172605e-06,
977
- "loss": 0.1334,
978
- "step": 1420
979
- },
980
- {
981
- "epoch": 13.4,
982
- "learning_rate": 5.590496156533893e-06,
983
- "loss": 0.157,
984
- "step": 1430
985
- },
986
- {
987
- "epoch": 13.49,
988
- "learning_rate": 5.241090146750524e-06,
989
- "loss": 0.1658,
990
- "step": 1440
991
- },
992
- {
993
- "epoch": 13.58,
994
- "learning_rate": 4.891684136967156e-06,
995
- "loss": 0.1692,
996
- "step": 1450
997
- },
998
- {
999
- "epoch": 13.68,
1000
- "learning_rate": 4.542278127183788e-06,
1001
- "loss": 0.1393,
1002
- "step": 1460
1003
- },
1004
- {
1005
- "epoch": 13.77,
1006
- "learning_rate": 4.19287211740042e-06,
1007
- "loss": 0.1492,
1008
- "step": 1470
1009
- },
1010
- {
1011
- "epoch": 13.86,
1012
- "learning_rate": 3.843466107617051e-06,
1013
- "loss": 0.1503,
1014
- "step": 1480
1015
- },
1016
- {
1017
- "epoch": 13.96,
1018
- "learning_rate": 3.4940600978336834e-06,
1019
- "loss": 0.1548,
1020
- "step": 1490
1021
- },
1022
- {
1023
- "epoch": 14.0,
1024
- "eval_accuracy": 0.8367166813768756,
1025
- "eval_loss": 0.5312865376472473,
1026
- "eval_runtime": 29.7732,
1027
- "eval_samples_per_second": 76.109,
1028
- "eval_steps_per_second": 4.769,
1029
- "step": 1494
1030
- },
1031
- {
1032
- "epoch": 14.05,
1033
- "learning_rate": 3.1446540880503146e-06,
1034
- "loss": 0.1186,
1035
- "step": 1500
1036
- },
1037
- {
1038
- "epoch": 14.15,
1039
- "learning_rate": 2.7952480782669463e-06,
1040
- "loss": 0.1213,
1041
- "step": 1510
1042
- },
1043
- {
1044
- "epoch": 14.24,
1045
- "learning_rate": 2.445842068483578e-06,
1046
- "loss": 0.1601,
1047
- "step": 1520
1048
- },
1049
- {
1050
- "epoch": 14.33,
1051
- "learning_rate": 2.09643605870021e-06,
1052
- "loss": 0.1916,
1053
- "step": 1530
1054
- },
1055
- {
1056
- "epoch": 14.43,
1057
- "learning_rate": 1.7470300489168417e-06,
1058
- "loss": 0.1243,
1059
- "step": 1540
1060
- },
1061
- {
1062
- "epoch": 14.52,
1063
- "learning_rate": 1.3976240391334732e-06,
1064
- "loss": 0.1323,
1065
- "step": 1550
1066
- },
1067
- {
1068
- "epoch": 14.61,
1069
- "learning_rate": 1.048218029350105e-06,
1070
- "loss": 0.1535,
1071
- "step": 1560
1072
- },
1073
- {
1074
- "epoch": 14.71,
1075
- "learning_rate": 6.988120195667366e-07,
1076
- "loss": 0.1385,
1077
- "step": 1570
1078
- },
1079
- {
1080
- "epoch": 14.8,
1081
- "learning_rate": 3.494060097833683e-07,
1082
- "loss": 0.1466,
1083
- "step": 1580
1084
  },
1085
  {
1086
- "epoch": 14.89,
1087
- "learning_rate": 0.0,
1088
- "loss": 0.1781,
1089
- "step": 1590
1090
- },
1091
- {
1092
- "epoch": 14.89,
1093
- "eval_accuracy": 0.8398058252427184,
1094
- "eval_loss": 0.5298482775688171,
1095
- "eval_runtime": 29.7814,
1096
- "eval_samples_per_second": 76.088,
1097
- "eval_steps_per_second": 4.768,
1098
- "step": 1590
1099
- },
1100
- {
1101
- "epoch": 14.89,
1102
- "step": 1590,
1103
- "total_flos": 7.877470546062803e+18,
1104
- "train_loss": 0.5140632056590146,
1105
- "train_runtime": 3982.2883,
1106
- "train_samples_per_second": 25.704,
1107
- "train_steps_per_second": 0.399
1108
  }
1109
  ],
1110
  "logging_steps": 10,
1111
- "max_steps": 1590,
1112
  "num_train_epochs": 15,
1113
  "save_steps": 500,
1114
- "total_flos": 7.877470546062803e+18,
1115
  "trial_name": null,
1116
  "trial_params": null
1117
  }
 
1
  {
2
+ "best_metric": 0.8528925619834711,
3
+ "best_model_checkpoint": "vit-base-patch16-224-brand/checkpoint-970",
4
+ "epoch": 14.957507082152974,
5
  "eval_steps": 500,
6
+ "global_step": 1320,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.11,
13
+ "learning_rate": 3.7878787878787882e-06,
14
+ "loss": 2.2055,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.23,
19
+ "learning_rate": 7.5757575757575764e-06,
20
+ "loss": 2.162,
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.34,
25
+ "learning_rate": 1.1363636363636365e-05,
26
+ "loss": 2.0983,
27
  "step": 30
28
  },
29
  {
30
+ "epoch": 0.45,
31
+ "learning_rate": 1.5151515151515153e-05,
32
+ "loss": 2.0166,
33
  "step": 40
34
  },
35
  {
36
+ "epoch": 0.57,
37
+ "learning_rate": 1.893939393939394e-05,
38
+ "loss": 1.9044,
39
  "step": 50
40
  },
41
  {
42
+ "epoch": 0.68,
43
+ "learning_rate": 2.272727272727273e-05,
44
+ "loss": 1.7381,
45
  "step": 60
46
  },
47
  {
48
+ "epoch": 0.79,
49
+ "learning_rate": 2.6515151515151516e-05,
50
+ "loss": 1.6089,
51
  "step": 70
52
  },
53
  {
54
+ "epoch": 0.91,
55
+ "learning_rate": 3.0303030303030306e-05,
56
+ "loss": 1.4669,
57
  "step": 80
58
  },
59
  {
60
+ "epoch": 1.0,
61
+ "eval_accuracy": 0.5611570247933885,
62
+ "eval_loss": 1.3067070245742798,
63
+ "eval_runtime": 16.7756,
64
+ "eval_samples_per_second": 72.129,
65
+ "eval_steps_per_second": 4.53,
66
+ "step": 88
67
  },
68
  {
69
+ "epoch": 1.02,
70
+ "learning_rate": 3.409090909090909e-05,
71
+ "loss": 1.4484,
72
+ "step": 90
73
  },
74
  {
75
+ "epoch": 1.13,
76
+ "learning_rate": 3.787878787878788e-05,
77
+ "loss": 1.2513,
78
+ "step": 100
 
 
 
79
  },
80
  {
81
+ "epoch": 1.25,
82
+ "learning_rate": 4.166666666666667e-05,
83
+ "loss": 1.1741,
84
  "step": 110
85
  },
86
  {
87
+ "epoch": 1.36,
88
+ "learning_rate": 4.545454545454546e-05,
89
+ "loss": 1.0883,
90
  "step": 120
91
  },
92
  {
93
+ "epoch": 1.47,
94
+ "learning_rate": 4.9242424242424245e-05,
95
+ "loss": 1.0522,
96
  "step": 130
97
  },
98
  {
99
+ "epoch": 1.59,
100
+ "learning_rate": 4.966329966329967e-05,
101
+ "loss": 1.0214,
102
  "step": 140
103
  },
104
  {
105
+ "epoch": 1.7,
106
+ "learning_rate": 4.9242424242424245e-05,
107
+ "loss": 0.9231,
108
  "step": 150
109
  },
110
  {
111
+ "epoch": 1.81,
112
+ "learning_rate": 4.882154882154882e-05,
113
+ "loss": 0.8961,
114
  "step": 160
115
  },
116
  {
117
+ "epoch": 1.93,
118
+ "learning_rate": 4.84006734006734e-05,
119
+ "loss": 0.8898,
120
  "step": 170
121
  },
122
  {
123
+ "epoch": 1.99,
124
+ "eval_accuracy": 0.7140495867768595,
125
+ "eval_loss": 0.8380156755447388,
126
+ "eval_runtime": 16.8759,
127
+ "eval_samples_per_second": 71.7,
128
+ "eval_steps_per_second": 4.503,
129
+ "step": 176
130
+ },
131
+ {
132
+ "epoch": 2.04,
133
+ "learning_rate": 4.797979797979798e-05,
134
+ "loss": 0.8856,
135
  "step": 180
136
  },
137
  {
138
+ "epoch": 2.15,
139
+ "learning_rate": 4.755892255892256e-05,
140
+ "loss": 0.7916,
141
  "step": 190
142
  },
143
  {
144
+ "epoch": 2.27,
145
+ "learning_rate": 4.713804713804714e-05,
146
+ "loss": 0.7614,
147
  "step": 200
148
  },
149
  {
150
+ "epoch": 2.38,
151
+ "learning_rate": 4.671717171717172e-05,
152
+ "loss": 0.7696,
153
  "step": 210
154
  },
155
  {
156
+ "epoch": 2.49,
157
+ "learning_rate": 4.62962962962963e-05,
158
+ "loss": 0.7874,
 
 
 
 
 
 
 
 
 
159
  "step": 220
160
  },
161
  {
162
+ "epoch": 2.61,
163
+ "learning_rate": 4.5875420875420876e-05,
164
+ "loss": 0.7126,
165
  "step": 230
166
  },
167
  {
168
+ "epoch": 2.72,
169
+ "learning_rate": 4.545454545454546e-05,
170
+ "loss": 0.7119,
171
  "step": 240
172
  },
173
  {
174
+ "epoch": 2.83,
175
+ "learning_rate": 4.5033670033670034e-05,
176
+ "loss": 0.7095,
177
  "step": 250
178
  },
179
  {
180
+ "epoch": 2.95,
181
+ "learning_rate": 4.4612794612794616e-05,
182
+ "loss": 0.7243,
183
  "step": 260
184
  },
185
  {
186
+ "epoch": 2.99,
187
+ "eval_accuracy": 0.7694214876033058,
188
+ "eval_loss": 0.6559494137763977,
189
+ "eval_runtime": 16.9139,
190
+ "eval_samples_per_second": 71.539,
191
+ "eval_steps_per_second": 4.493,
192
+ "step": 264
193
+ },
194
+ {
195
+ "epoch": 3.06,
196
+ "learning_rate": 4.41919191919192e-05,
197
+ "loss": 0.7109,
198
  "step": 270
199
  },
200
  {
201
+ "epoch": 3.17,
202
+ "learning_rate": 4.3771043771043774e-05,
203
+ "loss": 0.5651,
204
  "step": 280
205
  },
206
  {
207
+ "epoch": 3.29,
208
+ "learning_rate": 4.335016835016835e-05,
209
+ "loss": 0.5838,
210
  "step": 290
211
  },
212
  {
213
+ "epoch": 3.4,
214
+ "learning_rate": 4.292929292929293e-05,
215
+ "loss": 0.5879,
216
  "step": 300
217
  },
218
  {
219
+ "epoch": 3.51,
220
+ "learning_rate": 4.250841750841751e-05,
221
+ "loss": 0.5541,
222
  "step": 310
223
  },
224
  {
225
+ "epoch": 3.63,
226
+ "learning_rate": 4.208754208754209e-05,
227
+ "loss": 0.5522,
228
  "step": 320
229
  },
230
  {
231
+ "epoch": 3.74,
232
+ "learning_rate": 4.166666666666667e-05,
233
+ "loss": 0.5105,
 
 
 
 
 
 
 
 
 
234
  "step": 330
235
  },
236
  {
237
+ "epoch": 3.85,
238
+ "learning_rate": 4.124579124579125e-05,
239
+ "loss": 0.5289,
240
  "step": 340
241
  },
242
  {
243
+ "epoch": 3.97,
244
+ "learning_rate": 4.082491582491583e-05,
245
+ "loss": 0.5158,
246
  "step": 350
247
  },
248
  {
249
+ "epoch": 4.0,
250
+ "eval_accuracy": 0.7950413223140496,
251
+ "eval_loss": 0.5982227921485901,
252
+ "eval_runtime": 16.8053,
253
+ "eval_samples_per_second": 72.001,
254
+ "eval_steps_per_second": 4.522,
255
+ "step": 353
256
+ },
257
+ {
258
+ "epoch": 4.08,
259
+ "learning_rate": 4.0404040404040405e-05,
260
+ "loss": 0.4296,
261
  "step": 360
262
  },
263
  {
264
+ "epoch": 4.19,
265
+ "learning_rate": 3.998316498316498e-05,
266
+ "loss": 0.4105,
267
  "step": 370
268
  },
269
  {
270
+ "epoch": 4.31,
271
+ "learning_rate": 3.956228956228956e-05,
272
+ "loss": 0.4658,
273
  "step": 380
274
  },
275
  {
276
+ "epoch": 4.42,
277
+ "learning_rate": 3.9141414141414145e-05,
278
+ "loss": 0.4464,
279
  "step": 390
280
  },
281
  {
282
+ "epoch": 4.53,
283
+ "learning_rate": 3.872053872053872e-05,
284
+ "loss": 0.431,
285
  "step": 400
286
  },
287
  {
288
+ "epoch": 4.65,
289
+ "learning_rate": 3.82996632996633e-05,
290
+ "loss": 0.4499,
291
  "step": 410
292
  },
293
  {
294
+ "epoch": 4.76,
295
+ "learning_rate": 3.787878787878788e-05,
296
+ "loss": 0.4568,
297
  "step": 420
298
  },
299
  {
300
+ "epoch": 4.87,
301
+ "learning_rate": 3.745791245791246e-05,
302
+ "loss": 0.4397,
 
 
 
 
 
 
 
 
 
303
  "step": 430
304
  },
305
  {
306
+ "epoch": 4.99,
307
+ "learning_rate": 3.7037037037037037e-05,
308
+ "loss": 0.4605,
309
  "step": 440
310
  },
311
  {
312
+ "epoch": 5.0,
313
+ "eval_accuracy": 0.8082644628099174,
314
+ "eval_loss": 0.5856056809425354,
315
+ "eval_runtime": 16.7221,
316
+ "eval_samples_per_second": 72.359,
317
+ "eval_steps_per_second": 4.545,
318
+ "step": 441
319
+ },
320
+ {
321
+ "epoch": 5.1,
322
+ "learning_rate": 3.661616161616162e-05,
323
+ "loss": 0.3757,
324
  "step": 450
325
  },
326
  {
327
+ "epoch": 5.21,
328
+ "learning_rate": 3.61952861952862e-05,
329
+ "loss": 0.369,
330
  "step": 460
331
  },
332
  {
333
+ "epoch": 5.33,
334
+ "learning_rate": 3.577441077441078e-05,
335
+ "loss": 0.3952,
336
  "step": 470
337
  },
338
  {
339
+ "epoch": 5.44,
340
+ "learning_rate": 3.535353535353535e-05,
341
+ "loss": 0.3841,
342
  "step": 480
343
  },
344
  {
345
+ "epoch": 5.55,
346
+ "learning_rate": 3.4932659932659934e-05,
347
+ "loss": 0.3707,
348
  "step": 490
349
  },
350
  {
351
+ "epoch": 5.67,
352
+ "learning_rate": 3.451178451178451e-05,
353
+ "loss": 0.3874,
354
  "step": 500
355
  },
356
  {
357
+ "epoch": 5.78,
358
+ "learning_rate": 3.409090909090909e-05,
359
+ "loss": 0.347,
360
  "step": 510
361
  },
362
  {
363
+ "epoch": 5.89,
364
+ "learning_rate": 3.3670033670033675e-05,
365
+ "loss": 0.332,
366
  "step": 520
367
  },
368
  {
369
+ "epoch": 5.99,
370
+ "eval_accuracy": 0.8355371900826446,
371
+ "eval_loss": 0.5138491988182068,
372
+ "eval_runtime": 16.6373,
373
+ "eval_samples_per_second": 72.728,
374
+ "eval_steps_per_second": 4.568,
375
+ "step": 529
376
  },
377
  {
378
+ "epoch": 6.01,
379
+ "learning_rate": 3.324915824915825e-05,
380
+ "loss": 0.3767,
381
+ "step": 530
 
 
 
382
  },
383
  {
384
+ "epoch": 6.12,
385
+ "learning_rate": 3.282828282828283e-05,
386
+ "loss": 0.2646,
387
  "step": 540
388
  },
389
  {
390
+ "epoch": 6.23,
391
+ "learning_rate": 3.240740740740741e-05,
392
+ "loss": 0.3142,
393
  "step": 550
394
  },
395
  {
396
+ "epoch": 6.35,
397
+ "learning_rate": 3.198653198653199e-05,
398
+ "loss": 0.3171,
399
  "step": 560
400
  },
401
  {
402
+ "epoch": 6.46,
403
+ "learning_rate": 3.1565656565656566e-05,
404
+ "loss": 0.3318,
405
  "step": 570
406
  },
407
  {
408
+ "epoch": 6.57,
409
+ "learning_rate": 3.114478114478115e-05,
410
+ "loss": 0.309,
411
  "step": 580
412
  },
413
  {
414
+ "epoch": 6.69,
415
+ "learning_rate": 3.072390572390573e-05,
416
+ "loss": 0.2928,
417
  "step": 590
418
  },
419
  {
420
+ "epoch": 6.8,
421
+ "learning_rate": 3.0303030303030306e-05,
422
+ "loss": 0.3622,
423
  "step": 600
424
  },
425
  {
426
+ "epoch": 6.91,
427
+ "learning_rate": 2.9882154882154885e-05,
428
+ "loss": 0.3375,
429
  "step": 610
430
  },
431
  {
432
+ "epoch": 6.99,
433
+ "eval_accuracy": 0.8264462809917356,
434
+ "eval_loss": 0.5094661116600037,
435
+ "eval_runtime": 16.7644,
436
+ "eval_samples_per_second": 72.177,
437
+ "eval_steps_per_second": 4.533,
438
+ "step": 617
439
  },
440
  {
441
+ "epoch": 7.03,
442
+ "learning_rate": 2.946127946127946e-05,
443
+ "loss": 0.2897,
444
+ "step": 620
445
  },
446
  {
447
+ "epoch": 7.14,
448
+ "learning_rate": 2.904040404040404e-05,
449
+ "loss": 0.2764,
450
+ "step": 630
451
  },
452
  {
453
+ "epoch": 7.25,
454
+ "learning_rate": 2.8619528619528618e-05,
455
+ "loss": 0.2627,
 
 
 
456
  "step": 640
457
  },
458
  {
459
+ "epoch": 7.37,
460
+ "learning_rate": 2.8198653198653204e-05,
461
+ "loss": 0.2899,
462
  "step": 650
463
  },
464
  {
465
+ "epoch": 7.48,
466
+ "learning_rate": 2.777777777777778e-05,
467
+ "loss": 0.2316,
468
  "step": 660
469
  },
470
  {
471
+ "epoch": 7.59,
472
+ "learning_rate": 2.7356902356902358e-05,
473
+ "loss": 0.2729,
474
  "step": 670
475
  },
476
  {
477
+ "epoch": 7.71,
478
+ "learning_rate": 2.6936026936026937e-05,
479
+ "loss": 0.2598,
480
  "step": 680
481
  },
482
  {
483
+ "epoch": 7.82,
484
+ "learning_rate": 2.6515151515151516e-05,
485
+ "loss": 0.2331,
486
  "step": 690
487
  },
488
  {
489
+ "epoch": 7.93,
490
+ "learning_rate": 2.6094276094276095e-05,
491
+ "loss": 0.2188,
492
  "step": 700
493
  },
494
  {
495
+ "epoch": 8.0,
496
+ "eval_accuracy": 0.8322314049586776,
497
+ "eval_loss": 0.5088740587234497,
498
+ "eval_runtime": 16.6796,
499
+ "eval_samples_per_second": 72.544,
500
+ "eval_steps_per_second": 4.556,
501
+ "step": 706
502
+ },
503
+ {
504
+ "epoch": 8.05,
505
+ "learning_rate": 2.5673400673400677e-05,
506
+ "loss": 0.2445,
507
  "step": 710
508
  },
509
  {
510
+ "epoch": 8.16,
511
+ "learning_rate": 2.5252525252525256e-05,
512
+ "loss": 0.2144,
513
  "step": 720
514
  },
515
  {
516
+ "epoch": 8.27,
517
+ "learning_rate": 2.4831649831649835e-05,
518
+ "loss": 0.1956,
519
  "step": 730
520
  },
521
  {
522
+ "epoch": 8.39,
523
+ "learning_rate": 2.441077441077441e-05,
524
+ "loss": 0.2103,
525
  "step": 740
526
  },
527
  {
528
+ "epoch": 8.5,
529
+ "learning_rate": 2.398989898989899e-05,
530
+ "loss": 0.2539,
 
 
 
 
 
 
 
 
 
531
  "step": 750
532
  },
533
  {
534
+ "epoch": 8.61,
535
+ "learning_rate": 2.356902356902357e-05,
536
+ "loss": 0.2078,
537
  "step": 760
538
  },
539
  {
540
+ "epoch": 8.73,
541
+ "learning_rate": 2.314814814814815e-05,
542
+ "loss": 0.2112,
543
  "step": 770
544
  },
545
  {
546
+ "epoch": 8.84,
547
+ "learning_rate": 2.272727272727273e-05,
548
+ "loss": 0.2418,
549
  "step": 780
550
  },
551
  {
552
+ "epoch": 8.95,
553
+ "learning_rate": 2.2306397306397308e-05,
554
+ "loss": 0.2112,
555
  "step": 790
556
  },
557
  {
558
+ "epoch": 9.0,
559
+ "eval_accuracy": 0.8380165289256198,
560
+ "eval_loss": 0.5125700235366821,
561
+ "eval_runtime": 16.6395,
562
+ "eval_samples_per_second": 72.718,
563
+ "eval_steps_per_second": 4.567,
564
+ "step": 794
565
+ },
566
+ {
567
+ "epoch": 9.07,
568
+ "learning_rate": 2.1885521885521887e-05,
569
+ "loss": 0.207,
570
  "step": 800
571
  },
572
  {
573
+ "epoch": 9.18,
574
+ "learning_rate": 2.1464646464646466e-05,
575
+ "loss": 0.1844,
576
  "step": 810
577
  },
578
  {
579
+ "epoch": 9.29,
580
+ "learning_rate": 2.1043771043771045e-05,
581
+ "loss": 0.1742,
582
  "step": 820
583
  },
584
  {
585
+ "epoch": 9.41,
586
+ "learning_rate": 2.0622895622895624e-05,
587
+ "loss": 0.178,
588
  "step": 830
589
  },
590
  {
591
+ "epoch": 9.52,
592
+ "learning_rate": 2.0202020202020203e-05,
593
+ "loss": 0.1957,
594
  "step": 840
595
  },
596
  {
597
+ "epoch": 9.63,
598
+ "learning_rate": 1.978114478114478e-05,
599
+ "loss": 0.1833,
600
  "step": 850
601
  },
602
  {
603
+ "epoch": 9.75,
604
+ "learning_rate": 1.936026936026936e-05,
605
+ "loss": 0.1955,
 
 
 
 
 
 
 
 
 
606
  "step": 860
607
  },
608
  {
609
+ "epoch": 9.86,
610
+ "learning_rate": 1.893939393939394e-05,
611
+ "loss": 0.2161,
612
  "step": 870
613
  },
614
  {
615
+ "epoch": 9.97,
616
+ "learning_rate": 1.8518518518518518e-05,
617
+ "loss": 0.1895,
618
  "step": 880
619
  },
620
  {
621
+ "epoch": 9.99,
622
+ "eval_accuracy": 0.8363636363636363,
623
+ "eval_loss": 0.5057435631752014,
624
+ "eval_runtime": 16.6718,
625
+ "eval_samples_per_second": 72.578,
626
+ "eval_steps_per_second": 4.559,
627
+ "step": 882
628
+ },
629
+ {
630
+ "epoch": 10.08,
631
+ "learning_rate": 1.80976430976431e-05,
632
+ "loss": 0.1848,
633
  "step": 890
634
  },
635
  {
636
+ "epoch": 10.2,
637
+ "learning_rate": 1.7676767676767676e-05,
638
+ "loss": 0.1328,
639
  "step": 900
640
  },
641
  {
642
+ "epoch": 10.31,
643
+ "learning_rate": 1.7255892255892255e-05,
644
+ "loss": 0.1383,
645
  "step": 910
646
  },
647
  {
648
+ "epoch": 10.42,
649
+ "learning_rate": 1.6835016835016837e-05,
650
+ "loss": 0.1773,
651
  "step": 920
652
  },
653
  {
654
+ "epoch": 10.54,
655
+ "learning_rate": 1.6414141414141416e-05,
656
+ "loss": 0.1363,
657
  "step": 930
658
  },
659
  {
660
+ "epoch": 10.65,
661
+ "learning_rate": 1.5993265993265995e-05,
662
+ "loss": 0.1516,
663
  "step": 940
664
  },
665
  {
666
+ "epoch": 10.76,
667
+ "learning_rate": 1.5572390572390574e-05,
668
+ "loss": 0.1917,
669
  "step": 950
670
  },
671
  {
672
+ "epoch": 10.88,
673
+ "learning_rate": 1.5151515151515153e-05,
674
+ "loss": 0.1928,
675
  "step": 960
676
  },
677
  {
678
+ "epoch": 10.99,
679
+ "learning_rate": 1.473063973063973e-05,
680
+ "loss": 0.1593,
681
+ "step": 970
 
 
 
682
  },
683
  {
684
+ "epoch": 10.99,
685
+ "eval_accuracy": 0.8528925619834711,
686
+ "eval_loss": 0.4851677417755127,
687
+ "eval_runtime": 16.8845,
688
+ "eval_samples_per_second": 71.664,
689
+ "eval_steps_per_second": 4.501,
690
  "step": 970
691
  },
692
  {
693
+ "epoch": 11.1,
694
+ "learning_rate": 1.4309764309764309e-05,
695
+ "loss": 0.1578,
696
  "step": 980
697
  },
698
  {
699
+ "epoch": 11.22,
700
+ "learning_rate": 1.388888888888889e-05,
701
+ "loss": 0.1757,
702
  "step": 990
703
  },
704
  {
705
+ "epoch": 11.33,
706
+ "learning_rate": 1.3468013468013468e-05,
707
+ "loss": 0.125,
708
  "step": 1000
709
  },
710
  {
711
+ "epoch": 11.44,
712
+ "learning_rate": 1.3047138047138047e-05,
713
+ "loss": 0.1566,
714
  "step": 1010
715
  },
716
  {
717
+ "epoch": 11.56,
718
+ "learning_rate": 1.2626262626262628e-05,
719
+ "loss": 0.1841,
720
  "step": 1020
721
  },
722
  {
723
+ "epoch": 11.67,
724
+ "learning_rate": 1.2205387205387205e-05,
725
+ "loss": 0.1832,
726
  "step": 1030
727
  },
728
  {
729
+ "epoch": 11.78,
730
+ "learning_rate": 1.1784511784511786e-05,
731
+ "loss": 0.1604,
732
  "step": 1040
733
  },
734
  {
735
+ "epoch": 11.9,
736
+ "learning_rate": 1.1363636363636365e-05,
737
+ "loss": 0.1463,
738
  "step": 1050
739
  },
740
  {
741
+ "epoch": 12.0,
742
+ "eval_accuracy": 0.8429752066115702,
743
+ "eval_loss": 0.49344199895858765,
744
+ "eval_runtime": 16.7817,
745
+ "eval_samples_per_second": 72.102,
746
+ "eval_steps_per_second": 4.529,
747
+ "step": 1059
748
  },
749
  {
750
+ "epoch": 12.01,
751
+ "learning_rate": 1.0942760942760944e-05,
752
+ "loss": 0.133,
753
+ "step": 1060
 
 
 
754
  },
755
  {
756
+ "epoch": 12.12,
757
+ "learning_rate": 1.0521885521885522e-05,
758
+ "loss": 0.1293,
759
  "step": 1070
760
  },
761
  {
762
+ "epoch": 12.24,
763
+ "learning_rate": 1.0101010101010101e-05,
764
+ "loss": 0.1232,
765
  "step": 1080
766
  },
767
  {
768
+ "epoch": 12.35,
769
+ "learning_rate": 9.68013468013468e-06,
770
+ "loss": 0.1415,
771
  "step": 1090
772
  },
773
  {
774
+ "epoch": 12.46,
775
+ "learning_rate": 9.259259259259259e-06,
776
+ "loss": 0.1425,
777
  "step": 1100
778
  },
779
  {
780
+ "epoch": 12.58,
781
+ "learning_rate": 8.838383838383838e-06,
782
+ "loss": 0.1062,
783
  "step": 1110
784
  },
785
  {
786
+ "epoch": 12.69,
787
+ "learning_rate": 8.417508417508419e-06,
788
+ "loss": 0.1193,
789
  "step": 1120
790
  },
791
  {
792
+ "epoch": 12.8,
793
+ "learning_rate": 7.996632996632998e-06,
794
+ "loss": 0.1539,
795
  "step": 1130
796
  },
797
  {
798
+ "epoch": 12.92,
799
+ "learning_rate": 7.5757575757575764e-06,
800
+ "loss": 0.1565,
801
  "step": 1140
802
  },
803
  {
804
+ "epoch": 13.0,
805
+ "eval_accuracy": 0.8495867768595041,
806
+ "eval_loss": 0.47939661145210266,
807
+ "eval_runtime": 16.5708,
808
+ "eval_samples_per_second": 73.02,
809
+ "eval_steps_per_second": 4.586,
810
+ "step": 1147
811
+ },
812
+ {
813
+ "epoch": 13.03,
814
+ "learning_rate": 7.1548821548821545e-06,
815
+ "loss": 0.1193,
816
  "step": 1150
817
  },
818
  {
819
+ "epoch": 13.14,
820
+ "learning_rate": 6.734006734006734e-06,
821
+ "loss": 0.1287,
822
  "step": 1160
823
  },
824
  {
825
+ "epoch": 13.26,
826
+ "learning_rate": 6.313131313131314e-06,
827
+ "loss": 0.1186,
828
  "step": 1170
829
  },
830
  {
831
+ "epoch": 13.37,
832
+ "learning_rate": 5.892255892255893e-06,
833
+ "loss": 0.1562,
 
 
 
 
 
 
 
 
 
834
  "step": 1180
835
  },
836
  {
837
+ "epoch": 13.48,
838
+ "learning_rate": 5.471380471380472e-06,
839
+ "loss": 0.1297,
840
  "step": 1190
841
  },
842
  {
843
+ "epoch": 13.6,
844
+ "learning_rate": 5.050505050505051e-06,
845
+ "loss": 0.158,
846
  "step": 1200
847
  },
848
  {
849
+ "epoch": 13.71,
850
+ "learning_rate": 4.6296296296296296e-06,
851
+ "loss": 0.1188,
852
  "step": 1210
853
  },
854
  {
855
+ "epoch": 13.82,
856
+ "learning_rate": 4.208754208754209e-06,
857
+ "loss": 0.1237,
858
  "step": 1220
859
  },
860
  {
861
+ "epoch": 13.94,
862
+ "learning_rate": 3.7878787878787882e-06,
863
+ "loss": 0.1236,
864
  "step": 1230
865
  },
866
  {
867
+ "epoch": 13.99,
868
+ "eval_accuracy": 0.8462809917355372,
869
+ "eval_loss": 0.48630189895629883,
870
+ "eval_runtime": 16.6832,
871
+ "eval_samples_per_second": 72.528,
872
+ "eval_steps_per_second": 4.555,
873
+ "step": 1235
874
+ },
875
+ {
876
+ "epoch": 14.05,
877
+ "learning_rate": 3.367003367003367e-06,
878
+ "loss": 0.103,
879
  "step": 1240
880
  },
881
  {
882
+ "epoch": 14.16,
883
+ "learning_rate": 2.9461279461279464e-06,
884
+ "loss": 0.1352,
885
  "step": 1250
886
  },
887
  {
888
+ "epoch": 14.28,
889
+ "learning_rate": 2.5252525252525253e-06,
890
+ "loss": 0.1372,
891
  "step": 1260
892
  },
893
  {
894
+ "epoch": 14.39,
895
+ "learning_rate": 2.1043771043771047e-06,
896
+ "loss": 0.1215,
897
  "step": 1270
898
  },
899
  {
900
+ "epoch": 14.5,
901
+ "learning_rate": 1.6835016835016836e-06,
902
+ "loss": 0.1334,
903
  "step": 1280
904
  },
905
  {
906
+ "epoch": 14.62,
907
+ "learning_rate": 1.2626262626262627e-06,
908
+ "loss": 0.1326,
 
 
 
 
 
 
 
 
 
909
  "step": 1290
910
  },
911
  {
912
+ "epoch": 14.73,
913
+ "learning_rate": 8.417508417508418e-07,
914
+ "loss": 0.0859,
915
  "step": 1300
916
  },
917
  {
918
+ "epoch": 14.84,
919
+ "learning_rate": 4.208754208754209e-07,
920
+ "loss": 0.1313,
921
  "step": 1310
922
  },
923
  {
924
+ "epoch": 14.96,
925
+ "learning_rate": 0.0,
926
+ "loss": 0.1407,
927
  "step": 1320
928
  },
929
  {
930
+ "epoch": 14.96,
931
+ "eval_accuracy": 0.8495867768595041,
932
+ "eval_loss": 0.48121175169944763,
933
+ "eval_runtime": 16.5675,
934
+ "eval_samples_per_second": 73.034,
935
+ "eval_steps_per_second": 4.587,
936
+ "step": 1320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937
  },
938
  {
939
+ "epoch": 14.96,
940
+ "step": 1320,
941
+ "total_flos": 6.546875329145733e+18,
942
+ "train_loss": 0.44751356618874,
943
+ "train_runtime": 3414.1758,
944
+ "train_samples_per_second": 24.814,
945
+ "train_steps_per_second": 0.387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946
  }
947
  ],
948
  "logging_steps": 10,
949
+ "max_steps": 1320,
950
  "num_train_epochs": 15,
951
  "save_steps": 500,
952
+ "total_flos": 6.546875329145733e+18,
953
  "trial_name": null,
954
  "trial_params": null
955
  }