aalonso-developer commited on
Commit
6367bfc
·
1 Parent(s): b2b0bd6

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 7.0,
3
- "eval_accuracy": 0.6993638060991633,
4
- "eval_loss": 1.0771288871765137,
5
- "eval_runtime": 155.7222,
6
- "eval_samples_per_second": 185.728,
7
- "eval_steps_per_second": 23.221,
8
- "train_loss": 1.079674410547284,
9
- "train_runtime": 23712.848,
10
- "train_samples_per_second": 68.302,
11
- "train_steps_per_second": 2.135
12
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "eval_accuracy": 0.6974621395477492,
4
+ "eval_loss": 1.0670689344406128,
5
+ "eval_runtime": 150.7024,
6
+ "eval_samples_per_second": 191.915,
7
+ "eval_steps_per_second": 23.994,
8
+ "train_loss": 1.126858214650741,
9
+ "train_runtime": 24456.4619,
10
+ "train_samples_per_second": 66.225,
11
+ "train_steps_per_second": 2.07
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 7.0,
3
- "eval_accuracy": 0.6993638060991633,
4
- "eval_loss": 1.0771288871765137,
5
- "eval_runtime": 155.7222,
6
- "eval_samples_per_second": 185.728,
7
- "eval_steps_per_second": 23.221
8
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "eval_accuracy": 0.6974621395477492,
4
+ "eval_loss": 1.0670689344406128,
5
+ "eval_runtime": 150.7024,
6
+ "eval_samples_per_second": 191.915,
7
+ "eval_steps_per_second": 23.994
8
  }
runs/Jun01_08-09-44_adrian-development/events.out.tfevents.1685624418.adrian-development.8928.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56322dc9a143e5f266501aba5a58b69b3da1373c8b732c8711b04686aed248cf
3
+ size 369
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 7.0,
3
- "train_loss": 1.079674410547284,
4
- "train_runtime": 23712.848,
5
- "train_samples_per_second": 68.302,
6
- "train_steps_per_second": 2.135
7
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "train_loss": 1.126858214650741,
4
+ "train_runtime": 24456.4619,
5
+ "train_samples_per_second": 66.225,
6
+ "train_steps_per_second": 2.07
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 1.0771288871765137,
3
- "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-36000",
4
  "epoch": 7.0,
5
  "global_step": 50617,
6
  "is_hyper_param_search": false,
@@ -10,61 +10,61 @@
10
  {
11
  "epoch": 0.14,
12
  "learning_rate": 4.9012189580575696e-06,
13
- "loss": 2.7949,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
- "eval_accuracy": 0.4637646082566904,
19
- "eval_loss": 2.3152551651000977,
20
- "eval_runtime": 150.7767,
21
- "eval_samples_per_second": 191.82,
22
- "eval_steps_per_second": 23.982,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
  "learning_rate": 4.80243791611514e-06,
28
- "loss": 2.1028,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
- "eval_accuracy": 0.5636193900836733,
34
- "eval_loss": 1.9398735761642456,
35
- "eval_runtime": 149.8216,
36
- "eval_samples_per_second": 193.043,
37
- "eval_steps_per_second": 24.135,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
  "learning_rate": 4.703755655214652e-06,
43
- "loss": 1.8063,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
- "eval_accuracy": 0.601479842334555,
49
- "eval_loss": 1.7228820323944092,
50
- "eval_runtime": 150.2545,
51
- "eval_samples_per_second": 192.487,
52
- "eval_steps_per_second": 24.066,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
  "learning_rate": 4.605073394314164e-06,
58
- "loss": 1.6252,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
- "eval_accuracy": 0.6286563861420372,
64
- "eval_loss": 1.5740585327148438,
65
- "eval_runtime": 149.4266,
66
- "eval_samples_per_second": 193.553,
67
- "eval_steps_per_second": 24.199,
68
  "step": 4000
69
  },
70
  {
@@ -75,696 +75,696 @@
75
  },
76
  {
77
  "epoch": 0.69,
78
- "eval_accuracy": 0.6416222944471337,
79
- "eval_loss": 1.475919246673584,
80
- "eval_runtime": 150.2657,
81
- "eval_samples_per_second": 192.472,
82
- "eval_steps_per_second": 24.064,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
- "learning_rate": 4.407610091471245e-06,
88
- "loss": 1.4225,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
- "eval_accuracy": 0.6508540211603624,
94
- "eval_loss": 1.3999030590057373,
95
- "eval_runtime": 149.8691,
96
- "eval_samples_per_second": 192.982,
97
- "eval_steps_per_second": 24.128,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
  "learning_rate": 4.3088290495288145e-06,
103
- "loss": 1.3573,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
- "eval_accuracy": 0.659013899453703,
109
- "eval_loss": 1.3376258611679077,
110
- "eval_runtime": 149.2556,
111
- "eval_samples_per_second": 193.775,
112
- "eval_steps_per_second": 24.227,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
  "learning_rate": 4.2100480075863845e-06,
118
- "loss": 1.2666,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
- "eval_accuracy": 0.665721596016873,
124
- "eval_loss": 1.2908859252929688,
125
- "eval_runtime": 149.4901,
126
- "eval_samples_per_second": 193.471,
127
- "eval_steps_per_second": 24.189,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
- "learning_rate": 4.111365746685896e-06,
133
- "loss": 1.2226,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
- "eval_accuracy": 0.6698706866745039,
139
- "eval_loss": 1.2563775777816772,
140
- "eval_runtime": 149.7035,
141
- "eval_samples_per_second": 193.195,
142
- "eval_steps_per_second": 24.154,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
  "learning_rate": 4.012683485785409e-06,
148
- "loss": 1.1999,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
- "eval_accuracy": 0.6753682317958647,
154
- "eval_loss": 1.2273180484771729,
155
- "eval_runtime": 149.9221,
156
- "eval_samples_per_second": 192.914,
157
- "eval_steps_per_second": 24.119,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
  "learning_rate": 3.913902443842978e-06,
163
- "loss": 1.1858,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
- "eval_accuracy": 0.6769932923034369,
169
- "eval_loss": 1.204084038734436,
170
- "eval_runtime": 149.2049,
171
- "eval_samples_per_second": 193.841,
172
- "eval_steps_per_second": 24.235,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
  "learning_rate": 3.815121401900548e-06,
178
- "loss": 1.1457,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
- "eval_accuracy": 0.6784108982781274,
184
- "eval_loss": 1.190036416053772,
185
- "eval_runtime": 153.2132,
186
- "eval_samples_per_second": 188.77,
187
- "eval_steps_per_second": 23.601,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
- "learning_rate": 3.716340359958117e-06,
193
- "loss": 1.1526,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
- "eval_accuracy": 0.6814881405158703,
199
- "eval_loss": 1.1732572317123413,
200
- "eval_runtime": 153.4358,
201
- "eval_samples_per_second": 188.496,
202
- "eval_steps_per_second": 23.567,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
  "learning_rate": 3.6176580990576294e-06,
208
- "loss": 1.1285,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
- "eval_accuracy": 0.6809349284281861,
214
- "eval_loss": 1.1645420789718628,
215
- "eval_runtime": 154.9231,
216
- "eval_samples_per_second": 186.686,
217
- "eval_steps_per_second": 23.341,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
- "learning_rate": 3.5188770571151986e-06,
223
- "loss": 1.0864,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
- "eval_accuracy": 0.6872622916810732,
229
- "eval_loss": 1.1494485139846802,
230
- "eval_runtime": 153.8139,
231
- "eval_samples_per_second": 188.032,
232
- "eval_steps_per_second": 23.509,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
- "learning_rate": 3.4200960151727686e-06,
238
- "loss": 1.0623,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
- "eval_accuracy": 0.687089412903672,
244
- "eval_loss": 1.142927646636963,
245
- "eval_runtime": 150.573,
246
- "eval_samples_per_second": 192.08,
247
- "eval_steps_per_second": 24.015,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
- "learning_rate": 3.3213149732303378e-06,
253
- "loss": 1.0428,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
- "eval_accuracy": 0.6891293824770072,
259
- "eval_loss": 1.1337573528289795,
260
- "eval_runtime": 150.0663,
261
- "eval_samples_per_second": 192.728,
262
- "eval_steps_per_second": 24.096,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
- "learning_rate": 3.2226327123298502e-06,
268
- "loss": 1.0495,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
- "eval_accuracy": 0.6909618975174607,
274
- "eval_loss": 1.1230798959732056,
275
- "eval_runtime": 149.6696,
276
- "eval_samples_per_second": 193.239,
277
- "eval_steps_per_second": 24.16,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
  "learning_rate": 3.123950451429362e-06,
283
- "loss": 1.0401,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
- "eval_accuracy": 0.6924486550031118,
289
- "eval_loss": 1.1154536008834839,
290
- "eval_runtime": 150.8125,
291
- "eval_samples_per_second": 191.775,
292
- "eval_steps_per_second": 23.977,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
  "learning_rate": 3.0251694094869315e-06,
298
- "loss": 1.0279,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
- "eval_accuracy": 0.690581564207178,
304
- "eval_loss": 1.1119346618652344,
305
- "eval_runtime": 149.9854,
306
- "eval_samples_per_second": 192.832,
307
- "eval_steps_per_second": 24.109,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
- "learning_rate": 2.926388367544501e-06,
313
- "loss": 1.0205,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
- "eval_accuracy": 0.694488624576447,
319
- "eval_loss": 1.1036620140075684,
320
- "eval_runtime": 150.0204,
321
- "eval_samples_per_second": 192.787,
322
- "eval_steps_per_second": 24.103,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
  "learning_rate": 2.827706106644013e-06,
328
- "loss": 1.0102,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
- "eval_accuracy": 0.6956296245072955,
334
- "eval_loss": 1.1001828908920288,
335
- "eval_runtime": 150.2658,
336
- "eval_samples_per_second": 192.472,
337
- "eval_steps_per_second": 24.064,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
- "learning_rate": 2.7289250647015827e-06,
343
- "loss": 0.9516,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
- "eval_accuracy": 0.6948343821312496,
349
- "eval_loss": 1.0975209474563599,
350
- "eval_runtime": 150.3336,
351
- "eval_samples_per_second": 192.386,
352
- "eval_steps_per_second": 24.053,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
  "learning_rate": 2.6302428038010947e-06,
358
- "loss": 0.9526,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
- "eval_accuracy": 0.6936242306894406,
364
- "eval_loss": 1.1008094549179077,
365
- "eval_runtime": 151.6663,
366
- "eval_samples_per_second": 190.695,
367
- "eval_steps_per_second": 23.842,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
- "learning_rate": 2.5314617618586647e-06,
373
- "loss": 0.9694,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
- "eval_accuracy": 0.6935205034229998,
379
- "eval_loss": 1.0990099906921387,
380
- "eval_runtime": 150.2935,
381
- "eval_samples_per_second": 192.437,
382
- "eval_steps_per_second": 24.06,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
- "learning_rate": 2.432680719916234e-06,
388
- "loss": 0.9649,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
- "eval_accuracy": 0.6970126547265058,
394
- "eval_loss": 1.0900559425354004,
395
- "eval_runtime": 150.5398,
396
- "eval_samples_per_second": 192.122,
397
- "eval_steps_per_second": 24.02,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
  "learning_rate": 2.333998459015746e-06,
403
- "loss": 0.9522,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
- "eval_accuracy": 0.6967014729271834,
409
- "eval_loss": 1.088005542755127,
410
- "eval_runtime": 151.5602,
411
- "eval_samples_per_second": 190.828,
412
- "eval_steps_per_second": 23.859,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
- "learning_rate": 2.2352174170733155e-06,
418
- "loss": 0.9707,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
- "eval_accuracy": 0.6988797455224397,
424
- "eval_loss": 1.08329439163208,
425
- "eval_runtime": 149.8845,
426
- "eval_samples_per_second": 192.962,
427
- "eval_steps_per_second": 24.125,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.01,
432
- "learning_rate": 2.136436375130885e-06,
433
- "loss": 0.9533,
434
  "step": 29000
435
  },
436
  {
437
  "epoch": 4.01,
438
- "eval_accuracy": 0.69891432127792,
439
- "eval_loss": 1.078162670135498,
440
- "eval_runtime": 150.2294,
441
- "eval_samples_per_second": 192.519,
442
- "eval_steps_per_second": 24.07,
443
  "step": 29000
444
  },
445
  {
446
  "epoch": 4.15,
447
- "learning_rate": 2.0376553331884547e-06,
448
- "loss": 0.9187,
449
  "step": 30000
450
  },
451
  {
452
  "epoch": 4.15,
453
- "eval_accuracy": 0.6963557153723808,
454
- "eval_loss": 1.0856797695159912,
455
- "eval_runtime": 153.2337,
456
- "eval_samples_per_second": 188.744,
457
- "eval_steps_per_second": 23.598,
458
  "step": 30000
459
  },
460
  {
461
  "epoch": 4.29,
462
  "learning_rate": 1.9391706343718513e-06,
463
- "loss": 0.9019,
464
  "step": 31000
465
  },
466
  {
467
  "epoch": 4.29,
468
- "eval_accuracy": 0.6997095636539659,
469
- "eval_loss": 1.085842251777649,
470
- "eval_runtime": 155.7046,
471
- "eval_samples_per_second": 185.749,
472
- "eval_steps_per_second": 23.223,
473
  "step": 31000
474
  },
475
  {
476
  "epoch": 4.43,
477
  "learning_rate": 1.840389592429421e-06,
478
- "loss": 0.9074,
479
  "step": 32000
480
  },
481
  {
482
  "epoch": 4.43,
483
- "eval_accuracy": 0.6987760182559989,
484
- "eval_loss": 1.083946943283081,
485
- "eval_runtime": 154.7842,
486
- "eval_samples_per_second": 186.854,
487
- "eval_steps_per_second": 23.362,
488
  "step": 32000
489
  },
490
  {
491
  "epoch": 4.56,
492
  "learning_rate": 1.7416085504869907e-06,
493
- "loss": 0.903,
494
  "step": 33000
495
  },
496
  {
497
  "epoch": 4.56,
498
- "eval_accuracy": 0.6998132909204066,
499
- "eval_loss": 1.0829977989196777,
500
- "eval_runtime": 155.014,
501
- "eval_samples_per_second": 186.577,
502
- "eval_steps_per_second": 23.327,
503
  "step": 33000
504
  },
505
  {
506
  "epoch": 4.7,
507
  "learning_rate": 1.6428275085445602e-06,
508
- "loss": 0.8951,
509
  "step": 34000
510
  },
511
  {
512
  "epoch": 4.7,
513
- "eval_accuracy": 0.6996749878984856,
514
- "eval_loss": 1.081098198890686,
515
- "eval_runtime": 155.221,
516
- "eval_samples_per_second": 186.328,
517
- "eval_steps_per_second": 23.296,
518
  "step": 34000
519
  },
520
  {
521
  "epoch": 4.84,
522
- "learning_rate": 1.5440464666021298e-06,
523
- "loss": 0.8925,
524
  "step": 35000
525
  },
526
  {
527
  "epoch": 4.84,
528
- "eval_accuracy": 0.7006431090519328,
529
- "eval_loss": 1.0793355703353882,
530
- "eval_runtime": 154.8555,
531
- "eval_samples_per_second": 186.768,
532
- "eval_steps_per_second": 23.351,
533
  "step": 35000
534
  },
535
  {
536
  "epoch": 4.98,
537
  "learning_rate": 1.4453642057016417e-06,
538
- "loss": 0.901,
539
  "step": 36000
540
  },
541
  {
542
  "epoch": 4.98,
543
- "eval_accuracy": 0.6993638060991633,
544
- "eval_loss": 1.0771288871765137,
545
- "eval_runtime": 154.7342,
546
- "eval_samples_per_second": 186.914,
547
- "eval_steps_per_second": 23.369,
548
  "step": 36000
549
  },
550
  {
551
  "epoch": 5.12,
552
  "learning_rate": 1.3465831637592115e-06,
553
- "loss": 0.8694,
554
  "step": 37000
555
  },
556
  {
557
  "epoch": 5.12,
558
- "eval_accuracy": 0.699190927321762,
559
- "eval_loss": 1.0816415548324585,
560
- "eval_runtime": 154.9133,
561
- "eval_samples_per_second": 186.698,
562
- "eval_steps_per_second": 23.342,
563
  "step": 37000
564
  },
565
  {
566
  "epoch": 5.26,
567
  "learning_rate": 1.2479009028587235e-06,
568
- "loss": 0.8709,
569
  "step": 38000
570
  },
571
  {
572
  "epoch": 5.26,
573
- "eval_accuracy": 0.6991563515662818,
574
- "eval_loss": 1.083854079246521,
575
- "eval_runtime": 154.8522,
576
- "eval_samples_per_second": 186.772,
577
- "eval_steps_per_second": 23.351,
578
  "step": 38000
579
  },
580
  {
581
  "epoch": 5.39,
582
  "learning_rate": 1.1491198609162931e-06,
583
- "loss": 0.8557,
584
  "step": 39000
585
  },
586
  {
587
  "epoch": 5.39,
588
- "eval_accuracy": 0.6984648364566766,
589
- "eval_loss": 1.0835996866226196,
590
- "eval_runtime": 156.6539,
591
- "eval_samples_per_second": 184.623,
592
- "eval_steps_per_second": 23.083,
593
  "step": 39000
594
  },
595
  {
596
  "epoch": 5.53,
597
- "learning_rate": 1.0503388189738627e-06,
598
- "loss": 0.8583,
599
  "step": 40000
600
  },
601
  {
602
  "epoch": 5.53,
603
- "eval_accuracy": 0.6977041698361109,
604
- "eval_loss": 1.0821908712387085,
605
- "eval_runtime": 154.2155,
606
- "eval_samples_per_second": 187.543,
607
- "eval_steps_per_second": 23.448,
608
  "step": 40000
609
  },
610
  {
611
  "epoch": 5.67,
612
- "learning_rate": 9.515577770314322e-07,
613
- "loss": 0.8533,
614
  "step": 41000
615
  },
616
  {
617
  "epoch": 5.67,
618
- "eval_accuracy": 0.6983611091902358,
619
- "eval_loss": 1.0834639072418213,
620
- "eval_runtime": 155.654,
621
- "eval_samples_per_second": 185.809,
622
- "eval_steps_per_second": 23.231,
623
  "step": 41000
624
  },
625
  {
626
  "epoch": 5.81,
627
  "learning_rate": 8.528755161309442e-07,
628
- "loss": 0.8545,
629
  "step": 42000
630
  },
631
  {
632
  "epoch": 5.81,
633
- "eval_accuracy": 0.6993983818546435,
634
- "eval_loss": 1.0837448835372925,
635
- "eval_runtime": 155.1115,
636
- "eval_samples_per_second": 186.459,
637
- "eval_steps_per_second": 23.312,
638
  "step": 42000
639
  },
640
  {
641
  "epoch": 5.95,
642
  "learning_rate": 7.540944741885138e-07,
643
- "loss": 0.8608,
644
  "step": 43000
645
  },
646
  {
647
  "epoch": 5.95,
648
- "eval_accuracy": 0.6997441394094461,
649
- "eval_loss": 1.0805258750915527,
650
- "eval_runtime": 153.7674,
651
- "eval_samples_per_second": 188.089,
652
- "eval_steps_per_second": 23.516,
653
  "step": 43000
654
  },
655
  {
656
  "epoch": 6.08,
657
  "learning_rate": 6.554122132880259e-07,
658
- "loss": 0.8292,
659
  "step": 44000
660
  },
661
  {
662
  "epoch": 6.08,
663
- "eval_accuracy": 0.6999515939423276,
664
- "eval_loss": 1.0849448442459106,
665
- "eval_runtime": 155.0279,
666
- "eval_samples_per_second": 186.56,
667
- "eval_steps_per_second": 23.325,
668
  "step": 44000
669
  },
670
  {
671
  "epoch": 6.22,
672
  "learning_rate": 5.566311713455954e-07,
673
- "loss": 0.8385,
674
  "step": 45000
675
  },
676
  {
677
  "epoch": 6.22,
678
- "eval_accuracy": 0.6998478666758868,
679
- "eval_loss": 1.084898591041565,
680
- "eval_runtime": 154.6263,
681
- "eval_samples_per_second": 187.044,
682
- "eval_steps_per_second": 23.385,
683
  "step": 45000
684
  },
685
  {
686
  "epoch": 6.36,
687
  "learning_rate": 4.5785012940316495e-07,
688
- "loss": 0.826,
689
  "step": 46000
690
  },
691
  {
692
  "epoch": 6.36,
693
- "eval_accuracy": 0.699329230343683,
694
- "eval_loss": 1.0853327512741089,
695
- "eval_runtime": 155.3096,
696
- "eval_samples_per_second": 186.222,
697
- "eval_steps_per_second": 23.283,
698
  "step": 46000
699
  },
700
  {
701
  "epoch": 6.5,
702
  "learning_rate": 3.59167868502677e-07,
703
- "loss": 0.829,
704
  "step": 47000
705
  },
706
  {
707
  "epoch": 6.5,
708
- "eval_accuracy": 0.6992946545882027,
709
- "eval_loss": 1.0856379270553589,
710
- "eval_runtime": 153.5481,
711
- "eval_samples_per_second": 188.358,
712
- "eval_steps_per_second": 23.55,
713
  "step": 47000
714
  },
715
  {
716
  "epoch": 6.64,
717
- "learning_rate": 2.603868265602466e-07,
718
- "loss": 0.8345,
719
  "step": 48000
720
  },
721
  {
722
  "epoch": 6.64,
723
- "eval_accuracy": 0.699190927321762,
724
- "eval_loss": 1.0844392776489258,
725
- "eval_runtime": 154.6451,
726
- "eval_samples_per_second": 187.022,
727
- "eval_steps_per_second": 23.383,
728
  "step": 48000
729
  },
730
  {
731
  "epoch": 6.78,
732
- "learning_rate": 1.6160578461781615e-07,
733
- "loss": 0.8347,
734
  "step": 49000
735
  },
736
  {
737
  "epoch": 6.78,
738
- "eval_accuracy": 0.6992600788327225,
739
- "eval_loss": 1.0855780839920044,
740
- "eval_runtime": 154.3431,
741
- "eval_samples_per_second": 187.388,
742
- "eval_steps_per_second": 23.428,
743
  "step": 49000
744
  },
745
  {
746
  "epoch": 6.91,
747
- "learning_rate": 6.282474267538574e-08,
748
- "loss": 0.8266,
749
  "step": 50000
750
  },
751
  {
752
  "epoch": 6.91,
753
- "eval_accuracy": 0.6998478666758868,
754
- "eval_loss": 1.085789680480957,
755
- "eval_runtime": 154.1691,
756
- "eval_samples_per_second": 187.599,
757
- "eval_steps_per_second": 23.455,
758
  "step": 50000
759
  },
760
  {
761
  "epoch": 7.0,
762
  "step": 50617,
763
  "total_flos": 1.2555854407514107e+20,
764
- "train_loss": 1.079674410547284,
765
- "train_runtime": 23712.848,
766
- "train_samples_per_second": 68.302,
767
- "train_steps_per_second": 2.135
768
  }
769
  ],
770
  "max_steps": 50617,
 
1
  {
2
+ "best_metric": 1.0670689344406128,
3
+ "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-47000",
4
  "epoch": 7.0,
5
  "global_step": 50617,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 0.14,
12
  "learning_rate": 4.9012189580575696e-06,
13
+ "loss": 2.8236,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
+ "eval_accuracy": 0.4710600926630247,
19
+ "eval_loss": 2.3487069606781006,
20
+ "eval_runtime": 155.6192,
21
+ "eval_samples_per_second": 185.851,
22
+ "eval_steps_per_second": 23.236,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
  "learning_rate": 4.80243791611514e-06,
28
+ "loss": 2.1379,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
+ "eval_accuracy": 0.5444644215476108,
34
+ "eval_loss": 1.9659085273742676,
35
+ "eval_runtime": 153.7302,
36
+ "eval_samples_per_second": 188.135,
37
+ "eval_steps_per_second": 23.522,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
  "learning_rate": 4.703755655214652e-06,
43
+ "loss": 1.8288,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
+ "eval_accuracy": 0.6094322660950142,
49
+ "eval_loss": 1.736711025238037,
50
+ "eval_runtime": 154.9586,
51
+ "eval_samples_per_second": 186.643,
52
+ "eval_steps_per_second": 23.335,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
  "learning_rate": 4.605073394314164e-06,
58
+ "loss": 1.6449,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
+ "eval_accuracy": 0.6326325980222668,
64
+ "eval_loss": 1.5849583148956299,
65
+ "eval_runtime": 154.5958,
66
+ "eval_samples_per_second": 187.081,
67
+ "eval_steps_per_second": 23.39,
68
  "step": 4000
69
  },
70
  {
 
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "eval_accuracy": 0.6461517184150474,
79
+ "eval_loss": 1.4778083562850952,
80
+ "eval_runtime": 156.4372,
81
+ "eval_samples_per_second": 184.879,
82
+ "eval_steps_per_second": 23.115,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
+ "learning_rate": 4.407511310429303e-06,
88
+ "loss": 1.4122,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
+ "eval_accuracy": 0.6564898693036443,
94
+ "eval_loss": 1.399396300315857,
95
+ "eval_runtime": 154.7049,
96
+ "eval_samples_per_second": 186.949,
97
+ "eval_steps_per_second": 23.374,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
  "learning_rate": 4.3088290495288145e-06,
103
+ "loss": 1.3623,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "eval_accuracy": 0.6619874144250052,
109
+ "eval_loss": 1.3486990928649902,
110
+ "eval_runtime": 153.9773,
111
+ "eval_samples_per_second": 187.833,
112
+ "eval_steps_per_second": 23.484,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
  "learning_rate": 4.2100480075863845e-06,
118
+ "loss": 1.293,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
+ "eval_accuracy": 0.6671392019915635,
124
+ "eval_loss": 1.299405813217163,
125
+ "eval_runtime": 154.5924,
126
+ "eval_samples_per_second": 187.086,
127
+ "eval_steps_per_second": 23.391,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
+ "learning_rate": 4.1112669656439545e-06,
133
+ "loss": 1.2382,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
+ "eval_accuracy": 0.6701818684738261,
139
+ "eval_loss": 1.27021062374115,
140
+ "eval_runtime": 153.5258,
141
+ "eval_samples_per_second": 188.385,
142
+ "eval_steps_per_second": 23.553,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
  "learning_rate": 4.012683485785409e-06,
148
+ "loss": 1.2186,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
+ "eval_accuracy": 0.6728787774012862,
154
+ "eval_loss": 1.2421326637268066,
155
+ "eval_runtime": 155.3885,
156
+ "eval_samples_per_second": 186.127,
157
+ "eval_steps_per_second": 23.271,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
  "learning_rate": 3.913902443842978e-06,
163
+ "loss": 1.1912,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
+ "eval_accuracy": 0.6746767166862596,
169
+ "eval_loss": 1.2220091819763184,
170
+ "eval_runtime": 155.3267,
171
+ "eval_samples_per_second": 186.201,
172
+ "eval_steps_per_second": 23.28,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
  "learning_rate": 3.815121401900548e-06,
178
+ "loss": 1.1798,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
+ "eval_accuracy": 0.6796556254754167,
184
+ "eval_loss": 1.1974430084228516,
185
+ "eval_runtime": 153.7759,
186
+ "eval_samples_per_second": 188.079,
187
+ "eval_steps_per_second": 23.515,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
+ "learning_rate": 3.7164391410000594e-06,
193
+ "loss": 1.1605,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "eval_accuracy": 0.6826982919576793,
199
+ "eval_loss": 1.1833155155181885,
200
+ "eval_runtime": 153.6706,
201
+ "eval_samples_per_second": 188.208,
202
+ "eval_steps_per_second": 23.531,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
  "learning_rate": 3.6176580990576294e-06,
208
+ "loss": 1.1454,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
+ "eval_accuracy": 0.6838047161330475,
214
+ "eval_loss": 1.1689262390136719,
215
+ "eval_runtime": 155.3074,
216
+ "eval_samples_per_second": 186.224,
217
+ "eval_steps_per_second": 23.283,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
+ "learning_rate": 3.518975838157141e-06,
223
+ "loss": 1.1076,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
+ "eval_accuracy": 0.6820759283590346,
229
+ "eval_loss": 1.1666451692581177,
230
+ "eval_runtime": 156.0771,
231
+ "eval_samples_per_second": 185.306,
232
+ "eval_steps_per_second": 23.168,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
+ "learning_rate": 3.4201947962147106e-06,
238
+ "loss": 1.0882,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
+ "eval_accuracy": 0.6835626858446857,
244
+ "eval_loss": 1.1561516523361206,
245
+ "eval_runtime": 154.5444,
246
+ "eval_samples_per_second": 187.144,
247
+ "eval_steps_per_second": 23.398,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
+ "learning_rate": 3.3214137542722802e-06,
253
+ "loss": 1.0832,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
+ "eval_accuracy": 0.6874005947029943,
259
+ "eval_loss": 1.1425527334213257,
260
+ "eval_runtime": 155.3385,
261
+ "eval_samples_per_second": 186.187,
262
+ "eval_steps_per_second": 23.278,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
+ "learning_rate": 3.2227314933717923e-06,
268
+ "loss": 1.0698,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
+ "eval_accuracy": 0.6872622916810732,
274
+ "eval_loss": 1.1318212747573853,
275
+ "eval_runtime": 155.6543,
276
+ "eval_samples_per_second": 185.809,
277
+ "eval_steps_per_second": 23.231,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
  "learning_rate": 3.123950451429362e-06,
283
+ "loss": 1.0752,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
+ "eval_accuracy": 0.6842542009542909,
289
+ "eval_loss": 1.1395872831344604,
290
+ "eval_runtime": 155.6622,
291
+ "eval_samples_per_second": 185.8,
292
+ "eval_steps_per_second": 23.23,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
  "learning_rate": 3.0251694094869315e-06,
298
+ "loss": 1.0659,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
+ "eval_accuracy": 0.6903049581633358,
304
+ "eval_loss": 1.1166965961456299,
305
+ "eval_runtime": 154.6931,
306
+ "eval_samples_per_second": 186.964,
307
+ "eval_steps_per_second": 23.375,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
+ "learning_rate": 2.926487148586444e-06,
313
+ "loss": 1.0561,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
+ "eval_accuracy": 0.6879538067906784,
319
+ "eval_loss": 1.1178348064422607,
320
+ "eval_runtime": 152.6144,
321
+ "eval_samples_per_second": 189.51,
322
+ "eval_steps_per_second": 23.694,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
  "learning_rate": 2.827706106644013e-06,
328
+ "loss": 1.0328,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
+ "eval_accuracy": 0.690581564207178,
334
+ "eval_loss": 1.1114201545715332,
335
+ "eval_runtime": 153.9011,
336
+ "eval_samples_per_second": 187.926,
337
+ "eval_steps_per_second": 23.496,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
+ "learning_rate": 2.729023845743525e-06,
343
+ "loss": 1.0299,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
+ "eval_accuracy": 0.6917225641380265,
349
+ "eval_loss": 1.1057274341583252,
350
+ "eval_runtime": 155.3852,
351
+ "eval_samples_per_second": 186.131,
352
+ "eval_steps_per_second": 23.271,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
  "learning_rate": 2.6302428038010947e-06,
358
+ "loss": 0.9961,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
+ "eval_accuracy": 0.6912730793167831,
364
+ "eval_loss": 1.1056290864944458,
365
+ "eval_runtime": 154.2366,
366
+ "eval_samples_per_second": 187.517,
367
+ "eval_steps_per_second": 23.445,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
+ "learning_rate": 2.5315605429006068e-06,
373
+ "loss": 1.0128,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
+ "eval_accuracy": 0.6937971094668418,
379
+ "eval_loss": 1.0973228216171265,
380
+ "eval_runtime": 156.2066,
381
+ "eval_samples_per_second": 185.152,
382
+ "eval_steps_per_second": 23.149,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
+ "learning_rate": 2.4327795009581764e-06,
388
+ "loss": 1.0118,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
+ "eval_accuracy": 0.6942465942880852,
394
+ "eval_loss": 1.0930811166763306,
395
+ "eval_runtime": 155.695,
396
+ "eval_samples_per_second": 185.761,
397
+ "eval_steps_per_second": 23.225,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
  "learning_rate": 2.333998459015746e-06,
403
+ "loss": 1.0045,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
+ "eval_accuracy": 0.6936933822004011,
409
+ "eval_loss": 1.089782953262329,
410
+ "eval_runtime": 155.3577,
411
+ "eval_samples_per_second": 186.164,
412
+ "eval_steps_per_second": 23.275,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
+ "learning_rate": 2.2354149791572e-06,
418
+ "loss": 0.9923,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
+ "eval_accuracy": 0.6958716547956573,
424
+ "eval_loss": 1.0858705043792725,
425
+ "eval_runtime": 155.5851,
426
+ "eval_samples_per_second": 185.892,
427
+ "eval_steps_per_second": 23.241,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.01,
432
+ "learning_rate": 2.13663393721477e-06,
433
+ "loss": 0.9988,
434
  "step": 29000
435
  },
436
  {
437
  "epoch": 4.01,
438
+ "eval_accuracy": 0.6943848973100062,
439
+ "eval_loss": 1.0852184295654297,
440
+ "eval_runtime": 154.9704,
441
+ "eval_samples_per_second": 186.629,
442
+ "eval_steps_per_second": 23.333,
443
  "step": 29000
444
  },
445
  {
446
  "epoch": 4.15,
447
+ "learning_rate": 2.0378528952723396e-06,
448
+ "loss": 0.9773,
449
  "step": 30000
450
  },
451
  {
452
  "epoch": 4.15,
453
+ "eval_accuracy": 0.6930018670907959,
454
+ "eval_loss": 1.089293122291565,
455
+ "eval_runtime": 156.3077,
456
+ "eval_samples_per_second": 185.033,
457
+ "eval_steps_per_second": 23.134,
458
  "step": 30000
459
  },
460
  {
461
  "epoch": 4.29,
462
  "learning_rate": 1.9391706343718513e-06,
463
+ "loss": 0.9577,
464
  "step": 31000
465
  },
466
  {
467
  "epoch": 4.29,
468
+ "eval_accuracy": 0.6968052001936242,
469
+ "eval_loss": 1.0807169675827026,
470
+ "eval_runtime": 154.1329,
471
+ "eval_samples_per_second": 187.643,
472
+ "eval_steps_per_second": 23.46,
473
  "step": 31000
474
  },
475
  {
476
  "epoch": 4.43,
477
  "learning_rate": 1.840389592429421e-06,
478
+ "loss": 0.9748,
479
  "step": 32000
480
  },
481
  {
482
  "epoch": 4.43,
483
+ "eval_accuracy": 0.6957333517737363,
484
+ "eval_loss": 1.0788837671279907,
485
+ "eval_runtime": 154.5313,
486
+ "eval_samples_per_second": 187.16,
487
+ "eval_steps_per_second": 23.4,
488
  "step": 32000
489
  },
490
  {
491
  "epoch": 4.56,
492
  "learning_rate": 1.7416085504869907e-06,
493
+ "loss": 0.9777,
494
  "step": 33000
495
  },
496
  {
497
  "epoch": 4.56,
498
+ "eval_accuracy": 0.6924486550031118,
499
+ "eval_loss": 1.0864237546920776,
500
+ "eval_runtime": 155.2284,
501
+ "eval_samples_per_second": 186.319,
502
+ "eval_steps_per_second": 23.295,
503
  "step": 33000
504
  },
505
  {
506
  "epoch": 4.7,
507
  "learning_rate": 1.6428275085445602e-06,
508
+ "loss": 0.9536,
509
  "step": 34000
510
  },
511
  {
512
  "epoch": 4.7,
513
+ "eval_accuracy": 0.6949035336422101,
514
+ "eval_loss": 1.0813385248184204,
515
+ "eval_runtime": 153.9831,
516
+ "eval_samples_per_second": 187.826,
517
+ "eval_steps_per_second": 23.483,
518
  "step": 34000
519
  },
520
  {
521
  "epoch": 4.84,
522
+ "learning_rate": 1.544145247644072e-06,
523
+ "loss": 0.9507,
524
  "step": 35000
525
  },
526
  {
527
  "epoch": 4.84,
528
+ "eval_accuracy": 0.6950418366641311,
529
+ "eval_loss": 1.0795472860336304,
530
+ "eval_runtime": 156.0888,
531
+ "eval_samples_per_second": 185.292,
532
+ "eval_steps_per_second": 23.166,
533
  "step": 35000
534
  },
535
  {
536
  "epoch": 4.98,
537
  "learning_rate": 1.4453642057016417e-06,
538
+ "loss": 0.9627,
539
  "step": 36000
540
  },
541
  {
542
  "epoch": 4.98,
543
+ "eval_accuracy": 0.6955258972408547,
544
+ "eval_loss": 1.0754951238632202,
545
+ "eval_runtime": 156.2467,
546
+ "eval_samples_per_second": 185.105,
547
+ "eval_steps_per_second": 23.143,
548
  "step": 36000
549
  },
550
  {
551
  "epoch": 5.12,
552
  "learning_rate": 1.3465831637592115e-06,
553
+ "loss": 0.9399,
554
  "step": 37000
555
  },
556
  {
557
  "epoch": 5.12,
558
+ "eval_accuracy": 0.6960791093285388,
559
+ "eval_loss": 1.0770469903945923,
560
+ "eval_runtime": 153.8614,
561
+ "eval_samples_per_second": 187.974,
562
+ "eval_steps_per_second": 23.502,
563
  "step": 37000
564
  },
565
  {
566
  "epoch": 5.26,
567
  "learning_rate": 1.2479009028587235e-06,
568
+ "loss": 0.9357,
569
  "step": 38000
570
  },
571
  {
572
  "epoch": 5.26,
573
+ "eval_accuracy": 0.6960791093285388,
574
+ "eval_loss": 1.0759111642837524,
575
+ "eval_runtime": 153.1358,
576
+ "eval_samples_per_second": 188.865,
577
+ "eval_steps_per_second": 23.613,
578
  "step": 38000
579
  },
580
  {
581
  "epoch": 5.39,
582
  "learning_rate": 1.1491198609162931e-06,
583
+ "loss": 0.943,
584
  "step": 39000
585
  },
586
  {
587
  "epoch": 5.39,
588
+ "eval_accuracy": 0.6965977456607427,
589
+ "eval_loss": 1.0720691680908203,
590
+ "eval_runtime": 154.5633,
591
+ "eval_samples_per_second": 187.121,
592
+ "eval_steps_per_second": 23.395,
593
  "step": 39000
594
  },
595
  {
596
  "epoch": 5.53,
597
+ "learning_rate": 1.0504376000158052e-06,
598
+ "loss": 0.9244,
599
  "step": 40000
600
  },
601
  {
602
  "epoch": 5.53,
603
+ "eval_accuracy": 0.696908927460065,
604
+ "eval_loss": 1.0704323053359985,
605
+ "eval_runtime": 154.315,
606
+ "eval_samples_per_second": 187.422,
607
+ "eval_steps_per_second": 23.433,
608
  "step": 40000
609
  },
610
  {
611
  "epoch": 5.67,
612
+ "learning_rate": 9.516565580733748e-07,
613
+ "loss": 0.9231,
614
  "step": 41000
615
  },
616
  {
617
  "epoch": 5.67,
618
+ "eval_accuracy": 0.6960445335730586,
619
+ "eval_loss": 1.0727081298828125,
620
+ "eval_runtime": 153.587,
621
+ "eval_samples_per_second": 188.31,
622
+ "eval_steps_per_second": 23.544,
623
  "step": 41000
624
  },
625
  {
626
  "epoch": 5.81,
627
  "learning_rate": 8.528755161309442e-07,
628
+ "loss": 0.9294,
629
  "step": 42000
630
  },
631
  {
632
  "epoch": 5.81,
633
+ "eval_accuracy": 0.6969780789710255,
634
+ "eval_loss": 1.0715699195861816,
635
+ "eval_runtime": 153.1528,
636
+ "eval_samples_per_second": 188.844,
637
+ "eval_steps_per_second": 23.61,
638
  "step": 42000
639
  },
640
  {
641
  "epoch": 5.95,
642
  "learning_rate": 7.540944741885138e-07,
643
+ "loss": 0.9416,
644
  "step": 43000
645
  },
646
  {
647
  "epoch": 5.95,
648
+ "eval_accuracy": 0.6980845031463937,
649
+ "eval_loss": 1.0693832635879517,
650
+ "eval_runtime": 151.6066,
651
+ "eval_samples_per_second": 190.77,
652
+ "eval_steps_per_second": 23.851,
653
  "step": 43000
654
  },
655
  {
656
  "epoch": 6.08,
657
  "learning_rate": 6.554122132880259e-07,
658
+ "loss": 0.9248,
659
  "step": 44000
660
  },
661
  {
662
  "epoch": 6.08,
663
+ "eval_accuracy": 0.6991217758108015,
664
+ "eval_loss": 1.0678476095199585,
665
+ "eval_runtime": 151.367,
666
+ "eval_samples_per_second": 191.072,
667
+ "eval_steps_per_second": 23.889,
668
  "step": 44000
669
  },
670
  {
671
  "epoch": 6.22,
672
  "learning_rate": 5.566311713455954e-07,
673
+ "loss": 0.9137,
674
  "step": 45000
675
  },
676
  {
677
  "epoch": 6.22,
678
+ "eval_accuracy": 0.6976350183251504,
679
+ "eval_loss": 1.0700552463531494,
680
+ "eval_runtime": 152.3071,
681
+ "eval_samples_per_second": 189.893,
682
+ "eval_steps_per_second": 23.742,
683
  "step": 45000
684
  },
685
  {
686
  "epoch": 6.36,
687
  "learning_rate": 4.5785012940316495e-07,
688
+ "loss": 0.91,
689
  "step": 46000
690
  },
691
  {
692
  "epoch": 6.36,
693
+ "eval_accuracy": 0.6971855335039071,
694
+ "eval_loss": 1.0688731670379639,
695
+ "eval_runtime": 151.0293,
696
+ "eval_samples_per_second": 191.499,
697
+ "eval_steps_per_second": 23.942,
698
  "step": 46000
699
  },
700
  {
701
  "epoch": 6.5,
702
  "learning_rate": 3.59167868502677e-07,
703
+ "loss": 0.9256,
704
  "step": 47000
705
  },
706
  {
707
  "epoch": 6.5,
708
+ "eval_accuracy": 0.6974621395477492,
709
+ "eval_loss": 1.0670689344406128,
710
+ "eval_runtime": 150.9586,
711
+ "eval_samples_per_second": 191.589,
712
+ "eval_steps_per_second": 23.954,
713
  "step": 47000
714
  },
715
  {
716
  "epoch": 6.64,
717
+ "learning_rate": 2.6048560760218905e-07,
718
+ "loss": 0.9085,
719
  "step": 48000
720
  },
721
  {
722
  "epoch": 6.64,
723
+ "eval_accuracy": 0.6985339879676371,
724
+ "eval_loss": 1.067813754081726,
725
+ "eval_runtime": 151.0599,
726
+ "eval_samples_per_second": 191.46,
727
+ "eval_steps_per_second": 23.938,
728
  "step": 48000
729
  },
730
  {
731
  "epoch": 6.78,
732
+ "learning_rate": 1.617045656597586e-07,
733
+ "loss": 0.9169,
734
  "step": 49000
735
  },
736
  {
737
  "epoch": 6.78,
738
+ "eval_accuracy": 0.6984302607011963,
739
+ "eval_loss": 1.0689929723739624,
740
+ "eval_runtime": 151.6049,
741
+ "eval_samples_per_second": 190.772,
742
+ "eval_steps_per_second": 23.851,
743
  "step": 49000
744
  },
745
  {
746
  "epoch": 6.91,
747
+ "learning_rate": 6.292352371732817e-08,
748
+ "loss": 0.9087,
749
  "step": 50000
750
  },
751
  {
752
  "epoch": 6.91,
753
+ "eval_accuracy": 0.6972546850148675,
754
+ "eval_loss": 1.069164752960205,
755
+ "eval_runtime": 151.0946,
756
+ "eval_samples_per_second": 191.417,
757
+ "eval_steps_per_second": 23.932,
758
  "step": 50000
759
  },
760
  {
761
  "epoch": 7.0,
762
  "step": 50617,
763
  "total_flos": 1.2555854407514107e+20,
764
+ "train_loss": 1.126858214650741,
765
+ "train_runtime": 24456.4619,
766
+ "train_samples_per_second": 66.225,
767
+ "train_steps_per_second": 2.07
768
  }
769
  ],
770
  "max_steps": 50617,