aalonso-developer commited on
Commit
f9df51a
·
1 Parent(s): 86be4e4

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 7.0,
3
- "eval_accuracy": 0.6974621395477492,
4
- "eval_loss": 1.0670689344406128,
5
- "eval_runtime": 150.7024,
6
- "eval_samples_per_second": 191.915,
7
- "eval_steps_per_second": 23.994,
8
- "train_loss": 1.126858214650741,
9
- "train_runtime": 24456.4619,
10
- "train_samples_per_second": 66.225,
11
- "train_steps_per_second": 2.07
12
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "eval_accuracy": 0.7116727750501348,
4
+ "eval_loss": 0.9854417443275452,
5
+ "eval_runtime": 153.6785,
6
+ "eval_samples_per_second": 188.198,
7
+ "eval_steps_per_second": 23.53,
8
+ "train_loss": 0.6310260885072734,
9
+ "train_runtime": 24479.9902,
10
+ "train_samples_per_second": 66.162,
11
+ "train_steps_per_second": 2.068
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 7.0,
3
- "eval_accuracy": 0.6974621395477492,
4
- "eval_loss": 1.0670689344406128,
5
- "eval_runtime": 150.7024,
6
- "eval_samples_per_second": 191.915,
7
- "eval_steps_per_second": 23.994
8
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "eval_accuracy": 0.7116727750501348,
4
+ "eval_loss": 0.9854417443275452,
5
+ "eval_runtime": 153.6785,
6
+ "eval_samples_per_second": 188.198,
7
+ "eval_steps_per_second": 23.53
8
  }
runs/Jun01_15-30-11_adrian-development/events.out.tfevents.1685650870.adrian-development.134629.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d020070c2dd4a79194aea8b0abdc3fca9b8999fb7746988fb45660d5cb9754bf
3
+ size 369
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 7.0,
3
- "train_loss": 1.126858214650741,
4
- "train_runtime": 24456.4619,
5
- "train_samples_per_second": 66.225,
6
- "train_steps_per_second": 2.07
7
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "train_loss": 0.6310260885072734,
4
+ "train_runtime": 24479.9902,
5
+ "train_samples_per_second": 66.162,
6
+ "train_steps_per_second": 2.068
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 1.0670689344406128,
3
- "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-47000",
4
  "epoch": 7.0,
5
  "global_step": 50617,
6
  "is_hyper_param_search": false,
@@ -9,762 +9,762 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "learning_rate": 4.9012189580575696e-06,
13
- "loss": 2.8236,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
- "eval_accuracy": 0.4710600926630247,
19
- "eval_loss": 2.3487069606781006,
20
- "eval_runtime": 155.6192,
21
- "eval_samples_per_second": 185.851,
22
- "eval_steps_per_second": 23.236,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
- "learning_rate": 4.80243791611514e-06,
28
- "loss": 2.1379,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
- "eval_accuracy": 0.5444644215476108,
34
- "eval_loss": 1.9659085273742676,
35
- "eval_runtime": 153.7302,
36
- "eval_samples_per_second": 188.135,
37
- "eval_steps_per_second": 23.522,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
- "learning_rate": 4.703755655214652e-06,
43
- "loss": 1.8288,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
- "eval_accuracy": 0.6094322660950142,
49
- "eval_loss": 1.736711025238037,
50
- "eval_runtime": 154.9586,
51
- "eval_samples_per_second": 186.643,
52
- "eval_steps_per_second": 23.335,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
- "learning_rate": 4.605073394314164e-06,
58
- "loss": 1.6449,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
- "eval_accuracy": 0.6326325980222668,
64
- "eval_loss": 1.5849583148956299,
65
- "eval_runtime": 154.5958,
66
- "eval_samples_per_second": 187.081,
67
- "eval_steps_per_second": 23.39,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
- "learning_rate": 4.506292352371734e-06,
73
- "loss": 1.5127,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
- "eval_accuracy": 0.6461517184150474,
79
- "eval_loss": 1.4778083562850952,
80
- "eval_runtime": 156.4372,
81
- "eval_samples_per_second": 184.879,
82
- "eval_steps_per_second": 23.115,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
- "learning_rate": 4.407511310429303e-06,
88
- "loss": 1.4122,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
- "eval_accuracy": 0.6564898693036443,
94
- "eval_loss": 1.399396300315857,
95
- "eval_runtime": 154.7049,
96
- "eval_samples_per_second": 186.949,
97
- "eval_steps_per_second": 23.374,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
- "learning_rate": 4.3088290495288145e-06,
103
- "loss": 1.3623,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
- "eval_accuracy": 0.6619874144250052,
109
- "eval_loss": 1.3486990928649902,
110
- "eval_runtime": 153.9773,
111
- "eval_samples_per_second": 187.833,
112
- "eval_steps_per_second": 23.484,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
- "learning_rate": 4.2100480075863845e-06,
118
- "loss": 1.293,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
- "eval_accuracy": 0.6671392019915635,
124
- "eval_loss": 1.299405813217163,
125
- "eval_runtime": 154.5924,
126
- "eval_samples_per_second": 187.086,
127
- "eval_steps_per_second": 23.391,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
- "learning_rate": 4.1112669656439545e-06,
133
- "loss": 1.2382,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
- "eval_accuracy": 0.6701818684738261,
139
- "eval_loss": 1.27021062374115,
140
- "eval_runtime": 153.5258,
141
- "eval_samples_per_second": 188.385,
142
- "eval_steps_per_second": 23.553,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
- "learning_rate": 4.012683485785409e-06,
148
- "loss": 1.2186,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
- "eval_accuracy": 0.6728787774012862,
154
- "eval_loss": 1.2421326637268066,
155
- "eval_runtime": 155.3885,
156
- "eval_samples_per_second": 186.127,
157
- "eval_steps_per_second": 23.271,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
- "learning_rate": 3.913902443842978e-06,
163
- "loss": 1.1912,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
- "eval_accuracy": 0.6746767166862596,
169
- "eval_loss": 1.2220091819763184,
170
- "eval_runtime": 155.3267,
171
- "eval_samples_per_second": 186.201,
172
- "eval_steps_per_second": 23.28,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
- "learning_rate": 3.815121401900548e-06,
178
- "loss": 1.1798,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
- "eval_accuracy": 0.6796556254754167,
184
- "eval_loss": 1.1974430084228516,
185
- "eval_runtime": 153.7759,
186
- "eval_samples_per_second": 188.079,
187
- "eval_steps_per_second": 23.515,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
- "learning_rate": 3.7164391410000594e-06,
193
- "loss": 1.1605,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
- "eval_accuracy": 0.6826982919576793,
199
- "eval_loss": 1.1833155155181885,
200
- "eval_runtime": 153.6706,
201
- "eval_samples_per_second": 188.208,
202
- "eval_steps_per_second": 23.531,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
- "learning_rate": 3.6176580990576294e-06,
208
- "loss": 1.1454,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
- "eval_accuracy": 0.6838047161330475,
214
- "eval_loss": 1.1689262390136719,
215
- "eval_runtime": 155.3074,
216
- "eval_samples_per_second": 186.224,
217
- "eval_steps_per_second": 23.283,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
- "learning_rate": 3.518975838157141e-06,
223
- "loss": 1.1076,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
- "eval_accuracy": 0.6820759283590346,
229
- "eval_loss": 1.1666451692581177,
230
- "eval_runtime": 156.0771,
231
- "eval_samples_per_second": 185.306,
232
- "eval_steps_per_second": 23.168,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
- "learning_rate": 3.4201947962147106e-06,
238
- "loss": 1.0882,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
- "eval_accuracy": 0.6835626858446857,
244
- "eval_loss": 1.1561516523361206,
245
- "eval_runtime": 154.5444,
246
- "eval_samples_per_second": 187.144,
247
- "eval_steps_per_second": 23.398,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
- "learning_rate": 3.3214137542722802e-06,
253
- "loss": 1.0832,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
- "eval_accuracy": 0.6874005947029943,
259
- "eval_loss": 1.1425527334213257,
260
- "eval_runtime": 155.3385,
261
- "eval_samples_per_second": 186.187,
262
- "eval_steps_per_second": 23.278,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
- "learning_rate": 3.2227314933717923e-06,
268
- "loss": 1.0698,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
- "eval_accuracy": 0.6872622916810732,
274
- "eval_loss": 1.1318212747573853,
275
- "eval_runtime": 155.6543,
276
- "eval_samples_per_second": 185.809,
277
- "eval_steps_per_second": 23.231,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
- "learning_rate": 3.123950451429362e-06,
283
- "loss": 1.0752,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
- "eval_accuracy": 0.6842542009542909,
289
- "eval_loss": 1.1395872831344604,
290
- "eval_runtime": 155.6622,
291
- "eval_samples_per_second": 185.8,
292
- "eval_steps_per_second": 23.23,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
- "learning_rate": 3.0251694094869315e-06,
298
- "loss": 1.0659,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
- "eval_accuracy": 0.6903049581633358,
304
- "eval_loss": 1.1166965961456299,
305
- "eval_runtime": 154.6931,
306
- "eval_samples_per_second": 186.964,
307
- "eval_steps_per_second": 23.375,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
- "learning_rate": 2.926487148586444e-06,
313
- "loss": 1.0561,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
- "eval_accuracy": 0.6879538067906784,
319
- "eval_loss": 1.1178348064422607,
320
- "eval_runtime": 152.6144,
321
- "eval_samples_per_second": 189.51,
322
- "eval_steps_per_second": 23.694,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
- "learning_rate": 2.827706106644013e-06,
328
- "loss": 1.0328,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
- "eval_accuracy": 0.690581564207178,
334
- "eval_loss": 1.1114201545715332,
335
- "eval_runtime": 153.9011,
336
- "eval_samples_per_second": 187.926,
337
- "eval_steps_per_second": 23.496,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
- "learning_rate": 2.729023845743525e-06,
343
- "loss": 1.0299,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
- "eval_accuracy": 0.6917225641380265,
349
- "eval_loss": 1.1057274341583252,
350
- "eval_runtime": 155.3852,
351
- "eval_samples_per_second": 186.131,
352
- "eval_steps_per_second": 23.271,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
- "learning_rate": 2.6302428038010947e-06,
358
- "loss": 0.9961,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
- "eval_accuracy": 0.6912730793167831,
364
- "eval_loss": 1.1056290864944458,
365
- "eval_runtime": 154.2366,
366
- "eval_samples_per_second": 187.517,
367
- "eval_steps_per_second": 23.445,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
- "learning_rate": 2.5315605429006068e-06,
373
- "loss": 1.0128,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
- "eval_accuracy": 0.6937971094668418,
379
- "eval_loss": 1.0973228216171265,
380
- "eval_runtime": 156.2066,
381
- "eval_samples_per_second": 185.152,
382
- "eval_steps_per_second": 23.149,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
- "learning_rate": 2.4327795009581764e-06,
388
- "loss": 1.0118,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
- "eval_accuracy": 0.6942465942880852,
394
- "eval_loss": 1.0930811166763306,
395
- "eval_runtime": 155.695,
396
- "eval_samples_per_second": 185.761,
397
- "eval_steps_per_second": 23.225,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
- "learning_rate": 2.333998459015746e-06,
403
- "loss": 1.0045,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
- "eval_accuracy": 0.6936933822004011,
409
- "eval_loss": 1.089782953262329,
410
- "eval_runtime": 155.3577,
411
- "eval_samples_per_second": 186.164,
412
- "eval_steps_per_second": 23.275,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
- "learning_rate": 2.2354149791572e-06,
418
- "loss": 0.9923,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
- "eval_accuracy": 0.6958716547956573,
424
- "eval_loss": 1.0858705043792725,
425
- "eval_runtime": 155.5851,
426
- "eval_samples_per_second": 185.892,
427
- "eval_steps_per_second": 23.241,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.01,
432
- "learning_rate": 2.13663393721477e-06,
433
- "loss": 0.9988,
434
  "step": 29000
435
  },
436
  {
437
  "epoch": 4.01,
438
- "eval_accuracy": 0.6943848973100062,
439
- "eval_loss": 1.0852184295654297,
440
- "eval_runtime": 154.9704,
441
- "eval_samples_per_second": 186.629,
442
- "eval_steps_per_second": 23.333,
443
  "step": 29000
444
  },
445
  {
446
  "epoch": 4.15,
447
- "learning_rate": 2.0378528952723396e-06,
448
- "loss": 0.9773,
449
  "step": 30000
450
  },
451
  {
452
  "epoch": 4.15,
453
- "eval_accuracy": 0.6930018670907959,
454
- "eval_loss": 1.089293122291565,
455
- "eval_runtime": 156.3077,
456
- "eval_samples_per_second": 185.033,
457
- "eval_steps_per_second": 23.134,
458
  "step": 30000
459
  },
460
  {
461
  "epoch": 4.29,
462
- "learning_rate": 1.9391706343718513e-06,
463
- "loss": 0.9577,
464
  "step": 31000
465
  },
466
  {
467
  "epoch": 4.29,
468
- "eval_accuracy": 0.6968052001936242,
469
- "eval_loss": 1.0807169675827026,
470
- "eval_runtime": 154.1329,
471
- "eval_samples_per_second": 187.643,
472
- "eval_steps_per_second": 23.46,
473
  "step": 31000
474
  },
475
  {
476
  "epoch": 4.43,
477
- "learning_rate": 1.840389592429421e-06,
478
- "loss": 0.9748,
479
  "step": 32000
480
  },
481
  {
482
  "epoch": 4.43,
483
- "eval_accuracy": 0.6957333517737363,
484
- "eval_loss": 1.0788837671279907,
485
- "eval_runtime": 154.5313,
486
- "eval_samples_per_second": 187.16,
487
- "eval_steps_per_second": 23.4,
488
  "step": 32000
489
  },
490
  {
491
  "epoch": 4.56,
492
- "learning_rate": 1.7416085504869907e-06,
493
- "loss": 0.9777,
494
  "step": 33000
495
  },
496
  {
497
  "epoch": 4.56,
498
- "eval_accuracy": 0.6924486550031118,
499
- "eval_loss": 1.0864237546920776,
500
- "eval_runtime": 155.2284,
501
- "eval_samples_per_second": 186.319,
502
- "eval_steps_per_second": 23.295,
503
  "step": 33000
504
  },
505
  {
506
  "epoch": 4.7,
507
- "learning_rate": 1.6428275085445602e-06,
508
- "loss": 0.9536,
509
  "step": 34000
510
  },
511
  {
512
  "epoch": 4.7,
513
- "eval_accuracy": 0.6949035336422101,
514
- "eval_loss": 1.0813385248184204,
515
- "eval_runtime": 153.9831,
516
- "eval_samples_per_second": 187.826,
517
- "eval_steps_per_second": 23.483,
518
  "step": 34000
519
  },
520
  {
521
  "epoch": 4.84,
522
- "learning_rate": 1.544145247644072e-06,
523
- "loss": 0.9507,
524
  "step": 35000
525
  },
526
  {
527
  "epoch": 4.84,
528
- "eval_accuracy": 0.6950418366641311,
529
- "eval_loss": 1.0795472860336304,
530
- "eval_runtime": 156.0888,
531
- "eval_samples_per_second": 185.292,
532
- "eval_steps_per_second": 23.166,
533
  "step": 35000
534
  },
535
  {
536
  "epoch": 4.98,
537
- "learning_rate": 1.4453642057016417e-06,
538
- "loss": 0.9627,
539
  "step": 36000
540
  },
541
  {
542
  "epoch": 4.98,
543
- "eval_accuracy": 0.6955258972408547,
544
- "eval_loss": 1.0754951238632202,
545
- "eval_runtime": 156.2467,
546
- "eval_samples_per_second": 185.105,
547
- "eval_steps_per_second": 23.143,
548
  "step": 36000
549
  },
550
  {
551
  "epoch": 5.12,
552
- "learning_rate": 1.3465831637592115e-06,
553
- "loss": 0.9399,
554
  "step": 37000
555
  },
556
  {
557
  "epoch": 5.12,
558
- "eval_accuracy": 0.6960791093285388,
559
- "eval_loss": 1.0770469903945923,
560
- "eval_runtime": 153.8614,
561
- "eval_samples_per_second": 187.974,
562
- "eval_steps_per_second": 23.502,
563
  "step": 37000
564
  },
565
  {
566
  "epoch": 5.26,
567
- "learning_rate": 1.2479009028587235e-06,
568
- "loss": 0.9357,
569
  "step": 38000
570
  },
571
  {
572
  "epoch": 5.26,
573
- "eval_accuracy": 0.6960791093285388,
574
- "eval_loss": 1.0759111642837524,
575
- "eval_runtime": 153.1358,
576
- "eval_samples_per_second": 188.865,
577
- "eval_steps_per_second": 23.613,
578
  "step": 38000
579
  },
580
  {
581
  "epoch": 5.39,
582
- "learning_rate": 1.1491198609162931e-06,
583
- "loss": 0.943,
584
  "step": 39000
585
  },
586
  {
587
  "epoch": 5.39,
588
- "eval_accuracy": 0.6965977456607427,
589
- "eval_loss": 1.0720691680908203,
590
- "eval_runtime": 154.5633,
591
- "eval_samples_per_second": 187.121,
592
- "eval_steps_per_second": 23.395,
593
  "step": 39000
594
  },
595
  {
596
  "epoch": 5.53,
597
- "learning_rate": 1.0504376000158052e-06,
598
- "loss": 0.9244,
599
  "step": 40000
600
  },
601
  {
602
  "epoch": 5.53,
603
- "eval_accuracy": 0.696908927460065,
604
- "eval_loss": 1.0704323053359985,
605
- "eval_runtime": 154.315,
606
- "eval_samples_per_second": 187.422,
607
- "eval_steps_per_second": 23.433,
608
  "step": 40000
609
  },
610
  {
611
  "epoch": 5.67,
612
- "learning_rate": 9.516565580733748e-07,
613
- "loss": 0.9231,
614
  "step": 41000
615
  },
616
  {
617
  "epoch": 5.67,
618
- "eval_accuracy": 0.6960445335730586,
619
- "eval_loss": 1.0727081298828125,
620
- "eval_runtime": 153.587,
621
- "eval_samples_per_second": 188.31,
622
- "eval_steps_per_second": 23.544,
623
  "step": 41000
624
  },
625
  {
626
  "epoch": 5.81,
627
- "learning_rate": 8.528755161309442e-07,
628
- "loss": 0.9294,
629
  "step": 42000
630
  },
631
  {
632
  "epoch": 5.81,
633
- "eval_accuracy": 0.6969780789710255,
634
- "eval_loss": 1.0715699195861816,
635
- "eval_runtime": 153.1528,
636
- "eval_samples_per_second": 188.844,
637
- "eval_steps_per_second": 23.61,
638
  "step": 42000
639
  },
640
  {
641
  "epoch": 5.95,
642
- "learning_rate": 7.540944741885138e-07,
643
- "loss": 0.9416,
644
  "step": 43000
645
  },
646
  {
647
  "epoch": 5.95,
648
- "eval_accuracy": 0.6980845031463937,
649
- "eval_loss": 1.0693832635879517,
650
- "eval_runtime": 151.6066,
651
- "eval_samples_per_second": 190.77,
652
- "eval_steps_per_second": 23.851,
653
  "step": 43000
654
  },
655
  {
656
  "epoch": 6.08,
657
- "learning_rate": 6.554122132880259e-07,
658
- "loss": 0.9248,
659
  "step": 44000
660
  },
661
  {
662
  "epoch": 6.08,
663
- "eval_accuracy": 0.6991217758108015,
664
- "eval_loss": 1.0678476095199585,
665
- "eval_runtime": 151.367,
666
- "eval_samples_per_second": 191.072,
667
- "eval_steps_per_second": 23.889,
668
  "step": 44000
669
  },
670
  {
671
  "epoch": 6.22,
672
- "learning_rate": 5.566311713455954e-07,
673
- "loss": 0.9137,
674
  "step": 45000
675
  },
676
  {
677
  "epoch": 6.22,
678
- "eval_accuracy": 0.6976350183251504,
679
- "eval_loss": 1.0700552463531494,
680
- "eval_runtime": 152.3071,
681
- "eval_samples_per_second": 189.893,
682
- "eval_steps_per_second": 23.742,
683
  "step": 45000
684
  },
685
  {
686
  "epoch": 6.36,
687
- "learning_rate": 4.5785012940316495e-07,
688
- "loss": 0.91,
689
  "step": 46000
690
  },
691
  {
692
  "epoch": 6.36,
693
- "eval_accuracy": 0.6971855335039071,
694
- "eval_loss": 1.0688731670379639,
695
- "eval_runtime": 151.0293,
696
- "eval_samples_per_second": 191.499,
697
- "eval_steps_per_second": 23.942,
698
  "step": 46000
699
  },
700
  {
701
  "epoch": 6.5,
702
- "learning_rate": 3.59167868502677e-07,
703
- "loss": 0.9256,
704
  "step": 47000
705
  },
706
  {
707
  "epoch": 6.5,
708
- "eval_accuracy": 0.6974621395477492,
709
- "eval_loss": 1.0670689344406128,
710
- "eval_runtime": 150.9586,
711
- "eval_samples_per_second": 191.589,
712
- "eval_steps_per_second": 23.954,
713
  "step": 47000
714
  },
715
  {
716
  "epoch": 6.64,
717
- "learning_rate": 2.6048560760218905e-07,
718
- "loss": 0.9085,
719
  "step": 48000
720
  },
721
  {
722
  "epoch": 6.64,
723
- "eval_accuracy": 0.6985339879676371,
724
- "eval_loss": 1.067813754081726,
725
- "eval_runtime": 151.0599,
726
- "eval_samples_per_second": 191.46,
727
- "eval_steps_per_second": 23.938,
728
  "step": 48000
729
  },
730
  {
731
  "epoch": 6.78,
732
- "learning_rate": 1.617045656597586e-07,
733
- "loss": 0.9169,
734
  "step": 49000
735
  },
736
  {
737
  "epoch": 6.78,
738
- "eval_accuracy": 0.6984302607011963,
739
- "eval_loss": 1.0689929723739624,
740
- "eval_runtime": 151.6049,
741
- "eval_samples_per_second": 190.772,
742
- "eval_steps_per_second": 23.851,
743
  "step": 49000
744
  },
745
  {
746
  "epoch": 6.91,
747
- "learning_rate": 6.292352371732817e-08,
748
- "loss": 0.9087,
749
  "step": 50000
750
  },
751
  {
752
  "epoch": 6.91,
753
- "eval_accuracy": 0.6972546850148675,
754
- "eval_loss": 1.069164752960205,
755
- "eval_runtime": 151.0946,
756
- "eval_samples_per_second": 191.417,
757
- "eval_steps_per_second": 23.932,
758
  "step": 50000
759
  },
760
  {
761
  "epoch": 7.0,
762
  "step": 50617,
763
  "total_flos": 1.2555854407514107e+20,
764
- "train_loss": 1.126858214650741,
765
- "train_runtime": 24456.4619,
766
- "train_samples_per_second": 66.225,
767
- "train_steps_per_second": 2.07
768
  }
769
  ],
770
  "max_steps": 50617,
 
1
  {
2
+ "best_metric": 0.9854417443275452,
3
+ "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-14000",
4
  "epoch": 7.0,
5
  "global_step": 50617,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "learning_rate": 4.9012189580575696e-05,
13
+ "loss": 1.7292,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
+ "eval_accuracy": 0.6539312633981053,
19
+ "eval_loss": 1.3212652206420898,
20
+ "eval_runtime": 154.4552,
21
+ "eval_samples_per_second": 187.252,
22
+ "eval_steps_per_second": 23.411,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
+ "learning_rate": 4.8025366971570815e-05,
28
+ "loss": 1.2308,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
+ "eval_accuracy": 0.6633012931332549,
34
+ "eval_loss": 1.198843002319336,
35
+ "eval_runtime": 151.3412,
36
+ "eval_samples_per_second": 191.105,
37
+ "eval_steps_per_second": 23.893,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
+ "learning_rate": 4.7037556552146516e-05,
43
+ "loss": 1.1582,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
+ "eval_accuracy": 0.6818684738261531,
49
+ "eval_loss": 1.1182668209075928,
50
+ "eval_runtime": 152.2609,
51
+ "eval_samples_per_second": 189.95,
52
+ "eval_steps_per_second": 23.749,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
+ "learning_rate": 4.604974613272221e-05,
58
+ "loss": 1.109,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
+ "eval_accuracy": 0.6853952008851394,
64
+ "eval_loss": 1.0872125625610352,
65
+ "eval_runtime": 151.3068,
66
+ "eval_samples_per_second": 191.148,
67
+ "eval_steps_per_second": 23.898,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "learning_rate": 4.5062923523717335e-05,
73
+ "loss": 1.0772,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "eval_accuracy": 0.6994329576101238,
79
+ "eval_loss": 1.0495002269744873,
80
+ "eval_runtime": 150.7718,
81
+ "eval_samples_per_second": 191.826,
82
+ "eval_steps_per_second": 23.983,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
+ "learning_rate": 4.407511310429303e-05,
88
+ "loss": 1.0371,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
+ "eval_accuracy": 0.6932093216236774,
94
+ "eval_loss": 1.05511474609375,
95
+ "eval_runtime": 151.6025,
96
+ "eval_samples_per_second": 190.775,
97
+ "eval_steps_per_second": 23.852,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
+ "learning_rate": 4.308829049528815e-05,
103
+ "loss": 1.0287,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "eval_accuracy": 0.7008159878293341,
109
+ "eval_loss": 1.0264155864715576,
110
+ "eval_runtime": 152.8647,
111
+ "eval_samples_per_second": 189.2,
112
+ "eval_steps_per_second": 23.655,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
+ "learning_rate": 4.210048007586384e-05,
118
+ "loss": 0.9387,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
+ "eval_accuracy": 0.7021644422930641,
124
+ "eval_loss": 1.025620937347412,
125
+ "eval_runtime": 152.5576,
126
+ "eval_samples_per_second": 189.581,
127
+ "eval_steps_per_second": 23.703,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
+ "learning_rate": 4.111464527727839e-05,
133
+ "loss": 0.9005,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
+ "eval_accuracy": 0.7013346241615379,
139
+ "eval_loss": 1.0224932432174683,
140
+ "eval_runtime": 151.9509,
141
+ "eval_samples_per_second": 190.338,
142
+ "eval_steps_per_second": 23.797,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
+ "learning_rate": 4.012683485785408e-05,
148
+ "loss": 0.8957,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
+ "eval_accuracy": 0.6993638060991633,
154
+ "eval_loss": 1.0279977321624756,
155
+ "eval_runtime": 150.8357,
156
+ "eval_samples_per_second": 191.745,
157
+ "eval_steps_per_second": 23.973,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
+ "learning_rate": 3.913902443842978e-05,
163
+ "loss": 0.8924,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
+ "eval_accuracy": 0.7037203512896757,
169
+ "eval_loss": 1.016230821609497,
170
+ "eval_runtime": 151.3023,
171
+ "eval_samples_per_second": 191.154,
172
+ "eval_steps_per_second": 23.899,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
+ "learning_rate": 3.8151214019005474e-05,
178
+ "loss": 0.8934,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
+ "eval_accuracy": 0.7084918055459511,
184
+ "eval_loss": 1.0030243396759033,
185
+ "eval_runtime": 151.4775,
186
+ "eval_samples_per_second": 190.933,
187
+ "eval_steps_per_second": 23.872,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
+ "learning_rate": 3.7164391410000594e-05,
193
+ "loss": 0.8959,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "eval_accuracy": 0.7021298665375838,
199
+ "eval_loss": 1.017107367515564,
200
+ "eval_runtime": 151.0427,
201
+ "eval_samples_per_second": 191.482,
202
+ "eval_steps_per_second": 23.94,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
+ "learning_rate": 3.6176580990576294e-05,
208
+ "loss": 0.8927,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
+ "eval_accuracy": 0.7116727750501348,
214
+ "eval_loss": 0.9854417443275452,
215
+ "eval_runtime": 151.5533,
216
+ "eval_samples_per_second": 190.837,
217
+ "eval_steps_per_second": 23.86,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
+ "learning_rate": 3.518877057115199e-05,
223
+ "loss": 0.8107,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
+ "eval_accuracy": 0.701887836249222,
229
+ "eval_loss": 1.0297770500183105,
230
+ "eval_runtime": 151.6336,
231
+ "eval_samples_per_second": 190.736,
232
+ "eval_steps_per_second": 23.847,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
+ "learning_rate": 3.420194796214711e-05,
238
+ "loss": 0.7442,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
+ "eval_accuracy": 0.707523684392504,
244
+ "eval_loss": 1.0102756023406982,
245
+ "eval_runtime": 154.4177,
246
+ "eval_samples_per_second": 187.297,
247
+ "eval_steps_per_second": 23.417,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
+ "learning_rate": 3.32141375427228e-05,
253
+ "loss": 0.7421,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
+ "eval_accuracy": 0.708595532812392,
259
+ "eval_loss": 1.0141746997833252,
260
+ "eval_runtime": 154.77,
261
+ "eval_samples_per_second": 186.871,
262
+ "eval_steps_per_second": 23.364,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
+ "learning_rate": 3.222731493371792e-05,
268
+ "loss": 0.7311,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
+ "eval_accuracy": 0.7110504114514902,
274
+ "eval_loss": 1.01434326171875,
275
+ "eval_runtime": 153.512,
276
+ "eval_samples_per_second": 188.402,
277
+ "eval_steps_per_second": 23.555,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
+ "learning_rate": 3.1239504514293614e-05,
283
+ "loss": 0.7362,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
+ "eval_accuracy": 0.709943987276122,
289
+ "eval_loss": 1.0113046169281006,
290
+ "eval_runtime": 153.941,
291
+ "eval_samples_per_second": 187.877,
292
+ "eval_steps_per_second": 23.49,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
+ "learning_rate": 3.0252681905288737e-05,
298
+ "loss": 0.7392,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
+ "eval_accuracy": 0.7046193209321624,
304
+ "eval_loss": 1.03049898147583,
305
+ "eval_runtime": 154.7774,
306
+ "eval_samples_per_second": 186.862,
307
+ "eval_steps_per_second": 23.363,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
+ "learning_rate": 2.9264871485864437e-05,
313
+ "loss": 0.7515,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
+ "eval_accuracy": 0.7069013207938594,
319
+ "eval_loss": 1.0068926811218262,
320
+ "eval_runtime": 153.9311,
321
+ "eval_samples_per_second": 187.889,
322
+ "eval_steps_per_second": 23.491,
323
  "step": 21000
324
  },
325
  {
326
  "epoch": 3.04,
327
+ "learning_rate": 2.827706106644013e-05,
328
+ "loss": 0.6945,
329
  "step": 22000
330
  },
331
  {
332
  "epoch": 3.04,
333
+ "eval_accuracy": 0.7095636539658392,
334
+ "eval_loss": 1.0314772129058838,
335
+ "eval_runtime": 151.7505,
336
+ "eval_samples_per_second": 190.589,
337
+ "eval_steps_per_second": 23.829,
338
  "step": 22000
339
  },
340
  {
341
  "epoch": 3.18,
342
+ "learning_rate": 2.7289250647015823e-05,
343
+ "loss": 0.5912,
344
  "step": 23000
345
  },
346
  {
347
  "epoch": 3.18,
348
+ "eval_accuracy": 0.7004702302745315,
349
+ "eval_loss": 1.0634562969207764,
350
+ "eval_runtime": 152.7645,
351
+ "eval_samples_per_second": 189.324,
352
+ "eval_steps_per_second": 23.67,
353
  "step": 23000
354
  },
355
  {
356
  "epoch": 3.32,
357
+ "learning_rate": 2.6302428038010946e-05,
358
+ "loss": 0.58,
359
  "step": 24000
360
  },
361
  {
362
  "epoch": 3.32,
363
+ "eval_accuracy": 0.7049996542424452,
364
+ "eval_loss": 1.0592378377914429,
365
+ "eval_runtime": 153.0117,
366
+ "eval_samples_per_second": 189.018,
367
+ "eval_steps_per_second": 23.632,
368
  "step": 24000
369
  },
370
  {
371
  "epoch": 3.46,
372
+ "learning_rate": 2.5314617618586643e-05,
373
+ "loss": 0.576,
374
  "step": 25000
375
  },
376
  {
377
  "epoch": 3.46,
378
+ "eval_accuracy": 0.7018532604937417,
379
+ "eval_loss": 1.0623606443405151,
380
+ "eval_runtime": 154.2965,
381
+ "eval_samples_per_second": 187.444,
382
+ "eval_steps_per_second": 23.435,
383
  "step": 25000
384
  },
385
  {
386
  "epoch": 3.6,
387
+ "learning_rate": 2.4326807199162337e-05,
388
+ "loss": 0.5882,
389
  "step": 26000
390
  },
391
  {
392
  "epoch": 3.6,
393
+ "eval_accuracy": 0.7058986238849319,
394
+ "eval_loss": 1.0582630634307861,
395
+ "eval_runtime": 154.8374,
396
+ "eval_samples_per_second": 186.79,
397
+ "eval_steps_per_second": 23.354,
398
  "step": 26000
399
  },
400
  {
401
  "epoch": 3.73,
402
+ "learning_rate": 2.3338996779738033e-05,
403
+ "loss": 0.5867,
404
  "step": 27000
405
  },
406
  {
407
  "epoch": 3.73,
408
+ "eval_accuracy": 0.7076965631699053,
409
+ "eval_loss": 1.0526437759399414,
410
+ "eval_runtime": 153.5073,
411
+ "eval_samples_per_second": 188.408,
412
+ "eval_steps_per_second": 23.556,
413
  "step": 27000
414
  },
415
  {
416
  "epoch": 3.87,
417
+ "learning_rate": 2.2352174170733153e-05,
418
+ "loss": 0.593,
419
  "step": 28000
420
  },
421
  {
422
  "epoch": 3.87,
423
+ "eval_accuracy": 0.7047921997095636,
424
+ "eval_loss": 1.0647141933441162,
425
+ "eval_runtime": 153.5176,
426
+ "eval_samples_per_second": 188.395,
427
+ "eval_steps_per_second": 23.554,
428
  "step": 28000
429
  },
430
  {
431
  "epoch": 4.01,
432
+ "learning_rate": 2.136436375130885e-05,
433
+ "loss": 0.5745,
434
  "step": 29000
435
  },
436
  {
437
  "epoch": 4.01,
438
+ "eval_accuracy": 0.7044810179102413,
439
+ "eval_loss": 1.0814189910888672,
440
+ "eval_runtime": 147.6133,
441
+ "eval_samples_per_second": 195.931,
442
+ "eval_steps_per_second": 24.496,
443
  "step": 29000
444
  },
445
  {
446
  "epoch": 4.15,
447
+ "learning_rate": 2.0377541142303973e-05,
448
+ "loss": 0.4332,
449
  "step": 30000
450
  },
451
  {
452
  "epoch": 4.15,
453
+ "eval_accuracy": 0.7036166240232349,
454
+ "eval_loss": 1.1231409311294556,
455
+ "eval_runtime": 147.2084,
456
+ "eval_samples_per_second": 196.47,
457
+ "eval_steps_per_second": 24.564,
458
  "step": 30000
459
  },
460
  {
461
  "epoch": 4.29,
462
+ "learning_rate": 1.9389730722879666e-05,
463
+ "loss": 0.4427,
464
  "step": 31000
465
  },
466
  {
467
  "epoch": 4.29,
468
+ "eval_accuracy": 0.6981536546573542,
469
+ "eval_loss": 1.1329883337020874,
470
+ "eval_runtime": 156.5248,
471
+ "eval_samples_per_second": 184.776,
472
+ "eval_steps_per_second": 23.102,
473
  "step": 31000
474
  },
475
  {
476
  "epoch": 4.43,
477
+ "learning_rate": 1.8402908113874786e-05,
478
+ "loss": 0.4475,
479
  "step": 32000
480
  },
481
  {
482
  "epoch": 4.43,
483
+ "eval_accuracy": 0.7000207454532882,
484
+ "eval_loss": 1.126924991607666,
485
+ "eval_runtime": 155.4925,
486
+ "eval_samples_per_second": 186.003,
487
+ "eval_steps_per_second": 23.255,
488
  "step": 32000
489
  },
490
  {
491
  "epoch": 4.56,
492
+ "learning_rate": 1.7415097694450482e-05,
493
+ "loss": 0.4488,
494
  "step": 33000
495
  },
496
  {
497
  "epoch": 4.56,
498
+ "eval_accuracy": 0.7025101998478667,
499
+ "eval_loss": 1.1300145387649536,
500
+ "eval_runtime": 153.2715,
501
+ "eval_samples_per_second": 188.698,
502
+ "eval_steps_per_second": 23.592,
503
  "step": 33000
504
  },
505
  {
506
  "epoch": 4.7,
507
+ "learning_rate": 1.6428275085445602e-05,
508
+ "loss": 0.4431,
509
  "step": 34000
510
  },
511
  {
512
  "epoch": 4.7,
513
+ "eval_accuracy": 0.701887836249222,
514
+ "eval_loss": 1.1304574012756348,
515
+ "eval_runtime": 153.1234,
516
+ "eval_samples_per_second": 188.88,
517
+ "eval_steps_per_second": 23.615,
518
  "step": 34000
519
  },
520
  {
521
  "epoch": 4.84,
522
+ "learning_rate": 1.54404646660213e-05,
523
+ "loss": 0.4514,
524
  "step": 35000
525
  },
526
  {
527
  "epoch": 4.84,
528
+ "eval_accuracy": 0.6990872000553212,
529
+ "eval_loss": 1.1445332765579224,
530
+ "eval_runtime": 155.0001,
531
+ "eval_samples_per_second": 186.593,
532
+ "eval_steps_per_second": 23.329,
533
  "step": 35000
534
  },
535
  {
536
  "epoch": 4.98,
537
+ "learning_rate": 1.4452654246596994e-05,
538
+ "loss": 0.4427,
539
  "step": 36000
540
  },
541
  {
542
  "epoch": 4.98,
543
+ "eval_accuracy": 0.702821381647189,
544
+ "eval_loss": 1.1224807500839233,
545
+ "eval_runtime": 153.3684,
546
+ "eval_samples_per_second": 188.579,
547
+ "eval_steps_per_second": 23.577,
548
  "step": 36000
549
  },
550
  {
551
  "epoch": 5.12,
552
+ "learning_rate": 1.346681944801154e-05,
553
+ "loss": 0.3391,
554
  "step": 37000
555
  },
556
  {
557
  "epoch": 5.12,
558
+ "eval_accuracy": 0.6964248668833414,
559
+ "eval_loss": 1.1918517351150513,
560
+ "eval_runtime": 152.8672,
561
+ "eval_samples_per_second": 189.197,
562
+ "eval_steps_per_second": 23.655,
563
  "step": 37000
564
  },
565
  {
566
  "epoch": 5.26,
567
+ "learning_rate": 1.2479009028587233e-05,
568
+ "loss": 0.3222,
569
  "step": 38000
570
  },
571
  {
572
  "epoch": 5.26,
573
+ "eval_accuracy": 0.6971163819929466,
574
+ "eval_loss": 1.2108075618743896,
575
+ "eval_runtime": 155.4241,
576
+ "eval_samples_per_second": 186.084,
577
+ "eval_steps_per_second": 23.265,
578
  "step": 38000
579
  },
580
  {
581
  "epoch": 5.39,
582
+ "learning_rate": 1.149119860916293e-05,
583
+ "loss": 0.315,
584
  "step": 39000
585
  },
586
  {
587
  "epoch": 5.39,
588
+ "eval_accuracy": 0.6985685637231174,
589
+ "eval_loss": 1.2174683809280396,
590
+ "eval_runtime": 154.5842,
591
+ "eval_samples_per_second": 187.095,
592
+ "eval_steps_per_second": 23.392,
593
  "step": 39000
594
  },
595
  {
596
  "epoch": 5.53,
597
+ "learning_rate": 1.0503388189738627e-05,
598
+ "loss": 0.3307,
599
  "step": 40000
600
  },
601
  {
602
  "epoch": 5.53,
603
+ "eval_accuracy": 0.6943848973100062,
604
+ "eval_loss": 1.219739556312561,
605
+ "eval_runtime": 155.288,
606
+ "eval_samples_per_second": 186.247,
607
+ "eval_steps_per_second": 23.286,
608
  "step": 40000
609
  },
610
  {
611
  "epoch": 5.67,
612
+ "learning_rate": 9.516565580733746e-06,
613
+ "loss": 0.3187,
614
  "step": 41000
615
  },
616
  {
617
  "epoch": 5.67,
618
+ "eval_accuracy": 0.6988105940114792,
619
+ "eval_loss": 1.228104591369629,
620
+ "eval_runtime": 154.3425,
621
+ "eval_samples_per_second": 187.388,
622
+ "eval_steps_per_second": 23.428,
623
  "step": 41000
624
  },
625
  {
626
  "epoch": 5.81,
627
+ "learning_rate": 8.528755161309441e-06,
628
+ "loss": 0.3327,
629
  "step": 42000
630
  },
631
  {
632
  "epoch": 5.81,
633
+ "eval_accuracy": 0.6966668971717032,
634
+ "eval_loss": 1.2378689050674438,
635
+ "eval_runtime": 147.7901,
636
+ "eval_samples_per_second": 195.696,
637
+ "eval_steps_per_second": 24.467,
638
  "step": 42000
639
  },
640
  {
641
  "epoch": 5.95,
642
+ "learning_rate": 7.540944741885138e-06,
643
+ "loss": 0.3213,
644
  "step": 43000
645
  },
646
  {
647
  "epoch": 5.95,
648
+ "eval_accuracy": 0.6972201092593873,
649
+ "eval_loss": 1.2366853952407837,
650
+ "eval_runtime": 155.5771,
651
+ "eval_samples_per_second": 185.901,
652
+ "eval_steps_per_second": 23.242,
653
  "step": 43000
654
  },
655
  {
656
  "epoch": 6.08,
657
+ "learning_rate": 6.553134322460834e-06,
658
+ "loss": 0.2688,
659
  "step": 44000
660
  },
661
  {
662
  "epoch": 6.08,
663
+ "eval_accuracy": 0.6940737155106839,
664
+ "eval_loss": 1.2731064558029175,
665
+ "eval_runtime": 155.6374,
666
+ "eval_samples_per_second": 185.829,
667
+ "eval_steps_per_second": 23.233,
668
  "step": 44000
669
  },
670
  {
671
  "epoch": 6.22,
672
+ "learning_rate": 5.56532390303653e-06,
673
+ "loss": 0.2395,
674
  "step": 45000
675
  },
676
  {
677
  "epoch": 6.22,
678
+ "eval_accuracy": 0.696632321416223,
679
+ "eval_loss": 1.2903701066970825,
680
+ "eval_runtime": 153.3038,
681
+ "eval_samples_per_second": 188.658,
682
+ "eval_steps_per_second": 23.587,
683
  "step": 45000
684
  },
685
  {
686
  "epoch": 6.36,
687
+ "learning_rate": 4.5785012940316495e-06,
688
+ "loss": 0.2407,
689
  "step": 46000
690
  },
691
  {
692
  "epoch": 6.36,
693
+ "eval_accuracy": 0.6951455639305719,
694
+ "eval_loss": 1.2933955192565918,
695
+ "eval_runtime": 155.5305,
696
+ "eval_samples_per_second": 185.957,
697
+ "eval_steps_per_second": 23.249,
698
  "step": 46000
699
  },
700
  {
701
  "epoch": 6.5,
702
+ "learning_rate": 3.5906908746073454e-06,
703
+ "loss": 0.2312,
704
  "step": 47000
705
  },
706
  {
707
  "epoch": 6.5,
708
+ "eval_accuracy": 0.6955258972408547,
709
+ "eval_loss": 1.2975932359695435,
710
+ "eval_runtime": 155.813,
711
+ "eval_samples_per_second": 185.62,
712
+ "eval_steps_per_second": 23.207,
713
  "step": 47000
714
  },
715
  {
716
  "epoch": 6.64,
717
+ "learning_rate": 2.6028804551830413e-06,
718
+ "loss": 0.2369,
719
  "step": 48000
720
  },
721
  {
722
  "epoch": 6.64,
723
+ "eval_accuracy": 0.6934859276675195,
724
+ "eval_loss": 1.3018196821212769,
725
+ "eval_runtime": 150.7052,
726
+ "eval_samples_per_second": 191.911,
727
+ "eval_steps_per_second": 23.994,
728
  "step": 48000
729
  },
730
  {
731
  "epoch": 6.78,
732
+ "learning_rate": 1.6170456565975861e-06,
733
+ "loss": 0.2317,
734
  "step": 49000
735
  },
736
  {
737
  "epoch": 6.78,
738
+ "eval_accuracy": 0.6949381093976903,
739
+ "eval_loss": 1.3031286001205444,
740
+ "eval_runtime": 154.6116,
741
+ "eval_samples_per_second": 187.062,
742
+ "eval_steps_per_second": 23.388,
743
  "step": 49000
744
  },
745
  {
746
  "epoch": 6.91,
747
+ "learning_rate": 6.292352371732817e-07,
748
+ "loss": 0.2395,
749
  "step": 50000
750
  },
751
  {
752
  "epoch": 6.91,
753
+ "eval_accuracy": 0.6957679275292166,
754
+ "eval_loss": 1.3016529083251953,
755
+ "eval_runtime": 153.6886,
756
+ "eval_samples_per_second": 188.186,
757
+ "eval_steps_per_second": 23.528,
758
  "step": 50000
759
  },
760
  {
761
  "epoch": 7.0,
762
  "step": 50617,
763
  "total_flos": 1.2555854407514107e+20,
764
+ "train_loss": 0.6310260885072734,
765
+ "train_runtime": 24479.9902,
766
+ "train_samples_per_second": 66.162,
767
+ "train_steps_per_second": 2.068
768
  }
769
  ],
770
  "max_steps": 50617,