aalonso-developer commited on
Commit
74854b4
·
1 Parent(s): 3d163cb

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 7.0,
3
- "eval_accuracy": 0.7116727750501348,
4
- "eval_loss": 0.9854417443275452,
5
- "eval_runtime": 153.6785,
6
- "eval_samples_per_second": 188.198,
7
- "eval_steps_per_second": 23.53,
8
- "train_loss": 0.6310260885072734,
9
- "train_runtime": 24479.9902,
10
- "train_samples_per_second": 66.162,
11
- "train_steps_per_second": 2.068
12
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.7164442293064104,
4
+ "eval_loss": 0.9865725040435791,
5
+ "eval_runtime": 151.7517,
6
+ "eval_samples_per_second": 190.588,
7
+ "eval_steps_per_second": 23.828,
8
+ "train_loss": 0.9066727288788254,
9
+ "train_runtime": 10400.7433,
10
+ "train_samples_per_second": 66.739,
11
+ "train_steps_per_second": 2.086
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.0,
3
- "eval_accuracy": 0.7116727750501348,
4
- "eval_loss": 0.9854417443275452,
5
- "eval_runtime": 153.6785,
6
- "eval_samples_per_second": 188.198,
7
- "eval_steps_per_second": 23.53
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.7164442293064104,
4
+ "eval_loss": 0.9865725040435791,
5
+ "eval_runtime": 151.7517,
6
+ "eval_samples_per_second": 190.588,
7
+ "eval_steps_per_second": 23.828
8
  }
runs/Jun04_16-32-48_adrian-development/events.out.tfevents.1685899738.adrian-development.13219.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2021eee5433a1ae5bd04475acb95834443fdd524cf9ea53c7ef3272e545953d6
3
+ size 369
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 7.0,
3
- "train_loss": 0.6310260885072734,
4
- "train_runtime": 24479.9902,
5
- "train_samples_per_second": 66.162,
6
- "train_steps_per_second": 2.068
7
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.9066727288788254,
4
+ "train_runtime": 10400.7433,
5
+ "train_samples_per_second": 66.739,
6
+ "train_steps_per_second": 2.086
7
  }
trainer_state.json CHANGED
@@ -1,775 +1,340 @@
1
  {
2
- "best_metric": 0.9854417443275452,
3
- "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-14000",
4
- "epoch": 7.0,
5
- "global_step": 50617,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "learning_rate": 4.9012189580575696e-05,
13
- "loss": 1.7292,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
- "eval_accuracy": 0.6539312633981053,
19
- "eval_loss": 1.3212652206420898,
20
- "eval_runtime": 154.4552,
21
- "eval_samples_per_second": 187.252,
22
- "eval_steps_per_second": 23.411,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
- "learning_rate": 4.8025366971570815e-05,
28
- "loss": 1.2308,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
- "eval_accuracy": 0.6633012931332549,
34
- "eval_loss": 1.198843002319336,
35
- "eval_runtime": 151.3412,
36
- "eval_samples_per_second": 191.105,
37
- "eval_steps_per_second": 23.893,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
- "learning_rate": 4.7037556552146516e-05,
43
- "loss": 1.1582,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
- "eval_accuracy": 0.6818684738261531,
49
- "eval_loss": 1.1182668209075928,
50
- "eval_runtime": 152.2609,
51
- "eval_samples_per_second": 189.95,
52
- "eval_steps_per_second": 23.749,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
- "learning_rate": 4.604974613272221e-05,
58
- "loss": 1.109,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
- "eval_accuracy": 0.6853952008851394,
64
- "eval_loss": 1.0872125625610352,
65
- "eval_runtime": 151.3068,
66
- "eval_samples_per_second": 191.148,
67
- "eval_steps_per_second": 23.898,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
- "learning_rate": 4.5062923523717335e-05,
73
- "loss": 1.0772,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
- "eval_accuracy": 0.6994329576101238,
79
- "eval_loss": 1.0495002269744873,
80
- "eval_runtime": 150.7718,
81
- "eval_samples_per_second": 191.826,
82
- "eval_steps_per_second": 23.983,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
- "learning_rate": 4.407511310429303e-05,
88
- "loss": 1.0371,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
- "eval_accuracy": 0.6932093216236774,
94
- "eval_loss": 1.05511474609375,
95
- "eval_runtime": 151.6025,
96
- "eval_samples_per_second": 190.775,
97
- "eval_steps_per_second": 23.852,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
- "learning_rate": 4.308829049528815e-05,
103
- "loss": 1.0287,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
- "eval_accuracy": 0.7008159878293341,
109
- "eval_loss": 1.0264155864715576,
110
- "eval_runtime": 152.8647,
111
- "eval_samples_per_second": 189.2,
112
- "eval_steps_per_second": 23.655,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
- "learning_rate": 4.210048007586384e-05,
118
- "loss": 0.9387,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
- "eval_accuracy": 0.7021644422930641,
124
- "eval_loss": 1.025620937347412,
125
- "eval_runtime": 152.5576,
126
- "eval_samples_per_second": 189.581,
127
- "eval_steps_per_second": 23.703,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
- "learning_rate": 4.111464527727839e-05,
133
- "loss": 0.9005,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
- "eval_accuracy": 0.7013346241615379,
139
- "eval_loss": 1.0224932432174683,
140
- "eval_runtime": 151.9509,
141
- "eval_samples_per_second": 190.338,
142
- "eval_steps_per_second": 23.797,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
- "learning_rate": 4.012683485785408e-05,
148
- "loss": 0.8957,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
- "eval_accuracy": 0.6993638060991633,
154
- "eval_loss": 1.0279977321624756,
155
- "eval_runtime": 150.8357,
156
- "eval_samples_per_second": 191.745,
157
- "eval_steps_per_second": 23.973,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
- "learning_rate": 3.913902443842978e-05,
163
- "loss": 0.8924,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
- "eval_accuracy": 0.7037203512896757,
169
- "eval_loss": 1.016230821609497,
170
- "eval_runtime": 151.3023,
171
- "eval_samples_per_second": 191.154,
172
- "eval_steps_per_second": 23.899,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
- "learning_rate": 3.8151214019005474e-05,
178
- "loss": 0.8934,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
- "eval_accuracy": 0.7084918055459511,
184
- "eval_loss": 1.0030243396759033,
185
- "eval_runtime": 151.4775,
186
- "eval_samples_per_second": 190.933,
187
- "eval_steps_per_second": 23.872,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
- "learning_rate": 3.7164391410000594e-05,
193
- "loss": 0.8959,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
- "eval_accuracy": 0.7021298665375838,
199
- "eval_loss": 1.017107367515564,
200
- "eval_runtime": 151.0427,
201
- "eval_samples_per_second": 191.482,
202
- "eval_steps_per_second": 23.94,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
- "learning_rate": 3.6176580990576294e-05,
208
- "loss": 0.8927,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
- "eval_accuracy": 0.7116727750501348,
214
- "eval_loss": 0.9854417443275452,
215
- "eval_runtime": 151.5533,
216
- "eval_samples_per_second": 190.837,
217
- "eval_steps_per_second": 23.86,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
- "learning_rate": 3.518877057115199e-05,
223
- "loss": 0.8107,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
- "eval_accuracy": 0.701887836249222,
229
- "eval_loss": 1.0297770500183105,
230
- "eval_runtime": 151.6336,
231
- "eval_samples_per_second": 190.736,
232
- "eval_steps_per_second": 23.847,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
- "learning_rate": 3.420194796214711e-05,
238
- "loss": 0.7442,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
- "eval_accuracy": 0.707523684392504,
244
- "eval_loss": 1.0102756023406982,
245
- "eval_runtime": 154.4177,
246
- "eval_samples_per_second": 187.297,
247
- "eval_steps_per_second": 23.417,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
- "learning_rate": 3.32141375427228e-05,
253
- "loss": 0.7421,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
- "eval_accuracy": 0.708595532812392,
259
- "eval_loss": 1.0141746997833252,
260
- "eval_runtime": 154.77,
261
- "eval_samples_per_second": 186.871,
262
- "eval_steps_per_second": 23.364,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
- "learning_rate": 3.222731493371792e-05,
268
- "loss": 0.7311,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
- "eval_accuracy": 0.7110504114514902,
274
- "eval_loss": 1.01434326171875,
275
- "eval_runtime": 153.512,
276
- "eval_samples_per_second": 188.402,
277
- "eval_steps_per_second": 23.555,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
- "learning_rate": 3.1239504514293614e-05,
283
- "loss": 0.7362,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
- "eval_accuracy": 0.709943987276122,
289
- "eval_loss": 1.0113046169281006,
290
- "eval_runtime": 153.941,
291
- "eval_samples_per_second": 187.877,
292
- "eval_steps_per_second": 23.49,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
- "learning_rate": 3.0252681905288737e-05,
298
- "loss": 0.7392,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
- "eval_accuracy": 0.7046193209321624,
304
- "eval_loss": 1.03049898147583,
305
- "eval_runtime": 154.7774,
306
- "eval_samples_per_second": 186.862,
307
- "eval_steps_per_second": 23.363,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
- "learning_rate": 2.9264871485864437e-05,
313
- "loss": 0.7515,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
- "eval_accuracy": 0.7069013207938594,
319
- "eval_loss": 1.0068926811218262,
320
- "eval_runtime": 153.9311,
321
- "eval_samples_per_second": 187.889,
322
- "eval_steps_per_second": 23.491,
323
  "step": 21000
324
  },
325
  {
326
- "epoch": 3.04,
327
- "learning_rate": 2.827706106644013e-05,
328
- "loss": 0.6945,
329
- "step": 22000
330
- },
331
- {
332
- "epoch": 3.04,
333
- "eval_accuracy": 0.7095636539658392,
334
- "eval_loss": 1.0314772129058838,
335
- "eval_runtime": 151.7505,
336
- "eval_samples_per_second": 190.589,
337
- "eval_steps_per_second": 23.829,
338
- "step": 22000
339
- },
340
- {
341
- "epoch": 3.18,
342
- "learning_rate": 2.7289250647015823e-05,
343
- "loss": 0.5912,
344
- "step": 23000
345
- },
346
- {
347
- "epoch": 3.18,
348
- "eval_accuracy": 0.7004702302745315,
349
- "eval_loss": 1.0634562969207764,
350
- "eval_runtime": 152.7645,
351
- "eval_samples_per_second": 189.324,
352
- "eval_steps_per_second": 23.67,
353
- "step": 23000
354
- },
355
- {
356
- "epoch": 3.32,
357
- "learning_rate": 2.6302428038010946e-05,
358
- "loss": 0.58,
359
- "step": 24000
360
- },
361
- {
362
- "epoch": 3.32,
363
- "eval_accuracy": 0.7049996542424452,
364
- "eval_loss": 1.0592378377914429,
365
- "eval_runtime": 153.0117,
366
- "eval_samples_per_second": 189.018,
367
- "eval_steps_per_second": 23.632,
368
- "step": 24000
369
- },
370
- {
371
- "epoch": 3.46,
372
- "learning_rate": 2.5314617618586643e-05,
373
- "loss": 0.576,
374
- "step": 25000
375
- },
376
- {
377
- "epoch": 3.46,
378
- "eval_accuracy": 0.7018532604937417,
379
- "eval_loss": 1.0623606443405151,
380
- "eval_runtime": 154.2965,
381
- "eval_samples_per_second": 187.444,
382
- "eval_steps_per_second": 23.435,
383
- "step": 25000
384
- },
385
- {
386
- "epoch": 3.6,
387
- "learning_rate": 2.4326807199162337e-05,
388
- "loss": 0.5882,
389
- "step": 26000
390
- },
391
- {
392
- "epoch": 3.6,
393
- "eval_accuracy": 0.7058986238849319,
394
- "eval_loss": 1.0582630634307861,
395
- "eval_runtime": 154.8374,
396
- "eval_samples_per_second": 186.79,
397
- "eval_steps_per_second": 23.354,
398
- "step": 26000
399
- },
400
- {
401
- "epoch": 3.73,
402
- "learning_rate": 2.3338996779738033e-05,
403
- "loss": 0.5867,
404
- "step": 27000
405
- },
406
- {
407
- "epoch": 3.73,
408
- "eval_accuracy": 0.7076965631699053,
409
- "eval_loss": 1.0526437759399414,
410
- "eval_runtime": 153.5073,
411
- "eval_samples_per_second": 188.408,
412
- "eval_steps_per_second": 23.556,
413
- "step": 27000
414
- },
415
- {
416
- "epoch": 3.87,
417
- "learning_rate": 2.2352174170733153e-05,
418
- "loss": 0.593,
419
- "step": 28000
420
- },
421
- {
422
- "epoch": 3.87,
423
- "eval_accuracy": 0.7047921997095636,
424
- "eval_loss": 1.0647141933441162,
425
- "eval_runtime": 153.5176,
426
- "eval_samples_per_second": 188.395,
427
- "eval_steps_per_second": 23.554,
428
- "step": 28000
429
- },
430
- {
431
- "epoch": 4.01,
432
- "learning_rate": 2.136436375130885e-05,
433
- "loss": 0.5745,
434
- "step": 29000
435
- },
436
- {
437
- "epoch": 4.01,
438
- "eval_accuracy": 0.7044810179102413,
439
- "eval_loss": 1.0814189910888672,
440
- "eval_runtime": 147.6133,
441
- "eval_samples_per_second": 195.931,
442
- "eval_steps_per_second": 24.496,
443
- "step": 29000
444
- },
445
- {
446
- "epoch": 4.15,
447
- "learning_rate": 2.0377541142303973e-05,
448
- "loss": 0.4332,
449
- "step": 30000
450
- },
451
- {
452
- "epoch": 4.15,
453
- "eval_accuracy": 0.7036166240232349,
454
- "eval_loss": 1.1231409311294556,
455
- "eval_runtime": 147.2084,
456
- "eval_samples_per_second": 196.47,
457
- "eval_steps_per_second": 24.564,
458
- "step": 30000
459
- },
460
- {
461
- "epoch": 4.29,
462
- "learning_rate": 1.9389730722879666e-05,
463
- "loss": 0.4427,
464
- "step": 31000
465
- },
466
- {
467
- "epoch": 4.29,
468
- "eval_accuracy": 0.6981536546573542,
469
- "eval_loss": 1.1329883337020874,
470
- "eval_runtime": 156.5248,
471
- "eval_samples_per_second": 184.776,
472
- "eval_steps_per_second": 23.102,
473
- "step": 31000
474
- },
475
- {
476
- "epoch": 4.43,
477
- "learning_rate": 1.8402908113874786e-05,
478
- "loss": 0.4475,
479
- "step": 32000
480
- },
481
- {
482
- "epoch": 4.43,
483
- "eval_accuracy": 0.7000207454532882,
484
- "eval_loss": 1.126924991607666,
485
- "eval_runtime": 155.4925,
486
- "eval_samples_per_second": 186.003,
487
- "eval_steps_per_second": 23.255,
488
- "step": 32000
489
- },
490
- {
491
- "epoch": 4.56,
492
- "learning_rate": 1.7415097694450482e-05,
493
- "loss": 0.4488,
494
- "step": 33000
495
- },
496
- {
497
- "epoch": 4.56,
498
- "eval_accuracy": 0.7025101998478667,
499
- "eval_loss": 1.1300145387649536,
500
- "eval_runtime": 153.2715,
501
- "eval_samples_per_second": 188.698,
502
- "eval_steps_per_second": 23.592,
503
- "step": 33000
504
- },
505
- {
506
- "epoch": 4.7,
507
- "learning_rate": 1.6428275085445602e-05,
508
- "loss": 0.4431,
509
- "step": 34000
510
- },
511
- {
512
- "epoch": 4.7,
513
- "eval_accuracy": 0.701887836249222,
514
- "eval_loss": 1.1304574012756348,
515
- "eval_runtime": 153.1234,
516
- "eval_samples_per_second": 188.88,
517
- "eval_steps_per_second": 23.615,
518
- "step": 34000
519
- },
520
- {
521
- "epoch": 4.84,
522
- "learning_rate": 1.54404646660213e-05,
523
- "loss": 0.4514,
524
- "step": 35000
525
- },
526
- {
527
- "epoch": 4.84,
528
- "eval_accuracy": 0.6990872000553212,
529
- "eval_loss": 1.1445332765579224,
530
- "eval_runtime": 155.0001,
531
- "eval_samples_per_second": 186.593,
532
- "eval_steps_per_second": 23.329,
533
- "step": 35000
534
- },
535
- {
536
- "epoch": 4.98,
537
- "learning_rate": 1.4452654246596994e-05,
538
- "loss": 0.4427,
539
- "step": 36000
540
- },
541
- {
542
- "epoch": 4.98,
543
- "eval_accuracy": 0.702821381647189,
544
- "eval_loss": 1.1224807500839233,
545
- "eval_runtime": 153.3684,
546
- "eval_samples_per_second": 188.579,
547
- "eval_steps_per_second": 23.577,
548
- "step": 36000
549
- },
550
- {
551
- "epoch": 5.12,
552
- "learning_rate": 1.346681944801154e-05,
553
- "loss": 0.3391,
554
- "step": 37000
555
- },
556
- {
557
- "epoch": 5.12,
558
- "eval_accuracy": 0.6964248668833414,
559
- "eval_loss": 1.1918517351150513,
560
- "eval_runtime": 152.8672,
561
- "eval_samples_per_second": 189.197,
562
- "eval_steps_per_second": 23.655,
563
- "step": 37000
564
- },
565
- {
566
- "epoch": 5.26,
567
- "learning_rate": 1.2479009028587233e-05,
568
- "loss": 0.3222,
569
- "step": 38000
570
- },
571
- {
572
- "epoch": 5.26,
573
- "eval_accuracy": 0.6971163819929466,
574
- "eval_loss": 1.2108075618743896,
575
- "eval_runtime": 155.4241,
576
- "eval_samples_per_second": 186.084,
577
- "eval_steps_per_second": 23.265,
578
- "step": 38000
579
- },
580
- {
581
- "epoch": 5.39,
582
- "learning_rate": 1.149119860916293e-05,
583
- "loss": 0.315,
584
- "step": 39000
585
- },
586
- {
587
- "epoch": 5.39,
588
- "eval_accuracy": 0.6985685637231174,
589
- "eval_loss": 1.2174683809280396,
590
- "eval_runtime": 154.5842,
591
- "eval_samples_per_second": 187.095,
592
- "eval_steps_per_second": 23.392,
593
- "step": 39000
594
- },
595
- {
596
- "epoch": 5.53,
597
- "learning_rate": 1.0503388189738627e-05,
598
- "loss": 0.3307,
599
- "step": 40000
600
- },
601
- {
602
- "epoch": 5.53,
603
- "eval_accuracy": 0.6943848973100062,
604
- "eval_loss": 1.219739556312561,
605
- "eval_runtime": 155.288,
606
- "eval_samples_per_second": 186.247,
607
- "eval_steps_per_second": 23.286,
608
- "step": 40000
609
- },
610
- {
611
- "epoch": 5.67,
612
- "learning_rate": 9.516565580733746e-06,
613
- "loss": 0.3187,
614
- "step": 41000
615
- },
616
- {
617
- "epoch": 5.67,
618
- "eval_accuracy": 0.6988105940114792,
619
- "eval_loss": 1.228104591369629,
620
- "eval_runtime": 154.3425,
621
- "eval_samples_per_second": 187.388,
622
- "eval_steps_per_second": 23.428,
623
- "step": 41000
624
- },
625
- {
626
- "epoch": 5.81,
627
- "learning_rate": 8.528755161309441e-06,
628
- "loss": 0.3327,
629
- "step": 42000
630
- },
631
- {
632
- "epoch": 5.81,
633
- "eval_accuracy": 0.6966668971717032,
634
- "eval_loss": 1.2378689050674438,
635
- "eval_runtime": 147.7901,
636
- "eval_samples_per_second": 195.696,
637
- "eval_steps_per_second": 24.467,
638
- "step": 42000
639
- },
640
- {
641
- "epoch": 5.95,
642
- "learning_rate": 7.540944741885138e-06,
643
- "loss": 0.3213,
644
- "step": 43000
645
- },
646
- {
647
- "epoch": 5.95,
648
- "eval_accuracy": 0.6972201092593873,
649
- "eval_loss": 1.2366853952407837,
650
- "eval_runtime": 155.5771,
651
- "eval_samples_per_second": 185.901,
652
- "eval_steps_per_second": 23.242,
653
- "step": 43000
654
- },
655
- {
656
- "epoch": 6.08,
657
- "learning_rate": 6.553134322460834e-06,
658
- "loss": 0.2688,
659
- "step": 44000
660
- },
661
- {
662
- "epoch": 6.08,
663
- "eval_accuracy": 0.6940737155106839,
664
- "eval_loss": 1.2731064558029175,
665
- "eval_runtime": 155.6374,
666
- "eval_samples_per_second": 185.829,
667
- "eval_steps_per_second": 23.233,
668
- "step": 44000
669
- },
670
- {
671
- "epoch": 6.22,
672
- "learning_rate": 5.56532390303653e-06,
673
- "loss": 0.2395,
674
- "step": 45000
675
- },
676
- {
677
- "epoch": 6.22,
678
- "eval_accuracy": 0.696632321416223,
679
- "eval_loss": 1.2903701066970825,
680
- "eval_runtime": 153.3038,
681
- "eval_samples_per_second": 188.658,
682
- "eval_steps_per_second": 23.587,
683
- "step": 45000
684
- },
685
- {
686
- "epoch": 6.36,
687
- "learning_rate": 4.5785012940316495e-06,
688
- "loss": 0.2407,
689
- "step": 46000
690
- },
691
- {
692
- "epoch": 6.36,
693
- "eval_accuracy": 0.6951455639305719,
694
- "eval_loss": 1.2933955192565918,
695
- "eval_runtime": 155.5305,
696
- "eval_samples_per_second": 185.957,
697
- "eval_steps_per_second": 23.249,
698
- "step": 46000
699
- },
700
- {
701
- "epoch": 6.5,
702
- "learning_rate": 3.5906908746073454e-06,
703
- "loss": 0.2312,
704
- "step": 47000
705
- },
706
- {
707
- "epoch": 6.5,
708
- "eval_accuracy": 0.6955258972408547,
709
- "eval_loss": 1.2975932359695435,
710
- "eval_runtime": 155.813,
711
- "eval_samples_per_second": 185.62,
712
- "eval_steps_per_second": 23.207,
713
- "step": 47000
714
- },
715
- {
716
- "epoch": 6.64,
717
- "learning_rate": 2.6028804551830413e-06,
718
- "loss": 0.2369,
719
- "step": 48000
720
- },
721
- {
722
- "epoch": 6.64,
723
- "eval_accuracy": 0.6934859276675195,
724
- "eval_loss": 1.3018196821212769,
725
- "eval_runtime": 150.7052,
726
- "eval_samples_per_second": 191.911,
727
- "eval_steps_per_second": 23.994,
728
- "step": 48000
729
- },
730
- {
731
- "epoch": 6.78,
732
- "learning_rate": 1.6170456565975861e-06,
733
- "loss": 0.2317,
734
- "step": 49000
735
- },
736
- {
737
- "epoch": 6.78,
738
- "eval_accuracy": 0.6949381093976903,
739
- "eval_loss": 1.3031286001205444,
740
- "eval_runtime": 154.6116,
741
- "eval_samples_per_second": 187.062,
742
- "eval_steps_per_second": 23.388,
743
- "step": 49000
744
- },
745
- {
746
- "epoch": 6.91,
747
- "learning_rate": 6.292352371732817e-07,
748
- "loss": 0.2395,
749
- "step": 50000
750
- },
751
- {
752
- "epoch": 6.91,
753
- "eval_accuracy": 0.6957679275292166,
754
- "eval_loss": 1.3016529083251953,
755
- "eval_runtime": 153.6886,
756
- "eval_samples_per_second": 188.186,
757
- "eval_steps_per_second": 23.528,
758
- "step": 50000
759
- },
760
- {
761
- "epoch": 7.0,
762
- "step": 50617,
763
- "total_flos": 1.2555854407514107e+20,
764
- "train_loss": 0.6310260885072734,
765
- "train_runtime": 24479.9902,
766
- "train_samples_per_second": 66.162,
767
- "train_steps_per_second": 2.068
768
  }
769
  ],
770
- "max_steps": 50617,
771
- "num_train_epochs": 7,
772
- "total_flos": 1.2555854407514107e+20,
773
  "trial_name": null,
774
  "trial_params": null
775
  }
 
1
  {
2
+ "best_metric": 0.9865725040435791,
3
+ "best_model_checkpoint": "Transformers/vit-base-clothing-leafs-example-full-simple/checkpoint-21000",
4
+ "epoch": 3.0,
5
+ "global_step": 21693,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "learning_rate": 9.539482782464389e-05,
13
+ "loss": 1.558,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.14,
18
+ "eval_accuracy": 0.6426249913560611,
19
+ "eval_loss": 1.2684125900268555,
20
+ "eval_runtime": 160.0475,
21
+ "eval_samples_per_second": 180.709,
22
+ "eval_steps_per_second": 22.593,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.28,
27
+ "learning_rate": 9.078504586733047e-05,
28
+ "loss": 1.2184,
29
  "step": 2000
30
  },
31
  {
32
  "epoch": 0.28,
33
+ "eval_accuracy": 0.6658944747942742,
34
+ "eval_loss": 1.1550828218460083,
35
+ "eval_runtime": 153.2478,
36
+ "eval_samples_per_second": 188.727,
37
+ "eval_steps_per_second": 23.596,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.41,
42
+ "learning_rate": 8.617526391001706e-05,
43
+ "loss": 1.1427,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.41,
48
+ "eval_accuracy": 0.6677961413456884,
49
+ "eval_loss": 1.1250686645507812,
50
+ "eval_runtime": 152.762,
51
+ "eval_samples_per_second": 189.327,
52
+ "eval_steps_per_second": 23.671,
53
  "step": 3000
54
  },
55
  {
56
  "epoch": 0.55,
57
+ "learning_rate": 8.157009173466096e-05,
58
+ "loss": 1.1118,
59
  "step": 4000
60
  },
61
  {
62
  "epoch": 0.55,
63
+ "eval_accuracy": 0.6727750501348454,
64
+ "eval_loss": 1.111645221710205,
65
+ "eval_runtime": 152.8701,
66
+ "eval_samples_per_second": 189.193,
67
+ "eval_steps_per_second": 23.654,
68
  "step": 4000
69
  },
70
  {
71
  "epoch": 0.69,
72
+ "learning_rate": 7.696030977734754e-05,
73
+ "loss": 1.0768,
74
  "step": 5000
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "eval_accuracy": 0.6808657769172256,
79
+ "eval_loss": 1.0770487785339355,
80
+ "eval_runtime": 153.4667,
81
+ "eval_samples_per_second": 188.458,
82
+ "eval_steps_per_second": 23.562,
83
  "step": 5000
84
  },
85
  {
86
  "epoch": 0.83,
87
+ "learning_rate": 7.235513760199143e-05,
88
+ "loss": 1.0601,
89
  "step": 6000
90
  },
91
  {
92
  "epoch": 0.83,
93
+ "eval_accuracy": 0.6890256552105664,
94
+ "eval_loss": 1.054029107093811,
95
+ "eval_runtime": 153.5426,
96
+ "eval_samples_per_second": 188.365,
97
+ "eval_steps_per_second": 23.55,
98
  "step": 6000
99
  },
100
  {
101
  "epoch": 0.97,
102
+ "learning_rate": 6.774535564467801e-05,
103
+ "loss": 1.0326,
104
  "step": 7000
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "eval_accuracy": 0.6928981398243551,
109
+ "eval_loss": 1.0408824682235718,
110
+ "eval_runtime": 153.2325,
111
+ "eval_samples_per_second": 188.746,
112
+ "eval_steps_per_second": 23.598,
113
  "step": 7000
114
  },
115
  {
116
  "epoch": 1.11,
117
+ "learning_rate": 6.31401834693219e-05,
118
+ "loss": 0.9398,
119
  "step": 8000
120
  },
121
  {
122
  "epoch": 1.11,
123
+ "eval_accuracy": 0.6950764124196114,
124
+ "eval_loss": 1.0343307256698608,
125
+ "eval_runtime": 152.2,
126
+ "eval_samples_per_second": 190.026,
127
+ "eval_steps_per_second": 23.758,
128
  "step": 8000
129
  },
130
  {
131
  "epoch": 1.24,
132
+ "learning_rate": 5.853040151200848e-05,
133
+ "loss": 0.8986,
134
  "step": 9000
135
  },
136
  {
137
  "epoch": 1.24,
138
+ "eval_accuracy": 0.6950764124196114,
139
+ "eval_loss": 1.0352978706359863,
140
+ "eval_runtime": 152.4062,
141
+ "eval_samples_per_second": 189.769,
142
+ "eval_steps_per_second": 23.726,
143
  "step": 9000
144
  },
145
  {
146
  "epoch": 1.38,
147
+ "learning_rate": 5.392061955469506e-05,
148
+ "loss": 0.8883,
149
  "step": 10000
150
  },
151
  {
152
  "epoch": 1.38,
153
+ "eval_accuracy": 0.7006431090519328,
154
+ "eval_loss": 1.013888955116272,
155
+ "eval_runtime": 151.7157,
156
+ "eval_samples_per_second": 190.633,
157
+ "eval_steps_per_second": 23.834,
158
  "step": 10000
159
  },
160
  {
161
  "epoch": 1.52,
162
+ "learning_rate": 4.9315447379338956e-05,
163
+ "loss": 0.8731,
164
  "step": 11000
165
  },
166
  {
167
  "epoch": 1.52,
168
+ "eval_accuracy": 0.7063826844616555,
169
+ "eval_loss": 0.9994178414344788,
170
+ "eval_runtime": 153.225,
171
+ "eval_samples_per_second": 188.755,
172
+ "eval_steps_per_second": 23.599,
173
  "step": 11000
174
  },
175
  {
176
  "epoch": 1.66,
177
+ "learning_rate": 4.470566542202554e-05,
178
+ "loss": 0.8752,
179
  "step": 12000
180
  },
181
  {
182
  "epoch": 1.66,
183
+ "eval_accuracy": 0.7020261392711431,
184
+ "eval_loss": 1.004809856414795,
185
+ "eval_runtime": 154.2081,
186
+ "eval_samples_per_second": 187.552,
187
+ "eval_steps_per_second": 23.449,
188
  "step": 12000
189
  },
190
  {
191
  "epoch": 1.8,
192
+ "learning_rate": 4.010049324666943e-05,
193
+ "loss": 0.8579,
194
  "step": 13000
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "eval_accuracy": 0.7090795933891155,
199
+ "eval_loss": 0.9911579489707947,
200
+ "eval_runtime": 153.7146,
201
+ "eval_samples_per_second": 188.154,
202
+ "eval_steps_per_second": 23.524,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 1.94,
207
+ "learning_rate": 3.5490711289356015e-05,
208
+ "loss": 0.864,
209
  "step": 14000
210
  },
211
  {
212
  "epoch": 1.94,
213
+ "eval_accuracy": 0.7096328054767996,
214
+ "eval_loss": 0.9869005084037781,
215
+ "eval_runtime": 153.5275,
216
+ "eval_samples_per_second": 188.383,
217
+ "eval_steps_per_second": 23.553,
218
  "step": 14000
219
  },
220
  {
221
  "epoch": 2.07,
222
+ "learning_rate": 3.08809293320426e-05,
223
+ "loss": 0.7798,
224
  "step": 15000
225
  },
226
  {
227
  "epoch": 2.07,
228
+ "eval_accuracy": 0.7094253509439181,
229
+ "eval_loss": 1.0022022724151611,
230
+ "eval_runtime": 153.635,
231
+ "eval_samples_per_second": 188.251,
232
+ "eval_steps_per_second": 23.536,
233
  "step": 15000
234
  },
235
  {
236
  "epoch": 2.21,
237
+ "learning_rate": 2.6271147374729177e-05,
238
+ "loss": 0.6883,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 2.21,
243
+ "eval_accuracy": 0.7089067146117143,
244
+ "eval_loss": 1.00809645652771,
245
+ "eval_runtime": 154.6119,
246
+ "eval_samples_per_second": 187.062,
247
+ "eval_steps_per_second": 23.388,
248
  "step": 16000
249
  },
250
  {
251
  "epoch": 2.35,
252
+ "learning_rate": 2.1661365417415756e-05,
253
+ "loss": 0.67,
254
  "step": 17000
255
  },
256
  {
257
  "epoch": 2.35,
258
+ "eval_accuracy": 0.7111195629624507,
259
+ "eval_loss": 1.0066086053848267,
260
+ "eval_runtime": 152.9644,
261
+ "eval_samples_per_second": 189.077,
262
+ "eval_steps_per_second": 23.639,
263
  "step": 17000
264
  },
265
  {
266
  "epoch": 2.49,
267
+ "learning_rate": 1.705619324205965e-05,
268
+ "loss": 0.678,
269
  "step": 18000
270
  },
271
  {
272
  "epoch": 2.49,
273
+ "eval_accuracy": 0.7131595325357859,
274
+ "eval_loss": 0.9969001412391663,
275
+ "eval_runtime": 153.5427,
276
+ "eval_samples_per_second": 188.365,
277
+ "eval_steps_per_second": 23.55,
278
  "step": 18000
279
  },
280
  {
281
  "epoch": 2.63,
282
+ "learning_rate": 1.2446411284746232e-05,
283
+ "loss": 0.6701,
284
  "step": 19000
285
  },
286
  {
287
  "epoch": 2.63,
288
+ "eval_accuracy": 0.7133324113131871,
289
+ "eval_loss": 0.9977221488952637,
290
+ "eval_runtime": 153.0562,
291
+ "eval_samples_per_second": 188.963,
292
+ "eval_steps_per_second": 23.625,
293
  "step": 19000
294
  },
295
  {
296
  "epoch": 2.77,
297
+ "learning_rate": 7.836629327432812e-06,
298
+ "loss": 0.6652,
299
  "step": 20000
300
  },
301
  {
302
  "epoch": 2.77,
303
+ "eval_accuracy": 0.7144388354885555,
304
+ "eval_loss": 0.9937713742256165,
305
+ "eval_runtime": 152.7221,
306
+ "eval_samples_per_second": 189.377,
307
+ "eval_steps_per_second": 23.677,
308
  "step": 20000
309
  },
310
  {
311
  "epoch": 2.9,
312
+ "learning_rate": 3.226847370119394e-06,
313
+ "loss": 0.6588,
314
  "step": 21000
315
  },
316
  {
317
  "epoch": 2.9,
318
+ "eval_accuracy": 0.7164442293064104,
319
+ "eval_loss": 0.9865725040435791,
320
+ "eval_runtime": 151.9597,
321
+ "eval_samples_per_second": 190.327,
322
+ "eval_steps_per_second": 23.796,
323
  "step": 21000
324
  },
325
  {
326
+ "epoch": 3.0,
327
+ "step": 21693,
328
+ "total_flos": 5.381080460363188e+19,
329
+ "train_loss": 0.9066727288788254,
330
+ "train_runtime": 10400.7433,
331
+ "train_samples_per_second": 66.739,
332
+ "train_steps_per_second": 2.086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  }
334
  ],
335
+ "max_steps": 21693,
336
+ "num_train_epochs": 3,
337
+ "total_flos": 5.381080460363188e+19,
338
  "trial_name": null,
339
  "trial_params": null
340
  }