darthraider commited on
Commit
6bf9deb
·
verified ·
1 Parent(s): 5bea3ab

🍻 cheers

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # vit-base-fruit-punch
18
 
19
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.0004
22
  - Accuracy: 1.0
 
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
6
+ - image-classification
7
  - generated_from_trainer
8
  metrics:
9
  - accuracy
 
17
 
18
  # vit-base-fruit-punch
19
 
20
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the darthraider/fruit-ripeness-detection-dataset dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.0004
23
  - Accuracy: 1.0
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 8.0,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.00038815097650513053,
5
- "eval_runtime": 13.0148,
6
- "eval_samples_per_second": 76.836,
7
- "eval_steps_per_second": 4.841,
8
  "total_flos": 2.479168170953736e+18,
9
- "train_loss": 0.03568338290625252,
10
- "train_runtime": 1001.3794,
11
- "train_samples_per_second": 31.948,
12
- "train_steps_per_second": 0.999
13
  }
 
1
  {
2
  "epoch": 8.0,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.0003763487620744854,
5
+ "eval_runtime": 13.3161,
6
+ "eval_samples_per_second": 75.097,
7
+ "eval_steps_per_second": 4.731,
8
  "total_flos": 2.479168170953736e+18,
9
+ "train_loss": 0.03565365221118554,
10
+ "train_runtime": 1044.6113,
11
+ "train_samples_per_second": 30.626,
12
+ "train_steps_per_second": 0.957
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 8.0,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.00038815097650513053,
5
- "eval_runtime": 13.0148,
6
- "eval_samples_per_second": 76.836,
7
- "eval_steps_per_second": 4.841
8
  }
 
1
  {
2
  "epoch": 8.0,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.0003763487620744854,
5
+ "eval_runtime": 13.3161,
6
+ "eval_samples_per_second": 75.097,
7
+ "eval_steps_per_second": 4.731
8
  }
runs/Mar04_18-25-10_2cb32fa4c9b0/events.out.tfevents.1741113785.2cb32fa4c9b0.31.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1013ac7e0064df5bd39434f1d7e38c17f2f1730c6029107ec4bc813eccdb0508
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 2.479168170953736e+18,
4
- "train_loss": 0.03568338290625252,
5
- "train_runtime": 1001.3794,
6
- "train_samples_per_second": 31.948,
7
- "train_steps_per_second": 0.999
8
  }
 
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 2.479168170953736e+18,
4
+ "train_loss": 0.03565365221118554,
5
+ "train_runtime": 1044.6113,
6
+ "train_samples_per_second": 30.626,
7
+ "train_steps_per_second": 0.957
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.00038815097650513053,
3
  "best_model_checkpoint": "./vit-base-fruit-punch/checkpoint-1000",
4
  "epoch": 8.0,
5
  "eval_steps": 100,
@@ -10,465 +10,465 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.08,
13
- "grad_norm": 106393.265625,
14
  "learning_rate": 4.9500000000000004e-05,
15
- "loss": 1.1459,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.16,
20
- "grad_norm": 88276.09375,
21
  "learning_rate": 4.9e-05,
22
- "loss": 0.7296,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.24,
27
- "grad_norm": 63037.91796875,
28
  "learning_rate": 4.85e-05,
29
- "loss": 0.4458,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.32,
34
- "grad_norm": 42146.3203125,
35
  "learning_rate": 4.8e-05,
36
- "loss": 0.2647,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.4,
41
- "grad_norm": 27606.990234375,
42
  "learning_rate": 4.75e-05,
43
- "loss": 0.1553,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.48,
48
- "grad_norm": 25297.625,
49
  "learning_rate": 4.7e-05,
50
- "loss": 0.1053,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.56,
55
- "grad_norm": 19000.822265625,
56
  "learning_rate": 4.6500000000000005e-05,
57
- "loss": 0.0844,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.64,
62
- "grad_norm": 26219.373046875,
63
  "learning_rate": 4.600000000000001e-05,
64
- "loss": 0.0644,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.72,
69
- "grad_norm": 13452.0966796875,
70
  "learning_rate": 4.55e-05,
71
- "loss": 0.0583,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.8,
76
- "grad_norm": 11122.3671875,
77
  "learning_rate": 4.5e-05,
78
- "loss": 0.046,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.8,
83
  "eval_accuracy": 1.0,
84
- "eval_loss": 0.04426969215273857,
85
- "eval_runtime": 12.7701,
86
- "eval_samples_per_second": 78.308,
87
- "eval_steps_per_second": 4.933,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.88,
92
- "grad_norm": 10542.482421875,
93
  "learning_rate": 4.4500000000000004e-05,
94
- "loss": 0.0397,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 0.96,
99
- "grad_norm": 10209.0732421875,
100
  "learning_rate": 4.4000000000000006e-05,
101
- "loss": 0.0357,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 1.04,
106
- "grad_norm": 8613.568359375,
107
  "learning_rate": 4.35e-05,
108
- "loss": 0.0325,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 1.12,
113
- "grad_norm": 7610.2255859375,
114
  "learning_rate": 4.3e-05,
115
- "loss": 0.0289,
116
  "step": 140
117
  },
118
  {
119
  "epoch": 1.2,
120
- "grad_norm": 6793.42236328125,
121
  "learning_rate": 4.25e-05,
122
- "loss": 0.0259,
123
  "step": 150
124
  },
125
  {
126
  "epoch": 1.28,
127
- "grad_norm": 6208.12890625,
128
  "learning_rate": 4.2e-05,
129
- "loss": 0.0234,
130
  "step": 160
131
  },
132
  {
133
  "epoch": 1.3599999999999999,
134
- "grad_norm": 8605.1796875,
135
  "learning_rate": 4.15e-05,
136
- "loss": 0.0214,
137
  "step": 170
138
  },
139
  {
140
  "epoch": 1.44,
141
- "grad_norm": 5137.94921875,
142
  "learning_rate": 4.1e-05,
143
- "loss": 0.0197,
144
  "step": 180
145
  },
146
  {
147
  "epoch": 1.52,
148
- "grad_norm": 4862.05859375,
149
  "learning_rate": 4.05e-05,
150
- "loss": 0.0179,
151
  "step": 190
152
  },
153
  {
154
  "epoch": 1.6,
155
- "grad_norm": 4239.48583984375,
156
  "learning_rate": 4e-05,
157
- "loss": 0.0164,
158
  "step": 200
159
  },
160
  {
161
  "epoch": 1.6,
162
- "eval_accuracy": 0.999,
163
- "eval_loss": 0.018771709874272346,
164
- "eval_runtime": 12.8207,
165
- "eval_samples_per_second": 77.999,
166
- "eval_steps_per_second": 4.914,
167
  "step": 200
168
  },
169
  {
170
  "epoch": 1.6800000000000002,
171
- "grad_norm": 4013.29931640625,
172
  "learning_rate": 3.9500000000000005e-05,
173
- "loss": 0.0149,
174
  "step": 210
175
  },
176
  {
177
  "epoch": 1.76,
178
- "grad_norm": 3654.03173828125,
179
  "learning_rate": 3.9000000000000006e-05,
180
- "loss": 0.0137,
181
  "step": 220
182
  },
183
  {
184
  "epoch": 1.8399999999999999,
185
- "grad_norm": 3286.04052734375,
186
  "learning_rate": 3.85e-05,
187
- "loss": 0.0125,
188
  "step": 230
189
  },
190
  {
191
  "epoch": 1.92,
192
- "grad_norm": 3118.806396484375,
193
  "learning_rate": 3.8e-05,
194
- "loss": 0.0115,
195
  "step": 240
196
  },
197
  {
198
  "epoch": 2.0,
199
- "grad_norm": 2893.08203125,
200
  "learning_rate": 3.7500000000000003e-05,
201
- "loss": 0.0105,
202
  "step": 250
203
  },
204
  {
205
  "epoch": 2.08,
206
- "grad_norm": 2609.7880859375,
207
  "learning_rate": 3.7e-05,
208
- "loss": 0.0098,
209
  "step": 260
210
  },
211
  {
212
  "epoch": 2.16,
213
- "grad_norm": 2484.2294921875,
214
  "learning_rate": 3.65e-05,
215
- "loss": 0.0089,
216
  "step": 270
217
  },
218
  {
219
  "epoch": 2.24,
220
- "grad_norm": 2335.24072265625,
221
  "learning_rate": 3.6e-05,
222
- "loss": 0.0082,
223
  "step": 280
224
  },
225
  {
226
  "epoch": 2.32,
227
- "grad_norm": 2343.471435546875,
228
  "learning_rate": 3.55e-05,
229
- "loss": 0.0076,
230
  "step": 290
231
  },
232
  {
233
  "epoch": 2.4,
234
- "grad_norm": 1955.1356201171875,
235
  "learning_rate": 3.5e-05,
236
- "loss": 0.007,
237
  "step": 300
238
  },
239
  {
240
  "epoch": 2.4,
241
  "eval_accuracy": 1.0,
242
- "eval_loss": 0.007058804389089346,
243
- "eval_runtime": 12.8867,
244
- "eval_samples_per_second": 77.599,
245
- "eval_steps_per_second": 4.889,
246
  "step": 300
247
  },
248
  {
249
  "epoch": 2.48,
250
- "grad_norm": 1792.7567138671875,
251
  "learning_rate": 3.45e-05,
252
- "loss": 0.0065,
253
  "step": 310
254
  },
255
  {
256
  "epoch": 2.56,
257
- "grad_norm": 1647.1790771484375,
258
  "learning_rate": 3.4000000000000007e-05,
259
- "loss": 0.006,
260
  "step": 320
261
  },
262
  {
263
  "epoch": 2.64,
264
- "grad_norm": 1525.8948974609375,
265
  "learning_rate": 3.35e-05,
266
- "loss": 0.0055,
267
  "step": 330
268
  },
269
  {
270
  "epoch": 2.7199999999999998,
271
- "grad_norm": 1406.7808837890625,
272
  "learning_rate": 3.3e-05,
273
- "loss": 0.0051,
274
  "step": 340
275
  },
276
  {
277
  "epoch": 2.8,
278
- "grad_norm": 1297.923583984375,
279
  "learning_rate": 3.2500000000000004e-05,
280
- "loss": 0.0048,
281
  "step": 350
282
  },
283
  {
284
  "epoch": 2.88,
285
- "grad_norm": 1231.1033935546875,
286
  "learning_rate": 3.2000000000000005e-05,
287
- "loss": 0.0044,
288
  "step": 360
289
  },
290
  {
291
  "epoch": 2.96,
292
- "grad_norm": 1162.668212890625,
293
  "learning_rate": 3.15e-05,
294
- "loss": 0.0041,
295
  "step": 370
296
  },
297
  {
298
  "epoch": 3.04,
299
- "grad_norm": 1072.34814453125,
300
  "learning_rate": 3.1e-05,
301
- "loss": 0.0038,
302
  "step": 380
303
  },
304
  {
305
  "epoch": 3.12,
306
- "grad_norm": 981.4453125,
307
  "learning_rate": 3.05e-05,
308
- "loss": 0.0035,
309
  "step": 390
310
  },
311
  {
312
  "epoch": 3.2,
313
- "grad_norm": 917.4797973632812,
314
  "learning_rate": 3e-05,
315
- "loss": 0.0033,
316
  "step": 400
317
  },
318
  {
319
  "epoch": 3.2,
320
  "eval_accuracy": 1.0,
321
- "eval_loss": 0.0035334331914782524,
322
- "eval_runtime": 12.9877,
323
- "eval_samples_per_second": 76.996,
324
- "eval_steps_per_second": 4.851,
325
  "step": 400
326
  },
327
  {
328
  "epoch": 3.2800000000000002,
329
- "grad_norm": 911.9354858398438,
330
  "learning_rate": 2.95e-05,
331
- "loss": 0.0031,
332
  "step": 410
333
  },
334
  {
335
  "epoch": 3.36,
336
- "grad_norm": 814.162841796875,
337
  "learning_rate": 2.9e-05,
338
- "loss": 0.0029,
339
  "step": 420
340
  },
341
  {
342
  "epoch": 3.44,
343
- "grad_norm": 755.6058959960938,
344
  "learning_rate": 2.8499999999999998e-05,
345
- "loss": 0.0027,
346
  "step": 430
347
  },
348
  {
349
  "epoch": 3.52,
350
- "grad_norm": 703.8270263671875,
351
  "learning_rate": 2.8000000000000003e-05,
352
- "loss": 0.0025,
353
  "step": 440
354
  },
355
  {
356
  "epoch": 3.6,
357
- "grad_norm": 666.2708740234375,
358
  "learning_rate": 2.7500000000000004e-05,
359
- "loss": 0.0023,
360
  "step": 450
361
  },
362
  {
363
  "epoch": 3.68,
364
- "grad_norm": 613.1011962890625,
365
  "learning_rate": 2.7000000000000002e-05,
366
- "loss": 0.0022,
367
  "step": 460
368
  },
369
  {
370
  "epoch": 3.76,
371
- "grad_norm": 577.6843872070312,
372
  "learning_rate": 2.6500000000000004e-05,
373
- "loss": 0.002,
374
  "step": 470
375
  },
376
  {
377
  "epoch": 3.84,
378
- "grad_norm": 554.882568359375,
379
  "learning_rate": 2.6000000000000002e-05,
380
- "loss": 0.0019,
381
  "step": 480
382
  },
383
  {
384
  "epoch": 3.92,
385
- "grad_norm": 528.9329223632812,
386
  "learning_rate": 2.5500000000000003e-05,
387
- "loss": 0.0018,
388
  "step": 490
389
  },
390
  {
391
  "epoch": 4.0,
392
- "grad_norm": 483.04278564453125,
393
  "learning_rate": 2.5e-05,
394
- "loss": 0.0017,
395
  "step": 500
396
  },
397
  {
398
  "epoch": 4.0,
399
  "eval_accuracy": 1.0,
400
- "eval_loss": 0.002120121382176876,
401
- "eval_runtime": 13.1662,
402
- "eval_samples_per_second": 75.952,
403
- "eval_steps_per_second": 4.785,
404
  "step": 500
405
  },
406
  {
407
  "epoch": 4.08,
408
- "grad_norm": 484.7864074707031,
409
  "learning_rate": 2.45e-05,
410
- "loss": 0.0016,
411
  "step": 510
412
  },
413
  {
414
  "epoch": 4.16,
415
- "grad_norm": 438.2588806152344,
416
  "learning_rate": 2.4e-05,
417
- "loss": 0.0015,
418
  "step": 520
419
  },
420
  {
421
  "epoch": 4.24,
422
- "grad_norm": 422.3269348144531,
423
  "learning_rate": 2.35e-05,
424
- "loss": 0.0014,
425
  "step": 530
426
  },
427
  {
428
  "epoch": 4.32,
429
- "grad_norm": 391.5030822753906,
430
  "learning_rate": 2.3000000000000003e-05,
431
- "loss": 0.0013,
432
  "step": 540
433
  },
434
  {
435
  "epoch": 4.4,
436
- "grad_norm": 370.3298645019531,
437
  "learning_rate": 2.25e-05,
438
  "loss": 0.0013,
439
  "step": 550
440
  },
441
  {
442
  "epoch": 4.48,
443
- "grad_norm": 364.08148193359375,
444
  "learning_rate": 2.2000000000000003e-05,
445
  "loss": 0.0012,
446
  "step": 560
447
  },
448
  {
449
  "epoch": 4.5600000000000005,
450
- "grad_norm": 336.76055908203125,
451
  "learning_rate": 2.15e-05,
452
- "loss": 0.0011,
453
  "step": 570
454
  },
455
  {
456
  "epoch": 4.64,
457
- "grad_norm": 307.3483581542969,
458
  "learning_rate": 2.1e-05,
459
  "loss": 0.0011,
460
  "step": 580
461
  },
462
  {
463
  "epoch": 4.72,
464
- "grad_norm": 294.3090515136719,
465
  "learning_rate": 2.05e-05,
466
  "loss": 0.001,
467
  "step": 590
468
  },
469
  {
470
  "epoch": 4.8,
471
- "grad_norm": 281.6009521484375,
472
  "learning_rate": 2e-05,
473
  "loss": 0.001,
474
  "step": 600
@@ -476,78 +476,78 @@
476
  {
477
  "epoch": 4.8,
478
  "eval_accuracy": 1.0,
479
- "eval_loss": 0.0011029670713469386,
480
- "eval_runtime": 12.8305,
481
- "eval_samples_per_second": 77.939,
482
- "eval_steps_per_second": 4.91,
483
  "step": 600
484
  },
485
  {
486
  "epoch": 4.88,
487
- "grad_norm": 271.7886657714844,
488
  "learning_rate": 1.9500000000000003e-05,
489
  "loss": 0.0009,
490
  "step": 610
491
  },
492
  {
493
  "epoch": 4.96,
494
- "grad_norm": 253.9346923828125,
495
  "learning_rate": 1.9e-05,
496
  "loss": 0.0009,
497
  "step": 620
498
  },
499
  {
500
  "epoch": 5.04,
501
- "grad_norm": 248.29092407226562,
502
  "learning_rate": 1.85e-05,
503
- "loss": 0.0008,
504
  "step": 630
505
  },
506
  {
507
  "epoch": 5.12,
508
- "grad_norm": 230.48448181152344,
509
  "learning_rate": 1.8e-05,
510
  "loss": 0.0008,
511
  "step": 640
512
  },
513
  {
514
  "epoch": 5.2,
515
- "grad_norm": 229.324462890625,
516
  "learning_rate": 1.75e-05,
517
  "loss": 0.0008,
518
  "step": 650
519
  },
520
  {
521
  "epoch": 5.28,
522
- "grad_norm": 212.48696899414062,
523
  "learning_rate": 1.7000000000000003e-05,
524
  "loss": 0.0007,
525
  "step": 660
526
  },
527
  {
528
  "epoch": 5.36,
529
- "grad_norm": 210.8821258544922,
530
  "learning_rate": 1.65e-05,
531
  "loss": 0.0007,
532
  "step": 670
533
  },
534
  {
535
  "epoch": 5.44,
536
- "grad_norm": 198.33792114257812,
537
  "learning_rate": 1.6000000000000003e-05,
538
  "loss": 0.0007,
539
  "step": 680
540
  },
541
  {
542
  "epoch": 5.52,
543
- "grad_norm": 193.6271514892578,
544
  "learning_rate": 1.55e-05,
545
- "loss": 0.0006,
546
  "step": 690
547
  },
548
  {
549
  "epoch": 5.6,
550
- "grad_norm": 182.02919006347656,
551
  "learning_rate": 1.5e-05,
552
  "loss": 0.0006,
553
  "step": 700
@@ -555,157 +555,157 @@
555
  {
556
  "epoch": 5.6,
557
  "eval_accuracy": 1.0,
558
- "eval_loss": 0.0007458859472535551,
559
- "eval_runtime": 12.8435,
560
- "eval_samples_per_second": 77.86,
561
- "eval_steps_per_second": 4.905,
562
  "step": 700
563
  },
564
  {
565
  "epoch": 5.68,
566
- "grad_norm": 178.0136260986328,
567
  "learning_rate": 1.45e-05,
568
  "loss": 0.0006,
569
  "step": 710
570
  },
571
  {
572
  "epoch": 5.76,
573
- "grad_norm": 169.41432189941406,
574
  "learning_rate": 1.4000000000000001e-05,
575
  "loss": 0.0006,
576
  "step": 720
577
  },
578
  {
579
  "epoch": 5.84,
580
- "grad_norm": 176.4602813720703,
581
  "learning_rate": 1.3500000000000001e-05,
582
- "loss": 0.0005,
583
  "step": 730
584
  },
585
  {
586
  "epoch": 5.92,
587
- "grad_norm": 160.14578247070312,
588
  "learning_rate": 1.3000000000000001e-05,
589
  "loss": 0.0005,
590
  "step": 740
591
  },
592
  {
593
  "epoch": 6.0,
594
- "grad_norm": 166.63352966308594,
595
  "learning_rate": 1.25e-05,
596
  "loss": 0.0005,
597
  "step": 750
598
  },
599
  {
600
  "epoch": 6.08,
601
- "grad_norm": 152.0838623046875,
602
  "learning_rate": 1.2e-05,
603
  "loss": 0.0005,
604
  "step": 760
605
  },
606
  {
607
  "epoch": 6.16,
608
- "grad_norm": 147.35086059570312,
609
  "learning_rate": 1.1500000000000002e-05,
610
  "loss": 0.0005,
611
  "step": 770
612
  },
613
  {
614
  "epoch": 6.24,
615
- "grad_norm": 140.54298400878906,
616
  "learning_rate": 1.1000000000000001e-05,
617
  "loss": 0.0005,
618
  "step": 780
619
  },
620
  {
621
  "epoch": 6.32,
622
- "grad_norm": 141.50022888183594,
623
  "learning_rate": 1.05e-05,
624
  "loss": 0.0005,
625
  "step": 790
626
  },
627
  {
628
  "epoch": 6.4,
629
- "grad_norm": 133.0969696044922,
630
  "learning_rate": 1e-05,
631
- "loss": 0.0004,
632
  "step": 800
633
  },
634
  {
635
  "epoch": 6.4,
636
  "eval_accuracy": 1.0,
637
- "eval_loss": 0.000529205659404397,
638
- "eval_runtime": 13.0142,
639
- "eval_samples_per_second": 76.839,
640
- "eval_steps_per_second": 4.841,
641
  "step": 800
642
  },
643
  {
644
  "epoch": 6.48,
645
- "grad_norm": 132.95997619628906,
646
  "learning_rate": 9.5e-06,
647
  "loss": 0.0004,
648
  "step": 810
649
  },
650
  {
651
  "epoch": 6.5600000000000005,
652
- "grad_norm": 134.52874755859375,
653
  "learning_rate": 9e-06,
654
  "loss": 0.0004,
655
  "step": 820
656
  },
657
  {
658
  "epoch": 6.64,
659
- "grad_norm": 126.07892608642578,
660
  "learning_rate": 8.500000000000002e-06,
661
  "loss": 0.0004,
662
  "step": 830
663
  },
664
  {
665
  "epoch": 6.72,
666
- "grad_norm": 122.06675720214844,
667
  "learning_rate": 8.000000000000001e-06,
668
  "loss": 0.0004,
669
  "step": 840
670
  },
671
  {
672
  "epoch": 6.8,
673
- "grad_norm": 119.95358276367188,
674
  "learning_rate": 7.5e-06,
675
  "loss": 0.0004,
676
  "step": 850
677
  },
678
  {
679
  "epoch": 6.88,
680
- "grad_norm": 120.792236328125,
681
  "learning_rate": 7.000000000000001e-06,
682
  "loss": 0.0004,
683
  "step": 860
684
  },
685
  {
686
  "epoch": 6.96,
687
- "grad_norm": 121.48966217041016,
688
  "learning_rate": 6.5000000000000004e-06,
689
  "loss": 0.0004,
690
  "step": 870
691
  },
692
  {
693
  "epoch": 7.04,
694
- "grad_norm": 116.1141128540039,
695
  "learning_rate": 6e-06,
696
  "loss": 0.0004,
697
  "step": 880
698
  },
699
  {
700
  "epoch": 7.12,
701
- "grad_norm": 112.2109375,
702
  "learning_rate": 5.500000000000001e-06,
703
  "loss": 0.0004,
704
  "step": 890
705
  },
706
  {
707
  "epoch": 7.2,
708
- "grad_norm": 111.11483001708984,
709
  "learning_rate": 5e-06,
710
  "loss": 0.0004,
711
  "step": 900
@@ -713,78 +713,78 @@
713
  {
714
  "epoch": 7.2,
715
  "eval_accuracy": 1.0,
716
- "eval_loss": 0.00041427274118177593,
717
- "eval_runtime": 12.8429,
718
- "eval_samples_per_second": 77.864,
719
- "eval_steps_per_second": 4.905,
720
  "step": 900
721
  },
722
  {
723
  "epoch": 7.28,
724
- "grad_norm": 111.32091522216797,
725
  "learning_rate": 4.5e-06,
726
  "loss": 0.0004,
727
  "step": 910
728
  },
729
  {
730
  "epoch": 7.36,
731
- "grad_norm": 107.73372650146484,
732
  "learning_rate": 4.000000000000001e-06,
733
  "loss": 0.0004,
734
  "step": 920
735
  },
736
  {
737
  "epoch": 7.44,
738
- "grad_norm": 110.24082946777344,
739
  "learning_rate": 3.5000000000000004e-06,
740
  "loss": 0.0004,
741
  "step": 930
742
  },
743
  {
744
  "epoch": 7.52,
745
- "grad_norm": 111.30231475830078,
746
  "learning_rate": 3e-06,
747
- "loss": 0.0003,
748
  "step": 940
749
  },
750
  {
751
  "epoch": 7.6,
752
- "grad_norm": 104.49162292480469,
753
  "learning_rate": 2.5e-06,
754
- "loss": 0.0003,
755
  "step": 950
756
  },
757
  {
758
  "epoch": 7.68,
759
- "grad_norm": 107.62512969970703,
760
  "learning_rate": 2.0000000000000003e-06,
761
  "loss": 0.0003,
762
  "step": 960
763
  },
764
  {
765
  "epoch": 7.76,
766
- "grad_norm": 106.17529296875,
767
  "learning_rate": 1.5e-06,
768
  "loss": 0.0003,
769
  "step": 970
770
  },
771
  {
772
  "epoch": 7.84,
773
- "grad_norm": 104.55513000488281,
774
  "learning_rate": 1.0000000000000002e-06,
775
  "loss": 0.0003,
776
  "step": 980
777
  },
778
  {
779
  "epoch": 7.92,
780
- "grad_norm": 104.8958969116211,
781
  "learning_rate": 5.000000000000001e-07,
782
  "loss": 0.0003,
783
  "step": 990
784
  },
785
  {
786
  "epoch": 8.0,
787
- "grad_norm": 110.42820739746094,
788
  "learning_rate": 0.0,
789
  "loss": 0.0003,
790
  "step": 1000
@@ -792,20 +792,20 @@
792
  {
793
  "epoch": 8.0,
794
  "eval_accuracy": 1.0,
795
- "eval_loss": 0.00038815097650513053,
796
- "eval_runtime": 12.8005,
797
- "eval_samples_per_second": 78.122,
798
- "eval_steps_per_second": 4.922,
799
  "step": 1000
800
  },
801
  {
802
  "epoch": 8.0,
803
  "step": 1000,
804
  "total_flos": 2.479168170953736e+18,
805
- "train_loss": 0.03568338290625252,
806
- "train_runtime": 1001.3794,
807
- "train_samples_per_second": 31.948,
808
- "train_steps_per_second": 0.999
809
  }
810
  ],
811
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.0003763487620744854,
3
  "best_model_checkpoint": "./vit-base-fruit-punch/checkpoint-1000",
4
  "epoch": 8.0,
5
  "eval_steps": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.08,
13
+ "grad_norm": 100187.125,
14
  "learning_rate": 4.9500000000000004e-05,
15
+ "loss": 1.127,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.16,
20
+ "grad_norm": 96563.390625,
21
  "learning_rate": 4.9e-05,
22
+ "loss": 0.7007,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.24,
27
+ "grad_norm": 64723.85546875,
28
  "learning_rate": 4.85e-05,
29
+ "loss": 0.4167,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.32,
34
+ "grad_norm": 40675.4140625,
35
  "learning_rate": 4.8e-05,
36
+ "loss": 0.2602,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.4,
41
+ "grad_norm": 28872.85546875,
42
  "learning_rate": 4.75e-05,
43
+ "loss": 0.16,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.48,
48
+ "grad_norm": 26935.421875,
49
  "learning_rate": 4.7e-05,
50
+ "loss": 0.1115,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.56,
55
+ "grad_norm": 19970.119140625,
56
  "learning_rate": 4.6500000000000005e-05,
57
+ "loss": 0.0944,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.64,
62
+ "grad_norm": 19774.392578125,
63
  "learning_rate": 4.600000000000001e-05,
64
+ "loss": 0.0761,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.72,
69
+ "grad_norm": 14233.1318359375,
70
  "learning_rate": 4.55e-05,
71
+ "loss": 0.0588,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.8,
76
+ "grad_norm": 12306.6767578125,
77
  "learning_rate": 4.5e-05,
78
+ "loss": 0.0488,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.8,
83
  "eval_accuracy": 1.0,
84
+ "eval_loss": 0.04701722040772438,
85
+ "eval_runtime": 12.891,
86
+ "eval_samples_per_second": 77.574,
87
+ "eval_steps_per_second": 4.887,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.88,
92
+ "grad_norm": 11777.6708984375,
93
  "learning_rate": 4.4500000000000004e-05,
94
+ "loss": 0.0429,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 0.96,
99
+ "grad_norm": 10813.1064453125,
100
  "learning_rate": 4.4000000000000006e-05,
101
+ "loss": 0.0458,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 1.04,
106
+ "grad_norm": 8880.1279296875,
107
  "learning_rate": 4.35e-05,
108
+ "loss": 0.0384,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 1.12,
113
+ "grad_norm": 8182.60107421875,
114
  "learning_rate": 4.3e-05,
115
+ "loss": 0.0309,
116
  "step": 140
117
  },
118
  {
119
  "epoch": 1.2,
120
+ "grad_norm": 7128.07275390625,
121
  "learning_rate": 4.25e-05,
122
+ "loss": 0.0281,
123
  "step": 150
124
  },
125
  {
126
  "epoch": 1.28,
127
+ "grad_norm": 6803.498046875,
128
  "learning_rate": 4.2e-05,
129
+ "loss": 0.0254,
130
  "step": 160
131
  },
132
  {
133
  "epoch": 1.3599999999999999,
134
+ "grad_norm": 6198.88037109375,
135
  "learning_rate": 4.15e-05,
136
+ "loss": 0.0232,
137
  "step": 170
138
  },
139
  {
140
  "epoch": 1.44,
141
+ "grad_norm": 5394.99072265625,
142
  "learning_rate": 4.1e-05,
143
+ "loss": 0.021,
144
  "step": 180
145
  },
146
  {
147
  "epoch": 1.52,
148
+ "grad_norm": 5170.45458984375,
149
  "learning_rate": 4.05e-05,
150
+ "loss": 0.0191,
151
  "step": 190
152
  },
153
  {
154
  "epoch": 1.6,
155
+ "grad_norm": 4661.20263671875,
156
  "learning_rate": 4e-05,
157
+ "loss": 0.0174,
158
  "step": 200
159
  },
160
  {
161
  "epoch": 1.6,
162
+ "eval_accuracy": 1.0,
163
+ "eval_loss": 0.017339378595352173,
164
+ "eval_runtime": 13.5483,
165
+ "eval_samples_per_second": 73.81,
166
+ "eval_steps_per_second": 4.65,
167
  "step": 200
168
  },
169
  {
170
  "epoch": 1.6800000000000002,
171
+ "grad_norm": 4417.46337890625,
172
  "learning_rate": 3.9500000000000005e-05,
173
+ "loss": 0.016,
174
  "step": 210
175
  },
176
  {
177
  "epoch": 1.76,
178
+ "grad_norm": 3967.432861328125,
179
  "learning_rate": 3.9000000000000006e-05,
180
+ "loss": 0.0146,
181
  "step": 220
182
  },
183
  {
184
  "epoch": 1.8399999999999999,
185
+ "grad_norm": 3568.837646484375,
186
  "learning_rate": 3.85e-05,
187
+ "loss": 0.0134,
188
  "step": 230
189
  },
190
  {
191
  "epoch": 1.92,
192
+ "grad_norm": 3472.84716796875,
193
  "learning_rate": 3.8e-05,
194
+ "loss": 0.0123,
195
  "step": 240
196
  },
197
  {
198
  "epoch": 2.0,
199
+ "grad_norm": 3190.490966796875,
200
  "learning_rate": 3.7500000000000003e-05,
201
+ "loss": 0.0113,
202
  "step": 250
203
  },
204
  {
205
  "epoch": 2.08,
206
+ "grad_norm": 2825.7802734375,
207
  "learning_rate": 3.7e-05,
208
+ "loss": 0.0104,
209
  "step": 260
210
  },
211
  {
212
  "epoch": 2.16,
213
+ "grad_norm": 2605.496337890625,
214
  "learning_rate": 3.65e-05,
215
+ "loss": 0.0095,
216
  "step": 270
217
  },
218
  {
219
  "epoch": 2.24,
220
+ "grad_norm": 2393.7314453125,
221
  "learning_rate": 3.6e-05,
222
+ "loss": 0.0088,
223
  "step": 280
224
  },
225
  {
226
  "epoch": 2.32,
227
+ "grad_norm": 2354.353515625,
228
  "learning_rate": 3.55e-05,
229
+ "loss": 0.0081,
230
  "step": 290
231
  },
232
  {
233
  "epoch": 2.4,
234
+ "grad_norm": 2027.639404296875,
235
  "learning_rate": 3.5e-05,
236
+ "loss": 0.0074,
237
  "step": 300
238
  },
239
  {
240
  "epoch": 2.4,
241
  "eval_accuracy": 1.0,
242
+ "eval_loss": 0.007476483471691608,
243
+ "eval_runtime": 13.6387,
244
+ "eval_samples_per_second": 73.321,
245
+ "eval_steps_per_second": 4.619,
246
  "step": 300
247
  },
248
  {
249
  "epoch": 2.48,
250
+ "grad_norm": 1890.090087890625,
251
  "learning_rate": 3.45e-05,
252
+ "loss": 0.0068,
253
  "step": 310
254
  },
255
  {
256
  "epoch": 2.56,
257
+ "grad_norm": 1713.1453857421875,
258
  "learning_rate": 3.4000000000000007e-05,
259
+ "loss": 0.0063,
260
  "step": 320
261
  },
262
  {
263
  "epoch": 2.64,
264
+ "grad_norm": 1611.7166748046875,
265
  "learning_rate": 3.35e-05,
266
+ "loss": 0.0059,
267
  "step": 330
268
  },
269
  {
270
  "epoch": 2.7199999999999998,
271
+ "grad_norm": 1491.3282470703125,
272
  "learning_rate": 3.3e-05,
273
+ "loss": 0.0054,
274
  "step": 340
275
  },
276
  {
277
  "epoch": 2.8,
278
+ "grad_norm": 1385.7913818359375,
279
  "learning_rate": 3.2500000000000004e-05,
280
+ "loss": 0.005,
281
  "step": 350
282
  },
283
  {
284
  "epoch": 2.88,
285
+ "grad_norm": 1317.3277587890625,
286
  "learning_rate": 3.2000000000000005e-05,
287
+ "loss": 0.0047,
288
  "step": 360
289
  },
290
  {
291
  "epoch": 2.96,
292
+ "grad_norm": 1197.3973388671875,
293
  "learning_rate": 3.15e-05,
294
+ "loss": 0.0043,
295
  "step": 370
296
  },
297
  {
298
  "epoch": 3.04,
299
+ "grad_norm": 1110.10693359375,
300
  "learning_rate": 3.1e-05,
301
+ "loss": 0.004,
302
  "step": 380
303
  },
304
  {
305
  "epoch": 3.12,
306
+ "grad_norm": 1046.7801513671875,
307
  "learning_rate": 3.05e-05,
308
+ "loss": 0.0037,
309
  "step": 390
310
  },
311
  {
312
  "epoch": 3.2,
313
+ "grad_norm": 958.0781860351562,
314
  "learning_rate": 3e-05,
315
+ "loss": 0.0035,
316
  "step": 400
317
  },
318
  {
319
  "epoch": 3.2,
320
  "eval_accuracy": 1.0,
321
+ "eval_loss": 0.0034720886033028364,
322
+ "eval_runtime": 13.6477,
323
+ "eval_samples_per_second": 73.272,
324
+ "eval_steps_per_second": 4.616,
325
  "step": 400
326
  },
327
  {
328
  "epoch": 3.2800000000000002,
329
+ "grad_norm": 986.9517822265625,
330
  "learning_rate": 2.95e-05,
331
+ "loss": 0.0032,
332
  "step": 410
333
  },
334
  {
335
  "epoch": 3.36,
336
+ "grad_norm": 841.5371704101562,
337
  "learning_rate": 2.9e-05,
338
+ "loss": 0.003,
339
  "step": 420
340
  },
341
  {
342
  "epoch": 3.44,
343
+ "grad_norm": 797.5939331054688,
344
  "learning_rate": 2.8499999999999998e-05,
345
+ "loss": 0.0028,
346
  "step": 430
347
  },
348
  {
349
  "epoch": 3.52,
350
+ "grad_norm": 735.5321655273438,
351
  "learning_rate": 2.8000000000000003e-05,
352
+ "loss": 0.0026,
353
  "step": 440
354
  },
355
  {
356
  "epoch": 3.6,
357
+ "grad_norm": 701.638427734375,
358
  "learning_rate": 2.7500000000000004e-05,
359
+ "loss": 0.0024,
360
  "step": 450
361
  },
362
  {
363
  "epoch": 3.68,
364
+ "grad_norm": 647.36279296875,
365
  "learning_rate": 2.7000000000000002e-05,
366
+ "loss": 0.0023,
367
  "step": 460
368
  },
369
  {
370
  "epoch": 3.76,
371
+ "grad_norm": 599.30126953125,
372
  "learning_rate": 2.6500000000000004e-05,
373
+ "loss": 0.0021,
374
  "step": 470
375
  },
376
  {
377
  "epoch": 3.84,
378
+ "grad_norm": 590.8321533203125,
379
  "learning_rate": 2.6000000000000002e-05,
380
+ "loss": 0.002,
381
  "step": 480
382
  },
383
  {
384
  "epoch": 3.92,
385
+ "grad_norm": 546.0530395507812,
386
  "learning_rate": 2.5500000000000003e-05,
387
+ "loss": 0.0019,
388
  "step": 490
389
  },
390
  {
391
  "epoch": 4.0,
392
+ "grad_norm": 500.25738525390625,
393
  "learning_rate": 2.5e-05,
394
+ "loss": 0.0018,
395
  "step": 500
396
  },
397
  {
398
  "epoch": 4.0,
399
  "eval_accuracy": 1.0,
400
+ "eval_loss": 0.0022775332909077406,
401
+ "eval_runtime": 13.3322,
402
+ "eval_samples_per_second": 75.006,
403
+ "eval_steps_per_second": 4.725,
404
  "step": 500
405
  },
406
  {
407
  "epoch": 4.08,
408
+ "grad_norm": 494.0545349121094,
409
  "learning_rate": 2.45e-05,
410
+ "loss": 0.0017,
411
  "step": 510
412
  },
413
  {
414
  "epoch": 4.16,
415
+ "grad_norm": 452.84375,
416
  "learning_rate": 2.4e-05,
417
+ "loss": 0.0016,
418
  "step": 520
419
  },
420
  {
421
  "epoch": 4.24,
422
+ "grad_norm": 434.6565246582031,
423
  "learning_rate": 2.35e-05,
424
+ "loss": 0.0015,
425
  "step": 530
426
  },
427
  {
428
  "epoch": 4.32,
429
+ "grad_norm": 404.3072204589844,
430
  "learning_rate": 2.3000000000000003e-05,
431
+ "loss": 0.0014,
432
  "step": 540
433
  },
434
  {
435
  "epoch": 4.4,
436
+ "grad_norm": 389.670166015625,
437
  "learning_rate": 2.25e-05,
438
  "loss": 0.0013,
439
  "step": 550
440
  },
441
  {
442
  "epoch": 4.48,
443
+ "grad_norm": 387.8797607421875,
444
  "learning_rate": 2.2000000000000003e-05,
445
  "loss": 0.0012,
446
  "step": 560
447
  },
448
  {
449
  "epoch": 4.5600000000000005,
450
+ "grad_norm": 357.6133728027344,
451
  "learning_rate": 2.15e-05,
452
+ "loss": 0.0012,
453
  "step": 570
454
  },
455
  {
456
  "epoch": 4.64,
457
+ "grad_norm": 320.7620544433594,
458
  "learning_rate": 2.1e-05,
459
  "loss": 0.0011,
460
  "step": 580
461
  },
462
  {
463
  "epoch": 4.72,
464
+ "grad_norm": 309.20062255859375,
465
  "learning_rate": 2.05e-05,
466
  "loss": 0.001,
467
  "step": 590
468
  },
469
  {
470
  "epoch": 4.8,
471
+ "grad_norm": 292.0805358886719,
472
  "learning_rate": 2e-05,
473
  "loss": 0.001,
474
  "step": 600
 
476
  {
477
  "epoch": 4.8,
478
  "eval_accuracy": 1.0,
479
+ "eval_loss": 0.001073041232302785,
480
+ "eval_runtime": 13.7046,
481
+ "eval_samples_per_second": 72.968,
482
+ "eval_steps_per_second": 4.597,
483
  "step": 600
484
  },
485
  {
486
  "epoch": 4.88,
487
+ "grad_norm": 283.4959411621094,
488
  "learning_rate": 1.9500000000000003e-05,
489
  "loss": 0.0009,
490
  "step": 610
491
  },
492
  {
493
  "epoch": 4.96,
494
+ "grad_norm": 261.8572998046875,
495
  "learning_rate": 1.9e-05,
496
  "loss": 0.0009,
497
  "step": 620
498
  },
499
  {
500
  "epoch": 5.04,
501
+ "grad_norm": 252.91981506347656,
502
  "learning_rate": 1.85e-05,
503
+ "loss": 0.0009,
504
  "step": 630
505
  },
506
  {
507
  "epoch": 5.12,
508
+ "grad_norm": 239.09896850585938,
509
  "learning_rate": 1.8e-05,
510
  "loss": 0.0008,
511
  "step": 640
512
  },
513
  {
514
  "epoch": 5.2,
515
+ "grad_norm": 232.7013397216797,
516
  "learning_rate": 1.75e-05,
517
  "loss": 0.0008,
518
  "step": 650
519
  },
520
  {
521
  "epoch": 5.28,
522
+ "grad_norm": 220.06301879882812,
523
  "learning_rate": 1.7000000000000003e-05,
524
  "loss": 0.0007,
525
  "step": 660
526
  },
527
  {
528
  "epoch": 5.36,
529
+ "grad_norm": 219.54986572265625,
530
  "learning_rate": 1.65e-05,
531
  "loss": 0.0007,
532
  "step": 670
533
  },
534
  {
535
  "epoch": 5.44,
536
+ "grad_norm": 206.40716552734375,
537
  "learning_rate": 1.6000000000000003e-05,
538
  "loss": 0.0007,
539
  "step": 680
540
  },
541
  {
542
  "epoch": 5.52,
543
+ "grad_norm": 196.642578125,
544
  "learning_rate": 1.55e-05,
545
+ "loss": 0.0007,
546
  "step": 690
547
  },
548
  {
549
  "epoch": 5.6,
550
+ "grad_norm": 187.69554138183594,
551
  "learning_rate": 1.5e-05,
552
  "loss": 0.0006,
553
  "step": 700
 
555
  {
556
  "epoch": 5.6,
557
  "eval_accuracy": 1.0,
558
+ "eval_loss": 0.0006605549133382738,
559
+ "eval_runtime": 13.6953,
560
+ "eval_samples_per_second": 73.018,
561
+ "eval_steps_per_second": 4.6,
562
  "step": 700
563
  },
564
  {
565
  "epoch": 5.68,
566
+ "grad_norm": 183.08045959472656,
567
  "learning_rate": 1.45e-05,
568
  "loss": 0.0006,
569
  "step": 710
570
  },
571
  {
572
  "epoch": 5.76,
573
+ "grad_norm": 174.93222045898438,
574
  "learning_rate": 1.4000000000000001e-05,
575
  "loss": 0.0006,
576
  "step": 720
577
  },
578
  {
579
  "epoch": 5.84,
580
+ "grad_norm": 177.07530212402344,
581
  "learning_rate": 1.3500000000000001e-05,
582
+ "loss": 0.0006,
583
  "step": 730
584
  },
585
  {
586
  "epoch": 5.92,
587
+ "grad_norm": 166.14947509765625,
588
  "learning_rate": 1.3000000000000001e-05,
589
  "loss": 0.0005,
590
  "step": 740
591
  },
592
  {
593
  "epoch": 6.0,
594
+ "grad_norm": 165.67318725585938,
595
  "learning_rate": 1.25e-05,
596
  "loss": 0.0005,
597
  "step": 750
598
  },
599
  {
600
  "epoch": 6.08,
601
+ "grad_norm": 158.77545166015625,
602
  "learning_rate": 1.2e-05,
603
  "loss": 0.0005,
604
  "step": 760
605
  },
606
  {
607
  "epoch": 6.16,
608
+ "grad_norm": 149.71511840820312,
609
  "learning_rate": 1.1500000000000002e-05,
610
  "loss": 0.0005,
611
  "step": 770
612
  },
613
  {
614
  "epoch": 6.24,
615
+ "grad_norm": 144.20770263671875,
616
  "learning_rate": 1.1000000000000001e-05,
617
  "loss": 0.0005,
618
  "step": 780
619
  },
620
  {
621
  "epoch": 6.32,
622
+ "grad_norm": 148.6312255859375,
623
  "learning_rate": 1.05e-05,
624
  "loss": 0.0005,
625
  "step": 790
626
  },
627
  {
628
  "epoch": 6.4,
629
+ "grad_norm": 135.4142303466797,
630
  "learning_rate": 1e-05,
631
+ "loss": 0.0005,
632
  "step": 800
633
  },
634
  {
635
  "epoch": 6.4,
636
  "eval_accuracy": 1.0,
637
+ "eval_loss": 0.0004884201916866004,
638
+ "eval_runtime": 13.5521,
639
+ "eval_samples_per_second": 73.79,
640
+ "eval_steps_per_second": 4.649,
641
  "step": 800
642
  },
643
  {
644
  "epoch": 6.48,
645
+ "grad_norm": 133.83642578125,
646
  "learning_rate": 9.5e-06,
647
  "loss": 0.0004,
648
  "step": 810
649
  },
650
  {
651
  "epoch": 6.5600000000000005,
652
+ "grad_norm": 138.82203674316406,
653
  "learning_rate": 9e-06,
654
  "loss": 0.0004,
655
  "step": 820
656
  },
657
  {
658
  "epoch": 6.64,
659
+ "grad_norm": 127.43915557861328,
660
  "learning_rate": 8.500000000000002e-06,
661
  "loss": 0.0004,
662
  "step": 830
663
  },
664
  {
665
  "epoch": 6.72,
666
+ "grad_norm": 126.12251281738281,
667
  "learning_rate": 8.000000000000001e-06,
668
  "loss": 0.0004,
669
  "step": 840
670
  },
671
  {
672
  "epoch": 6.8,
673
+ "grad_norm": 121.66053771972656,
674
  "learning_rate": 7.5e-06,
675
  "loss": 0.0004,
676
  "step": 850
677
  },
678
  {
679
  "epoch": 6.88,
680
+ "grad_norm": 121.25574493408203,
681
  "learning_rate": 7.000000000000001e-06,
682
  "loss": 0.0004,
683
  "step": 860
684
  },
685
  {
686
  "epoch": 6.96,
687
+ "grad_norm": 126.3290023803711,
688
  "learning_rate": 6.5000000000000004e-06,
689
  "loss": 0.0004,
690
  "step": 870
691
  },
692
  {
693
  "epoch": 7.04,
694
+ "grad_norm": 117.99575805664062,
695
  "learning_rate": 6e-06,
696
  "loss": 0.0004,
697
  "step": 880
698
  },
699
  {
700
  "epoch": 7.12,
701
+ "grad_norm": 116.10645294189453,
702
  "learning_rate": 5.500000000000001e-06,
703
  "loss": 0.0004,
704
  "step": 890
705
  },
706
  {
707
  "epoch": 7.2,
708
+ "grad_norm": 113.11275482177734,
709
  "learning_rate": 5e-06,
710
  "loss": 0.0004,
711
  "step": 900
 
713
  {
714
  "epoch": 7.2,
715
  "eval_accuracy": 1.0,
716
+ "eval_loss": 0.0003921452153008431,
717
+ "eval_runtime": 13.7013,
718
+ "eval_samples_per_second": 72.986,
719
+ "eval_steps_per_second": 4.598,
720
  "step": 900
721
  },
722
  {
723
  "epoch": 7.28,
724
+ "grad_norm": 114.9211196899414,
725
  "learning_rate": 4.5e-06,
726
  "loss": 0.0004,
727
  "step": 910
728
  },
729
  {
730
  "epoch": 7.36,
731
+ "grad_norm": 110.5498046875,
732
  "learning_rate": 4.000000000000001e-06,
733
  "loss": 0.0004,
734
  "step": 920
735
  },
736
  {
737
  "epoch": 7.44,
738
+ "grad_norm": 110.70841979980469,
739
  "learning_rate": 3.5000000000000004e-06,
740
  "loss": 0.0004,
741
  "step": 930
742
  },
743
  {
744
  "epoch": 7.52,
745
+ "grad_norm": 115.6305160522461,
746
  "learning_rate": 3e-06,
747
+ "loss": 0.0004,
748
  "step": 940
749
  },
750
  {
751
  "epoch": 7.6,
752
+ "grad_norm": 106.5681381225586,
753
  "learning_rate": 2.5e-06,
754
+ "loss": 0.0004,
755
  "step": 950
756
  },
757
  {
758
  "epoch": 7.68,
759
+ "grad_norm": 109.81066131591797,
760
  "learning_rate": 2.0000000000000003e-06,
761
  "loss": 0.0003,
762
  "step": 960
763
  },
764
  {
765
  "epoch": 7.76,
766
+ "grad_norm": 107.74824523925781,
767
  "learning_rate": 1.5e-06,
768
  "loss": 0.0003,
769
  "step": 970
770
  },
771
  {
772
  "epoch": 7.84,
773
+ "grad_norm": 108.34854888916016,
774
  "learning_rate": 1.0000000000000002e-06,
775
  "loss": 0.0003,
776
  "step": 980
777
  },
778
  {
779
  "epoch": 7.92,
780
+ "grad_norm": 107.01416778564453,
781
  "learning_rate": 5.000000000000001e-07,
782
  "loss": 0.0003,
783
  "step": 990
784
  },
785
  {
786
  "epoch": 8.0,
787
+ "grad_norm": 111.06143188476562,
788
  "learning_rate": 0.0,
789
  "loss": 0.0003,
790
  "step": 1000
 
792
  {
793
  "epoch": 8.0,
794
  "eval_accuracy": 1.0,
795
+ "eval_loss": 0.0003763487620744854,
796
+ "eval_runtime": 13.1171,
797
+ "eval_samples_per_second": 76.236,
798
+ "eval_steps_per_second": 4.803,
799
  "step": 1000
800
  },
801
  {
802
  "epoch": 8.0,
803
  "step": 1000,
804
  "total_flos": 2.479168170953736e+18,
805
+ "train_loss": 0.03565365221118554,
806
+ "train_runtime": 1044.6113,
807
+ "train_samples_per_second": 30.626,
808
+ "train_steps_per_second": 0.957
809
  }
810
  ],
811
  "logging_steps": 10,