RobertoSonic commited on
Commit
ad16e17
·
verified ·
1 Parent(s): d077246

End of training

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.2461
22
- - Accuracy: 0.6923
23
 
24
  ## Model description
25
 
 
18
 
19
  This model is a fine-tuned version of [microsoft/swinv2-tiny-patch4-window8-256](https://huggingface.co/microsoft/swinv2-tiny-patch4-window8-256) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.8631
22
+ - Accuracy: 0.7308
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 41.93023255813954,
3
  "eval_accuracy": 0.7307692307692307,
4
- "eval_loss": 0.9577280879020691,
5
- "eval_runtime": 1.0772,
6
- "eval_samples_per_second": 48.273,
7
- "eval_steps_per_second": 1.857,
8
  "total_flos": 1.8345690247389512e+18,
9
- "train_loss": 1.2270459805216107,
10
- "train_runtime": 1356.3174,
11
- "train_samples_per_second": 42.176,
12
- "train_steps_per_second": 0.31
13
  }
 
1
  {
2
  "epoch": 41.93023255813954,
3
  "eval_accuracy": 0.7307692307692307,
4
+ "eval_loss": 0.8631380796432495,
5
+ "eval_runtime": 1.3442,
6
+ "eval_samples_per_second": 38.683,
7
+ "eval_steps_per_second": 1.488,
8
  "total_flos": 1.8345690247389512e+18,
9
+ "train_loss": 1.1112867690268018,
10
+ "train_runtime": 1368.6695,
11
+ "train_samples_per_second": 41.795,
12
+ "train_steps_per_second": 0.307
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 41.93023255813954,
3
  "eval_accuracy": 0.7307692307692307,
4
- "eval_loss": 0.9577280879020691,
5
- "eval_runtime": 1.0772,
6
- "eval_samples_per_second": 48.273,
7
- "eval_steps_per_second": 1.857
8
  }
 
1
  {
2
  "epoch": 41.93023255813954,
3
  "eval_accuracy": 0.7307692307692307,
4
+ "eval_loss": 0.8631380796432495,
5
+ "eval_runtime": 1.3442,
6
+ "eval_samples_per_second": 38.683,
7
+ "eval_steps_per_second": 1.488
8
  }
runs/Jan21_16-36-23_31423d62c322/events.out.tfevents.1737480255.31423d62c322.1929.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df65b5a9535834e72ff108061e1e39a0e3c9f13e5595cf23ff0e4c1ad3093d93
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 41.93023255813954,
3
  "total_flos": 1.8345690247389512e+18,
4
- "train_loss": 1.2270459805216107,
5
- "train_runtime": 1356.3174,
6
- "train_samples_per_second": 42.176,
7
- "train_steps_per_second": 0.31
8
  }
 
1
  {
2
  "epoch": 41.93023255813954,
3
  "total_flos": 1.8345690247389512e+18,
4
+ "train_loss": 1.1112867690268018,
5
+ "train_runtime": 1368.6695,
6
+ "train_samples_per_second": 41.795,
7
+ "train_steps_per_second": 0.307
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 0.7307692307692307,
3
- "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV21/checkpoint-90",
4
  "epoch": 41.93023255813954,
5
  "eval_steps": 500,
6
  "global_step": 420,
@@ -10,635 +10,635 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9302325581395349,
13
- "eval_accuracy": 0.23076923076923078,
14
- "eval_loss": 1.600624680519104,
15
- "eval_runtime": 0.9466,
16
- "eval_samples_per_second": 54.935,
17
- "eval_steps_per_second": 2.113,
18
  "step": 10
19
  },
20
  {
21
  "epoch": 1.1860465116279069,
22
- "grad_norm": 11.869792938232422,
23
  "learning_rate": 1.4285714285714285e-05,
24
- "loss": 6.7273,
25
  "step": 12
26
  },
27
  {
28
  "epoch": 1.9302325581395348,
29
- "eval_accuracy": 0.3076923076923077,
30
- "eval_loss": 1.4678974151611328,
31
- "eval_runtime": 0.9407,
32
- "eval_samples_per_second": 55.281,
33
- "eval_steps_per_second": 2.126,
34
  "step": 20
35
  },
36
  {
37
  "epoch": 2.3720930232558137,
38
- "grad_norm": 21.593393325805664,
39
  "learning_rate": 2.857142857142857e-05,
40
- "loss": 5.9474,
41
  "step": 24
42
  },
43
  {
44
  "epoch": 2.9302325581395348,
45
- "eval_accuracy": 0.5384615384615384,
46
- "eval_loss": 1.1914443969726562,
47
- "eval_runtime": 0.9141,
48
- "eval_samples_per_second": 56.886,
49
- "eval_steps_per_second": 2.188,
50
  "step": 30
51
  },
52
  {
53
  "epoch": 3.558139534883721,
54
- "grad_norm": 30.464523315429688,
55
  "learning_rate": 4.2857142857142856e-05,
56
- "loss": 4.3002,
57
  "step": 36
58
  },
59
  {
60
  "epoch": 3.9302325581395348,
61
- "eval_accuracy": 0.46153846153846156,
62
- "eval_loss": 1.1951944828033447,
63
- "eval_runtime": 1.2786,
64
- "eval_samples_per_second": 40.671,
65
- "eval_steps_per_second": 1.564,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 4.7441860465116275,
70
- "grad_norm": 25.736072540283203,
71
  "learning_rate": 4.9206349206349204e-05,
72
- "loss": 2.9717,
73
  "step": 48
74
  },
75
  {
76
  "epoch": 4.930232558139535,
77
- "eval_accuracy": 0.5769230769230769,
78
- "eval_loss": 0.9782841205596924,
79
- "eval_runtime": 0.9391,
80
- "eval_samples_per_second": 55.375,
81
- "eval_steps_per_second": 2.13,
82
  "step": 50
83
  },
84
  {
85
  "epoch": 5.930232558139535,
86
- "grad_norm": 54.5507698059082,
87
  "learning_rate": 4.761904761904762e-05,
88
- "loss": 2.4828,
89
  "step": 60
90
  },
91
  {
92
  "epoch": 5.930232558139535,
93
- "eval_accuracy": 0.5769230769230769,
94
- "eval_loss": 0.9832176566123962,
95
- "eval_runtime": 0.9619,
96
- "eval_samples_per_second": 54.06,
97
- "eval_steps_per_second": 2.079,
98
  "step": 60
99
  },
100
  {
101
  "epoch": 6.930232558139535,
102
- "eval_accuracy": 0.5,
103
- "eval_loss": 1.0621165037155151,
104
- "eval_runtime": 1.3284,
105
- "eval_samples_per_second": 39.145,
106
- "eval_steps_per_second": 1.506,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 7.186046511627907,
111
- "grad_norm": 26.069013595581055,
112
  "learning_rate": 4.603174603174603e-05,
113
- "loss": 2.0276,
114
  "step": 72
115
  },
116
  {
117
  "epoch": 7.930232558139535,
118
- "eval_accuracy": 0.6538461538461539,
119
- "eval_loss": 0.9040729999542236,
120
- "eval_runtime": 0.9561,
121
- "eval_samples_per_second": 54.39,
122
- "eval_steps_per_second": 2.092,
123
  "step": 80
124
  },
125
  {
126
  "epoch": 8.372093023255815,
127
- "grad_norm": 43.163116455078125,
128
  "learning_rate": 4.4444444444444447e-05,
129
- "loss": 1.7041,
130
  "step": 84
131
  },
132
  {
133
  "epoch": 8.930232558139535,
134
- "eval_accuracy": 0.7307692307692307,
135
- "eval_loss": 0.9577280879020691,
136
- "eval_runtime": 0.9834,
137
- "eval_samples_per_second": 52.878,
138
- "eval_steps_per_second": 2.034,
139
  "step": 90
140
  },
141
  {
142
  "epoch": 9.55813953488372,
143
- "grad_norm": 45.423728942871094,
144
  "learning_rate": 4.2857142857142856e-05,
145
- "loss": 1.3967,
146
  "step": 96
147
  },
148
  {
149
  "epoch": 9.930232558139535,
150
  "eval_accuracy": 0.6730769230769231,
151
- "eval_loss": 1.0290472507476807,
152
- "eval_runtime": 0.9566,
153
- "eval_samples_per_second": 54.362,
154
- "eval_steps_per_second": 2.091,
155
  "step": 100
156
  },
157
  {
158
  "epoch": 10.744186046511627,
159
- "grad_norm": 29.1428279876709,
160
  "learning_rate": 4.126984126984127e-05,
161
- "loss": 1.3079,
162
  "step": 108
163
  },
164
  {
165
  "epoch": 10.930232558139535,
166
- "eval_accuracy": 0.7115384615384616,
167
- "eval_loss": 0.9863845109939575,
168
- "eval_runtime": 1.3391,
169
- "eval_samples_per_second": 38.832,
170
- "eval_steps_per_second": 1.494,
171
  "step": 110
172
  },
173
  {
174
  "epoch": 11.930232558139535,
175
- "grad_norm": 25.176292419433594,
176
  "learning_rate": 3.968253968253968e-05,
177
- "loss": 1.0486,
178
  "step": 120
179
  },
180
  {
181
  "epoch": 11.930232558139535,
182
- "eval_accuracy": 0.6730769230769231,
183
- "eval_loss": 1.0005784034729004,
184
- "eval_runtime": 0.9808,
185
- "eval_samples_per_second": 53.016,
186
- "eval_steps_per_second": 2.039,
187
  "step": 120
188
  },
189
  {
190
  "epoch": 12.930232558139535,
191
- "eval_accuracy": 0.6153846153846154,
192
- "eval_loss": 0.9503486156463623,
193
- "eval_runtime": 0.9746,
194
- "eval_samples_per_second": 53.353,
195
- "eval_steps_per_second": 2.052,
196
  "step": 130
197
  },
198
  {
199
  "epoch": 13.186046511627907,
200
- "grad_norm": 55.16592025756836,
201
  "learning_rate": 3.809523809523809e-05,
202
- "loss": 1.0704,
203
  "step": 132
204
  },
205
  {
206
  "epoch": 13.930232558139535,
207
- "eval_accuracy": 0.7307692307692307,
208
- "eval_loss": 0.9434700012207031,
209
- "eval_runtime": 1.3209,
210
- "eval_samples_per_second": 39.368,
211
- "eval_steps_per_second": 1.514,
212
  "step": 140
213
  },
214
  {
215
  "epoch": 14.372093023255815,
216
- "grad_norm": 31.888456344604492,
217
  "learning_rate": 3.650793650793651e-05,
218
- "loss": 0.9929,
219
  "step": 144
220
  },
221
  {
222
  "epoch": 14.930232558139535,
223
- "eval_accuracy": 0.6346153846153846,
224
- "eval_loss": 1.0601013898849487,
225
- "eval_runtime": 0.9793,
226
- "eval_samples_per_second": 53.098,
227
- "eval_steps_per_second": 2.042,
228
  "step": 150
229
  },
230
  {
231
  "epoch": 15.55813953488372,
232
- "grad_norm": 29.11701202392578,
233
  "learning_rate": 3.492063492063492e-05,
234
- "loss": 0.8911,
235
  "step": 156
236
  },
237
  {
238
  "epoch": 15.930232558139535,
239
- "eval_accuracy": 0.6730769230769231,
240
- "eval_loss": 1.0281590223312378,
241
- "eval_runtime": 0.9645,
242
- "eval_samples_per_second": 53.912,
243
- "eval_steps_per_second": 2.074,
244
  "step": 160
245
  },
246
  {
247
  "epoch": 16.74418604651163,
248
- "grad_norm": 32.15597152709961,
249
  "learning_rate": 3.3333333333333335e-05,
250
- "loss": 0.7702,
251
  "step": 168
252
  },
253
  {
254
  "epoch": 16.930232558139537,
255
- "eval_accuracy": 0.6923076923076923,
256
- "eval_loss": 1.1857846975326538,
257
- "eval_runtime": 0.9593,
258
- "eval_samples_per_second": 54.207,
259
- "eval_steps_per_second": 2.085,
260
  "step": 170
261
  },
262
  {
263
  "epoch": 17.930232558139537,
264
- "grad_norm": 32.97303771972656,
265
  "learning_rate": 3.1746031746031745e-05,
266
- "loss": 0.718,
267
  "step": 180
268
  },
269
  {
270
  "epoch": 17.930232558139537,
271
- "eval_accuracy": 0.6538461538461539,
272
- "eval_loss": 1.2644145488739014,
273
- "eval_runtime": 1.3041,
274
- "eval_samples_per_second": 39.875,
275
- "eval_steps_per_second": 1.534,
276
  "step": 180
277
  },
278
  {
279
  "epoch": 18.930232558139537,
280
- "eval_accuracy": 0.6346153846153846,
281
- "eval_loss": 1.1403751373291016,
282
- "eval_runtime": 0.957,
283
- "eval_samples_per_second": 54.339,
284
- "eval_steps_per_second": 2.09,
285
  "step": 190
286
  },
287
  {
288
  "epoch": 19.186046511627907,
289
- "grad_norm": 18.562591552734375,
290
  "learning_rate": 3.0158730158730158e-05,
291
- "loss": 0.6526,
292
  "step": 192
293
  },
294
  {
295
  "epoch": 19.930232558139537,
296
- "eval_accuracy": 0.6923076923076923,
297
- "eval_loss": 1.2202544212341309,
298
- "eval_runtime": 0.981,
299
- "eval_samples_per_second": 53.008,
300
- "eval_steps_per_second": 2.039,
301
  "step": 200
302
  },
303
  {
304
  "epoch": 20.372093023255815,
305
- "grad_norm": 41.22922897338867,
306
  "learning_rate": 2.857142857142857e-05,
307
- "loss": 0.6617,
308
  "step": 204
309
  },
310
  {
311
  "epoch": 20.930232558139537,
312
- "eval_accuracy": 0.6730769230769231,
313
- "eval_loss": 1.3465415239334106,
314
- "eval_runtime": 1.3212,
315
- "eval_samples_per_second": 39.359,
316
- "eval_steps_per_second": 1.514,
317
  "step": 210
318
  },
319
  {
320
  "epoch": 21.558139534883722,
321
- "grad_norm": 28.829620361328125,
322
  "learning_rate": 2.6984126984126984e-05,
323
- "loss": 0.5699,
324
  "step": 216
325
  },
326
  {
327
  "epoch": 21.930232558139537,
328
  "eval_accuracy": 0.6538461538461539,
329
- "eval_loss": 1.3862558603286743,
330
- "eval_runtime": 0.9516,
331
- "eval_samples_per_second": 54.644,
332
- "eval_steps_per_second": 2.102,
333
  "step": 220
334
  },
335
  {
336
  "epoch": 22.74418604651163,
337
- "grad_norm": 23.03827476501465,
338
  "learning_rate": 2.5396825396825397e-05,
339
- "loss": 0.5228,
340
  "step": 228
341
  },
342
  {
343
  "epoch": 22.930232558139537,
344
- "eval_accuracy": 0.7115384615384616,
345
- "eval_loss": 1.272707223892212,
346
- "eval_runtime": 0.985,
347
- "eval_samples_per_second": 52.794,
348
- "eval_steps_per_second": 2.031,
349
  "step": 230
350
  },
351
  {
352
  "epoch": 23.930232558139537,
353
- "grad_norm": 27.27239990234375,
354
  "learning_rate": 2.380952380952381e-05,
355
- "loss": 0.4321,
356
  "step": 240
357
  },
358
  {
359
  "epoch": 23.930232558139537,
360
- "eval_accuracy": 0.7115384615384616,
361
- "eval_loss": 1.194717288017273,
362
- "eval_runtime": 0.9431,
363
- "eval_samples_per_second": 55.135,
364
- "eval_steps_per_second": 2.121,
365
  "step": 240
366
  },
367
  {
368
  "epoch": 24.930232558139537,
369
- "eval_accuracy": 0.6346153846153846,
370
- "eval_loss": 1.397331714630127,
371
- "eval_runtime": 1.2955,
372
- "eval_samples_per_second": 40.139,
373
- "eval_steps_per_second": 1.544,
374
  "step": 250
375
  },
376
  {
377
  "epoch": 25.186046511627907,
378
- "grad_norm": 24.13581085205078,
379
  "learning_rate": 2.2222222222222223e-05,
380
- "loss": 0.557,
381
  "step": 252
382
  },
383
  {
384
  "epoch": 25.930232558139537,
385
- "eval_accuracy": 0.6538461538461539,
386
- "eval_loss": 1.4321506023406982,
387
- "eval_runtime": 1.3216,
388
- "eval_samples_per_second": 39.345,
389
- "eval_steps_per_second": 1.513,
390
  "step": 260
391
  },
392
  {
393
  "epoch": 26.372093023255815,
394
- "grad_norm": 34.4373664855957,
395
  "learning_rate": 2.0634920634920636e-05,
396
- "loss": 0.4569,
397
  "step": 264
398
  },
399
  {
400
  "epoch": 26.930232558139537,
401
- "eval_accuracy": 0.6538461538461539,
402
- "eval_loss": 1.3255730867385864,
403
- "eval_runtime": 0.9379,
404
- "eval_samples_per_second": 55.443,
405
- "eval_steps_per_second": 2.132,
406
  "step": 270
407
  },
408
  {
409
  "epoch": 27.558139534883722,
410
- "grad_norm": 19.65439224243164,
411
  "learning_rate": 1.9047619047619046e-05,
412
- "loss": 0.4395,
413
  "step": 276
414
  },
415
  {
416
  "epoch": 27.930232558139537,
417
- "eval_accuracy": 0.6730769230769231,
418
- "eval_loss": 1.3907530307769775,
419
- "eval_runtime": 1.2894,
420
- "eval_samples_per_second": 40.33,
421
- "eval_steps_per_second": 1.551,
422
  "step": 280
423
  },
424
  {
425
  "epoch": 28.74418604651163,
426
- "grad_norm": 31.272125244140625,
427
  "learning_rate": 1.746031746031746e-05,
428
- "loss": 0.5587,
429
  "step": 288
430
  },
431
  {
432
  "epoch": 28.930232558139537,
433
- "eval_accuracy": 0.6923076923076923,
434
- "eval_loss": 1.5055606365203857,
435
- "eval_runtime": 0.9347,
436
- "eval_samples_per_second": 55.632,
437
- "eval_steps_per_second": 2.14,
438
  "step": 290
439
  },
440
  {
441
  "epoch": 29.930232558139537,
442
- "grad_norm": 21.733734130859375,
443
  "learning_rate": 1.5873015873015872e-05,
444
- "loss": 0.439,
445
  "step": 300
446
  },
447
  {
448
  "epoch": 29.930232558139537,
449
- "eval_accuracy": 0.6538461538461539,
450
- "eval_loss": 1.3810527324676514,
451
- "eval_runtime": 0.9297,
452
- "eval_samples_per_second": 55.934,
453
- "eval_steps_per_second": 2.151,
454
  "step": 300
455
  },
456
  {
457
  "epoch": 30.930232558139537,
458
- "eval_accuracy": 0.6153846153846154,
459
- "eval_loss": 1.3693809509277344,
460
- "eval_runtime": 1.2587,
461
- "eval_samples_per_second": 41.311,
462
- "eval_steps_per_second": 1.589,
463
  "step": 310
464
  },
465
  {
466
  "epoch": 31.186046511627907,
467
- "grad_norm": 24.466344833374023,
468
  "learning_rate": 1.4285714285714285e-05,
469
- "loss": 0.3989,
470
  "step": 312
471
  },
472
  {
473
  "epoch": 31.930232558139537,
474
- "eval_accuracy": 0.6538461538461539,
475
- "eval_loss": 1.4759361743927002,
476
- "eval_runtime": 0.9328,
477
- "eval_samples_per_second": 55.746,
478
- "eval_steps_per_second": 2.144,
479
  "step": 320
480
  },
481
  {
482
  "epoch": 32.372093023255815,
483
- "grad_norm": 35.87159729003906,
484
  "learning_rate": 1.2698412698412699e-05,
485
- "loss": 0.312,
486
  "step": 324
487
  },
488
  {
489
  "epoch": 32.93023255813954,
490
- "eval_accuracy": 0.6346153846153846,
491
- "eval_loss": 1.5513783693313599,
492
- "eval_runtime": 0.9364,
493
- "eval_samples_per_second": 55.53,
494
- "eval_steps_per_second": 2.136,
495
  "step": 330
496
  },
497
  {
498
  "epoch": 33.55813953488372,
499
- "grad_norm": 14.943767547607422,
500
  "learning_rate": 1.1111111111111112e-05,
501
- "loss": 0.3728,
502
  "step": 336
503
  },
504
  {
505
  "epoch": 33.93023255813954,
506
  "eval_accuracy": 0.6538461538461539,
507
- "eval_loss": 1.5128782987594604,
508
- "eval_runtime": 1.2844,
509
- "eval_samples_per_second": 40.485,
510
- "eval_steps_per_second": 1.557,
511
  "step": 340
512
  },
513
  {
514
  "epoch": 34.74418604651163,
515
- "grad_norm": 15.3562593460083,
516
  "learning_rate": 9.523809523809523e-06,
517
- "loss": 0.4031,
518
  "step": 348
519
  },
520
  {
521
  "epoch": 34.93023255813954,
522
- "eval_accuracy": 0.6538461538461539,
523
- "eval_loss": 1.4914475679397583,
524
- "eval_runtime": 0.948,
525
- "eval_samples_per_second": 54.852,
526
- "eval_steps_per_second": 2.11,
527
  "step": 350
528
  },
529
  {
530
  "epoch": 35.93023255813954,
531
- "grad_norm": 23.48607635498047,
532
  "learning_rate": 7.936507936507936e-06,
533
- "loss": 0.3523,
534
  "step": 360
535
  },
536
  {
537
  "epoch": 35.93023255813954,
538
- "eval_accuracy": 0.6730769230769231,
539
- "eval_loss": 1.5588839054107666,
540
- "eval_runtime": 0.9436,
541
- "eval_samples_per_second": 55.108,
542
- "eval_steps_per_second": 2.12,
543
  "step": 360
544
  },
545
  {
546
  "epoch": 36.93023255813954,
547
- "eval_accuracy": 0.6730769230769231,
548
- "eval_loss": 1.568899154663086,
549
- "eval_runtime": 1.2885,
550
- "eval_samples_per_second": 40.359,
551
- "eval_steps_per_second": 1.552,
552
  "step": 370
553
  },
554
  {
555
  "epoch": 37.18604651162791,
556
- "grad_norm": 31.557363510131836,
557
  "learning_rate": 6.349206349206349e-06,
558
- "loss": 0.265,
559
  "step": 372
560
  },
561
  {
562
  "epoch": 37.93023255813954,
563
- "eval_accuracy": 0.6730769230769231,
564
- "eval_loss": 1.6178526878356934,
565
- "eval_runtime": 0.9634,
566
- "eval_samples_per_second": 53.974,
567
- "eval_steps_per_second": 2.076,
568
  "step": 380
569
  },
570
  {
571
  "epoch": 38.372093023255815,
572
- "grad_norm": 27.208581924438477,
573
  "learning_rate": 4.7619047619047615e-06,
574
- "loss": 0.3189,
575
  "step": 384
576
  },
577
  {
578
  "epoch": 38.93023255813954,
579
- "eval_accuracy": 0.6538461538461539,
580
- "eval_loss": 1.5962971448898315,
581
- "eval_runtime": 0.9485,
582
- "eval_samples_per_second": 54.826,
583
- "eval_steps_per_second": 2.109,
584
  "step": 390
585
  },
586
  {
587
  "epoch": 39.55813953488372,
588
- "grad_norm": 30.211496353149414,
589
  "learning_rate": 3.1746031746031746e-06,
590
- "loss": 0.2976,
591
  "step": 396
592
  },
593
  {
594
  "epoch": 39.93023255813954,
595
- "eval_accuracy": 0.6346153846153846,
596
- "eval_loss": 1.5738095045089722,
597
- "eval_runtime": 1.1624,
598
- "eval_samples_per_second": 44.733,
599
- "eval_steps_per_second": 1.721,
600
  "step": 400
601
  },
602
  {
603
  "epoch": 40.74418604651163,
604
- "grad_norm": 16.808286666870117,
605
  "learning_rate": 1.5873015873015873e-06,
606
- "loss": 0.3097,
607
  "step": 408
608
  },
609
  {
610
  "epoch": 40.93023255813954,
611
- "eval_accuracy": 0.6346153846153846,
612
- "eval_loss": 1.578661561012268,
613
- "eval_runtime": 0.9279,
614
- "eval_samples_per_second": 56.041,
615
- "eval_steps_per_second": 2.155,
616
  "step": 410
617
  },
618
  {
619
  "epoch": 41.93023255813954,
620
- "grad_norm": 11.15967082977295,
621
  "learning_rate": 0.0,
622
- "loss": 0.269,
623
  "step": 420
624
  },
625
  {
626
  "epoch": 41.93023255813954,
627
- "eval_accuracy": 0.6346153846153846,
628
- "eval_loss": 1.5818698406219482,
629
- "eval_runtime": 1.6499,
630
- "eval_samples_per_second": 31.516,
631
- "eval_steps_per_second": 1.212,
632
  "step": 420
633
  },
634
  {
635
  "epoch": 41.93023255813954,
636
  "step": 420,
637
  "total_flos": 1.8345690247389512e+18,
638
- "train_loss": 1.2270459805216107,
639
- "train_runtime": 1356.3174,
640
- "train_samples_per_second": 42.176,
641
- "train_steps_per_second": 0.31
642
  }
643
  ],
644
  "logging_steps": 12,
 
1
  {
2
  "best_metric": 0.7307692307692307,
3
+ "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-humeda-DAV21/checkpoint-130",
4
  "epoch": 41.93023255813954,
5
  "eval_steps": 500,
6
  "global_step": 420,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9302325581395349,
13
+ "eval_accuracy": 0.34615384615384615,
14
+ "eval_loss": 1.550955891609192,
15
+ "eval_runtime": 0.9413,
16
+ "eval_samples_per_second": 55.245,
17
+ "eval_steps_per_second": 2.125,
18
  "step": 10
19
  },
20
  {
21
  "epoch": 1.1860465116279069,
22
+ "grad_norm": 13.047734260559082,
23
  "learning_rate": 1.4285714285714285e-05,
24
+ "loss": 6.5929,
25
  "step": 12
26
  },
27
  {
28
  "epoch": 1.9302325581395348,
29
+ "eval_accuracy": 0.2692307692307692,
30
+ "eval_loss": 1.4801901578903198,
31
+ "eval_runtime": 0.914,
32
+ "eval_samples_per_second": 56.894,
33
+ "eval_steps_per_second": 2.188,
34
  "step": 20
35
  },
36
  {
37
  "epoch": 2.3720930232558137,
38
+ "grad_norm": 18.362613677978516,
39
  "learning_rate": 2.857142857142857e-05,
40
+ "loss": 5.6252,
41
  "step": 24
42
  },
43
  {
44
  "epoch": 2.9302325581395348,
45
+ "eval_accuracy": 0.40384615384615385,
46
+ "eval_loss": 1.1115041971206665,
47
+ "eval_runtime": 3.1175,
48
+ "eval_samples_per_second": 16.68,
49
+ "eval_steps_per_second": 0.642,
50
  "step": 30
51
  },
52
  {
53
  "epoch": 3.558139534883721,
54
+ "grad_norm": 30.29971694946289,
55
  "learning_rate": 4.2857142857142856e-05,
56
+ "loss": 3.874,
57
  "step": 36
58
  },
59
  {
60
  "epoch": 3.9302325581395348,
61
+ "eval_accuracy": 0.5576923076923077,
62
+ "eval_loss": 0.9996148347854614,
63
+ "eval_runtime": 0.9335,
64
+ "eval_samples_per_second": 55.705,
65
+ "eval_steps_per_second": 2.143,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 4.7441860465116275,
70
+ "grad_norm": 29.079408645629883,
71
  "learning_rate": 4.9206349206349204e-05,
72
+ "loss": 2.7168,
73
  "step": 48
74
  },
75
  {
76
  "epoch": 4.930232558139535,
77
+ "eval_accuracy": 0.6538461538461539,
78
+ "eval_loss": 0.8436340689659119,
79
+ "eval_runtime": 1.286,
80
+ "eval_samples_per_second": 40.437,
81
+ "eval_steps_per_second": 1.555,
82
  "step": 50
83
  },
84
  {
85
  "epoch": 5.930232558139535,
86
+ "grad_norm": 38.7504768371582,
87
  "learning_rate": 4.761904761904762e-05,
88
+ "loss": 2.2435,
89
  "step": 60
90
  },
91
  {
92
  "epoch": 5.930232558139535,
93
+ "eval_accuracy": 0.6153846153846154,
94
+ "eval_loss": 0.9319868087768555,
95
+ "eval_runtime": 0.9627,
96
+ "eval_samples_per_second": 54.015,
97
+ "eval_steps_per_second": 2.078,
98
  "step": 60
99
  },
100
  {
101
  "epoch": 6.930232558139535,
102
+ "eval_accuracy": 0.6346153846153846,
103
+ "eval_loss": 0.8411852121353149,
104
+ "eval_runtime": 0.9446,
105
+ "eval_samples_per_second": 55.048,
106
+ "eval_steps_per_second": 2.117,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 7.186046511627907,
111
+ "grad_norm": 27.031049728393555,
112
  "learning_rate": 4.603174603174603e-05,
113
+ "loss": 1.9334,
114
  "step": 72
115
  },
116
  {
117
  "epoch": 7.930232558139535,
118
+ "eval_accuracy": 0.6730769230769231,
119
+ "eval_loss": 0.8622324466705322,
120
+ "eval_runtime": 1.1865,
121
+ "eval_samples_per_second": 43.825,
122
+ "eval_steps_per_second": 1.686,
123
  "step": 80
124
  },
125
  {
126
  "epoch": 8.372093023255815,
127
+ "grad_norm": 29.655231475830078,
128
  "learning_rate": 4.4444444444444447e-05,
129
+ "loss": 1.6303,
130
  "step": 84
131
  },
132
  {
133
  "epoch": 8.930232558139535,
134
+ "eval_accuracy": 0.7115384615384616,
135
+ "eval_loss": 0.9151535034179688,
136
+ "eval_runtime": 0.9237,
137
+ "eval_samples_per_second": 56.296,
138
+ "eval_steps_per_second": 2.165,
139
  "step": 90
140
  },
141
  {
142
  "epoch": 9.55813953488372,
143
+ "grad_norm": 22.004980087280273,
144
  "learning_rate": 4.2857142857142856e-05,
145
+ "loss": 1.2748,
146
  "step": 96
147
  },
148
  {
149
  "epoch": 9.930232558139535,
150
  "eval_accuracy": 0.6730769230769231,
151
+ "eval_loss": 0.9720916748046875,
152
+ "eval_runtime": 0.9318,
153
+ "eval_samples_per_second": 55.806,
154
+ "eval_steps_per_second": 2.146,
155
  "step": 100
156
  },
157
  {
158
  "epoch": 10.744186046511627,
159
+ "grad_norm": 30.746623992919922,
160
  "learning_rate": 4.126984126984127e-05,
161
+ "loss": 1.0945,
162
  "step": 108
163
  },
164
  {
165
  "epoch": 10.930232558139535,
166
+ "eval_accuracy": 0.6538461538461539,
167
+ "eval_loss": 1.0826632976531982,
168
+ "eval_runtime": 1.2786,
169
+ "eval_samples_per_second": 40.671,
170
+ "eval_steps_per_second": 1.564,
171
  "step": 110
172
  },
173
  {
174
  "epoch": 11.930232558139535,
175
+ "grad_norm": 22.60085105895996,
176
  "learning_rate": 3.968253968253968e-05,
177
+ "loss": 0.8395,
178
  "step": 120
179
  },
180
  {
181
  "epoch": 11.930232558139535,
182
+ "eval_accuracy": 0.7115384615384616,
183
+ "eval_loss": 0.9152665734291077,
184
+ "eval_runtime": 0.9881,
185
+ "eval_samples_per_second": 52.629,
186
+ "eval_steps_per_second": 2.024,
187
  "step": 120
188
  },
189
  {
190
  "epoch": 12.930232558139535,
191
+ "eval_accuracy": 0.7307692307692307,
192
+ "eval_loss": 0.8631380796432495,
193
+ "eval_runtime": 0.9359,
194
+ "eval_samples_per_second": 55.559,
195
+ "eval_steps_per_second": 2.137,
196
  "step": 130
197
  },
198
  {
199
  "epoch": 13.186046511627907,
200
+ "grad_norm": 28.28727912902832,
201
  "learning_rate": 3.809523809523809e-05,
202
+ "loss": 0.8587,
203
  "step": 132
204
  },
205
  {
206
  "epoch": 13.930232558139535,
207
+ "eval_accuracy": 0.6538461538461539,
208
+ "eval_loss": 1.1038739681243896,
209
+ "eval_runtime": 1.1762,
210
+ "eval_samples_per_second": 44.21,
211
+ "eval_steps_per_second": 1.7,
212
  "step": 140
213
  },
214
  {
215
  "epoch": 14.372093023255815,
216
+ "grad_norm": 30.497623443603516,
217
  "learning_rate": 3.650793650793651e-05,
218
+ "loss": 0.8574,
219
  "step": 144
220
  },
221
  {
222
  "epoch": 14.930232558139535,
223
+ "eval_accuracy": 0.6923076923076923,
224
+ "eval_loss": 1.0462819337844849,
225
+ "eval_runtime": 0.9493,
226
+ "eval_samples_per_second": 54.776,
227
+ "eval_steps_per_second": 2.107,
228
  "step": 150
229
  },
230
  {
231
  "epoch": 15.55813953488372,
232
+ "grad_norm": 19.123886108398438,
233
  "learning_rate": 3.492063492063492e-05,
234
+ "loss": 0.7096,
235
  "step": 156
236
  },
237
  {
238
  "epoch": 15.930232558139535,
239
+ "eval_accuracy": 0.7115384615384616,
240
+ "eval_loss": 0.9990596175193787,
241
+ "eval_runtime": 0.9614,
242
+ "eval_samples_per_second": 54.09,
243
+ "eval_steps_per_second": 2.08,
244
  "step": 160
245
  },
246
  {
247
  "epoch": 16.74418604651163,
248
+ "grad_norm": 34.98741912841797,
249
  "learning_rate": 3.3333333333333335e-05,
250
+ "loss": 0.6606,
251
  "step": 168
252
  },
253
  {
254
  "epoch": 16.930232558139537,
255
+ "eval_accuracy": 0.6730769230769231,
256
+ "eval_loss": 1.051858901977539,
257
+ "eval_runtime": 1.0499,
258
+ "eval_samples_per_second": 49.531,
259
+ "eval_steps_per_second": 1.905,
260
  "step": 170
261
  },
262
  {
263
  "epoch": 17.930232558139537,
264
+ "grad_norm": 29.33749771118164,
265
  "learning_rate": 3.1746031746031745e-05,
266
+ "loss": 0.5513,
267
  "step": 180
268
  },
269
  {
270
  "epoch": 17.930232558139537,
271
+ "eval_accuracy": 0.7115384615384616,
272
+ "eval_loss": 1.0864715576171875,
273
+ "eval_runtime": 0.9297,
274
+ "eval_samples_per_second": 55.932,
275
+ "eval_steps_per_second": 2.151,
276
  "step": 180
277
  },
278
  {
279
  "epoch": 18.930232558139537,
280
+ "eval_accuracy": 0.6730769230769231,
281
+ "eval_loss": 1.1139813661575317,
282
+ "eval_runtime": 0.9356,
283
+ "eval_samples_per_second": 55.582,
284
+ "eval_steps_per_second": 2.138,
285
  "step": 190
286
  },
287
  {
288
  "epoch": 19.186046511627907,
289
+ "grad_norm": 31.023462295532227,
290
  "learning_rate": 3.0158730158730158e-05,
291
+ "loss": 0.61,
292
  "step": 192
293
  },
294
  {
295
  "epoch": 19.930232558139537,
296
+ "eval_accuracy": 0.6730769230769231,
297
+ "eval_loss": 1.0289796590805054,
298
+ "eval_runtime": 0.9508,
299
+ "eval_samples_per_second": 54.693,
300
+ "eval_steps_per_second": 2.104,
301
  "step": 200
302
  },
303
  {
304
  "epoch": 20.372093023255815,
305
+ "grad_norm": 31.7921142578125,
306
  "learning_rate": 2.857142857142857e-05,
307
+ "loss": 0.5278,
308
  "step": 204
309
  },
310
  {
311
  "epoch": 20.930232558139537,
312
+ "eval_accuracy": 0.6923076923076923,
313
+ "eval_loss": 1.1002823114395142,
314
+ "eval_runtime": 0.9325,
315
+ "eval_samples_per_second": 55.763,
316
+ "eval_steps_per_second": 2.145,
317
  "step": 210
318
  },
319
  {
320
  "epoch": 21.558139534883722,
321
+ "grad_norm": 33.69935989379883,
322
  "learning_rate": 2.6984126984126984e-05,
323
+ "loss": 0.4639,
324
  "step": 216
325
  },
326
  {
327
  "epoch": 21.930232558139537,
328
  "eval_accuracy": 0.6538461538461539,
329
+ "eval_loss": 1.2471978664398193,
330
+ "eval_runtime": 0.9316,
331
+ "eval_samples_per_second": 55.818,
332
+ "eval_steps_per_second": 2.147,
333
  "step": 220
334
  },
335
  {
336
  "epoch": 22.74418604651163,
337
+ "grad_norm": 20.980804443359375,
338
  "learning_rate": 2.5396825396825397e-05,
339
+ "loss": 0.4719,
340
  "step": 228
341
  },
342
  {
343
  "epoch": 22.930232558139537,
344
+ "eval_accuracy": 0.6923076923076923,
345
+ "eval_loss": 1.1545660495758057,
346
+ "eval_runtime": 0.9225,
347
+ "eval_samples_per_second": 56.37,
348
+ "eval_steps_per_second": 2.168,
349
  "step": 230
350
  },
351
  {
352
  "epoch": 23.930232558139537,
353
+ "grad_norm": 26.727859497070312,
354
  "learning_rate": 2.380952380952381e-05,
355
+ "loss": 0.4212,
356
  "step": 240
357
  },
358
  {
359
  "epoch": 23.930232558139537,
360
+ "eval_accuracy": 0.7307692307692307,
361
+ "eval_loss": 1.1084016561508179,
362
+ "eval_runtime": 0.9361,
363
+ "eval_samples_per_second": 55.552,
364
+ "eval_steps_per_second": 2.137,
365
  "step": 240
366
  },
367
  {
368
  "epoch": 24.930232558139537,
369
+ "eval_accuracy": 0.6730769230769231,
370
+ "eval_loss": 1.2952685356140137,
371
+ "eval_runtime": 1.0838,
372
+ "eval_samples_per_second": 47.981,
373
+ "eval_steps_per_second": 1.845,
374
  "step": 250
375
  },
376
  {
377
  "epoch": 25.186046511627907,
378
+ "grad_norm": 16.363554000854492,
379
  "learning_rate": 2.2222222222222223e-05,
380
+ "loss": 0.4109,
381
  "step": 252
382
  },
383
  {
384
  "epoch": 25.930232558139537,
385
+ "eval_accuracy": 0.7307692307692307,
386
+ "eval_loss": 1.1867846250534058,
387
+ "eval_runtime": 0.9096,
388
+ "eval_samples_per_second": 57.167,
389
+ "eval_steps_per_second": 2.199,
390
  "step": 260
391
  },
392
  {
393
  "epoch": 26.372093023255815,
394
+ "grad_norm": 42.207515716552734,
395
  "learning_rate": 2.0634920634920636e-05,
396
+ "loss": 0.4236,
397
  "step": 264
398
  },
399
  {
400
  "epoch": 26.930232558139537,
401
+ "eval_accuracy": 0.6346153846153846,
402
+ "eval_loss": 1.2559605836868286,
403
+ "eval_runtime": 0.9101,
404
+ "eval_samples_per_second": 57.135,
405
+ "eval_steps_per_second": 2.197,
406
  "step": 270
407
  },
408
  {
409
  "epoch": 27.558139534883722,
410
+ "grad_norm": 15.23690414428711,
411
  "learning_rate": 1.9047619047619046e-05,
412
+ "loss": 0.3638,
413
  "step": 276
414
  },
415
  {
416
  "epoch": 27.930232558139537,
417
+ "eval_accuracy": 0.7115384615384616,
418
+ "eval_loss": 1.216145634651184,
419
+ "eval_runtime": 1.2163,
420
+ "eval_samples_per_second": 42.753,
421
+ "eval_steps_per_second": 1.644,
422
  "step": 280
423
  },
424
  {
425
  "epoch": 28.74418604651163,
426
+ "grad_norm": 33.352203369140625,
427
  "learning_rate": 1.746031746031746e-05,
428
+ "loss": 0.3944,
429
  "step": 288
430
  },
431
  {
432
  "epoch": 28.930232558139537,
433
+ "eval_accuracy": 0.7307692307692307,
434
+ "eval_loss": 1.1581984758377075,
435
+ "eval_runtime": 0.9294,
436
+ "eval_samples_per_second": 55.948,
437
+ "eval_steps_per_second": 2.152,
438
  "step": 290
439
  },
440
  {
441
  "epoch": 29.930232558139537,
442
+ "grad_norm": 19.420169830322266,
443
  "learning_rate": 1.5873015873015872e-05,
444
+ "loss": 0.3621,
445
  "step": 300
446
  },
447
  {
448
  "epoch": 29.930232558139537,
449
+ "eval_accuracy": 0.6923076923076923,
450
+ "eval_loss": 1.2993353605270386,
451
+ "eval_runtime": 0.912,
452
+ "eval_samples_per_second": 57.019,
453
+ "eval_steps_per_second": 2.193,
454
  "step": 300
455
  },
456
  {
457
  "epoch": 30.930232558139537,
458
+ "eval_accuracy": 0.7115384615384616,
459
+ "eval_loss": 1.1401245594024658,
460
+ "eval_runtime": 1.2431,
461
+ "eval_samples_per_second": 41.83,
462
+ "eval_steps_per_second": 1.609,
463
  "step": 310
464
  },
465
  {
466
  "epoch": 31.186046511627907,
467
+ "grad_norm": 24.769433975219727,
468
  "learning_rate": 1.4285714285714285e-05,
469
+ "loss": 0.3203,
470
  "step": 312
471
  },
472
  {
473
  "epoch": 31.930232558139537,
474
+ "eval_accuracy": 0.7115384615384616,
475
+ "eval_loss": 1.3228098154067993,
476
+ "eval_runtime": 0.9358,
477
+ "eval_samples_per_second": 55.568,
478
+ "eval_steps_per_second": 2.137,
479
  "step": 320
480
  },
481
  {
482
  "epoch": 32.372093023255815,
483
+ "grad_norm": 25.592016220092773,
484
  "learning_rate": 1.2698412698412699e-05,
485
+ "loss": 0.3014,
486
  "step": 324
487
  },
488
  {
489
  "epoch": 32.93023255813954,
490
+ "eval_accuracy": 0.6923076923076923,
491
+ "eval_loss": 1.2812803983688354,
492
+ "eval_runtime": 1.1414,
493
+ "eval_samples_per_second": 45.557,
494
+ "eval_steps_per_second": 1.752,
495
  "step": 330
496
  },
497
  {
498
  "epoch": 33.55813953488372,
499
+ "grad_norm": 17.85179328918457,
500
  "learning_rate": 1.1111111111111112e-05,
501
+ "loss": 0.3464,
502
  "step": 336
503
  },
504
  {
505
  "epoch": 33.93023255813954,
506
  "eval_accuracy": 0.6538461538461539,
507
+ "eval_loss": 1.4768296480178833,
508
+ "eval_runtime": 0.9065,
509
+ "eval_samples_per_second": 57.364,
510
+ "eval_steps_per_second": 2.206,
511
  "step": 340
512
  },
513
  {
514
  "epoch": 34.74418604651163,
515
+ "grad_norm": 15.770562171936035,
516
  "learning_rate": 9.523809523809523e-06,
517
+ "loss": 0.2891,
518
  "step": 348
519
  },
520
  {
521
  "epoch": 34.93023255813954,
522
+ "eval_accuracy": 0.7307692307692307,
523
+ "eval_loss": 1.2304089069366455,
524
+ "eval_runtime": 0.9275,
525
+ "eval_samples_per_second": 56.067,
526
+ "eval_steps_per_second": 2.156,
527
  "step": 350
528
  },
529
  {
530
  "epoch": 35.93023255813954,
531
+ "grad_norm": 22.76992416381836,
532
  "learning_rate": 7.936507936507936e-06,
533
+ "loss": 0.3153,
534
  "step": 360
535
  },
536
  {
537
  "epoch": 35.93023255813954,
538
+ "eval_accuracy": 0.6923076923076923,
539
+ "eval_loss": 1.3096110820770264,
540
+ "eval_runtime": 1.2083,
541
+ "eval_samples_per_second": 43.037,
542
+ "eval_steps_per_second": 1.655,
543
  "step": 360
544
  },
545
  {
546
  "epoch": 36.93023255813954,
547
+ "eval_accuracy": 0.7115384615384616,
548
+ "eval_loss": 1.3564749956130981,
549
+ "eval_runtime": 0.9106,
550
+ "eval_samples_per_second": 57.103,
551
+ "eval_steps_per_second": 2.196,
552
  "step": 370
553
  },
554
  {
555
  "epoch": 37.18604651162791,
556
+ "grad_norm": 32.75940704345703,
557
  "learning_rate": 6.349206349206349e-06,
558
+ "loss": 0.2762,
559
  "step": 372
560
  },
561
  {
562
  "epoch": 37.93023255813954,
563
+ "eval_accuracy": 0.6923076923076923,
564
+ "eval_loss": 1.2931231260299683,
565
+ "eval_runtime": 0.9165,
566
+ "eval_samples_per_second": 56.736,
567
+ "eval_steps_per_second": 2.182,
568
  "step": 380
569
  },
570
  {
571
  "epoch": 38.372093023255815,
572
+ "grad_norm": 28.327919006347656,
573
  "learning_rate": 4.7619047619047615e-06,
574
+ "loss": 0.3191,
575
  "step": 384
576
  },
577
  {
578
  "epoch": 38.93023255813954,
579
+ "eval_accuracy": 0.7307692307692307,
580
+ "eval_loss": 1.2440568208694458,
581
+ "eval_runtime": 1.2601,
582
+ "eval_samples_per_second": 41.266,
583
+ "eval_steps_per_second": 1.587,
584
  "step": 390
585
  },
586
  {
587
  "epoch": 39.55813953488372,
588
+ "grad_norm": 17.981231689453125,
589
  "learning_rate": 3.1746031746031746e-06,
590
+ "loss": 0.3009,
591
  "step": 396
592
  },
593
  {
594
  "epoch": 39.93023255813954,
595
+ "eval_accuracy": 0.7307692307692307,
596
+ "eval_loss": 1.2109795808792114,
597
+ "eval_runtime": 0.9344,
598
+ "eval_samples_per_second": 55.649,
599
+ "eval_steps_per_second": 2.14,
600
  "step": 400
601
  },
602
  {
603
  "epoch": 40.74418604651163,
604
+ "grad_norm": 14.792089462280273,
605
  "learning_rate": 1.5873015873015873e-06,
606
+ "loss": 0.2645,
607
  "step": 408
608
  },
609
  {
610
  "epoch": 40.93023255813954,
611
+ "eval_accuracy": 0.7115384615384616,
612
+ "eval_loss": 1.2433098554611206,
613
+ "eval_runtime": 0.9244,
614
+ "eval_samples_per_second": 56.254,
615
+ "eval_steps_per_second": 2.164,
616
  "step": 410
617
  },
618
  {
619
  "epoch": 41.93023255813954,
620
+ "grad_norm": 13.357348442077637,
621
  "learning_rate": 0.0,
622
+ "loss": 0.2497,
623
  "step": 420
624
  },
625
  {
626
  "epoch": 41.93023255813954,
627
+ "eval_accuracy": 0.6923076923076923,
628
+ "eval_loss": 1.2460753917694092,
629
+ "eval_runtime": 1.8198,
630
+ "eval_samples_per_second": 28.575,
631
+ "eval_steps_per_second": 1.099,
632
  "step": 420
633
  },
634
  {
635
  "epoch": 41.93023255813954,
636
  "step": 420,
637
  "total_flos": 1.8345690247389512e+18,
638
+ "train_loss": 1.1112867690268018,
639
+ "train_runtime": 1368.6695,
640
+ "train_samples_per_second": 41.795,
641
+ "train_steps_per_second": 0.307
642
  }
643
  ],
644
  "logging_steps": 12,