Soulaimen commited on
Commit
3f28661
·
1 Parent(s): 90d2a5d

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.99,
3
- "eval_accuracy": 0.9629629629629629,
4
- "eval_loss": 0.10010068118572235,
5
- "eval_runtime": 7.7425,
6
- "eval_samples_per_second": 80.207,
7
- "eval_steps_per_second": 10.074,
8
  "total_flos": 6.917311166047027e+17,
9
- "train_loss": 0.36484567570960386,
10
- "train_runtime": 2062.5819,
11
- "train_samples_per_second": 13.529,
12
- "train_steps_per_second": 0.422
13
  }
 
1
  {
2
  "epoch": 4.99,
3
+ "eval_accuracy": 0.9259259259259259,
4
+ "eval_loss": 0.20305776596069336,
5
+ "eval_runtime": 8.7135,
6
+ "eval_samples_per_second": 71.269,
7
+ "eval_steps_per_second": 8.952,
8
  "total_flos": 6.917311166047027e+17,
9
+ "train_loss": 0.5214220342964961,
10
+ "train_runtime": 714.1939,
11
+ "train_samples_per_second": 39.072,
12
+ "train_steps_per_second": 1.218
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.99,
3
- "eval_accuracy": 0.9629629629629629,
4
- "eval_loss": 0.10010068118572235,
5
- "eval_runtime": 7.7425,
6
- "eval_samples_per_second": 80.207,
7
- "eval_steps_per_second": 10.074
8
  }
 
1
  {
2
  "epoch": 4.99,
3
+ "eval_accuracy": 0.9259259259259259,
4
+ "eval_loss": 0.20305776596069336,
5
+ "eval_runtime": 8.7135,
6
+ "eval_samples_per_second": 71.269,
7
+ "eval_steps_per_second": 8.952
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e3e95500d6fe0909c42f2819f87f55a3a42ef9d0758b153c62f511834239428
3
  size 110401009
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288f1e07aeae75d7051a67af7c1db330f9e2364765327bf4a8b76148f243099b
3
  size 110401009
runs/Apr17_12-28-29_5910a653ef0c/events.out.tfevents.1681735350.5910a653ef0c.229.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dfc16ef309d5d02f10eae85e184888b926e71c14e3ccf42fb13c7fc43f9d49a
3
+ size 411
runs/Apr17_12-45-08_5910a653ef0c/1681735511.6216552/events.out.tfevents.1681735511.5910a653ef0c.229.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df61c00545e4ec4eb5146ead4331479b6d1e021246fd1d92ead3d496bfa2c0f4
3
+ size 5970
runs/Apr17_12-45-08_5910a653ef0c/events.out.tfevents.1681735511.5910a653ef0c.229.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82ae2747d417a8e8f960016b020048d19f8a761e277b756443641e37bd9b9716
3
+ size 7496
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.99,
3
  "total_flos": 6.917311166047027e+17,
4
- "train_loss": 0.36484567570960386,
5
- "train_runtime": 2062.5819,
6
- "train_samples_per_second": 13.529,
7
- "train_steps_per_second": 0.422
8
  }
 
1
  {
2
  "epoch": 4.99,
3
  "total_flos": 6.917311166047027e+17,
4
+ "train_loss": 0.5214220342964961,
5
+ "train_runtime": 714.1939,
6
+ "train_samples_per_second": 39.072,
7
+ "train_steps_per_second": 1.218
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9629629629629629,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-bottom_cleaned_data/checkpoint-698",
4
  "epoch": 4.98567335243553,
5
  "global_step": 870,
6
  "is_hyper_param_search": false,
@@ -9,579 +9,579 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.06,
12
- "learning_rate": 5.747126436781609e-06,
13
- "loss": 1.4399,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.11,
18
- "learning_rate": 1.1494252873563218e-05,
19
- "loss": 1.3855,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.17,
24
- "learning_rate": 1.7241379310344828e-05,
25
- "loss": 1.2914,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.23,
30
- "learning_rate": 2.2988505747126437e-05,
31
- "loss": 1.1756,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.29,
36
- "learning_rate": 2.8735632183908045e-05,
37
- "loss": 0.993,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.34,
42
- "learning_rate": 3.4482758620689657e-05,
43
- "loss": 0.8509,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.4,
48
- "learning_rate": 4.0229885057471265e-05,
49
- "loss": 0.7269,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.46,
54
- "learning_rate": 4.597701149425287e-05,
55
- "loss": 0.6418,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.52,
60
- "learning_rate": 4.980842911877395e-05,
61
- "loss": 0.6033,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.57,
66
- "learning_rate": 4.916985951468711e-05,
67
- "loss": 0.6462,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.63,
72
- "learning_rate": 4.853128991060026e-05,
73
- "loss": 0.6602,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.69,
78
- "learning_rate": 4.789272030651341e-05,
79
- "loss": 0.4678,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.74,
84
- "learning_rate": 4.725415070242657e-05,
85
- "loss": 0.4999,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.8,
90
- "learning_rate": 4.661558109833972e-05,
91
- "loss": 0.5158,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.86,
96
- "learning_rate": 4.597701149425287e-05,
97
- "loss": 0.4182,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.92,
102
- "learning_rate": 4.5338441890166025e-05,
103
- "loss": 0.4787,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.97,
108
- "learning_rate": 4.469987228607918e-05,
109
- "loss": 0.4343,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 1.0,
114
- "eval_accuracy": 0.927536231884058,
115
- "eval_loss": 0.2457878440618515,
116
- "eval_runtime": 141.3791,
117
- "eval_samples_per_second": 4.392,
118
- "eval_steps_per_second": 0.552,
119
  "step": 174
120
  },
121
  {
122
  "epoch": 1.03,
123
- "learning_rate": 4.406130268199234e-05,
124
- "loss": 0.502,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.09,
129
- "learning_rate": 4.342273307790549e-05,
130
- "loss": 0.5438,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.15,
135
- "learning_rate": 4.278416347381865e-05,
136
- "loss": 0.4235,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.2,
141
- "learning_rate": 4.21455938697318e-05,
142
- "loss": 0.3872,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.26,
147
- "learning_rate": 4.1507024265644955e-05,
148
- "loss": 0.3741,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.32,
153
- "learning_rate": 4.086845466155811e-05,
154
- "loss": 0.3162,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.38,
159
- "learning_rate": 4.0229885057471265e-05,
160
- "loss": 0.306,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.43,
165
- "learning_rate": 3.959131545338442e-05,
166
- "loss": 0.3393,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.49,
171
- "learning_rate": 3.8952745849297575e-05,
172
- "loss": 0.4019,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.55,
177
- "learning_rate": 3.831417624521073e-05,
178
- "loss": 0.3279,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.6,
183
- "learning_rate": 3.7675606641123885e-05,
184
- "loss": 0.2847,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.66,
189
- "learning_rate": 3.7037037037037037e-05,
190
- "loss": 0.3104,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.72,
195
- "learning_rate": 3.6398467432950195e-05,
196
- "loss": 0.3329,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.78,
201
- "learning_rate": 3.5759897828863347e-05,
202
- "loss": 0.3435,
203
  "step": 310
204
  },
205
  {
206
  "epoch": 1.83,
207
- "learning_rate": 3.51213282247765e-05,
208
- "loss": 0.3614,
209
  "step": 320
210
  },
211
  {
212
  "epoch": 1.89,
213
- "learning_rate": 3.4482758620689657e-05,
214
- "loss": 0.3221,
215
  "step": 330
216
  },
217
  {
218
  "epoch": 1.95,
219
- "learning_rate": 3.3844189016602815e-05,
220
- "loss": 0.3086,
221
  "step": 340
222
  },
223
  {
224
  "epoch": 2.0,
225
- "eval_accuracy": 0.9516908212560387,
226
- "eval_loss": 0.13658131659030914,
227
- "eval_runtime": 8.9796,
228
- "eval_samples_per_second": 69.157,
229
- "eval_steps_per_second": 8.686,
230
  "step": 349
231
  },
232
  {
233
  "epoch": 2.01,
234
- "learning_rate": 3.3205619412515967e-05,
235
- "loss": 0.3944,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.06,
240
- "learning_rate": 3.256704980842912e-05,
241
- "loss": 0.3012,
242
  "step": 360
243
  },
244
  {
245
  "epoch": 2.12,
246
- "learning_rate": 3.192848020434228e-05,
247
- "loss": 0.3217,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.18,
252
- "learning_rate": 3.128991060025543e-05,
253
- "loss": 0.3616,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.23,
258
- "learning_rate": 3.065134099616858e-05,
259
- "loss": 0.2652,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 2.29,
264
- "learning_rate": 3.0012771392081738e-05,
265
- "loss": 0.3129,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.35,
270
- "learning_rate": 2.9374201787994893e-05,
271
- "loss": 0.3538,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.41,
276
- "learning_rate": 2.8735632183908045e-05,
277
- "loss": 0.2775,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.46,
282
- "learning_rate": 2.8097062579821203e-05,
283
- "loss": 0.3021,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.52,
288
- "learning_rate": 2.745849297573436e-05,
289
- "loss": 0.2512,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.58,
294
- "learning_rate": 2.681992337164751e-05,
295
- "loss": 0.2721,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.64,
300
- "learning_rate": 2.618135376756067e-05,
301
- "loss": 0.2555,
302
  "step": 460
303
  },
304
  {
305
  "epoch": 2.69,
306
- "learning_rate": 2.554278416347382e-05,
307
- "loss": 0.2576,
308
  "step": 470
309
  },
310
  {
311
  "epoch": 2.75,
312
- "learning_rate": 2.4904214559386975e-05,
313
- "loss": 0.3178,
314
  "step": 480
315
  },
316
  {
317
  "epoch": 2.81,
318
- "learning_rate": 2.426564495530013e-05,
319
- "loss": 0.2992,
320
  "step": 490
321
  },
322
  {
323
  "epoch": 2.87,
324
- "learning_rate": 2.3627075351213285e-05,
325
- "loss": 0.2333,
326
  "step": 500
327
  },
328
  {
329
  "epoch": 2.92,
330
- "learning_rate": 2.2988505747126437e-05,
331
- "loss": 0.2409,
332
  "step": 510
333
  },
334
  {
335
  "epoch": 2.98,
336
- "learning_rate": 2.234993614303959e-05,
337
- "loss": 0.2377,
338
  "step": 520
339
  },
340
  {
341
  "epoch": 3.0,
342
- "eval_accuracy": 0.9468599033816425,
343
- "eval_loss": 0.1384800225496292,
344
- "eval_runtime": 7.5854,
345
- "eval_samples_per_second": 81.868,
346
- "eval_steps_per_second": 10.283,
347
  "step": 523
348
  },
349
  {
350
  "epoch": 3.04,
351
- "learning_rate": 2.1711366538952747e-05,
352
- "loss": 0.2089,
353
  "step": 530
354
  },
355
  {
356
  "epoch": 3.09,
357
- "learning_rate": 2.10727969348659e-05,
358
- "loss": 0.2406,
359
  "step": 540
360
  },
361
  {
362
  "epoch": 3.15,
363
- "learning_rate": 2.0434227330779057e-05,
364
- "loss": 0.242,
365
  "step": 550
366
  },
367
  {
368
  "epoch": 3.21,
369
- "learning_rate": 1.979565772669221e-05,
370
- "loss": 0.2607,
371
  "step": 560
372
  },
373
  {
374
  "epoch": 3.27,
375
- "learning_rate": 1.9157088122605367e-05,
376
- "loss": 0.2147,
377
  "step": 570
378
  },
379
  {
380
  "epoch": 3.32,
381
- "learning_rate": 1.8518518518518518e-05,
382
- "loss": 0.2211,
383
  "step": 580
384
  },
385
  {
386
  "epoch": 3.38,
387
- "learning_rate": 1.7879948914431673e-05,
388
- "loss": 0.2235,
389
  "step": 590
390
  },
391
  {
392
  "epoch": 3.44,
393
- "learning_rate": 1.7241379310344828e-05,
394
- "loss": 0.2362,
395
  "step": 600
396
  },
397
  {
398
  "epoch": 3.5,
399
- "learning_rate": 1.6602809706257983e-05,
400
- "loss": 0.2407,
401
  "step": 610
402
  },
403
  {
404
  "epoch": 3.55,
405
- "learning_rate": 1.596424010217114e-05,
406
- "loss": 0.2809,
407
  "step": 620
408
  },
409
  {
410
  "epoch": 3.61,
411
- "learning_rate": 1.532567049808429e-05,
412
- "loss": 0.1666,
413
  "step": 630
414
  },
415
  {
416
  "epoch": 3.67,
417
- "learning_rate": 1.4687100893997447e-05,
418
- "loss": 0.2207,
419
  "step": 640
420
  },
421
  {
422
  "epoch": 3.72,
423
- "learning_rate": 1.4048531289910602e-05,
424
- "loss": 0.2079,
425
  "step": 650
426
  },
427
  {
428
  "epoch": 3.78,
429
- "learning_rate": 1.3409961685823755e-05,
430
- "loss": 0.2262,
431
  "step": 660
432
  },
433
  {
434
  "epoch": 3.84,
435
- "learning_rate": 1.277139208173691e-05,
436
- "loss": 0.1975,
437
  "step": 670
438
  },
439
  {
440
  "epoch": 3.9,
441
- "learning_rate": 1.2132822477650065e-05,
442
- "loss": 0.1849,
443
  "step": 680
444
  },
445
  {
446
  "epoch": 3.95,
447
- "learning_rate": 1.1494252873563218e-05,
448
- "loss": 0.2451,
449
  "step": 690
450
  },
451
  {
452
  "epoch": 4.0,
453
- "eval_accuracy": 0.9629629629629629,
454
- "eval_loss": 0.10010068118572235,
455
- "eval_runtime": 9.1061,
456
- "eval_samples_per_second": 68.196,
457
- "eval_steps_per_second": 8.566,
458
  "step": 698
459
  },
460
  {
461
  "epoch": 4.01,
462
- "learning_rate": 1.0855683269476373e-05,
463
- "loss": 0.22,
464
  "step": 700
465
  },
466
  {
467
  "epoch": 4.07,
468
- "learning_rate": 1.0217113665389528e-05,
469
- "loss": 0.2175,
470
  "step": 710
471
  },
472
  {
473
  "epoch": 4.13,
474
- "learning_rate": 9.578544061302683e-06,
475
- "loss": 0.1948,
476
  "step": 720
477
  },
478
  {
479
  "epoch": 4.18,
480
- "learning_rate": 8.939974457215837e-06,
481
- "loss": 0.168,
482
  "step": 730
483
  },
484
  {
485
  "epoch": 4.24,
486
- "learning_rate": 8.301404853128992e-06,
487
- "loss": 0.1665,
488
  "step": 740
489
  },
490
  {
491
  "epoch": 4.3,
492
- "learning_rate": 7.662835249042145e-06,
493
- "loss": 0.183,
494
  "step": 750
495
  },
496
  {
497
  "epoch": 4.36,
498
- "learning_rate": 7.024265644955301e-06,
499
- "loss": 0.1918,
500
  "step": 760
501
  },
502
  {
503
  "epoch": 4.41,
504
- "learning_rate": 6.385696040868455e-06,
505
- "loss": 0.1743,
506
  "step": 770
507
  },
508
  {
509
  "epoch": 4.47,
510
- "learning_rate": 5.747126436781609e-06,
511
- "loss": 0.1328,
512
  "step": 780
513
  },
514
  {
515
  "epoch": 4.53,
516
- "learning_rate": 5.108556832694764e-06,
517
- "loss": 0.1959,
518
  "step": 790
519
  },
520
  {
521
  "epoch": 4.58,
522
- "learning_rate": 4.469987228607918e-06,
523
- "loss": 0.172,
524
  "step": 800
525
  },
526
  {
527
  "epoch": 4.64,
528
- "learning_rate": 3.8314176245210725e-06,
529
- "loss": 0.1872,
530
  "step": 810
531
  },
532
  {
533
  "epoch": 4.7,
534
- "learning_rate": 3.1928480204342275e-06,
535
- "loss": 0.2218,
536
  "step": 820
537
  },
538
  {
539
  "epoch": 4.76,
540
- "learning_rate": 2.554278416347382e-06,
541
- "loss": 0.1797,
542
  "step": 830
543
  },
544
  {
545
  "epoch": 4.81,
546
- "learning_rate": 1.9157088122605362e-06,
547
- "loss": 0.1487,
548
  "step": 840
549
  },
550
  {
551
  "epoch": 4.87,
552
- "learning_rate": 1.277139208173691e-06,
553
- "loss": 0.2479,
554
  "step": 850
555
  },
556
  {
557
  "epoch": 4.93,
558
- "learning_rate": 6.385696040868455e-07,
559
- "loss": 0.1359,
560
  "step": 860
561
  },
562
  {
563
  "epoch": 4.99,
564
  "learning_rate": 0.0,
565
- "loss": 0.1148,
566
  "step": 870
567
  },
568
  {
569
  "epoch": 4.99,
570
- "eval_accuracy": 0.961352657004831,
571
- "eval_loss": 0.09633708000183105,
572
- "eval_runtime": 7.7205,
573
- "eval_samples_per_second": 80.435,
574
- "eval_steps_per_second": 10.103,
575
  "step": 870
576
  },
577
  {
578
  "epoch": 4.99,
579
  "step": 870,
580
  "total_flos": 6.917311166047027e+17,
581
- "train_loss": 0.36484567570960386,
582
- "train_runtime": 2062.5819,
583
- "train_samples_per_second": 13.529,
584
- "train_steps_per_second": 0.422
585
  }
586
  ],
587
  "max_steps": 870,
 
1
  {
2
+ "best_metric": 0.9259259259259259,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-bottom_cleaned_data/checkpoint-870",
4
  "epoch": 4.98567335243553,
5
  "global_step": 870,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.06,
12
+ "learning_rate": 1.1494252873563219e-06,
13
+ "loss": 1.4361,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.11,
18
+ "learning_rate": 2.2988505747126437e-06,
19
+ "loss": 1.3935,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.17,
24
+ "learning_rate": 3.448275862068966e-06,
25
+ "loss": 1.3852,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.23,
30
+ "learning_rate": 4.5977011494252875e-06,
31
+ "loss": 1.3455,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.29,
36
+ "learning_rate": 5.747126436781609e-06,
37
+ "loss": 1.2788,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.34,
42
+ "learning_rate": 6.896551724137932e-06,
43
+ "loss": 1.2361,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.4,
48
+ "learning_rate": 8.045977011494253e-06,
49
+ "loss": 1.1974,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.46,
54
+ "learning_rate": 9.195402298850575e-06,
55
+ "loss": 1.1278,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.52,
60
+ "learning_rate": 9.96168582375479e-06,
61
+ "loss": 1.0761,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.57,
66
+ "learning_rate": 9.833971902937422e-06,
67
+ "loss": 1.0022,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.63,
72
+ "learning_rate": 9.706257982120052e-06,
73
+ "loss": 0.9669,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.69,
78
+ "learning_rate": 9.578544061302683e-06,
79
+ "loss": 0.8852,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.74,
84
+ "learning_rate": 9.450830140485315e-06,
85
+ "loss": 0.8496,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.8,
90
+ "learning_rate": 9.323116219667945e-06,
91
+ "loss": 0.804,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.86,
96
+ "learning_rate": 9.195402298850575e-06,
97
+ "loss": 0.7712,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.92,
102
+ "learning_rate": 9.067688378033207e-06,
103
+ "loss": 0.6497,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.97,
108
+ "learning_rate": 8.939974457215838e-06,
109
+ "loss": 0.6591,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 1.0,
114
+ "eval_accuracy": 0.8067632850241546,
115
+ "eval_loss": 0.5231599807739258,
116
+ "eval_runtime": 9.148,
117
+ "eval_samples_per_second": 67.884,
118
+ "eval_steps_per_second": 8.526,
119
  "step": 174
120
  },
121
  {
122
  "epoch": 1.03,
123
+ "learning_rate": 8.812260536398468e-06,
124
+ "loss": 0.6781,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 1.09,
129
+ "learning_rate": 8.684546615581098e-06,
130
+ "loss": 0.6175,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.15,
135
+ "learning_rate": 8.55683269476373e-06,
136
+ "loss": 0.5585,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.2,
141
+ "learning_rate": 8.429118773946362e-06,
142
+ "loss": 0.5456,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.26,
147
+ "learning_rate": 8.301404853128992e-06,
148
+ "loss": 0.5256,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.32,
153
+ "learning_rate": 8.173690932311623e-06,
154
+ "loss": 0.5278,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.38,
159
+ "learning_rate": 8.045977011494253e-06,
160
+ "loss": 0.4722,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.43,
165
+ "learning_rate": 7.918263090676885e-06,
166
+ "loss": 0.509,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.49,
171
+ "learning_rate": 7.790549169859515e-06,
172
+ "loss": 0.4425,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.55,
177
+ "learning_rate": 7.662835249042147e-06,
178
+ "loss": 0.4515,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.6,
183
+ "learning_rate": 7.535121328224777e-06,
184
+ "loss": 0.477,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.66,
189
+ "learning_rate": 7.4074074074074075e-06,
190
+ "loss": 0.5036,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.72,
195
+ "learning_rate": 7.279693486590039e-06,
196
+ "loss": 0.5159,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.78,
201
+ "learning_rate": 7.15197956577267e-06,
202
+ "loss": 0.5527,
203
  "step": 310
204
  },
205
  {
206
  "epoch": 1.83,
207
+ "learning_rate": 7.0242656449553e-06,
208
+ "loss": 0.4243,
209
  "step": 320
210
  },
211
  {
212
  "epoch": 1.89,
213
+ "learning_rate": 6.896551724137932e-06,
214
+ "loss": 0.4485,
215
  "step": 330
216
  },
217
  {
218
  "epoch": 1.95,
219
+ "learning_rate": 6.7688378033205625e-06,
220
+ "loss": 0.4104,
221
  "step": 340
222
  },
223
  {
224
  "epoch": 2.0,
225
+ "eval_accuracy": 0.8888888888888888,
226
+ "eval_loss": 0.31606417894363403,
227
+ "eval_runtime": 9.5164,
228
+ "eval_samples_per_second": 65.255,
229
+ "eval_steps_per_second": 8.196,
230
  "step": 349
231
  },
232
  {
233
  "epoch": 2.01,
234
+ "learning_rate": 6.641123882503193e-06,
235
+ "loss": 0.3783,
236
  "step": 350
237
  },
238
  {
239
  "epoch": 2.06,
240
+ "learning_rate": 6.513409961685824e-06,
241
+ "loss": 0.372,
242
  "step": 360
243
  },
244
  {
245
  "epoch": 2.12,
246
+ "learning_rate": 6.385696040868455e-06,
247
+ "loss": 0.4664,
248
  "step": 370
249
  },
250
  {
251
  "epoch": 2.18,
252
+ "learning_rate": 6.257982120051086e-06,
253
+ "loss": 0.4034,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.23,
258
+ "learning_rate": 6.130268199233717e-06,
259
+ "loss": 0.3513,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 2.29,
264
+ "learning_rate": 6.002554278416348e-06,
265
+ "loss": 0.3816,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.35,
270
+ "learning_rate": 5.874840357598979e-06,
271
+ "loss": 0.4518,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.41,
276
+ "learning_rate": 5.747126436781609e-06,
277
+ "loss": 0.4686,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.46,
282
+ "learning_rate": 5.619412515964241e-06,
283
+ "loss": 0.3879,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.52,
288
+ "learning_rate": 5.491698595146872e-06,
289
+ "loss": 0.3775,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.58,
294
+ "learning_rate": 5.3639846743295025e-06,
295
+ "loss": 0.457,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.64,
300
+ "learning_rate": 5.236270753512134e-06,
301
+ "loss": 0.3692,
302
  "step": 460
303
  },
304
  {
305
  "epoch": 2.69,
306
+ "learning_rate": 5.108556832694764e-06,
307
+ "loss": 0.4145,
308
  "step": 470
309
  },
310
  {
311
  "epoch": 2.75,
312
+ "learning_rate": 4.980842911877395e-06,
313
+ "loss": 0.3576,
314
  "step": 480
315
  },
316
  {
317
  "epoch": 2.81,
318
+ "learning_rate": 4.853128991060026e-06,
319
+ "loss": 0.3297,
320
  "step": 490
321
  },
322
  {
323
  "epoch": 2.87,
324
+ "learning_rate": 4.7254150702426575e-06,
325
+ "loss": 0.4084,
326
  "step": 500
327
  },
328
  {
329
  "epoch": 2.92,
330
+ "learning_rate": 4.5977011494252875e-06,
331
+ "loss": 0.3237,
332
  "step": 510
333
  },
334
  {
335
  "epoch": 2.98,
336
+ "learning_rate": 4.469987228607919e-06,
337
+ "loss": 0.3559,
338
  "step": 520
339
  },
340
  {
341
  "epoch": 3.0,
342
+ "eval_accuracy": 0.9162640901771336,
343
+ "eval_loss": 0.2237011194229126,
344
+ "eval_runtime": 9.3284,
345
+ "eval_samples_per_second": 66.571,
346
+ "eval_steps_per_second": 8.362,
347
  "step": 523
348
  },
349
  {
350
  "epoch": 3.04,
351
+ "learning_rate": 4.342273307790549e-06,
352
+ "loss": 0.3295,
353
  "step": 530
354
  },
355
  {
356
  "epoch": 3.09,
357
+ "learning_rate": 4.214559386973181e-06,
358
+ "loss": 0.3902,
359
  "step": 540
360
  },
361
  {
362
  "epoch": 3.15,
363
+ "learning_rate": 4.086845466155812e-06,
364
+ "loss": 0.2753,
365
  "step": 550
366
  },
367
  {
368
  "epoch": 3.21,
369
+ "learning_rate": 3.9591315453384425e-06,
370
+ "loss": 0.3705,
371
  "step": 560
372
  },
373
  {
374
  "epoch": 3.27,
375
+ "learning_rate": 3.831417624521073e-06,
376
+ "loss": 0.3797,
377
  "step": 570
378
  },
379
  {
380
  "epoch": 3.32,
381
+ "learning_rate": 3.7037037037037037e-06,
382
+ "loss": 0.3163,
383
  "step": 580
384
  },
385
  {
386
  "epoch": 3.38,
387
+ "learning_rate": 3.575989782886335e-06,
388
+ "loss": 0.3488,
389
  "step": 590
390
  },
391
  {
392
  "epoch": 3.44,
393
+ "learning_rate": 3.448275862068966e-06,
394
+ "loss": 0.3248,
395
  "step": 600
396
  },
397
  {
398
  "epoch": 3.5,
399
+ "learning_rate": 3.3205619412515967e-06,
400
+ "loss": 0.3806,
401
  "step": 610
402
  },
403
  {
404
  "epoch": 3.55,
405
+ "learning_rate": 3.1928480204342275e-06,
406
+ "loss": 0.2763,
407
  "step": 620
408
  },
409
  {
410
  "epoch": 3.61,
411
+ "learning_rate": 3.0651340996168583e-06,
412
+ "loss": 0.3623,
413
  "step": 630
414
  },
415
  {
416
  "epoch": 3.67,
417
+ "learning_rate": 2.9374201787994896e-06,
418
+ "loss": 0.3353,
419
  "step": 640
420
  },
421
  {
422
  "epoch": 3.72,
423
+ "learning_rate": 2.8097062579821204e-06,
424
+ "loss": 0.3229,
425
  "step": 650
426
  },
427
  {
428
  "epoch": 3.78,
429
+ "learning_rate": 2.6819923371647512e-06,
430
+ "loss": 0.3127,
431
  "step": 660
432
  },
433
  {
434
  "epoch": 3.84,
435
+ "learning_rate": 2.554278416347382e-06,
436
+ "loss": 0.28,
437
  "step": 670
438
  },
439
  {
440
  "epoch": 3.9,
441
+ "learning_rate": 2.426564495530013e-06,
442
+ "loss": 0.4105,
443
  "step": 680
444
  },
445
  {
446
  "epoch": 3.95,
447
+ "learning_rate": 2.2988505747126437e-06,
448
+ "loss": 0.3487,
449
  "step": 690
450
  },
451
  {
452
  "epoch": 4.0,
453
+ "eval_accuracy": 0.9194847020933977,
454
+ "eval_loss": 0.19985385239124298,
455
+ "eval_runtime": 8.2263,
456
+ "eval_samples_per_second": 75.49,
457
+ "eval_steps_per_second": 9.482,
458
  "step": 698
459
  },
460
  {
461
  "epoch": 4.01,
462
+ "learning_rate": 2.1711366538952746e-06,
463
+ "loss": 0.333,
464
  "step": 700
465
  },
466
  {
467
  "epoch": 4.07,
468
+ "learning_rate": 2.043422733077906e-06,
469
+ "loss": 0.3239,
470
  "step": 710
471
  },
472
  {
473
  "epoch": 4.13,
474
+ "learning_rate": 1.9157088122605367e-06,
475
+ "loss": 0.256,
476
  "step": 720
477
  },
478
  {
479
  "epoch": 4.18,
480
+ "learning_rate": 1.7879948914431675e-06,
481
+ "loss": 0.3252,
482
  "step": 730
483
  },
484
  {
485
  "epoch": 4.24,
486
+ "learning_rate": 1.6602809706257983e-06,
487
+ "loss": 0.2693,
488
  "step": 740
489
  },
490
  {
491
  "epoch": 4.3,
492
+ "learning_rate": 1.5325670498084292e-06,
493
+ "loss": 0.3099,
494
  "step": 750
495
  },
496
  {
497
  "epoch": 4.36,
498
+ "learning_rate": 1.4048531289910602e-06,
499
+ "loss": 0.3115,
500
  "step": 760
501
  },
502
  {
503
  "epoch": 4.41,
504
+ "learning_rate": 1.277139208173691e-06,
505
+ "loss": 0.3501,
506
  "step": 770
507
  },
508
  {
509
  "epoch": 4.47,
510
+ "learning_rate": 1.1494252873563219e-06,
511
+ "loss": 0.3733,
512
  "step": 780
513
  },
514
  {
515
  "epoch": 4.53,
516
+ "learning_rate": 1.021711366538953e-06,
517
+ "loss": 0.3264,
518
  "step": 790
519
  },
520
  {
521
  "epoch": 4.58,
522
+ "learning_rate": 8.939974457215837e-07,
523
+ "loss": 0.3111,
524
  "step": 800
525
  },
526
  {
527
  "epoch": 4.64,
528
+ "learning_rate": 7.662835249042146e-07,
529
+ "loss": 0.4114,
530
  "step": 810
531
  },
532
  {
533
  "epoch": 4.7,
534
+ "learning_rate": 6.385696040868455e-07,
535
+ "loss": 0.2706,
536
  "step": 820
537
  },
538
  {
539
  "epoch": 4.76,
540
+ "learning_rate": 5.108556832694765e-07,
541
+ "loss": 0.2863,
542
  "step": 830
543
  },
544
  {
545
  "epoch": 4.81,
546
+ "learning_rate": 3.831417624521073e-07,
547
+ "loss": 0.2813,
548
  "step": 840
549
  },
550
  {
551
  "epoch": 4.87,
552
+ "learning_rate": 2.5542784163473823e-07,
553
+ "loss": 0.3687,
554
  "step": 850
555
  },
556
  {
557
  "epoch": 4.93,
558
+ "learning_rate": 1.2771392081736911e-07,
559
+ "loss": 0.3689,
560
  "step": 860
561
  },
562
  {
563
  "epoch": 4.99,
564
  "learning_rate": 0.0,
565
+ "loss": 0.3422,
566
  "step": 870
567
  },
568
  {
569
  "epoch": 4.99,
570
+ "eval_accuracy": 0.9259259259259259,
571
+ "eval_loss": 0.20305776596069336,
572
+ "eval_runtime": 9.5579,
573
+ "eval_samples_per_second": 64.972,
574
+ "eval_steps_per_second": 8.161,
575
  "step": 870
576
  },
577
  {
578
  "epoch": 4.99,
579
  "step": 870,
580
  "total_flos": 6.917311166047027e+17,
581
+ "train_loss": 0.5214220342964961,
582
+ "train_runtime": 714.1939,
583
+ "train_samples_per_second": 39.072,
584
+ "train_steps_per_second": 1.218
585
  }
586
  ],
587
  "max_steps": 870,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3269c8bf9b3171151c206c3d4adc88506d8dd8c6ae4508d917fe180083ccbf30
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da96b57a9dcbdd5437021deb7d5e8c15643d6e1cbffc15f0d5445db348dffe6
3
  size 3643