MaxP commited on
Commit
6c838e6
·
1 Parent(s): 309ce01

Training in progress, step 100

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "total_flos": 1.1044933279767798e+18,
4
- "train_loss": 0.1437230341363276,
5
- "train_runtime": 451.3314,
6
- "train_samples_per_second": 31.58,
7
- "train_steps_per_second": 1.974
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "total_flos": 4.665792694694584e+17,
4
+ "train_loss": 0.15108026370958047,
5
+ "train_runtime": 222.8357,
6
+ "train_samples_per_second": 27.02,
7
+ "train_steps_per_second": 1.696
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cbdcc63fadf3ac8b367f50f6613113304c72dbd9d6b1f1e0d1c1e6b96bcc1fc
3
  size 343268717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176c26fcece8fa93ca3638cc5a44ff81006609d9ef30cd57f914e8dba43a457e
3
  size 343268717
runs/Mar10_21-05-02_a4c3c39bee63/1678482309.2431283/events.out.tfevents.1678482309.a4c3c39bee63.128.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ddefbee056805dcbfa2ceef715fdeb8f19116303e5c0aa4bbcecedeab538b0
3
+ size 5695
runs/Mar10_21-05-02_a4c3c39bee63/events.out.tfevents.1678482309.a4c3c39bee63.128.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:675c928040cca04ec1fe14efc37e49d0020bb21865ae0937c33f66f0f7ae281d
3
+ size 5808
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "total_flos": 1.1044933279767798e+18,
4
- "train_loss": 0.1437230341363276,
5
- "train_runtime": 451.3314,
6
- "train_samples_per_second": 31.58,
7
- "train_steps_per_second": 1.974
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "total_flos": 4.665792694694584e+17,
4
+ "train_loss": 0.15108026370958047,
5
+ "train_runtime": 222.8357,
6
+ "train_samples_per_second": 27.02,
7
+ "train_steps_per_second": 1.696
8
  }
trainer_state.json CHANGED
@@ -1,631 +1,274 @@
1
  {
2
- "best_metric": 0.040985796600580215,
3
- "best_model_checkpoint": "./output/vit-base-riego/checkpoint-800",
4
  "epoch": 3.0,
5
- "global_step": 891,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.03,
12
- "learning_rate": 0.00019775533108866444,
13
- "loss": 0.5334,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.07,
18
- "learning_rate": 0.00019551066217732884,
19
- "loss": 0.4114,
20
  "step": 20
21
  },
22
  {
23
- "epoch": 0.1,
24
- "learning_rate": 0.00019326599326599327,
25
- "loss": 0.4417,
26
  "step": 30
27
  },
28
  {
29
- "epoch": 0.13,
30
- "learning_rate": 0.0001910213243546577,
31
- "loss": 0.2842,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 0.17,
36
- "learning_rate": 0.00018877665544332213,
37
- "loss": 0.4106,
38
  "step": 50
39
  },
40
  {
41
- "epoch": 0.2,
42
- "learning_rate": 0.00018653198653198653,
43
- "loss": 0.2933,
44
  "step": 60
45
  },
46
  {
47
- "epoch": 0.24,
48
- "learning_rate": 0.00018428731762065096,
49
- "loss": 0.2447,
50
  "step": 70
51
  },
52
  {
53
- "epoch": 0.27,
54
- "learning_rate": 0.00018204264870931537,
55
- "loss": 0.1987,
56
  "step": 80
57
  },
58
  {
59
- "epoch": 0.3,
60
- "learning_rate": 0.0001797979797979798,
61
- "loss": 0.6804,
62
  "step": 90
63
  },
64
  {
65
- "epoch": 0.34,
66
- "learning_rate": 0.00017755331088664423,
67
- "loss": 0.4811,
68
  "step": 100
69
  },
70
  {
71
- "epoch": 0.34,
72
- "eval_f1": 0.7467948717948718,
73
- "eval_loss": 0.44471248984336853,
74
- "eval_runtime": 6.4953,
75
- "eval_samples_per_second": 129.324,
76
- "eval_steps_per_second": 16.166,
77
  "step": 100
78
  },
79
  {
80
- "epoch": 0.37,
81
- "learning_rate": 0.00017530864197530866,
82
- "loss": 0.3681,
83
  "step": 110
84
  },
85
  {
86
- "epoch": 0.4,
87
- "learning_rate": 0.00017306397306397306,
88
- "loss": 0.3704,
89
  "step": 120
90
  },
91
  {
92
- "epoch": 0.44,
93
- "learning_rate": 0.0001708193041526375,
94
- "loss": 0.1756,
95
  "step": 130
96
  },
97
  {
98
- "epoch": 0.47,
99
- "learning_rate": 0.00016857463524130192,
100
- "loss": 0.2008,
101
  "step": 140
102
  },
103
  {
104
- "epoch": 0.51,
105
- "learning_rate": 0.00016632996632996635,
106
- "loss": 0.2869,
107
  "step": 150
108
  },
109
  {
110
- "epoch": 0.54,
111
- "learning_rate": 0.00016408529741863078,
112
- "loss": 0.1804,
113
  "step": 160
114
  },
115
  {
116
- "epoch": 0.57,
117
- "learning_rate": 0.00016184062850729518,
118
- "loss": 0.1362,
119
  "step": 170
120
  },
121
  {
122
- "epoch": 0.61,
123
- "learning_rate": 0.0001595959595959596,
124
- "loss": 0.1901,
125
  "step": 180
126
  },
127
  {
128
- "epoch": 0.64,
129
- "learning_rate": 0.00015757575757575757,
130
- "loss": 0.3886,
131
  "step": 190
132
  },
133
  {
134
- "epoch": 0.67,
135
- "learning_rate": 0.000155331088664422,
136
- "loss": 0.2985,
137
  "step": 200
138
  },
139
  {
140
- "epoch": 0.67,
141
- "eval_f1": 0.8792569659442724,
142
- "eval_loss": 0.2198340892791748,
143
- "eval_runtime": 11.0106,
144
- "eval_samples_per_second": 76.29,
145
- "eval_steps_per_second": 9.536,
146
  "step": 200
147
  },
148
  {
149
- "epoch": 0.71,
150
- "learning_rate": 0.0001530864197530864,
151
- "loss": 0.2172,
152
  "step": 210
153
  },
154
  {
155
- "epoch": 0.74,
156
- "learning_rate": 0.00015084175084175086,
157
- "loss": 0.1929,
158
  "step": 220
159
  },
160
  {
161
- "epoch": 0.77,
162
- "learning_rate": 0.00014859708193041527,
163
- "loss": 0.2861,
164
  "step": 230
165
  },
166
  {
167
- "epoch": 0.81,
168
- "learning_rate": 0.00014657687991021326,
169
- "loss": 0.1706,
170
  "step": 240
171
  },
172
  {
173
- "epoch": 0.84,
174
- "learning_rate": 0.00014433221099887769,
175
- "loss": 0.1842,
176
  "step": 250
177
  },
178
  {
179
- "epoch": 0.88,
180
- "learning_rate": 0.0001420875420875421,
181
- "loss": 0.1779,
182
  "step": 260
183
  },
184
  {
185
- "epoch": 0.91,
186
- "learning_rate": 0.00013984287317620652,
187
- "loss": 0.1383,
188
  "step": 270
189
  },
190
  {
191
- "epoch": 0.94,
192
- "learning_rate": 0.00013759820426487092,
193
- "loss": 0.2297,
194
  "step": 280
195
  },
196
  {
197
- "epoch": 0.98,
198
- "learning_rate": 0.00013535353535353538,
199
- "loss": 0.2028,
200
  "step": 290
201
  },
202
  {
203
- "epoch": 1.01,
204
- "learning_rate": 0.00013310886644219978,
205
- "loss": 0.1199,
206
  "step": 300
207
  },
208
  {
209
- "epoch": 1.01,
210
- "eval_f1": 0.9709302325581396,
211
- "eval_loss": 0.0845857635140419,
212
- "eval_runtime": 6.2789,
213
- "eval_samples_per_second": 133.781,
214
- "eval_steps_per_second": 16.723,
215
  "step": 300
216
  },
217
  {
218
- "epoch": 1.04,
219
- "learning_rate": 0.0001308641975308642,
220
- "loss": 0.1548,
221
  "step": 310
222
  },
223
  {
224
- "epoch": 1.08,
225
- "learning_rate": 0.0001286195286195286,
226
- "loss": 0.1304,
227
  "step": 320
228
  },
229
  {
230
- "epoch": 1.11,
231
- "learning_rate": 0.00012637485970819304,
232
- "loss": 0.1097,
233
  "step": 330
234
  },
235
  {
236
- "epoch": 1.14,
237
- "learning_rate": 0.00012413019079685747,
238
- "loss": 0.0966,
239
  "step": 340
240
  },
241
  {
242
- "epoch": 1.18,
243
- "learning_rate": 0.00012188552188552189,
244
- "loss": 0.2074,
245
  "step": 350
246
  },
247
  {
248
- "epoch": 1.21,
249
- "learning_rate": 0.00011964085297418632,
250
- "loss": 0.0986,
251
  "step": 360
252
  },
253
  {
254
- "epoch": 1.25,
255
- "learning_rate": 0.00011739618406285073,
256
- "loss": 0.1224,
257
  "step": 370
258
  },
259
- {
260
- "epoch": 1.28,
261
- "learning_rate": 0.00011515151515151516,
262
- "loss": 0.0973,
263
- "step": 380
264
- },
265
- {
266
- "epoch": 1.31,
267
- "learning_rate": 0.00011290684624017958,
268
- "loss": 0.1067,
269
- "step": 390
270
- },
271
- {
272
- "epoch": 1.35,
273
- "learning_rate": 0.00011066217732884401,
274
- "loss": 0.128,
275
- "step": 400
276
- },
277
- {
278
- "epoch": 1.35,
279
- "eval_f1": 0.9573863636363636,
280
- "eval_loss": 0.11194377392530441,
281
- "eval_runtime": 8.4731,
282
- "eval_samples_per_second": 99.137,
283
- "eval_steps_per_second": 12.392,
284
- "step": 400
285
- },
286
- {
287
- "epoch": 1.38,
288
- "learning_rate": 0.00010841750841750841,
289
- "loss": 0.1299,
290
- "step": 410
291
- },
292
- {
293
- "epoch": 1.41,
294
- "learning_rate": 0.00010617283950617284,
295
- "loss": 0.1451,
296
- "step": 420
297
- },
298
- {
299
- "epoch": 1.45,
300
- "learning_rate": 0.00010392817059483726,
301
- "loss": 0.0638,
302
- "step": 430
303
- },
304
- {
305
- "epoch": 1.48,
306
- "learning_rate": 0.00010168350168350169,
307
- "loss": 0.0818,
308
- "step": 440
309
- },
310
- {
311
- "epoch": 1.52,
312
- "learning_rate": 9.943883277216612e-05,
313
- "loss": 0.3606,
314
- "step": 450
315
- },
316
- {
317
- "epoch": 1.55,
318
- "learning_rate": 9.719416386083054e-05,
319
- "loss": 0.092,
320
- "step": 460
321
- },
322
- {
323
- "epoch": 1.58,
324
- "learning_rate": 9.494949494949495e-05,
325
- "loss": 0.1516,
326
- "step": 470
327
- },
328
- {
329
- "epoch": 1.62,
330
- "learning_rate": 9.270482603815938e-05,
331
- "loss": 0.0527,
332
- "step": 480
333
- },
334
- {
335
- "epoch": 1.65,
336
- "learning_rate": 9.04601571268238e-05,
337
- "loss": 0.1143,
338
- "step": 490
339
- },
340
- {
341
- "epoch": 1.68,
342
- "learning_rate": 8.821548821548821e-05,
343
- "loss": 0.1387,
344
- "step": 500
345
- },
346
- {
347
- "epoch": 1.68,
348
- "eval_f1": 0.9495798319327731,
349
- "eval_loss": 0.12221043556928635,
350
- "eval_runtime": 5.8532,
351
- "eval_samples_per_second": 143.512,
352
- "eval_steps_per_second": 17.939,
353
- "step": 500
354
- },
355
- {
356
- "epoch": 1.72,
357
- "learning_rate": 8.597081930415264e-05,
358
- "loss": 0.1286,
359
- "step": 510
360
- },
361
- {
362
- "epoch": 1.75,
363
- "learning_rate": 8.372615039281706e-05,
364
- "loss": 0.0926,
365
- "step": 520
366
- },
367
- {
368
- "epoch": 1.78,
369
- "learning_rate": 8.148148148148148e-05,
370
- "loss": 0.0371,
371
- "step": 530
372
- },
373
- {
374
- "epoch": 1.82,
375
- "learning_rate": 7.92368125701459e-05,
376
- "loss": 0.0766,
377
- "step": 540
378
- },
379
- {
380
- "epoch": 1.85,
381
- "learning_rate": 7.699214365881034e-05,
382
- "loss": 0.0888,
383
- "step": 550
384
- },
385
- {
386
- "epoch": 1.89,
387
- "learning_rate": 7.474747474747475e-05,
388
- "loss": 0.0791,
389
- "step": 560
390
- },
391
- {
392
- "epoch": 1.92,
393
- "learning_rate": 7.250280583613918e-05,
394
- "loss": 0.0833,
395
- "step": 570
396
- },
397
- {
398
- "epoch": 1.95,
399
- "learning_rate": 7.02581369248036e-05,
400
- "loss": 0.0651,
401
- "step": 580
402
- },
403
- {
404
- "epoch": 1.99,
405
- "learning_rate": 6.801346801346801e-05,
406
- "loss": 0.0134,
407
- "step": 590
408
- },
409
- {
410
- "epoch": 2.02,
411
- "learning_rate": 6.576879910213244e-05,
412
- "loss": 0.0899,
413
- "step": 600
414
- },
415
- {
416
- "epoch": 2.02,
417
- "eval_f1": 0.9765395894428153,
418
- "eval_loss": 0.0800161063671112,
419
- "eval_runtime": 6.9218,
420
- "eval_samples_per_second": 121.356,
421
- "eval_steps_per_second": 15.17,
422
- "step": 600
423
- },
424
- {
425
- "epoch": 2.05,
426
- "learning_rate": 6.352413019079686e-05,
427
- "loss": 0.042,
428
- "step": 610
429
- },
430
- {
431
- "epoch": 2.09,
432
- "learning_rate": 6.150392817059484e-05,
433
- "loss": 0.0251,
434
- "step": 620
435
- },
436
- {
437
- "epoch": 2.12,
438
- "learning_rate": 5.925925925925926e-05,
439
- "loss": 0.0061,
440
- "step": 630
441
- },
442
- {
443
- "epoch": 2.15,
444
- "learning_rate": 5.701459034792368e-05,
445
- "loss": 0.0495,
446
- "step": 640
447
- },
448
- {
449
- "epoch": 2.19,
450
- "learning_rate": 5.47699214365881e-05,
451
- "loss": 0.0586,
452
- "step": 650
453
- },
454
- {
455
- "epoch": 2.22,
456
- "learning_rate": 5.2525252525252536e-05,
457
- "loss": 0.0062,
458
- "step": 660
459
- },
460
- {
461
- "epoch": 2.26,
462
- "learning_rate": 5.028058361391695e-05,
463
- "loss": 0.027,
464
- "step": 670
465
- },
466
- {
467
- "epoch": 2.29,
468
- "learning_rate": 4.803591470258137e-05,
469
- "loss": 0.0356,
470
- "step": 680
471
- },
472
- {
473
- "epoch": 2.32,
474
- "learning_rate": 4.57912457912458e-05,
475
- "loss": 0.0372,
476
- "step": 690
477
- },
478
- {
479
- "epoch": 2.36,
480
- "learning_rate": 4.3546576879910214e-05,
481
- "loss": 0.0048,
482
- "step": 700
483
- },
484
- {
485
- "epoch": 2.36,
486
- "eval_f1": 0.9769452449567723,
487
- "eval_loss": 0.07305397093296051,
488
- "eval_runtime": 5.7864,
489
- "eval_samples_per_second": 145.168,
490
- "eval_steps_per_second": 18.146,
491
- "step": 700
492
- },
493
- {
494
- "epoch": 2.39,
495
- "learning_rate": 4.130190796857464e-05,
496
- "loss": 0.0318,
497
- "step": 710
498
- },
499
- {
500
- "epoch": 2.42,
501
- "learning_rate": 3.905723905723906e-05,
502
- "loss": 0.0858,
503
- "step": 720
504
- },
505
- {
506
- "epoch": 2.46,
507
- "learning_rate": 3.681257014590348e-05,
508
- "loss": 0.065,
509
- "step": 730
510
- },
511
- {
512
- "epoch": 2.49,
513
- "learning_rate": 3.45679012345679e-05,
514
- "loss": 0.1117,
515
- "step": 740
516
- },
517
- {
518
- "epoch": 2.53,
519
- "learning_rate": 3.232323232323233e-05,
520
- "loss": 0.0432,
521
- "step": 750
522
- },
523
- {
524
- "epoch": 2.56,
525
- "learning_rate": 3.007856341189675e-05,
526
- "loss": 0.0209,
527
- "step": 760
528
- },
529
- {
530
- "epoch": 2.59,
531
- "learning_rate": 2.783389450056117e-05,
532
- "loss": 0.0591,
533
- "step": 770
534
- },
535
- {
536
- "epoch": 2.63,
537
- "learning_rate": 2.5589225589225592e-05,
538
- "loss": 0.0276,
539
- "step": 780
540
- },
541
- {
542
- "epoch": 2.66,
543
- "learning_rate": 2.3344556677890012e-05,
544
- "loss": 0.0058,
545
- "step": 790
546
- },
547
- {
548
- "epoch": 2.69,
549
- "learning_rate": 2.1099887766554435e-05,
550
- "loss": 0.0195,
551
- "step": 800
552
- },
553
- {
554
- "epoch": 2.69,
555
- "eval_f1": 0.9885057471264367,
556
- "eval_loss": 0.040985796600580215,
557
- "eval_runtime": 6.6774,
558
- "eval_samples_per_second": 125.798,
559
- "eval_steps_per_second": 15.725,
560
- "step": 800
561
- },
562
- {
563
- "epoch": 2.73,
564
- "learning_rate": 1.8855218855218858e-05,
565
- "loss": 0.0055,
566
- "step": 810
567
- },
568
- {
569
- "epoch": 2.76,
570
- "learning_rate": 1.6610549943883278e-05,
571
- "loss": 0.0351,
572
- "step": 820
573
- },
574
- {
575
- "epoch": 2.79,
576
- "learning_rate": 1.43658810325477e-05,
577
- "loss": 0.0045,
578
- "step": 830
579
- },
580
- {
581
- "epoch": 2.83,
582
- "learning_rate": 1.2121212121212122e-05,
583
- "loss": 0.0173,
584
- "step": 840
585
- },
586
- {
587
- "epoch": 2.86,
588
- "learning_rate": 9.876543209876543e-06,
589
- "loss": 0.0042,
590
- "step": 850
591
- },
592
- {
593
- "epoch": 2.9,
594
- "learning_rate": 7.631874298540966e-06,
595
- "loss": 0.008,
596
- "step": 860
597
- },
598
- {
599
- "epoch": 2.93,
600
- "learning_rate": 5.387205387205387e-06,
601
- "loss": 0.0648,
602
- "step": 870
603
- },
604
- {
605
- "epoch": 2.96,
606
- "learning_rate": 3.1425364758698095e-06,
607
- "loss": 0.0389,
608
- "step": 880
609
- },
610
- {
611
- "epoch": 3.0,
612
- "learning_rate": 8.978675645342313e-07,
613
- "loss": 0.0332,
614
- "step": 890
615
- },
616
  {
617
  "epoch": 3.0,
618
- "step": 891,
619
- "total_flos": 1.1044933279767798e+18,
620
- "train_loss": 0.1437230341363276,
621
- "train_runtime": 451.3314,
622
- "train_samples_per_second": 31.58,
623
- "train_steps_per_second": 1.974
624
  }
625
  ],
626
- "max_steps": 891,
627
  "num_train_epochs": 3,
628
- "total_flos": 1.1044933279767798e+18,
629
  "trial_name": null,
630
  "trial_params": null
631
  }
 
1
  {
2
+ "best_metric": 0.4047260880470276,
3
+ "best_model_checkpoint": "./output/vit-base-riego/checkpoint-300",
4
  "epoch": 3.0,
5
+ "global_step": 378,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.08,
12
+ "learning_rate": 0.0001947089947089947,
13
+ "loss": 0.5579,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.16,
18
+ "learning_rate": 0.00018941798941798943,
19
+ "loss": 0.3376,
20
  "step": 20
21
  },
22
  {
23
+ "epoch": 0.24,
24
+ "learning_rate": 0.00018412698412698412,
25
+ "loss": 0.2934,
26
  "step": 30
27
  },
28
  {
29
+ "epoch": 0.32,
30
+ "learning_rate": 0.00017883597883597884,
31
+ "loss": 0.3096,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 0.4,
36
+ "learning_rate": 0.00017354497354497354,
37
+ "loss": 0.2475,
38
  "step": 50
39
  },
40
  {
41
+ "epoch": 0.48,
42
+ "learning_rate": 0.00016825396825396826,
43
+ "loss": 0.173,
44
  "step": 60
45
  },
46
  {
47
+ "epoch": 0.56,
48
+ "learning_rate": 0.00016296296296296295,
49
+ "loss": 0.3087,
50
  "step": 70
51
  },
52
  {
53
+ "epoch": 0.63,
54
+ "learning_rate": 0.00015820105820105822,
55
+ "loss": 0.3236,
56
  "step": 80
57
  },
58
  {
59
+ "epoch": 0.71,
60
+ "learning_rate": 0.0001529100529100529,
61
+ "loss": 0.2005,
62
  "step": 90
63
  },
64
  {
65
+ "epoch": 0.79,
66
+ "learning_rate": 0.00014761904761904763,
67
+ "loss": 0.249,
68
  "step": 100
69
  },
70
  {
71
+ "epoch": 0.79,
72
+ "eval_f1": 0.31901840490797545,
73
+ "eval_loss": 0.6620292067527771,
74
+ "eval_runtime": 7.2847,
75
+ "eval_samples_per_second": 132.882,
76
+ "eval_steps_per_second": 16.61,
77
  "step": 100
78
  },
79
  {
80
+ "epoch": 0.87,
81
+ "learning_rate": 0.00014232804232804233,
82
+ "loss": 0.2803,
83
  "step": 110
84
  },
85
  {
86
+ "epoch": 0.95,
87
+ "learning_rate": 0.00013703703703703705,
88
+ "loss": 0.163,
89
  "step": 120
90
  },
91
  {
92
+ "epoch": 1.03,
93
+ "learning_rate": 0.00013174603174603174,
94
+ "loss": 0.2306,
95
  "step": 130
96
  },
97
  {
98
+ "epoch": 1.11,
99
+ "learning_rate": 0.00012645502645502646,
100
+ "loss": 0.0607,
101
  "step": 140
102
  },
103
  {
104
+ "epoch": 1.19,
105
+ "learning_rate": 0.00012116402116402117,
106
+ "loss": 0.1109,
107
  "step": 150
108
  },
109
  {
110
+ "epoch": 1.27,
111
+ "learning_rate": 0.0001158730158730159,
112
+ "loss": 0.2241,
113
  "step": 160
114
  },
115
  {
116
+ "epoch": 1.35,
117
+ "learning_rate": 0.00011058201058201059,
118
+ "loss": 0.1394,
119
  "step": 170
120
  },
121
  {
122
+ "epoch": 1.43,
123
+ "learning_rate": 0.00010529100529100531,
124
+ "loss": 0.153,
125
  "step": 180
126
  },
127
  {
128
+ "epoch": 1.51,
129
+ "learning_rate": 0.0001,
130
+ "loss": 0.1361,
131
  "step": 190
132
  },
133
  {
134
+ "epoch": 1.59,
135
+ "learning_rate": 9.470899470899471e-05,
136
+ "loss": 0.0348,
137
  "step": 200
138
  },
139
  {
140
+ "epoch": 1.59,
141
+ "eval_f1": 0.427536231884058,
142
+ "eval_loss": 0.514543354511261,
143
+ "eval_runtime": 9.2968,
144
+ "eval_samples_per_second": 104.122,
145
+ "eval_steps_per_second": 13.015,
146
  "step": 200
147
  },
148
  {
149
+ "epoch": 1.67,
150
+ "learning_rate": 8.941798941798942e-05,
151
+ "loss": 0.1362,
152
  "step": 210
153
  },
154
  {
155
+ "epoch": 1.75,
156
+ "learning_rate": 8.412698412698413e-05,
157
+ "loss": 0.1059,
158
  "step": 220
159
  },
160
  {
161
+ "epoch": 1.83,
162
+ "learning_rate": 7.883597883597884e-05,
163
+ "loss": 0.1051,
164
  "step": 230
165
  },
166
  {
167
+ "epoch": 1.9,
168
+ "learning_rate": 7.354497354497355e-05,
169
+ "loss": 0.2335,
170
  "step": 240
171
  },
172
  {
173
+ "epoch": 1.98,
174
+ "learning_rate": 6.825396825396825e-05,
175
+ "loss": 0.0877,
176
  "step": 250
177
  },
178
  {
179
+ "epoch": 2.06,
180
+ "learning_rate": 6.296296296296296e-05,
181
+ "loss": 0.0765,
182
  "step": 260
183
  },
184
  {
185
+ "epoch": 2.14,
186
+ "learning_rate": 5.7671957671957676e-05,
187
+ "loss": 0.0409,
188
  "step": 270
189
  },
190
  {
191
+ "epoch": 2.22,
192
+ "learning_rate": 5.2380952380952384e-05,
193
+ "loss": 0.0978,
194
  "step": 280
195
  },
196
  {
197
+ "epoch": 2.3,
198
+ "learning_rate": 4.708994708994709e-05,
199
+ "loss": 0.048,
200
  "step": 290
201
  },
202
  {
203
+ "epoch": 2.38,
204
+ "learning_rate": 4.17989417989418e-05,
205
+ "loss": 0.0135,
206
  "step": 300
207
  },
208
  {
209
+ "epoch": 2.38,
210
+ "eval_f1": 0.580441640378549,
211
+ "eval_loss": 0.4047260880470276,
212
+ "eval_runtime": 9.0047,
213
+ "eval_samples_per_second": 107.499,
214
+ "eval_steps_per_second": 13.437,
215
  "step": 300
216
  },
217
  {
218
+ "epoch": 2.46,
219
+ "learning_rate": 3.650793650793651e-05,
220
+ "loss": 0.0093,
221
  "step": 310
222
  },
223
  {
224
+ "epoch": 2.54,
225
+ "learning_rate": 3.121693121693122e-05,
226
+ "loss": 0.0163,
227
  "step": 320
228
  },
229
  {
230
+ "epoch": 2.62,
231
+ "learning_rate": 2.5925925925925925e-05,
232
+ "loss": 0.0783,
233
  "step": 330
234
  },
235
  {
236
+ "epoch": 2.7,
237
+ "learning_rate": 2.0634920634920636e-05,
238
+ "loss": 0.0288,
239
  "step": 340
240
  },
241
  {
242
+ "epoch": 2.78,
243
+ "learning_rate": 1.5343915343915344e-05,
244
+ "loss": 0.033,
245
  "step": 350
246
  },
247
  {
248
+ "epoch": 2.86,
249
+ "learning_rate": 1.0052910052910053e-05,
250
+ "loss": 0.0308,
251
  "step": 360
252
  },
253
  {
254
+ "epoch": 2.94,
255
+ "learning_rate": 4.7619047619047615e-06,
256
+ "loss": 0.0293,
257
  "step": 370
258
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  {
260
  "epoch": 3.0,
261
+ "step": 378,
262
+ "total_flos": 4.665792694694584e+17,
263
+ "train_loss": 0.15108026370958047,
264
+ "train_runtime": 222.8357,
265
+ "train_samples_per_second": 27.02,
266
+ "train_steps_per_second": 1.696
267
  }
268
  ],
269
+ "max_steps": 378,
270
  "num_train_epochs": 3,
271
+ "total_flos": 4.665792694694584e+17,
272
  "trial_name": null,
273
  "trial_params": null
274
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f037a2d700160eee370bbefe6a19ec9e9dd3a4adefcc6d81620479ef744a13aa
3
  size 3515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34043900b560431430b7737d36e5976c630010556c272b1438037be0984bfc67
3
  size 3515