jflotz commited on
Commit
16c4f23
·
1 Parent(s): f16df8c

Training in progress, step 880000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de9e7a475185d5a4f5233f2eaa1e382e39bf61b0b26422c710d12b1c428f053
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f011531683b29e6010c43a1101dacea9ed4a18363936f0bda8424b0b64fd61d5
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cc5f078f248ee95608611561bbd9aea19a49de30cb9f351661140392a113dcb
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:761b97519f5fabc1226f543fdfda1e57b01e50e22126717fe93929f009e6d948
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc1e6c47e318d84077f6a967794dd3042b980af9f2017841afbd1886d7db9904
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:935da3b045c8c8b8ce754d2c39cf0981b2085a82929dd47cd40a448687388e6a
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4623ab6fbb26e814bfa3dd96c774e4b841513bb02553b1e19ff1a3408dd2f2ba
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbdeea7578a634cee6754d432445c1d3c22f4d3700bcbfbce695f5cbf096739a
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b84195dd425f6794c5806a9bed14c1dd56437e46af995a7b88fe65b75813426e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6c5ff5b48c10910314f1846b5d56765a69ff07e0a1c2179950e7e708be45c72
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a330644903d9cebc0a0cef1ede19ebdfcc46633ad2affcbf69e9f23412d356cb
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:538db9627b3319da412d82dd2d788799c67c9fd4db76e45825c3a463b4ec891a
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d23d54ddb4a3e42a55be419de46e8b2afb37f587b9bc4ebb416244cb1aa1460d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cefdc80aebec1bb7388820de673e9b9482e6730079e6308c4991cb6f1f6a1673
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50c90440bc56cdec79071a507653e9cc50024a2dbba647fc3d934431503f32da
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078a7ec7b0b49e632a586c20c54dd7869580bfe51e6466659e4a1c049cec1d5c
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f41c8eaa88d8a5dc08e3c2e3def98b7a8b86cad84a79d3f83ed3a1a7c681beb0
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ee34e421a4579fb938367d875bd6d0e9cb1e7cc940564954160dbbd10ab58c
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1faf83b262a97abbbb6024851cdc5f7c1c92261d8aa4e9260f21590a330cc4e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f6c3ac5b3d9ff9275bcc6f2b68cf34b2b33a4d26ba0d17a6dea955ffb4848b
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:843b249180999e01b18cb497ce97e55f21bbe186ae977c32256fbedc40db0e39
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6c3b9739e151e083ac7876c028ada3eeb37ab066447f172eee69fac580c5323
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2790156328473148,
5
- "global_step": 870000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10446,11 +10446,131 @@
10446
  "learning_rate": 1.6369542546558626e-05,
10447
  "loss": 0.2864,
10448
  "step": 870000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10449
  }
10450
  ],
10451
  "max_steps": 1000000,
10452
  "num_train_epochs": 2,
10453
- "total_flos": 5.881811674297794e+22,
10454
  "trial_name": null,
10455
  "trial_params": null
10456
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2989453209078372,
5
+ "global_step": 880000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10446
  "learning_rate": 1.6369542546558626e-05,
10447
  "loss": 0.2864,
10448
  "step": 870000
10449
+ },
10450
+ {
10451
+ "epoch": 1.28,
10452
+ "learning_rate": 1.6321390014277996e-05,
10453
+ "loss": 0.2868,
10454
+ "step": 870500
10455
+ },
10456
+ {
10457
+ "epoch": 1.28,
10458
+ "learning_rate": 1.6273411576885517e-05,
10459
+ "loss": 0.2869,
10460
+ "step": 871000
10461
+ },
10462
+ {
10463
+ "epoch": 1.28,
10464
+ "learning_rate": 1.6225607365552378e-05,
10465
+ "loss": 0.2863,
10466
+ "step": 871500
10467
+ },
10468
+ {
10469
+ "epoch": 1.28,
10470
+ "learning_rate": 1.617797751097349e-05,
10471
+ "loss": 0.2866,
10472
+ "step": 872000
10473
+ },
10474
+ {
10475
+ "epoch": 1.28,
10476
+ "learning_rate": 1.6130522143367032e-05,
10477
+ "loss": 0.2868,
10478
+ "step": 872500
10479
+ },
10480
+ {
10481
+ "epoch": 1.28,
10482
+ "learning_rate": 1.608324139247421e-05,
10483
+ "loss": 0.2864,
10484
+ "step": 873000
10485
+ },
10486
+ {
10487
+ "epoch": 1.29,
10488
+ "learning_rate": 1.6036135387558756e-05,
10489
+ "loss": 0.2862,
10490
+ "step": 873500
10491
+ },
10492
+ {
10493
+ "epoch": 1.29,
10494
+ "learning_rate": 1.5989204257406693e-05,
10495
+ "loss": 0.286,
10496
+ "step": 874000
10497
+ },
10498
+ {
10499
+ "epoch": 1.29,
10500
+ "learning_rate": 1.594244813032595e-05,
10501
+ "loss": 0.286,
10502
+ "step": 874500
10503
+ },
10504
+ {
10505
+ "epoch": 1.29,
10506
+ "learning_rate": 1.5895867134145974e-05,
10507
+ "loss": 0.2861,
10508
+ "step": 875000
10509
+ },
10510
+ {
10511
+ "epoch": 1.29,
10512
+ "learning_rate": 1.5849461396217467e-05,
10513
+ "loss": 0.2855,
10514
+ "step": 875500
10515
+ },
10516
+ {
10517
+ "epoch": 1.29,
10518
+ "learning_rate": 1.5803231043411912e-05,
10519
+ "loss": 0.286,
10520
+ "step": 876000
10521
+ },
10522
+ {
10523
+ "epoch": 1.29,
10524
+ "learning_rate": 1.575717620212132e-05,
10525
+ "loss": 0.2867,
10526
+ "step": 876500
10527
+ },
10528
+ {
10529
+ "epoch": 1.29,
10530
+ "learning_rate": 1.5711296998257902e-05,
10531
+ "loss": 0.2867,
10532
+ "step": 877000
10533
+ },
10534
+ {
10535
+ "epoch": 1.29,
10536
+ "learning_rate": 1.5665593557253623e-05,
10537
+ "loss": 0.2865,
10538
+ "step": 877500
10539
+ },
10540
+ {
10541
+ "epoch": 1.29,
10542
+ "learning_rate": 1.562006600405996e-05,
10543
+ "loss": 0.286,
10544
+ "step": 878000
10545
+ },
10546
+ {
10547
+ "epoch": 1.3,
10548
+ "learning_rate": 1.5574714463147512e-05,
10549
+ "loss": 0.2857,
10550
+ "step": 878500
10551
+ },
10552
+ {
10553
+ "epoch": 1.3,
10554
+ "learning_rate": 1.5529539058505624e-05,
10555
+ "loss": 0.286,
10556
+ "step": 879000
10557
+ },
10558
+ {
10559
+ "epoch": 1.3,
10560
+ "learning_rate": 1.5484539913642175e-05,
10561
+ "loss": 0.2862,
10562
+ "step": 879500
10563
+ },
10564
+ {
10565
+ "epoch": 1.3,
10566
+ "learning_rate": 1.543971715158307e-05,
10567
+ "loss": 0.2863,
10568
+ "step": 880000
10569
  }
10570
  ],
10571
  "max_steps": 1000000,
10572
  "num_train_epochs": 2,
10573
+ "total_flos": 5.949418752030229e+22,
10574
  "trial_name": null,
10575
  "trial_params": null
10576
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cc5f078f248ee95608611561bbd9aea19a49de30cb9f351661140392a113dcb
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:761b97519f5fabc1226f543fdfda1e57b01e50e22126717fe93929f009e6d948
3
  size 449450757