jflotz commited on
Commit
bf381ec
·
1 Parent(s): fd6b070

Training in progress, step 810000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a28142d72c1a639c002962982eef85320960aec72ba5875c70ba183bc4428b1
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4d687ddbdb4e89fc2dcf4a1194021793a9bf6bf7cb019db9f4960ca46caec57
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce05dc3d2487d7ae9aa6b0c59ea7f4616a5a7c54ca64071fd6c540348ab08786
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ec87ec32fe6f1afb99642886552d48e3abf86b7380d88757c48489a6974eadf
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c65759179a0409080d1617c50ff7701ce92dbb64fc3e317b9b62050537c3c7
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e09a2f5687d865e3c781f2165eefcb1856cc3c45b89b03d8a7d88cfa59107bfb
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44a3c67c522ab27350032a90eb3d95a3dabd1324000cbd2835b62067512227df
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acb039c0d2f72216fce3d8d73be28207294bda5cfc4474547820110b11abd2a8
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d08d861261452452fd94fae84216cedf28f7702bcc30b7c349d5fe92376b24f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5adc671b102c788828dd37ee1d0f0b0a03c77bb3d279f123bf2cbe3d6d5cd23e
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68acbc45c81b60f69d2a54d496b38309d0cdcd82a52bd9be2a013dfa91b5b790
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8116bccf5ca568ceb54edc4c2f036f266c94ac035ff894ed751446d6238c146f
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7bd01ddf08fc5e563abc90c1bcd231f2103e9ae4cae190ae9888d8dbb7258d1
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4decc89420866c9caffa620eb043d2f83ba7fe11eee6e3e9db617a680a5e3419
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:294721238c7a8c697c0dc55ae3f4c4580f7fc2de42c41858980ea55e897cb68b
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3ecec9a0f4bcccdee7615b55d893bdd755c9846b4db2c967eb5630e87ff3741
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a269e37b31e512e59995e2d8559babba1725032fdbd7ed2e8cd2d1c9cdf42315
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7819877f3abe602d4d64d7ac8cdc0b37ac08d27db902f3ca861703ead38253c
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ada5eca471a55afc26d1f38512d1163ebd3e62514e19a602711641b70fa1cc7
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c95826cc93c68dd743486cd9578bd00eeec47504d8a825d434c0d8b522697126
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc1b9455cfc1cc0d89898dcc5885e6586d120f989c1d84c4e74dce29a1aeae31
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311a816d2396f8ee752cab7a1a3a8667609453373ab7e8b0474b724f8acc447d
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1395078164236574,
5
- "global_step": 800000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9606,11 +9606,131 @@
9606
  "learning_rate": 2.476016434225246e-05,
9607
  "loss": 0.2901,
9608
  "step": 800000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9609
  }
9610
  ],
9611
  "max_steps": 1000000,
9612
  "num_train_epochs": 2,
9613
- "total_flos": 5.4085624444111735e+22,
9614
  "trial_name": null,
9615
  "trial_params": null
9616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1594375044841798,
5
+ "global_step": 810000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9606
  "learning_rate": 2.476016434225246e-05,
9607
  "loss": 0.2901,
9608
  "step": 800000
9609
+ },
9610
+ {
9611
+ "epoch": 1.14,
9612
+ "learning_rate": 2.4689149133701672e-05,
9613
+ "loss": 0.2905,
9614
+ "step": 800500
9615
+ },
9616
+ {
9617
+ "epoch": 1.14,
9618
+ "learning_rate": 2.461828514290513e-05,
9619
+ "loss": 0.2898,
9620
+ "step": 801000
9621
+ },
9622
+ {
9623
+ "epoch": 1.14,
9624
+ "learning_rate": 2.4547572563602267e-05,
9625
+ "loss": 0.2903,
9626
+ "step": 801500
9627
+ },
9628
+ {
9629
+ "epoch": 1.14,
9630
+ "learning_rate": 2.447701158911855e-05,
9631
+ "loss": 0.2898,
9632
+ "step": 802000
9633
+ },
9634
+ {
9635
+ "epoch": 1.14,
9636
+ "learning_rate": 2.4406602412365027e-05,
9637
+ "loss": 0.2898,
9638
+ "step": 802500
9639
+ },
9640
+ {
9641
+ "epoch": 1.15,
9642
+ "learning_rate": 2.4336345225837658e-05,
9643
+ "loss": 0.2902,
9644
+ "step": 803000
9645
+ },
9646
+ {
9647
+ "epoch": 1.15,
9648
+ "learning_rate": 2.4266240221616956e-05,
9649
+ "loss": 0.2905,
9650
+ "step": 803500
9651
+ },
9652
+ {
9653
+ "epoch": 1.15,
9654
+ "learning_rate": 2.4196287591367296e-05,
9655
+ "loss": 0.2897,
9656
+ "step": 804000
9657
+ },
9658
+ {
9659
+ "epoch": 1.15,
9660
+ "learning_rate": 2.412648752633649e-05,
9661
+ "loss": 0.2898,
9662
+ "step": 804500
9663
+ },
9664
+ {
9665
+ "epoch": 1.15,
9666
+ "learning_rate": 2.405684021735527e-05,
9667
+ "loss": 0.2898,
9668
+ "step": 805000
9669
+ },
9670
+ {
9671
+ "epoch": 1.15,
9672
+ "learning_rate": 2.39873458548367e-05,
9673
+ "loss": 0.2895,
9674
+ "step": 805500
9675
+ },
9676
+ {
9677
+ "epoch": 1.15,
9678
+ "learning_rate": 2.3918004628775736e-05,
9679
+ "loss": 0.2897,
9680
+ "step": 806000
9681
+ },
9682
+ {
9683
+ "epoch": 1.15,
9684
+ "learning_rate": 2.3848816728748643e-05,
9685
+ "loss": 0.2897,
9686
+ "step": 806500
9687
+ },
9688
+ {
9689
+ "epoch": 1.15,
9690
+ "learning_rate": 2.3779782343912463e-05,
9691
+ "loss": 0.2888,
9692
+ "step": 807000
9693
+ },
9694
+ {
9695
+ "epoch": 1.15,
9696
+ "learning_rate": 2.3710901663004604e-05,
9697
+ "loss": 0.29,
9698
+ "step": 807500
9699
+ },
9700
+ {
9701
+ "epoch": 1.16,
9702
+ "learning_rate": 2.364217487434221e-05,
9703
+ "loss": 0.2895,
9704
+ "step": 808000
9705
+ },
9706
+ {
9707
+ "epoch": 1.16,
9708
+ "learning_rate": 2.3573602165821668e-05,
9709
+ "loss": 0.2899,
9710
+ "step": 808500
9711
+ },
9712
+ {
9713
+ "epoch": 1.16,
9714
+ "learning_rate": 2.3505183724918196e-05,
9715
+ "loss": 0.2897,
9716
+ "step": 809000
9717
+ },
9718
+ {
9719
+ "epoch": 1.16,
9720
+ "learning_rate": 2.3436919738685132e-05,
9721
+ "loss": 0.29,
9722
+ "step": 809500
9723
+ },
9724
+ {
9725
+ "epoch": 1.16,
9726
+ "learning_rate": 2.3368810393753687e-05,
9727
+ "loss": 0.2895,
9728
+ "step": 810000
9729
  }
9730
  ],
9731
  "max_steps": 1000000,
9732
  "num_train_epochs": 2,
9733
+ "total_flos": 5.476171643101538e+22,
9734
  "trial_name": null,
9735
  "trial_params": null
9736
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce05dc3d2487d7ae9aa6b0c59ea7f4616a5a7c54ca64071fd6c540348ab08786
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ec87ec32fe6f1afb99642886552d48e3abf86b7380d88757c48489a6974eadf
3
  size 449450757