jflotz commited on
Commit
4d4591b
·
1 Parent(s): bf381ec

Training in progress, step 820000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4d687ddbdb4e89fc2dcf4a1194021793a9bf6bf7cb019db9f4960ca46caec57
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb7fd5efb2ff87d094795926832b73ee7aa1e6614b8bfcc85208c1fac61c542d
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ec87ec32fe6f1afb99642886552d48e3abf86b7380d88757c48489a6974eadf
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd2e6a7da537ff760743918bcaefe9c986fc2c76a2e5c7636f4801ac5b5cd7b
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e09a2f5687d865e3c781f2165eefcb1856cc3c45b89b03d8a7d88cfa59107bfb
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0609e713a668aa906084c6d7021e86eb2ccb60af4342d1b4397b833b19fbbc89
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acb039c0d2f72216fce3d8d73be28207294bda5cfc4474547820110b11abd2a8
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3592882666e2655598af900b2cb26f9f1ba0f86a99376f61e0440aabceaa6a07
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5adc671b102c788828dd37ee1d0f0b0a03c77bb3d279f123bf2cbe3d6d5cd23e
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ca8c6c74101f99bae3f401fe7213e04d3ffd2cde1efe24238143ecf20918b53
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8116bccf5ca568ceb54edc4c2f036f266c94ac035ff894ed751446d6238c146f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a67149e6f2c803c5510419511ac904f9537eb4fbff5066e666ed08b44f9664f
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4decc89420866c9caffa620eb043d2f83ba7fe11eee6e3e9db617a680a5e3419
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e59f03ce343bc2279a4caef20772b97445f7cad5f56f03c6fe562ef9297aba5
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3ecec9a0f4bcccdee7615b55d893bdd755c9846b4db2c967eb5630e87ff3741
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b75ef2c8e55abd504706b58ef3e0a8e29500c910d22cedece78b4bbdd0c43a
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7819877f3abe602d4d64d7ac8cdc0b37ac08d27db902f3ca861703ead38253c
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a3e48ab19d013994e76ce74871eebcf792a7f0de0b1bc5e31a34b55c3911660
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c95826cc93c68dd743486cd9578bd00eeec47504d8a825d434c0d8b522697126
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f313ad3307a61df4799cc55bebfd321f79a2d67e43e4af68c3ea5f2e8a2721
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:311a816d2396f8ee752cab7a1a3a8667609453373ab7e8b0474b724f8acc447d
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:735207f6708877a90cb07d7ba5bda4e331561a0f2cd24f0dff22c21fa9a1d464
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1594375044841798,
5
- "global_step": 810000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9726,11 +9726,131 @@
9726
  "learning_rate": 2.3368810393753687e-05,
9727
  "loss": 0.2895,
9728
  "step": 810000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9729
  }
9730
  ],
9731
  "max_steps": 1000000,
9732
  "num_train_epochs": 2,
9733
- "total_flos": 5.476171643101538e+22,
9734
  "trial_name": null,
9735
  "trial_params": null
9736
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1793671925447022,
5
+ "global_step": 820000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9726
  "learning_rate": 2.3368810393753687e-05,
9727
  "loss": 0.2895,
9728
  "step": 810000
9729
+ },
9730
+ {
9731
+ "epoch": 1.16,
9732
+ "learning_rate": 2.3300855876332162e-05,
9733
+ "loss": 0.2894,
9734
+ "step": 810500
9735
+ },
9736
+ {
9737
+ "epoch": 1.16,
9738
+ "learning_rate": 2.32330563722056e-05,
9739
+ "loss": 0.2896,
9740
+ "step": 811000
9741
+ },
9742
+ {
9743
+ "epoch": 1.16,
9744
+ "learning_rate": 2.316541206673529e-05,
9745
+ "loss": 0.2896,
9746
+ "step": 811500
9747
+ },
9748
+ {
9749
+ "epoch": 1.16,
9750
+ "learning_rate": 2.309792314485815e-05,
9751
+ "loss": 0.2895,
9752
+ "step": 812000
9753
+ },
9754
+ {
9755
+ "epoch": 1.16,
9756
+ "learning_rate": 2.3030589791086353e-05,
9757
+ "loss": 0.2893,
9758
+ "step": 812500
9759
+ },
9760
+ {
9761
+ "epoch": 1.17,
9762
+ "learning_rate": 2.2963412189506695e-05,
9763
+ "loss": 0.2893,
9764
+ "step": 813000
9765
+ },
9766
+ {
9767
+ "epoch": 1.17,
9768
+ "learning_rate": 2.2896390523780156e-05,
9769
+ "loss": 0.2894,
9770
+ "step": 813500
9771
+ },
9772
+ {
9773
+ "epoch": 1.17,
9774
+ "learning_rate": 2.282952497714145e-05,
9775
+ "loss": 0.2894,
9776
+ "step": 814000
9777
+ },
9778
+ {
9779
+ "epoch": 1.17,
9780
+ "learning_rate": 2.2762815732398387e-05,
9781
+ "loss": 0.2896,
9782
+ "step": 814500
9783
+ },
9784
+ {
9785
+ "epoch": 1.17,
9786
+ "learning_rate": 2.2696262971931538e-05,
9787
+ "loss": 0.2891,
9788
+ "step": 815000
9789
+ },
9790
+ {
9791
+ "epoch": 1.17,
9792
+ "learning_rate": 2.2629866877693577e-05,
9793
+ "loss": 0.2892,
9794
+ "step": 815500
9795
+ },
9796
+ {
9797
+ "epoch": 1.17,
9798
+ "learning_rate": 2.2563627631208887e-05,
9799
+ "loss": 0.2892,
9800
+ "step": 816000
9801
+ },
9802
+ {
9803
+ "epoch": 1.17,
9804
+ "learning_rate": 2.2497545413573065e-05,
9805
+ "loss": 0.2902,
9806
+ "step": 816500
9807
+ },
9808
+ {
9809
+ "epoch": 1.17,
9810
+ "learning_rate": 2.2431620405452336e-05,
9811
+ "loss": 0.2889,
9812
+ "step": 817000
9813
+ },
9814
+ {
9815
+ "epoch": 1.17,
9816
+ "learning_rate": 2.23658527870832e-05,
9817
+ "loss": 0.2892,
9818
+ "step": 817500
9819
+ },
9820
+ {
9821
+ "epoch": 1.18,
9822
+ "learning_rate": 2.230024273827179e-05,
9823
+ "loss": 0.2885,
9824
+ "step": 818000
9825
+ },
9826
+ {
9827
+ "epoch": 1.18,
9828
+ "learning_rate": 2.223479043839345e-05,
9829
+ "loss": 0.2888,
9830
+ "step": 818500
9831
+ },
9832
+ {
9833
+ "epoch": 1.18,
9834
+ "learning_rate": 2.216949606639231e-05,
9835
+ "loss": 0.2892,
9836
+ "step": 819000
9837
+ },
9838
+ {
9839
+ "epoch": 1.18,
9840
+ "learning_rate": 2.2104359800780665e-05,
9841
+ "loss": 0.2885,
9842
+ "step": 819500
9843
+ },
9844
+ {
9845
+ "epoch": 1.18,
9846
+ "learning_rate": 2.2039381819638596e-05,
9847
+ "loss": 0.2894,
9848
+ "step": 820000
9849
  }
9850
  ],
9851
  "max_steps": 1000000,
9852
  "num_train_epochs": 2,
9853
+ "total_flos": 5.543772678797676e+22,
9854
  "trial_name": null,
9855
  "trial_params": null
9856
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ec87ec32fe6f1afb99642886552d48e3abf86b7380d88757c48489a6974eadf
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd2e6a7da537ff760743918bcaefe9c986fc2c76a2e5c7636f4801ac5b5cd7b
3
  size 449450757