jflotz commited on
Commit
3d20115
·
1 Parent(s): 4d4591b

Training in progress, step 830000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb7fd5efb2ff87d094795926832b73ee7aa1e6614b8bfcc85208c1fac61c542d
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ef1b487cabfa82f2f81ded5cef2d529a41a97f8137d750d66e05d443478e91a
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dd2e6a7da537ff760743918bcaefe9c986fc2c76a2e5c7636f4801ac5b5cd7b
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b63e12b39341c1c8d25b10dc8ca07cde5e82303726c5b23ac1fdd24345f08c2a
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0609e713a668aa906084c6d7021e86eb2ccb60af4342d1b4397b833b19fbbc89
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34241c46169d6140a8feca7f2e1db99ea6d7326e000406064ac61feecf3f17b2
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3592882666e2655598af900b2cb26f9f1ba0f86a99376f61e0440aabceaa6a07
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fb4767c32ae0f5cf34a94816a959b9568b2ea2174c179de2a3df992017a5f77
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ca8c6c74101f99bae3f401fe7213e04d3ffd2cde1efe24238143ecf20918b53
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc0c6d968c43ae24dfc91f94bcca80995b7ed16b92115235f58d28e636291a4
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a67149e6f2c803c5510419511ac904f9537eb4fbff5066e666ed08b44f9664f
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df72b23925eb5f82752cd028feed8cf7d2bbe1dbdf011029efc7554632b26ae2
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e59f03ce343bc2279a4caef20772b97445f7cad5f56f03c6fe562ef9297aba5
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1259a368e737ba331fab7b54a98c7cfc3dd87ed455bcbc28503bda235c71d3
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49b75ef2c8e55abd504706b58ef3e0a8e29500c910d22cedece78b4bbdd0c43a
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f2eac6df5cd35dc068eb0bab0d362a924c2221b17b5c9bfe0e42f4ba3c05c9c
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a3e48ab19d013994e76ce74871eebcf792a7f0de0b1bc5e31a34b55c3911660
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7f8260a2f4cc917f2e6b69683488cb603bdd620504afe136b5dc796247fcec
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83f313ad3307a61df4799cc55bebfd321f79a2d67e43e4af68c3ea5f2e8a2721
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41a93612851ee52955512369b8723183837a90f2506b353970cd64de58829b3d
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:735207f6708877a90cb07d7ba5bda4e331561a0f2cd24f0dff22c21fa9a1d464
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bde979dd08e67682f748b136bdd4f7c962a7cebec495023e183c816b6c75933
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1793671925447022,
5
- "global_step": 820000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9846,11 +9846,131 @@
9846
  "learning_rate": 2.2039381819638596e-05,
9847
  "loss": 0.2894,
9848
  "step": 820000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9849
  }
9850
  ],
9851
  "max_steps": 1000000,
9852
  "num_train_epochs": 2,
9853
- "total_flos": 5.543772678797676e+22,
9854
  "trial_name": null,
9855
  "trial_params": null
9856
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1992968806052247,
5
+ "global_step": 830000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9846
  "learning_rate": 2.2039381819638596e-05,
9847
  "loss": 0.2894,
9848
  "step": 820000
9849
+ },
9850
+ {
9851
+ "epoch": 1.18,
9852
+ "learning_rate": 2.1974562300613417e-05,
9853
+ "loss": 0.2893,
9854
+ "step": 820500
9855
+ },
9856
+ {
9857
+ "epoch": 1.18,
9858
+ "learning_rate": 2.1909901420919184e-05,
9859
+ "loss": 0.2891,
9860
+ "step": 821000
9861
+ },
9862
+ {
9863
+ "epoch": 1.18,
9864
+ "learning_rate": 2.1845399357336326e-05,
9865
+ "loss": 0.2891,
9866
+ "step": 821500
9867
+ },
9868
+ {
9869
+ "epoch": 1.18,
9870
+ "learning_rate": 2.1781056286210997e-05,
9871
+ "loss": 0.2891,
9872
+ "step": 822000
9873
+ },
9874
+ {
9875
+ "epoch": 1.18,
9876
+ "learning_rate": 2.1716872383454674e-05,
9877
+ "loss": 0.2892,
9878
+ "step": 822500
9879
+ },
9880
+ {
9881
+ "epoch": 1.19,
9882
+ "learning_rate": 2.1652847824543744e-05,
9883
+ "loss": 0.2884,
9884
+ "step": 823000
9885
+ },
9886
+ {
9887
+ "epoch": 1.19,
9888
+ "learning_rate": 2.1588982784518853e-05,
9889
+ "loss": 0.288,
9890
+ "step": 823500
9891
+ },
9892
+ {
9893
+ "epoch": 1.19,
9894
+ "learning_rate": 2.1525277437984636e-05,
9895
+ "loss": 0.2889,
9896
+ "step": 824000
9897
+ },
9898
+ {
9899
+ "epoch": 1.19,
9900
+ "learning_rate": 2.1461731959109053e-05,
9901
+ "loss": 0.2893,
9902
+ "step": 824500
9903
+ },
9904
+ {
9905
+ "epoch": 1.19,
9906
+ "learning_rate": 2.1398346521623e-05,
9907
+ "loss": 0.2884,
9908
+ "step": 825000
9909
+ },
9910
+ {
9911
+ "epoch": 1.19,
9912
+ "learning_rate": 2.1335121298819867e-05,
9913
+ "loss": 0.2891,
9914
+ "step": 825500
9915
+ },
9916
+ {
9917
+ "epoch": 1.19,
9918
+ "learning_rate": 2.1272056463554978e-05,
9919
+ "loss": 0.2887,
9920
+ "step": 826000
9921
+ },
9922
+ {
9923
+ "epoch": 1.19,
9924
+ "learning_rate": 2.1209152188245214e-05,
9925
+ "loss": 0.2887,
9926
+ "step": 826500
9927
+ },
9928
+ {
9929
+ "epoch": 1.19,
9930
+ "learning_rate": 2.114640864486845e-05,
9931
+ "loss": 0.2892,
9932
+ "step": 827000
9933
+ },
9934
+ {
9935
+ "epoch": 1.19,
9936
+ "learning_rate": 2.1083826004963102e-05,
9937
+ "loss": 0.2888,
9938
+ "step": 827500
9939
+ },
9940
+ {
9941
+ "epoch": 1.2,
9942
+ "learning_rate": 2.1021404439627775e-05,
9943
+ "loss": 0.2889,
9944
+ "step": 828000
9945
+ },
9946
+ {
9947
+ "epoch": 1.2,
9948
+ "learning_rate": 2.09591441195206e-05,
9949
+ "loss": 0.2878,
9950
+ "step": 828500
9951
+ },
9952
+ {
9953
+ "epoch": 1.2,
9954
+ "learning_rate": 2.089704521485896e-05,
9955
+ "loss": 0.2888,
9956
+ "step": 829000
9957
+ },
9958
+ {
9959
+ "epoch": 1.2,
9960
+ "learning_rate": 2.083510789541883e-05,
9961
+ "loss": 0.2879,
9962
+ "step": 829500
9963
+ },
9964
+ {
9965
+ "epoch": 1.2,
9966
+ "learning_rate": 2.0773332330534513e-05,
9967
+ "loss": 0.2887,
9968
+ "step": 830000
9969
  }
9970
  ],
9971
  "max_steps": 1000000,
9972
  "num_train_epochs": 2,
9973
+ "total_flos": 5.6113757283593115e+22,
9974
  "trial_name": null,
9975
  "trial_params": null
9976
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dd2e6a7da537ff760743918bcaefe9c986fc2c76a2e5c7636f4801ac5b5cd7b
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b63e12b39341c1c8d25b10dc8ca07cde5e82303726c5b23ac1fdd24345f08c2a
3
  size 449450757