jflotz commited on
Commit
b6d602e
·
1 Parent(s): 1d53b9f

Training in progress, step 660000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0094043a45ee455c34cdbf7e5ed868b844e2cc109c62c31adc8eabe0945cd55
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:facea498a3305bb3a4cf04ba493e9b7a282bf34c7699144b12c3401905a21a21
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afca04d573f1bb3162caabe1bb7b946edefb5cb8fa7beeabdf4a9618ee0ba3ea
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b0253822de0f7fe54e007e60affd0913e4b68439fe1550215e7a076507078bb
3
  size 449450757
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8afdb75bc9c4b3b8d3f36f77e21f0d34f0633a3fe673f092dd264b1121465456
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46211629a4cc11950e226f7dbdda304cda1420c43879285d3e04ebb8508dc043
3
  size 14583
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:334f507bebbd8e2eb32a3a52e1460054ef235aff9b388a6044a2cf6124700604
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c11aa747eb56c76be6b0a8185d8eb345ab22e3a85121576df152b2d5604743
3
  size 14583
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1fe6196ed311cd8ddb4f7739bbce785a7482bd7a8a89fc83aadbb7b199e0b80
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87f323fe99852eb9065ab82d8c0f478aeb01397ae4967c2beb7e4de9ca3f02ec
3
  size 14583
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c16cf0e46ab235f9e251974c64ca93772ae50300b4f1505ff50d8f4e2246708
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b8df123d1834d25753f8b002915374feeeb89dc9c6992036e0b849ae27e9320
3
  size 14583
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dea98aa1b40d4dde89de24ce301ffc44f0dee70fb25e51e28dfe6b65e5e6240d
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90443240959e9f40acd615af8df90f5d9b01b4b49736141665a99e8168dc3c6a
3
  size 14583
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:476caa126ce91db0ad93d8541266aa7e5c1a71c0473ab678864fc300fdd08e70
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd0fd3633719fca8fe6bbb82b9e0f5384f9458c9e7cc0938ff32b60be639fdb
3
  size 14583
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3637d3be8f9c8d6ff9f1958a71fe2d848eaecb87ddf0683d13eaae5352425491
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8ca6715dedc0846ae860b91a71263b7a826d6a54596ee7919105a93c87a6496
3
  size 14583
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b62727907f78fc16c3f0e4b91fbdcc94bc537750512333e674d8d2c4dcd12411
3
  size 14583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:555ec27b704b2cb1160b682f3e0caec8c125b854ba2add3f3b500f4ed76a0e8e
3
  size 14583
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb24dd41ced00dc957e38e97b930833d2e52e5141588b4ec2f84d6e2ee23293d
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e8e59bcc271d9e83e953974cfcbd52d5f5cf63d456de8e0f805b45487976195
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2993814944086615,
5
- "global_step": 650000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7806,11 +7806,131 @@
7806
  "learning_rate": 5.188132027429215e-05,
7807
  "loss": 0.2991,
7808
  "step": 650000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7809
  }
7810
  ],
7811
  "max_steps": 1000000,
7812
  "num_train_epochs": 2,
7813
- "total_flos": 4.394457756168279e+22,
7814
  "trial_name": null,
7815
  "trial_params": null
7816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3193719789380256,
5
+ "global_step": 660000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7806
  "learning_rate": 5.188132027429215e-05,
7807
  "loss": 0.2991,
7808
  "step": 650000
7809
+ },
7810
+ {
7811
+ "epoch": 1.3,
7812
+ "learning_rate": 5.177536451140569e-05,
7813
+ "loss": 0.3005,
7814
+ "step": 650500
7815
+ },
7816
+ {
7817
+ "epoch": 1.3,
7818
+ "learning_rate": 5.166948591359489e-05,
7819
+ "loss": 0.3002,
7820
+ "step": 651000
7821
+ },
7822
+ {
7823
+ "epoch": 1.3,
7824
+ "learning_rate": 5.1563684770327804e-05,
7825
+ "loss": 0.3003,
7826
+ "step": 651500
7827
+ },
7828
+ {
7829
+ "epoch": 1.3,
7830
+ "learning_rate": 5.145796137086076e-05,
7831
+ "loss": 0.3,
7832
+ "step": 652000
7833
+ },
7834
+ {
7835
+ "epoch": 1.3,
7836
+ "learning_rate": 5.135231600423742e-05,
7837
+ "loss": 0.2997,
7838
+ "step": 652500
7839
+ },
7840
+ {
7841
+ "epoch": 1.31,
7842
+ "learning_rate": 5.124674895928823e-05,
7843
+ "loss": 0.2998,
7844
+ "step": 653000
7845
+ },
7846
+ {
7847
+ "epoch": 1.31,
7848
+ "learning_rate": 5.114126052462943e-05,
7849
+ "loss": 0.2998,
7850
+ "step": 653500
7851
+ },
7852
+ {
7853
+ "epoch": 1.31,
7854
+ "learning_rate": 5.103585098866237e-05,
7855
+ "loss": 0.2995,
7856
+ "step": 654000
7857
+ },
7858
+ {
7859
+ "epoch": 1.31,
7860
+ "learning_rate": 5.093052063957276e-05,
7861
+ "loss": 0.2996,
7862
+ "step": 654500
7863
+ },
7864
+ {
7865
+ "epoch": 1.31,
7866
+ "learning_rate": 5.082526976532968e-05,
7867
+ "loss": 0.2994,
7868
+ "step": 655000
7869
+ },
7870
+ {
7871
+ "epoch": 1.31,
7872
+ "learning_rate": 5.072009865368501e-05,
7873
+ "loss": 0.2996,
7874
+ "step": 655500
7875
+ },
7876
+ {
7877
+ "epoch": 1.31,
7878
+ "learning_rate": 5.061500759217261e-05,
7879
+ "loss": 0.2999,
7880
+ "step": 656000
7881
+ },
7882
+ {
7883
+ "epoch": 1.31,
7884
+ "learning_rate": 5.050999686810735e-05,
7885
+ "loss": 0.2997,
7886
+ "step": 656500
7887
+ },
7888
+ {
7889
+ "epoch": 1.31,
7890
+ "learning_rate": 5.04050667685846e-05,
7891
+ "loss": 0.2995,
7892
+ "step": 657000
7893
+ },
7894
+ {
7895
+ "epoch": 1.31,
7896
+ "learning_rate": 5.0300217580479244e-05,
7897
+ "loss": 0.2997,
7898
+ "step": 657500
7899
+ },
7900
+ {
7901
+ "epoch": 1.32,
7902
+ "learning_rate": 5.01954495904449e-05,
7903
+ "loss": 0.2993,
7904
+ "step": 658000
7905
+ },
7906
+ {
7907
+ "epoch": 1.32,
7908
+ "learning_rate": 5.0090763084913336e-05,
7909
+ "loss": 0.2991,
7910
+ "step": 658500
7911
+ },
7912
+ {
7913
+ "epoch": 1.32,
7914
+ "learning_rate": 4.998615835009339e-05,
7915
+ "loss": 0.2995,
7916
+ "step": 659000
7917
+ },
7918
+ {
7919
+ "epoch": 1.32,
7920
+ "learning_rate": 4.988163567197043e-05,
7921
+ "loss": 0.2993,
7922
+ "step": 659500
7923
+ },
7924
+ {
7925
+ "epoch": 1.32,
7926
+ "learning_rate": 4.97771953363055e-05,
7927
+ "loss": 0.299,
7928
+ "step": 660000
7929
  }
7930
  ],
7931
  "max_steps": 1000000,
7932
  "num_train_epochs": 2,
7933
+ "total_flos": 4.462065045446751e+22,
7934
  "trial_name": null,
7935
  "trial_params": null
7936
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afca04d573f1bb3162caabe1bb7b946edefb5cb8fa7beeabdf4a9618ee0ba3ea
3
  size 449450757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b0253822de0f7fe54e007e60affd0913e4b68439fe1550215e7a076507078bb
3
  size 449450757