jflotz commited on
Commit
27b8fd7
·
1 Parent(s): 115020b

Training in progress, step 900000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:683c63bc197cf3bb64f6c2ce95a62fc4f0bf6028b19e6d2e5831707a2f06c758
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15d8bbdd8e9ea2eabe38aad0708eba010797097fc97de2bc108dfae7de70eae1
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40f6901c05f87feaf8e889e20cabff1c85d845893260d2343fe525c0b122a6e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:083d2e25ea0ac8e88690f7365c08a3344ba965c5f4549f033918008711bc1651
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20800e78ca6d34f9d5b11fa5e4b130d78895eafe993ba74e5838487393b5953b
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2afa6aa14483adb7c817c2439178a198c4680dbfe427eab82def33bea1566914
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa624c96f92eaad028a188cf78acc34c38cdc88db165dfecd04176965e65555e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.568919175133555,
5
- "global_step": 890000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -17806,11 +17806,211 @@
17806
  "eval_samples_per_second": 855.002,
17807
  "eval_steps_per_second": 13.4,
17808
  "step": 890000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17809
  }
17810
  ],
17811
  "max_steps": 1000000,
17812
  "num_train_epochs": 12,
17813
- "total_flos": 6.238873943673255e+22,
17814
  "trial_name": null,
17815
  "trial_params": null
17816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.68044789935648,
5
+ "global_step": 900000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
17806
  "eval_samples_per_second": 855.002,
17807
  "eval_steps_per_second": 13.4,
17808
  "step": 890000
17809
+ },
17810
+ {
17811
+ "epoch": 9.57,
17812
+ "learning_rate": 1.4539396316922552e-05,
17813
+ "loss": 0.1822,
17814
+ "step": 890500
17815
+ },
17816
+ {
17817
+ "epoch": 9.58,
17818
+ "learning_rate": 1.4498484122598232e-05,
17819
+ "loss": 0.1824,
17820
+ "step": 891000
17821
+ },
17822
+ {
17823
+ "epoch": 9.58,
17824
+ "eval_loss": 0.17072877287864685,
17825
+ "eval_runtime": 2.7539,
17826
+ "eval_samples_per_second": 834.091,
17827
+ "eval_steps_per_second": 13.072,
17828
+ "step": 891000
17829
+ },
17830
+ {
17831
+ "epoch": 9.59,
17832
+ "learning_rate": 1.4457751006917137e-05,
17833
+ "loss": 0.1822,
17834
+ "step": 891500
17835
+ },
17836
+ {
17837
+ "epoch": 9.59,
17838
+ "learning_rate": 1.4417197081242083e-05,
17839
+ "loss": 0.1829,
17840
+ "step": 892000
17841
+ },
17842
+ {
17843
+ "epoch": 9.59,
17844
+ "eval_loss": 0.17135965824127197,
17845
+ "eval_runtime": 2.7156,
17846
+ "eval_samples_per_second": 845.864,
17847
+ "eval_steps_per_second": 13.257,
17848
+ "step": 892000
17849
+ },
17850
+ {
17851
+ "epoch": 9.6,
17852
+ "learning_rate": 1.4376822456445926e-05,
17853
+ "loss": 0.1825,
17854
+ "step": 892500
17855
+ },
17856
+ {
17857
+ "epoch": 9.6,
17858
+ "learning_rate": 1.433662724291136e-05,
17859
+ "loss": 0.1828,
17860
+ "step": 893000
17861
+ },
17862
+ {
17863
+ "epoch": 9.6,
17864
+ "eval_loss": 0.17201776802539825,
17865
+ "eval_runtime": 2.6531,
17866
+ "eval_samples_per_second": 865.779,
17867
+ "eval_steps_per_second": 13.569,
17868
+ "step": 893000
17869
+ },
17870
+ {
17871
+ "epoch": 9.61,
17872
+ "learning_rate": 1.4296611550530563e-05,
17873
+ "loss": 0.1823,
17874
+ "step": 893500
17875
+ },
17876
+ {
17877
+ "epoch": 9.61,
17878
+ "learning_rate": 1.4256775488704904e-05,
17879
+ "loss": 0.1825,
17880
+ "step": 894000
17881
+ },
17882
+ {
17883
+ "epoch": 9.61,
17884
+ "eval_loss": 0.17262525856494904,
17885
+ "eval_runtime": 2.6435,
17886
+ "eval_samples_per_second": 868.939,
17887
+ "eval_steps_per_second": 13.619,
17888
+ "step": 894000
17889
+ },
17890
+ {
17891
+ "epoch": 9.62,
17892
+ "learning_rate": 1.4217119166344665e-05,
17893
+ "loss": 0.1822,
17894
+ "step": 894500
17895
+ },
17896
+ {
17897
+ "epoch": 9.62,
17898
+ "learning_rate": 1.4177642691868717e-05,
17899
+ "loss": 0.1819,
17900
+ "step": 895000
17901
+ },
17902
+ {
17903
+ "epoch": 9.62,
17904
+ "eval_loss": 0.1716921329498291,
17905
+ "eval_runtime": 2.6607,
17906
+ "eval_samples_per_second": 863.304,
17907
+ "eval_steps_per_second": 13.53,
17908
+ "step": 895000
17909
+ },
17910
+ {
17911
+ "epoch": 9.63,
17912
+ "learning_rate": 1.4138346173204218e-05,
17913
+ "loss": 0.1823,
17914
+ "step": 895500
17915
+ },
17916
+ {
17917
+ "epoch": 9.64,
17918
+ "learning_rate": 1.4099229717786368e-05,
17919
+ "loss": 0.1825,
17920
+ "step": 896000
17921
+ },
17922
+ {
17923
+ "epoch": 9.64,
17924
+ "eval_loss": 0.1738402545452118,
17925
+ "eval_runtime": 2.7328,
17926
+ "eval_samples_per_second": 840.538,
17927
+ "eval_steps_per_second": 13.173,
17928
+ "step": 896000
17929
+ },
17930
+ {
17931
+ "epoch": 9.64,
17932
+ "learning_rate": 1.406029343255806e-05,
17933
+ "loss": 0.1823,
17934
+ "step": 896500
17935
+ },
17936
+ {
17937
+ "epoch": 9.65,
17938
+ "learning_rate": 1.4021537423969588e-05,
17939
+ "loss": 0.1823,
17940
+ "step": 897000
17941
+ },
17942
+ {
17943
+ "epoch": 9.65,
17944
+ "eval_loss": 0.1751101166009903,
17945
+ "eval_runtime": 2.7169,
17946
+ "eval_samples_per_second": 845.459,
17947
+ "eval_steps_per_second": 13.251,
17948
+ "step": 897000
17949
+ },
17950
+ {
17951
+ "epoch": 9.65,
17952
+ "learning_rate": 1.3982961797978431e-05,
17953
+ "loss": 0.1819,
17954
+ "step": 897500
17955
+ },
17956
+ {
17957
+ "epoch": 9.66,
17958
+ "learning_rate": 1.3944566660048863e-05,
17959
+ "loss": 0.1823,
17960
+ "step": 898000
17961
+ },
17962
+ {
17963
+ "epoch": 9.66,
17964
+ "eval_loss": 0.1745595484972,
17965
+ "eval_runtime": 2.78,
17966
+ "eval_samples_per_second": 826.25,
17967
+ "eval_steps_per_second": 12.95,
17968
+ "step": 898000
17969
+ },
17970
+ {
17971
+ "epoch": 9.66,
17972
+ "learning_rate": 1.3906352115151725e-05,
17973
+ "loss": 0.1821,
17974
+ "step": 898500
17975
+ },
17976
+ {
17977
+ "epoch": 9.67,
17978
+ "learning_rate": 1.3868318267764128e-05,
17979
+ "loss": 0.1821,
17980
+ "step": 899000
17981
+ },
17982
+ {
17983
+ "epoch": 9.67,
17984
+ "eval_loss": 0.17514048516750336,
17985
+ "eval_runtime": 2.6917,
17986
+ "eval_samples_per_second": 853.365,
17987
+ "eval_steps_per_second": 13.374,
17988
+ "step": 899000
17989
+ },
17990
+ {
17991
+ "epoch": 9.67,
17992
+ "learning_rate": 1.3830465221869146e-05,
17993
+ "loss": 0.1821,
17994
+ "step": 899500
17995
+ },
17996
+ {
17997
+ "epoch": 9.68,
17998
+ "learning_rate": 1.3792793080955574e-05,
17999
+ "loss": 0.1819,
18000
+ "step": 900000
18001
+ },
18002
+ {
18003
+ "epoch": 9.68,
18004
+ "eval_loss": 0.17348013818264008,
18005
+ "eval_runtime": 2.6872,
18006
+ "eval_samples_per_second": 854.803,
18007
+ "eval_steps_per_second": 13.397,
18008
+ "step": 900000
18009
  }
18010
  ],
18011
  "max_steps": 1000000,
18012
  "num_train_epochs": 12,
18013
+ "total_flos": 6.3089742047036024e+22,
18014
  "trial_name": null,
18015
  "trial_params": null
18016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40f6901c05f87feaf8e889e20cabff1c85d845893260d2343fe525c0b122a6e9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:083d2e25ea0ac8e88690f7365c08a3344ba965c5f4549f033918008711bc1651
3
  size 449471589