jflotz commited on
Commit
c1407b6
·
1 Parent(s): d0638a2

Training in progress, step 850000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24859c623c6a5769d23a445d9e652805ef93ef8232d0532f3fafc5dad772c85e
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e51d88aff5257549a283e52c14c7816f109ed1c8f1cd4c0209be1013bd750037
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b27758f4c4f1be46ca953f785452acae2687180a06e7c14c3b975c46e8947612
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2b61d52c517c0fe4a2eaad25ea86eca5fed6ebf386f54b8df5e3364654d10f
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b75da63b821a4c72c4b37f39fc301b88ce6e4d7dc37edf4f078b7f5706f736e3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fbbdae9c95471a40e6c6d019353b081fa6055bc839ed4f2163c0c1b80837934
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.011275554018937,
5
- "global_step": 840000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -16806,11 +16806,211 @@
16806
  "eval_samples_per_second": 870.984,
16807
  "eval_steps_per_second": 13.651,
16808
  "step": 840000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16809
  }
16810
  ],
16811
  "max_steps": 1000000,
16812
  "num_train_epochs": 12,
16813
- "total_flos": 5.8883726385215196e+22,
16814
  "trial_name": null,
16815
  "trial_params": null
16816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.122804278241862,
5
+ "global_step": 850000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
16806
  "eval_samples_per_second": 870.984,
16807
  "eval_steps_per_second": 13.651,
16808
  "step": 840000
16809
+ },
16810
+ {
16811
+ "epoch": 9.02,
16812
+ "learning_rate": 1.95137059427344e-05,
16813
+ "loss": 0.1851,
16814
+ "step": 840500
16815
+ },
16816
+ {
16817
+ "epoch": 9.02,
16818
+ "learning_rate": 1.945553091971727e-05,
16819
+ "loss": 0.1852,
16820
+ "step": 841000
16821
+ },
16822
+ {
16823
+ "epoch": 9.02,
16824
+ "eval_loss": 0.17605267465114594,
16825
+ "eval_runtime": 2.6415,
16826
+ "eval_samples_per_second": 869.593,
16827
+ "eval_steps_per_second": 13.629,
16828
+ "step": 841000
16829
+ },
16830
+ {
16831
+ "epoch": 9.03,
16832
+ "learning_rate": 1.93975214229667e-05,
16833
+ "loss": 0.185,
16834
+ "step": 841500
16835
+ },
16836
+ {
16837
+ "epoch": 9.03,
16838
+ "learning_rate": 1.933967761107847e-05,
16839
+ "loss": 0.1846,
16840
+ "step": 842000
16841
+ },
16842
+ {
16843
+ "epoch": 9.03,
16844
+ "eval_loss": 0.17334681749343872,
16845
+ "eval_runtime": 2.679,
16846
+ "eval_samples_per_second": 857.418,
16847
+ "eval_steps_per_second": 13.438,
16848
+ "step": 842000
16849
+ },
16850
+ {
16851
+ "epoch": 9.04,
16852
+ "learning_rate": 1.928199964219533e-05,
16853
+ "loss": 0.1851,
16854
+ "step": 842500
16855
+ },
16856
+ {
16857
+ "epoch": 9.04,
16858
+ "learning_rate": 1.9224487674006694e-05,
16859
+ "loss": 0.1848,
16860
+ "step": 843000
16861
+ },
16862
+ {
16863
+ "epoch": 9.04,
16864
+ "eval_loss": 0.17702366411685944,
16865
+ "eval_runtime": 2.7205,
16866
+ "eval_samples_per_second": 844.336,
16867
+ "eval_steps_per_second": 13.233,
16868
+ "step": 843000
16869
+ },
16870
+ {
16871
+ "epoch": 9.05,
16872
+ "learning_rate": 1.9167141863748015e-05,
16873
+ "loss": 0.1848,
16874
+ "step": 843500
16875
+ },
16876
+ {
16877
+ "epoch": 9.06,
16878
+ "learning_rate": 1.9109962368200602e-05,
16879
+ "loss": 0.1846,
16880
+ "step": 844000
16881
+ },
16882
+ {
16883
+ "epoch": 9.06,
16884
+ "eval_loss": 0.17539054155349731,
16885
+ "eval_runtime": 2.6244,
16886
+ "eval_samples_per_second": 875.248,
16887
+ "eval_steps_per_second": 13.717,
16888
+ "step": 844000
16889
+ },
16890
+ {
16891
+ "epoch": 9.06,
16892
+ "learning_rate": 1.9052949343690977e-05,
16893
+ "loss": 0.1848,
16894
+ "step": 844500
16895
+ },
16896
+ {
16897
+ "epoch": 9.07,
16898
+ "learning_rate": 1.8996102946090586e-05,
16899
+ "loss": 0.1846,
16900
+ "step": 845000
16901
+ },
16902
+ {
16903
+ "epoch": 9.07,
16904
+ "eval_loss": 0.17318959534168243,
16905
+ "eval_runtime": 2.7201,
16906
+ "eval_samples_per_second": 844.445,
16907
+ "eval_steps_per_second": 13.235,
16908
+ "step": 845000
16909
+ },
16910
+ {
16911
+ "epoch": 9.07,
16912
+ "learning_rate": 1.8939423330815345e-05,
16913
+ "loss": 0.1847,
16914
+ "step": 845500
16915
+ },
16916
+ {
16917
+ "epoch": 9.08,
16918
+ "learning_rate": 1.888291065282509e-05,
16919
+ "loss": 0.1847,
16920
+ "step": 846000
16921
+ },
16922
+ {
16923
+ "epoch": 9.08,
16924
+ "eval_loss": 0.17560191452503204,
16925
+ "eval_runtime": 2.7101,
16926
+ "eval_samples_per_second": 847.565,
16927
+ "eval_steps_per_second": 13.284,
16928
+ "step": 846000
16929
+ },
16930
+ {
16931
+ "epoch": 9.08,
16932
+ "learning_rate": 1.882656506662338e-05,
16933
+ "loss": 0.1846,
16934
+ "step": 846500
16935
+ },
16936
+ {
16937
+ "epoch": 9.09,
16938
+ "learning_rate": 1.8770386726256865e-05,
16939
+ "loss": 0.1844,
16940
+ "step": 847000
16941
+ },
16942
+ {
16943
+ "epoch": 9.09,
16944
+ "eval_loss": 0.17197825014591217,
16945
+ "eval_runtime": 2.7201,
16946
+ "eval_samples_per_second": 844.459,
16947
+ "eval_steps_per_second": 13.235,
16948
+ "step": 847000
16949
+ },
16950
+ {
16951
+ "epoch": 9.09,
16952
+ "learning_rate": 1.8714375785315006e-05,
16953
+ "loss": 0.1845,
16954
+ "step": 847500
16955
+ },
16956
+ {
16957
+ "epoch": 9.1,
16958
+ "learning_rate": 1.8658532396929565e-05,
16959
+ "loss": 0.184,
16960
+ "step": 848000
16961
+ },
16962
+ {
16963
+ "epoch": 9.1,
16964
+ "eval_loss": 0.17653484642505646,
16965
+ "eval_runtime": 2.6616,
16966
+ "eval_samples_per_second": 863.031,
16967
+ "eval_steps_per_second": 13.526,
16968
+ "step": 848000
16969
+ },
16970
+ {
16971
+ "epoch": 9.11,
16972
+ "learning_rate": 1.8602856713774208e-05,
16973
+ "loss": 0.1843,
16974
+ "step": 848500
16975
+ },
16976
+ {
16977
+ "epoch": 9.11,
16978
+ "learning_rate": 1.8547348888064178e-05,
16979
+ "loss": 0.1848,
16980
+ "step": 849000
16981
+ },
16982
+ {
16983
+ "epoch": 9.11,
16984
+ "eval_loss": 0.1734277456998825,
16985
+ "eval_runtime": 2.6737,
16986
+ "eval_samples_per_second": 859.112,
16987
+ "eval_steps_per_second": 13.465,
16988
+ "step": 849000
16989
+ },
16990
+ {
16991
+ "epoch": 9.12,
16992
+ "learning_rate": 1.8492009071555703e-05,
16993
+ "loss": 0.1846,
16994
+ "step": 849500
16995
+ },
16996
+ {
16997
+ "epoch": 9.12,
16998
+ "learning_rate": 1.8436837415545772e-05,
16999
+ "loss": 0.1848,
17000
+ "step": 850000
17001
+ },
17002
+ {
17003
+ "epoch": 9.12,
17004
+ "eval_loss": 0.17425018548965454,
17005
+ "eval_runtime": 2.6779,
17006
+ "eval_samples_per_second": 857.76,
17007
+ "eval_steps_per_second": 13.443,
17008
+ "step": 850000
17009
  }
17010
  ],
17011
  "max_steps": 1000000,
17012
  "num_train_epochs": 12,
17013
+ "total_flos": 5.958472899551867e+22,
17014
  "trial_name": null,
17015
  "trial_params": null
17016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b27758f4c4f1be46ca953f785452acae2687180a06e7c14c3b975c46e8947612
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2b61d52c517c0fe4a2eaad25ea86eca5fed6ebf386f54b8df5e3364654d10f
3
  size 449471589