jflotz commited on
Commit
3fd55f4
·
1 Parent(s): ae93723

Training in progress, step 800000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee6d0b8731469184859b6e2af2323dc331e9f7e709ceb8418eca6fab2f75e9cb
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05cce4c1769cad62058c494e3844f2366f5491f558c117a5601783f8f4aca538
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3c99a6d8856f7a728dbbbf96bf0c858122cdeb2ae96a80fcc6876c29d8e2666
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6d411bd0a38f2cff0679d6b8d24e5bbf393f3e64bddfcb6100439be517e6e9
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c08292d027d8ba65de9023aba96bca0e44920026e543fcb86c6e89c28847c9ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4556b030f4b10d8ec54832aab87f365d5f534e5c8f2aba138109a6119b60109f
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83d1297302d20060e31d476195b98906c23904815e65152eb2d3ffb7dd074183
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57cc3662a36c24e948440a9c5383a944373362c15a98a0fb5317ec1e024dd4c4
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.453631932904319,
5
- "global_step": 790000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -15806,11 +15806,211 @@
15806
  "eval_samples_per_second": 887.972,
15807
  "eval_steps_per_second": 13.917,
15808
  "step": 790000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15809
  }
15810
  ],
15811
  "max_steps": 1000000,
15812
  "num_train_epochs": 12,
15813
- "total_flos": 5.5378768098172995e+22,
15814
  "trial_name": null,
15815
  "trial_params": null
15816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.565160657127244,
5
+ "global_step": 800000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
15806
  "eval_samples_per_second": 887.972,
15807
  "eval_steps_per_second": 13.917,
15808
  "step": 790000
15809
+ },
15810
+ {
15811
+ "epoch": 8.46,
15812
+ "learning_rate": 2.6137923994427768e-05,
15813
+ "loss": 0.1881,
15814
+ "step": 790500
15815
+ },
15816
+ {
15817
+ "epoch": 8.46,
15818
+ "learning_rate": 2.6064073005694758e-05,
15819
+ "loss": 0.1876,
15820
+ "step": 791000
15821
+ },
15822
+ {
15823
+ "epoch": 8.46,
15824
+ "eval_loss": 0.17775288224220276,
15825
+ "eval_runtime": 2.6373,
15826
+ "eval_samples_per_second": 870.95,
15827
+ "eval_steps_per_second": 13.65,
15828
+ "step": 791000
15829
+ },
15830
+ {
15831
+ "epoch": 8.47,
15832
+ "learning_rate": 2.5990369475726598e-05,
15833
+ "loss": 0.1878,
15834
+ "step": 791500
15835
+ },
15836
+ {
15837
+ "epoch": 8.48,
15838
+ "learning_rate": 2.591681360602595e-05,
15839
+ "loss": 0.1876,
15840
+ "step": 792000
15841
+ },
15842
+ {
15843
+ "epoch": 8.48,
15844
+ "eval_loss": 0.1769527643918991,
15845
+ "eval_runtime": 2.7236,
15846
+ "eval_samples_per_second": 843.383,
15847
+ "eval_steps_per_second": 13.218,
15848
+ "step": 792000
15849
+ },
15850
+ {
15851
+ "epoch": 8.48,
15852
+ "learning_rate": 2.5843405597691748e-05,
15853
+ "loss": 0.188,
15854
+ "step": 792500
15855
+ },
15856
+ {
15857
+ "epoch": 8.49,
15858
+ "learning_rate": 2.577014565141866e-05,
15859
+ "loss": 0.1872,
15860
+ "step": 793000
15861
+ },
15862
+ {
15863
+ "epoch": 8.49,
15864
+ "eval_loss": 0.1795121282339096,
15865
+ "eval_runtime": 2.6103,
15866
+ "eval_samples_per_second": 879.963,
15867
+ "eval_steps_per_second": 13.791,
15868
+ "step": 793000
15869
+ },
15870
+ {
15871
+ "epoch": 8.49,
15872
+ "learning_rate": 2.569703396749661e-05,
15873
+ "loss": 0.1875,
15874
+ "step": 793500
15875
+ },
15876
+ {
15877
+ "epoch": 8.5,
15878
+ "learning_rate": 2.562407074581014e-05,
15879
+ "loss": 0.188,
15880
+ "step": 794000
15881
+ },
15882
+ {
15883
+ "epoch": 8.5,
15884
+ "eval_loss": 0.17704832553863525,
15885
+ "eval_runtime": 2.6547,
15886
+ "eval_samples_per_second": 865.258,
15887
+ "eval_steps_per_second": 13.561,
15888
+ "step": 794000
15889
+ },
15890
+ {
15891
+ "epoch": 8.5,
15892
+ "learning_rate": 2.5551256185837897e-05,
15893
+ "loss": 0.1878,
15894
+ "step": 794500
15895
+ },
15896
+ {
15897
+ "epoch": 8.51,
15898
+ "learning_rate": 2.5478590486652137e-05,
15899
+ "loss": 0.1879,
15900
+ "step": 795000
15901
+ },
15902
+ {
15903
+ "epoch": 8.51,
15904
+ "eval_loss": 0.1775512397289276,
15905
+ "eval_runtime": 2.704,
15906
+ "eval_samples_per_second": 849.487,
15907
+ "eval_steps_per_second": 13.314,
15908
+ "step": 795000
15909
+ },
15910
+ {
15911
+ "epoch": 8.51,
15912
+ "learning_rate": 2.5406073846918076e-05,
15913
+ "loss": 0.1873,
15914
+ "step": 795500
15915
+ },
15916
+ {
15917
+ "epoch": 8.52,
15918
+ "learning_rate": 2.533370646489347e-05,
15919
+ "loss": 0.1872,
15920
+ "step": 796000
15921
+ },
15922
+ {
15923
+ "epoch": 8.52,
15924
+ "eval_loss": 0.17661112546920776,
15925
+ "eval_runtime": 2.6375,
15926
+ "eval_samples_per_second": 870.909,
15927
+ "eval_steps_per_second": 13.649,
15928
+ "step": 796000
15929
+ },
15930
+ {
15931
+ "epoch": 8.53,
15932
+ "learning_rate": 2.526148853842796e-05,
15933
+ "loss": 0.1874,
15934
+ "step": 796500
15935
+ },
15936
+ {
15937
+ "epoch": 8.53,
15938
+ "learning_rate": 2.5189420264962586e-05,
15939
+ "loss": 0.1875,
15940
+ "step": 797000
15941
+ },
15942
+ {
15943
+ "epoch": 8.53,
15944
+ "eval_loss": 0.17768479883670807,
15945
+ "eval_runtime": 2.6644,
15946
+ "eval_samples_per_second": 862.092,
15947
+ "eval_steps_per_second": 13.511,
15948
+ "step": 797000
15949
+ },
15950
+ {
15951
+ "epoch": 8.54,
15952
+ "learning_rate": 2.5117501841529297e-05,
15953
+ "loss": 0.1871,
15954
+ "step": 797500
15955
+ },
15956
+ {
15957
+ "epoch": 8.54,
15958
+ "learning_rate": 2.504573346475026e-05,
15959
+ "loss": 0.1874,
15960
+ "step": 798000
15961
+ },
15962
+ {
15963
+ "epoch": 8.54,
15964
+ "eval_loss": 0.17816244065761566,
15965
+ "eval_runtime": 2.6304,
15966
+ "eval_samples_per_second": 873.254,
15967
+ "eval_steps_per_second": 13.686,
15968
+ "step": 798000
15969
+ },
15970
+ {
15971
+ "epoch": 8.55,
15972
+ "learning_rate": 2.497411533083753e-05,
15973
+ "loss": 0.1874,
15974
+ "step": 798500
15975
+ },
15976
+ {
15977
+ "epoch": 8.55,
15978
+ "learning_rate": 2.4902647635592324e-05,
15979
+ "loss": 0.187,
15980
+ "step": 799000
15981
+ },
15982
+ {
15983
+ "epoch": 8.55,
15984
+ "eval_loss": 0.17790637910366058,
15985
+ "eval_runtime": 2.6765,
15986
+ "eval_samples_per_second": 858.201,
15987
+ "eval_steps_per_second": 13.45,
15988
+ "step": 799000
15989
+ },
15990
+ {
15991
+ "epoch": 8.56,
15992
+ "learning_rate": 2.483133057440458e-05,
15993
+ "loss": 0.1871,
15994
+ "step": 799500
15995
+ },
15996
+ {
15997
+ "epoch": 8.57,
15998
+ "learning_rate": 2.476016434225246e-05,
15999
+ "loss": 0.1872,
16000
+ "step": 800000
16001
+ },
16002
+ {
16003
+ "epoch": 8.57,
16004
+ "eval_loss": 0.17626047134399414,
16005
+ "eval_runtime": 2.6916,
16006
+ "eval_samples_per_second": 853.408,
16007
+ "eval_steps_per_second": 13.375,
16008
+ "step": 800000
16009
  }
16010
  ],
16011
  "max_steps": 1000000,
16012
  "num_train_epochs": 12,
16013
+ "total_flos": 5.607977070847647e+22,
16014
  "trial_name": null,
16015
  "trial_params": null
16016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3c99a6d8856f7a728dbbbf96bf0c858122cdeb2ae96a80fcc6876c29d8e2666
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f6d411bd0a38f2cff0679d6b8d24e5bbf393f3e64bddfcb6100439be517e6e9
3
  size 449471589