jflotz commited on
Commit
72c9bd7
·
1 Parent(s): c7e9fcb

Training in progress, step 950000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0c72fdd0154d524cda12334954eb1e4f193d30dc2134990578a195ba70ede7f
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:555f1620da14174bf24cf8a2c50966c673718f13caf9ff9216cb282d58986be2
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0068dc2eb70214cc7f3762f96ca920a8342113b2f223dfdea92c9f41e6012f4
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c3d51ab918ac4532e1d6eeab8c0b6a6ea719ff69a5dc804d4995968ab632fc
3
  size 449471589
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dab84d4b75593cd9349f424c4371ea8ac2493751bc544a294c8ef74a18b08e9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d771b6aa86b6db5c1d3a18a5ba01d5f7ff8a339c98c29586734738700dc44c
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.126562796248173,
5
- "global_step": 940000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -18806,11 +18806,211 @@
18806
  "eval_samples_per_second": 888.512,
18807
  "eval_steps_per_second": 13.925,
18808
  "step": 940000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18809
  }
18810
  ],
18811
  "max_steps": 1000000,
18812
  "num_train_epochs": 12,
18813
- "total_flos": 6.589369772377475e+22,
18814
  "trial_name": null,
18815
  "trial_params": null
18816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.238091520471098,
5
+ "global_step": 950000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
18806
  "eval_samples_per_second": 888.512,
18807
  "eval_steps_per_second": 13.925,
18808
  "step": 940000
18809
+ },
18810
+ {
18811
+ "epoch": 10.13,
18812
+ "learning_rate": 1.1350681316526965e-05,
18813
+ "loss": 0.1805,
18814
+ "step": 940500
18815
+ },
18816
+ {
18817
+ "epoch": 10.14,
18818
+ "learning_rate": 1.1328147928906494e-05,
18819
+ "loss": 0.1809,
18820
+ "step": 941000
18821
+ },
18822
+ {
18823
+ "epoch": 10.14,
18824
+ "eval_loss": 0.1728110909461975,
18825
+ "eval_runtime": 2.6803,
18826
+ "eval_samples_per_second": 857.002,
18827
+ "eval_steps_per_second": 13.431,
18828
+ "step": 941000
18829
+ },
18830
+ {
18831
+ "epoch": 10.14,
18832
+ "learning_rate": 1.1305802287507358e-05,
18833
+ "loss": 0.1806,
18834
+ "step": 941500
18835
+ },
18836
+ {
18837
+ "epoch": 10.15,
18838
+ "learning_rate": 1.1283644453421678e-05,
18839
+ "loss": 0.1806,
18840
+ "step": 942000
18841
+ },
18842
+ {
18843
+ "epoch": 10.15,
18844
+ "eval_loss": 0.17073097825050354,
18845
+ "eval_runtime": 2.5804,
18846
+ "eval_samples_per_second": 890.183,
18847
+ "eval_steps_per_second": 13.951,
18848
+ "step": 942000
18849
+ },
18850
+ {
18851
+ "epoch": 10.15,
18852
+ "learning_rate": 1.1261674487228149e-05,
18853
+ "loss": 0.1805,
18854
+ "step": 942500
18855
+ },
18856
+ {
18857
+ "epoch": 10.16,
18858
+ "learning_rate": 1.1239892448991798e-05,
18859
+ "loss": 0.1806,
18860
+ "step": 943000
18861
+ },
18862
+ {
18863
+ "epoch": 10.16,
18864
+ "eval_loss": 0.17256046831607819,
18865
+ "eval_runtime": 2.657,
18866
+ "eval_samples_per_second": 864.515,
18867
+ "eval_steps_per_second": 13.549,
18868
+ "step": 943000
18869
+ },
18870
+ {
18871
+ "epoch": 10.17,
18872
+ "learning_rate": 1.1218298398263894e-05,
18873
+ "loss": 0.1808,
18874
+ "step": 943500
18875
+ },
18876
+ {
18877
+ "epoch": 10.17,
18878
+ "learning_rate": 1.1196892394081743e-05,
18879
+ "loss": 0.1803,
18880
+ "step": 944000
18881
+ },
18882
+ {
18883
+ "epoch": 10.17,
18884
+ "eval_loss": 0.1697072833776474,
18885
+ "eval_runtime": 2.5585,
18886
+ "eval_samples_per_second": 897.801,
18887
+ "eval_steps_per_second": 14.071,
18888
+ "step": 944000
18889
+ },
18890
+ {
18891
+ "epoch": 10.18,
18892
+ "learning_rate": 1.1175674494968552e-05,
18893
+ "loss": 0.1803,
18894
+ "step": 944500
18895
+ },
18896
+ {
18897
+ "epoch": 10.18,
18898
+ "learning_rate": 1.1154644758933235e-05,
18899
+ "loss": 0.1807,
18900
+ "step": 945000
18901
+ },
18902
+ {
18903
+ "epoch": 10.18,
18904
+ "eval_loss": 0.17261387407779694,
18905
+ "eval_runtime": 2.6868,
18906
+ "eval_samples_per_second": 854.925,
18907
+ "eval_steps_per_second": 13.399,
18908
+ "step": 945000
18909
+ },
18910
+ {
18911
+ "epoch": 10.19,
18912
+ "learning_rate": 1.11338032434703e-05,
18913
+ "loss": 0.1804,
18914
+ "step": 945500
18915
+ },
18916
+ {
18917
+ "epoch": 10.19,
18918
+ "learning_rate": 1.1113150005559644e-05,
18919
+ "loss": 0.1808,
18920
+ "step": 946000
18921
+ },
18922
+ {
18923
+ "epoch": 10.19,
18924
+ "eval_loss": 0.17092828452587128,
18925
+ "eval_runtime": 2.6611,
18926
+ "eval_samples_per_second": 863.19,
18927
+ "eval_steps_per_second": 13.528,
18928
+ "step": 946000
18929
+ },
18930
+ {
18931
+ "epoch": 10.2,
18932
+ "learning_rate": 1.1092685101666438e-05,
18933
+ "loss": 0.1806,
18934
+ "step": 946500
18935
+ },
18936
+ {
18937
+ "epoch": 10.2,
18938
+ "learning_rate": 1.1072408587740942e-05,
18939
+ "loss": 0.1804,
18940
+ "step": 947000
18941
+ },
18942
+ {
18943
+ "epoch": 10.2,
18944
+ "eval_loss": 0.17135068774223328,
18945
+ "eval_runtime": 2.7301,
18946
+ "eval_samples_per_second": 841.37,
18947
+ "eval_steps_per_second": 13.186,
18948
+ "step": 947000
18949
+ },
18950
+ {
18951
+ "epoch": 10.21,
18952
+ "learning_rate": 1.1052320519218383e-05,
18953
+ "loss": 0.1804,
18954
+ "step": 947500
18955
+ },
18956
+ {
18957
+ "epoch": 10.22,
18958
+ "learning_rate": 1.1032420951018755e-05,
18959
+ "loss": 0.1806,
18960
+ "step": 948000
18961
+ },
18962
+ {
18963
+ "epoch": 10.22,
18964
+ "eval_loss": 0.16970402002334595,
18965
+ "eval_runtime": 2.5839,
18966
+ "eval_samples_per_second": 888.964,
18967
+ "eval_steps_per_second": 13.932,
18968
+ "step": 948000
18969
+ },
18970
+ {
18971
+ "epoch": 10.22,
18972
+ "learning_rate": 1.1012709937546722e-05,
18973
+ "loss": 0.1805,
18974
+ "step": 948500
18975
+ },
18976
+ {
18977
+ "epoch": 10.23,
18978
+ "learning_rate": 1.0993187532691458e-05,
18979
+ "loss": 0.1804,
18980
+ "step": 949000
18981
+ },
18982
+ {
18983
+ "epoch": 10.23,
18984
+ "eval_loss": 0.17099051177501678,
18985
+ "eval_runtime": 2.7073,
18986
+ "eval_samples_per_second": 848.444,
18987
+ "eval_steps_per_second": 13.297,
18988
+ "step": 949000
18989
+ },
18990
+ {
18991
+ "epoch": 10.23,
18992
+ "learning_rate": 1.0973853789826454e-05,
18993
+ "loss": 0.1804,
18994
+ "step": 949500
18995
+ },
18996
+ {
18997
+ "epoch": 10.24,
18998
+ "learning_rate": 1.0954708761809438e-05,
18999
+ "loss": 0.1806,
19000
+ "step": 950000
19001
+ },
19002
+ {
19003
+ "epoch": 10.24,
19004
+ "eval_loss": 0.1725110560655594,
19005
+ "eval_runtime": 2.6133,
19006
+ "eval_samples_per_second": 878.965,
19007
+ "eval_steps_per_second": 13.776,
19008
+ "step": 950000
19009
  }
19010
  ],
19011
  "max_steps": 1000000,
19012
  "num_train_epochs": 12,
19013
+ "total_flos": 6.6594700334078225e+22,
19014
  "trial_name": null,
19015
  "trial_params": null
19016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0068dc2eb70214cc7f3762f96ca920a8342113b2f223dfdea92c9f41e6012f4
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c3d51ab918ac4532e1d6eeab8c0b6a6ea719ff69a5dc804d4995968ab632fc
3
  size 449471589