Training in progress, step 950000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:555f1620da14174bf24cf8a2c50966c673718f13caf9ff9216cb282d58986be2
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92c3d51ab918ac4532e1d6eeab8c0b6a6ea719ff69a5dc804d4995968ab632fc
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f56e4ab7d580b4bb483b5af1e3c9da40dee5bbb1c90ae3bbc38d37f787d0e30
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76d771b6aa86b6db5c1d3a18a5ba01d5f7ff8a339c98c29586734738700dc44c
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -18806,11 +18806,211 @@
|
|
| 18806 |
"eval_samples_per_second": 888.512,
|
| 18807 |
"eval_steps_per_second": 13.925,
|
| 18808 |
"step": 940000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18809 |
}
|
| 18810 |
],
|
| 18811 |
"max_steps": 1000000,
|
| 18812 |
"num_train_epochs": 12,
|
| 18813 |
-
"total_flos": 6.
|
| 18814 |
"trial_name": null,
|
| 18815 |
"trial_params": null
|
| 18816 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.238091520471098,
|
| 5 |
+
"global_step": 950000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 18806 |
"eval_samples_per_second": 888.512,
|
| 18807 |
"eval_steps_per_second": 13.925,
|
| 18808 |
"step": 940000
|
| 18809 |
+
},
|
| 18810 |
+
{
|
| 18811 |
+
"epoch": 10.13,
|
| 18812 |
+
"learning_rate": 1.1350681316526965e-05,
|
| 18813 |
+
"loss": 0.1805,
|
| 18814 |
+
"step": 940500
|
| 18815 |
+
},
|
| 18816 |
+
{
|
| 18817 |
+
"epoch": 10.14,
|
| 18818 |
+
"learning_rate": 1.1328147928906494e-05,
|
| 18819 |
+
"loss": 0.1809,
|
| 18820 |
+
"step": 941000
|
| 18821 |
+
},
|
| 18822 |
+
{
|
| 18823 |
+
"epoch": 10.14,
|
| 18824 |
+
"eval_loss": 0.1728110909461975,
|
| 18825 |
+
"eval_runtime": 2.6803,
|
| 18826 |
+
"eval_samples_per_second": 857.002,
|
| 18827 |
+
"eval_steps_per_second": 13.431,
|
| 18828 |
+
"step": 941000
|
| 18829 |
+
},
|
| 18830 |
+
{
|
| 18831 |
+
"epoch": 10.14,
|
| 18832 |
+
"learning_rate": 1.1305802287507358e-05,
|
| 18833 |
+
"loss": 0.1806,
|
| 18834 |
+
"step": 941500
|
| 18835 |
+
},
|
| 18836 |
+
{
|
| 18837 |
+
"epoch": 10.15,
|
| 18838 |
+
"learning_rate": 1.1283644453421678e-05,
|
| 18839 |
+
"loss": 0.1806,
|
| 18840 |
+
"step": 942000
|
| 18841 |
+
},
|
| 18842 |
+
{
|
| 18843 |
+
"epoch": 10.15,
|
| 18844 |
+
"eval_loss": 0.17073097825050354,
|
| 18845 |
+
"eval_runtime": 2.5804,
|
| 18846 |
+
"eval_samples_per_second": 890.183,
|
| 18847 |
+
"eval_steps_per_second": 13.951,
|
| 18848 |
+
"step": 942000
|
| 18849 |
+
},
|
| 18850 |
+
{
|
| 18851 |
+
"epoch": 10.15,
|
| 18852 |
+
"learning_rate": 1.1261674487228149e-05,
|
| 18853 |
+
"loss": 0.1805,
|
| 18854 |
+
"step": 942500
|
| 18855 |
+
},
|
| 18856 |
+
{
|
| 18857 |
+
"epoch": 10.16,
|
| 18858 |
+
"learning_rate": 1.1239892448991798e-05,
|
| 18859 |
+
"loss": 0.1806,
|
| 18860 |
+
"step": 943000
|
| 18861 |
+
},
|
| 18862 |
+
{
|
| 18863 |
+
"epoch": 10.16,
|
| 18864 |
+
"eval_loss": 0.17256046831607819,
|
| 18865 |
+
"eval_runtime": 2.657,
|
| 18866 |
+
"eval_samples_per_second": 864.515,
|
| 18867 |
+
"eval_steps_per_second": 13.549,
|
| 18868 |
+
"step": 943000
|
| 18869 |
+
},
|
| 18870 |
+
{
|
| 18871 |
+
"epoch": 10.17,
|
| 18872 |
+
"learning_rate": 1.1218298398263894e-05,
|
| 18873 |
+
"loss": 0.1808,
|
| 18874 |
+
"step": 943500
|
| 18875 |
+
},
|
| 18876 |
+
{
|
| 18877 |
+
"epoch": 10.17,
|
| 18878 |
+
"learning_rate": 1.1196892394081743e-05,
|
| 18879 |
+
"loss": 0.1803,
|
| 18880 |
+
"step": 944000
|
| 18881 |
+
},
|
| 18882 |
+
{
|
| 18883 |
+
"epoch": 10.17,
|
| 18884 |
+
"eval_loss": 0.1697072833776474,
|
| 18885 |
+
"eval_runtime": 2.5585,
|
| 18886 |
+
"eval_samples_per_second": 897.801,
|
| 18887 |
+
"eval_steps_per_second": 14.071,
|
| 18888 |
+
"step": 944000
|
| 18889 |
+
},
|
| 18890 |
+
{
|
| 18891 |
+
"epoch": 10.18,
|
| 18892 |
+
"learning_rate": 1.1175674494968552e-05,
|
| 18893 |
+
"loss": 0.1803,
|
| 18894 |
+
"step": 944500
|
| 18895 |
+
},
|
| 18896 |
+
{
|
| 18897 |
+
"epoch": 10.18,
|
| 18898 |
+
"learning_rate": 1.1154644758933235e-05,
|
| 18899 |
+
"loss": 0.1807,
|
| 18900 |
+
"step": 945000
|
| 18901 |
+
},
|
| 18902 |
+
{
|
| 18903 |
+
"epoch": 10.18,
|
| 18904 |
+
"eval_loss": 0.17261387407779694,
|
| 18905 |
+
"eval_runtime": 2.6868,
|
| 18906 |
+
"eval_samples_per_second": 854.925,
|
| 18907 |
+
"eval_steps_per_second": 13.399,
|
| 18908 |
+
"step": 945000
|
| 18909 |
+
},
|
| 18910 |
+
{
|
| 18911 |
+
"epoch": 10.19,
|
| 18912 |
+
"learning_rate": 1.11338032434703e-05,
|
| 18913 |
+
"loss": 0.1804,
|
| 18914 |
+
"step": 945500
|
| 18915 |
+
},
|
| 18916 |
+
{
|
| 18917 |
+
"epoch": 10.19,
|
| 18918 |
+
"learning_rate": 1.1113150005559644e-05,
|
| 18919 |
+
"loss": 0.1808,
|
| 18920 |
+
"step": 946000
|
| 18921 |
+
},
|
| 18922 |
+
{
|
| 18923 |
+
"epoch": 10.19,
|
| 18924 |
+
"eval_loss": 0.17092828452587128,
|
| 18925 |
+
"eval_runtime": 2.6611,
|
| 18926 |
+
"eval_samples_per_second": 863.19,
|
| 18927 |
+
"eval_steps_per_second": 13.528,
|
| 18928 |
+
"step": 946000
|
| 18929 |
+
},
|
| 18930 |
+
{
|
| 18931 |
+
"epoch": 10.2,
|
| 18932 |
+
"learning_rate": 1.1092685101666438e-05,
|
| 18933 |
+
"loss": 0.1806,
|
| 18934 |
+
"step": 946500
|
| 18935 |
+
},
|
| 18936 |
+
{
|
| 18937 |
+
"epoch": 10.2,
|
| 18938 |
+
"learning_rate": 1.1072408587740942e-05,
|
| 18939 |
+
"loss": 0.1804,
|
| 18940 |
+
"step": 947000
|
| 18941 |
+
},
|
| 18942 |
+
{
|
| 18943 |
+
"epoch": 10.2,
|
| 18944 |
+
"eval_loss": 0.17135068774223328,
|
| 18945 |
+
"eval_runtime": 2.7301,
|
| 18946 |
+
"eval_samples_per_second": 841.37,
|
| 18947 |
+
"eval_steps_per_second": 13.186,
|
| 18948 |
+
"step": 947000
|
| 18949 |
+
},
|
| 18950 |
+
{
|
| 18951 |
+
"epoch": 10.21,
|
| 18952 |
+
"learning_rate": 1.1052320519218383e-05,
|
| 18953 |
+
"loss": 0.1804,
|
| 18954 |
+
"step": 947500
|
| 18955 |
+
},
|
| 18956 |
+
{
|
| 18957 |
+
"epoch": 10.22,
|
| 18958 |
+
"learning_rate": 1.1032420951018755e-05,
|
| 18959 |
+
"loss": 0.1806,
|
| 18960 |
+
"step": 948000
|
| 18961 |
+
},
|
| 18962 |
+
{
|
| 18963 |
+
"epoch": 10.22,
|
| 18964 |
+
"eval_loss": 0.16970402002334595,
|
| 18965 |
+
"eval_runtime": 2.5839,
|
| 18966 |
+
"eval_samples_per_second": 888.964,
|
| 18967 |
+
"eval_steps_per_second": 13.932,
|
| 18968 |
+
"step": 948000
|
| 18969 |
+
},
|
| 18970 |
+
{
|
| 18971 |
+
"epoch": 10.22,
|
| 18972 |
+
"learning_rate": 1.1012709937546722e-05,
|
| 18973 |
+
"loss": 0.1805,
|
| 18974 |
+
"step": 948500
|
| 18975 |
+
},
|
| 18976 |
+
{
|
| 18977 |
+
"epoch": 10.23,
|
| 18978 |
+
"learning_rate": 1.0993187532691458e-05,
|
| 18979 |
+
"loss": 0.1804,
|
| 18980 |
+
"step": 949000
|
| 18981 |
+
},
|
| 18982 |
+
{
|
| 18983 |
+
"epoch": 10.23,
|
| 18984 |
+
"eval_loss": 0.17099051177501678,
|
| 18985 |
+
"eval_runtime": 2.7073,
|
| 18986 |
+
"eval_samples_per_second": 848.444,
|
| 18987 |
+
"eval_steps_per_second": 13.297,
|
| 18988 |
+
"step": 949000
|
| 18989 |
+
},
|
| 18990 |
+
{
|
| 18991 |
+
"epoch": 10.23,
|
| 18992 |
+
"learning_rate": 1.0973853789826454e-05,
|
| 18993 |
+
"loss": 0.1804,
|
| 18994 |
+
"step": 949500
|
| 18995 |
+
},
|
| 18996 |
+
{
|
| 18997 |
+
"epoch": 10.24,
|
| 18998 |
+
"learning_rate": 1.0954708761809438e-05,
|
| 18999 |
+
"loss": 0.1806,
|
| 19000 |
+
"step": 950000
|
| 19001 |
+
},
|
| 19002 |
+
{
|
| 19003 |
+
"epoch": 10.24,
|
| 19004 |
+
"eval_loss": 0.1725110560655594,
|
| 19005 |
+
"eval_runtime": 2.6133,
|
| 19006 |
+
"eval_samples_per_second": 878.965,
|
| 19007 |
+
"eval_steps_per_second": 13.776,
|
| 19008 |
+
"step": 950000
|
| 19009 |
}
|
| 19010 |
],
|
| 19011 |
"max_steps": 1000000,
|
| 19012 |
"num_train_epochs": 12,
|
| 19013 |
+
"total_flos": 6.6594700334078225e+22,
|
| 19014 |
"trial_name": null,
|
| 19015 |
"trial_params": null
|
| 19016 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92c3d51ab918ac4532e1d6eeab8c0b6a6ea719ff69a5dc804d4995968ab632fc
|
| 3 |
size 449471589
|