Training in progress, step 830000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ef1b487cabfa82f2f81ded5cef2d529a41a97f8137d750d66e05d443478e91a
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b63e12b39341c1c8d25b10dc8ca07cde5e82303726c5b23ac1fdd24345f08c2a
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34241c46169d6140a8feca7f2e1db99ea6d7326e000406064ac61feecf3f17b2
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fb4767c32ae0f5cf34a94816a959b9568b2ea2174c179de2a3df992017a5f77
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efc0c6d968c43ae24dfc91f94bcca80995b7ed16b92115235f58d28e636291a4
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df72b23925eb5f82752cd028feed8cf7d2bbe1dbdf011029efc7554632b26ae2
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd1259a368e737ba331fab7b54a98c7cfc3dd87ed455bcbc28503bda235c71d3
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f2eac6df5cd35dc068eb0bab0d362a924c2221b17b5c9bfe0e42f4ba3c05c9c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e7f8260a2f4cc917f2e6b69683488cb603bdd620504afe136b5dc796247fcec
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41a93612851ee52955512369b8723183837a90f2506b353970cd64de58829b3d
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bde979dd08e67682f748b136bdd4f7c962a7cebec495023e183c816b6c75933
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9846,11 +9846,131 @@
|
|
| 9846 |
"learning_rate": 2.2039381819638596e-05,
|
| 9847 |
"loss": 0.2894,
|
| 9848 |
"step": 820000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9849 |
}
|
| 9850 |
],
|
| 9851 |
"max_steps": 1000000,
|
| 9852 |
"num_train_epochs": 2,
|
| 9853 |
-
"total_flos": 5.
|
| 9854 |
"trial_name": null,
|
| 9855 |
"trial_params": null
|
| 9856 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.1992968806052247,
|
| 5 |
+
"global_step": 830000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9846 |
"learning_rate": 2.2039381819638596e-05,
|
| 9847 |
"loss": 0.2894,
|
| 9848 |
"step": 820000
|
| 9849 |
+
},
|
| 9850 |
+
{
|
| 9851 |
+
"epoch": 1.18,
|
| 9852 |
+
"learning_rate": 2.1974562300613417e-05,
|
| 9853 |
+
"loss": 0.2893,
|
| 9854 |
+
"step": 820500
|
| 9855 |
+
},
|
| 9856 |
+
{
|
| 9857 |
+
"epoch": 1.18,
|
| 9858 |
+
"learning_rate": 2.1909901420919184e-05,
|
| 9859 |
+
"loss": 0.2891,
|
| 9860 |
+
"step": 821000
|
| 9861 |
+
},
|
| 9862 |
+
{
|
| 9863 |
+
"epoch": 1.18,
|
| 9864 |
+
"learning_rate": 2.1845399357336326e-05,
|
| 9865 |
+
"loss": 0.2891,
|
| 9866 |
+
"step": 821500
|
| 9867 |
+
},
|
| 9868 |
+
{
|
| 9869 |
+
"epoch": 1.18,
|
| 9870 |
+
"learning_rate": 2.1781056286210997e-05,
|
| 9871 |
+
"loss": 0.2891,
|
| 9872 |
+
"step": 822000
|
| 9873 |
+
},
|
| 9874 |
+
{
|
| 9875 |
+
"epoch": 1.18,
|
| 9876 |
+
"learning_rate": 2.1716872383454674e-05,
|
| 9877 |
+
"loss": 0.2892,
|
| 9878 |
+
"step": 822500
|
| 9879 |
+
},
|
| 9880 |
+
{
|
| 9881 |
+
"epoch": 1.19,
|
| 9882 |
+
"learning_rate": 2.1652847824543744e-05,
|
| 9883 |
+
"loss": 0.2884,
|
| 9884 |
+
"step": 823000
|
| 9885 |
+
},
|
| 9886 |
+
{
|
| 9887 |
+
"epoch": 1.19,
|
| 9888 |
+
"learning_rate": 2.1588982784518853e-05,
|
| 9889 |
+
"loss": 0.288,
|
| 9890 |
+
"step": 823500
|
| 9891 |
+
},
|
| 9892 |
+
{
|
| 9893 |
+
"epoch": 1.19,
|
| 9894 |
+
"learning_rate": 2.1525277437984636e-05,
|
| 9895 |
+
"loss": 0.2889,
|
| 9896 |
+
"step": 824000
|
| 9897 |
+
},
|
| 9898 |
+
{
|
| 9899 |
+
"epoch": 1.19,
|
| 9900 |
+
"learning_rate": 2.1461731959109053e-05,
|
| 9901 |
+
"loss": 0.2893,
|
| 9902 |
+
"step": 824500
|
| 9903 |
+
},
|
| 9904 |
+
{
|
| 9905 |
+
"epoch": 1.19,
|
| 9906 |
+
"learning_rate": 2.1398346521623e-05,
|
| 9907 |
+
"loss": 0.2884,
|
| 9908 |
+
"step": 825000
|
| 9909 |
+
},
|
| 9910 |
+
{
|
| 9911 |
+
"epoch": 1.19,
|
| 9912 |
+
"learning_rate": 2.1335121298819867e-05,
|
| 9913 |
+
"loss": 0.2891,
|
| 9914 |
+
"step": 825500
|
| 9915 |
+
},
|
| 9916 |
+
{
|
| 9917 |
+
"epoch": 1.19,
|
| 9918 |
+
"learning_rate": 2.1272056463554978e-05,
|
| 9919 |
+
"loss": 0.2887,
|
| 9920 |
+
"step": 826000
|
| 9921 |
+
},
|
| 9922 |
+
{
|
| 9923 |
+
"epoch": 1.19,
|
| 9924 |
+
"learning_rate": 2.1209152188245214e-05,
|
| 9925 |
+
"loss": 0.2887,
|
| 9926 |
+
"step": 826500
|
| 9927 |
+
},
|
| 9928 |
+
{
|
| 9929 |
+
"epoch": 1.19,
|
| 9930 |
+
"learning_rate": 2.114640864486845e-05,
|
| 9931 |
+
"loss": 0.2892,
|
| 9932 |
+
"step": 827000
|
| 9933 |
+
},
|
| 9934 |
+
{
|
| 9935 |
+
"epoch": 1.19,
|
| 9936 |
+
"learning_rate": 2.1083826004963102e-05,
|
| 9937 |
+
"loss": 0.2888,
|
| 9938 |
+
"step": 827500
|
| 9939 |
+
},
|
| 9940 |
+
{
|
| 9941 |
+
"epoch": 1.2,
|
| 9942 |
+
"learning_rate": 2.1021404439627775e-05,
|
| 9943 |
+
"loss": 0.2889,
|
| 9944 |
+
"step": 828000
|
| 9945 |
+
},
|
| 9946 |
+
{
|
| 9947 |
+
"epoch": 1.2,
|
| 9948 |
+
"learning_rate": 2.09591441195206e-05,
|
| 9949 |
+
"loss": 0.2878,
|
| 9950 |
+
"step": 828500
|
| 9951 |
+
},
|
| 9952 |
+
{
|
| 9953 |
+
"epoch": 1.2,
|
| 9954 |
+
"learning_rate": 2.089704521485896e-05,
|
| 9955 |
+
"loss": 0.2888,
|
| 9956 |
+
"step": 829000
|
| 9957 |
+
},
|
| 9958 |
+
{
|
| 9959 |
+
"epoch": 1.2,
|
| 9960 |
+
"learning_rate": 2.083510789541883e-05,
|
| 9961 |
+
"loss": 0.2879,
|
| 9962 |
+
"step": 829500
|
| 9963 |
+
},
|
| 9964 |
+
{
|
| 9965 |
+
"epoch": 1.2,
|
| 9966 |
+
"learning_rate": 2.0773332330534513e-05,
|
| 9967 |
+
"loss": 0.2887,
|
| 9968 |
+
"step": 830000
|
| 9969 |
}
|
| 9970 |
],
|
| 9971 |
"max_steps": 1000000,
|
| 9972 |
"num_train_epochs": 2,
|
| 9973 |
+
"total_flos": 5.6113757283593115e+22,
|
| 9974 |
"trial_name": null,
|
| 9975 |
"trial_params": null
|
| 9976 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b63e12b39341c1c8d25b10dc8ca07cde5e82303726c5b23ac1fdd24345f08c2a
|
| 3 |
size 449450757
|