Training in progress, step 500000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfd6bf6424245454271decd3b53eca84bb0231c75269a1abfef86fc10e306e13
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05568a72287e960fd9739070140c44a287f370c2fcded0e2c9e42000a5ecda50
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ba591c6f74b15a56dee3a71187bae3451fe611d14ab4c6b5094ba97f5b2e1bd
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b3b1d8b9ec088f9488e7777a87dcba6709bc61a5820efc403674b616e4790ed
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38db8a281134056cc67716d0423f4a90c16aea7e8a53b20d38460d854f2fb474
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b3991a2efc9d121e256fe106ba687ac6c896be2424603b102f4091356c237e3
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfe479370cc5e540b724877394cc13eb749f3953b33b916c9a22b3165f5dbb7e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6080f58c6154f51a2b844dd6ced9f6d46e00dd93181694ec96024dbb4b15f736
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4291e6d684d83f32e6a530eca103a0c81e9858aece41197b882687fab12530d
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8638e3fa087e0a193aeb627c3597747b6e37bf1ecc2043b8e409a9b8a85efb9
|
| 3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53a074c65f43dfe683b5f0e988de79e3365c939ebb4b13c8f9ce84b59bdb64a7
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 15.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9806,11 +9806,211 @@
|
|
| 9806 |
"eval_samples_per_second": 2008.198,
|
| 9807 |
"eval_steps_per_second": 32.131,
|
| 9808 |
"step": 490000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9809 |
}
|
| 9810 |
],
|
| 9811 |
"max_steps": 500000,
|
| 9812 |
"num_train_epochs": 16,
|
| 9813 |
-
"total_flos": 1.
|
| 9814 |
"trial_name": null,
|
| 9815 |
"trial_params": null
|
| 9816 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 15.319096786053494,
|
| 5 |
+
"global_step": 500000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9806 |
"eval_samples_per_second": 2008.198,
|
| 9807 |
"eval_steps_per_second": 32.131,
|
| 9808 |
"step": 490000
|
| 9809 |
+
},
|
| 9810 |
+
{
|
| 9811 |
+
"epoch": 15.03,
|
| 9812 |
+
"learning_rate": 1.0286124377900624e-05,
|
| 9813 |
+
"loss": 0.3123,
|
| 9814 |
+
"step": 490500
|
| 9815 |
+
},
|
| 9816 |
+
{
|
| 9817 |
+
"epoch": 15.04,
|
| 9818 |
+
"learning_rate": 1.0256807270282153e-05,
|
| 9819 |
+
"loss": 0.3122,
|
| 9820 |
+
"step": 491000
|
| 9821 |
+
},
|
| 9822 |
+
{
|
| 9823 |
+
"epoch": 15.04,
|
| 9824 |
+
"eval_loss": 0.7738854885101318,
|
| 9825 |
+
"eval_runtime": 0.4992,
|
| 9826 |
+
"eval_samples_per_second": 2003.188,
|
| 9827 |
+
"eval_steps_per_second": 32.051,
|
| 9828 |
+
"step": 491000
|
| 9829 |
+
},
|
| 9830 |
+
{
|
| 9831 |
+
"epoch": 15.06,
|
| 9832 |
+
"learning_rate": 1.0229073051029455e-05,
|
| 9833 |
+
"loss": 0.3124,
|
| 9834 |
+
"step": 491500
|
| 9835 |
+
},
|
| 9836 |
+
{
|
| 9837 |
+
"epoch": 15.07,
|
| 9838 |
+
"learning_rate": 1.020292202343952e-05,
|
| 9839 |
+
"loss": 0.3125,
|
| 9840 |
+
"step": 492000
|
| 9841 |
+
},
|
| 9842 |
+
{
|
| 9843 |
+
"epoch": 15.07,
|
| 9844 |
+
"eval_loss": 0.7788605690002441,
|
| 9845 |
+
"eval_runtime": 0.5071,
|
| 9846 |
+
"eval_samples_per_second": 1971.996,
|
| 9847 |
+
"eval_steps_per_second": 31.552,
|
| 9848 |
+
"step": 492000
|
| 9849 |
+
},
|
| 9850 |
+
{
|
| 9851 |
+
"epoch": 15.09,
|
| 9852 |
+
"learning_rate": 1.0178354473495813e-05,
|
| 9853 |
+
"loss": 0.3126,
|
| 9854 |
+
"step": 492500
|
| 9855 |
+
},
|
| 9856 |
+
{
|
| 9857 |
+
"epoch": 15.1,
|
| 9858 |
+
"learning_rate": 1.0155370669865077e-05,
|
| 9859 |
+
"loss": 0.3121,
|
| 9860 |
+
"step": 493000
|
| 9861 |
+
},
|
| 9862 |
+
{
|
| 9863 |
+
"epoch": 15.1,
|
| 9864 |
+
"eval_loss": 0.7763463854789734,
|
| 9865 |
+
"eval_runtime": 0.4971,
|
| 9866 |
+
"eval_samples_per_second": 2011.654,
|
| 9867 |
+
"eval_steps_per_second": 32.186,
|
| 9868 |
+
"step": 493000
|
| 9869 |
+
},
|
| 9870 |
+
{
|
| 9871 |
+
"epoch": 15.12,
|
| 9872 |
+
"learning_rate": 1.0133970863894557e-05,
|
| 9873 |
+
"loss": 0.3121,
|
| 9874 |
+
"step": 493500
|
| 9875 |
+
},
|
| 9876 |
+
{
|
| 9877 |
+
"epoch": 15.14,
|
| 9878 |
+
"learning_rate": 1.0114155289609061e-05,
|
| 9879 |
+
"loss": 0.3121,
|
| 9880 |
+
"step": 494000
|
| 9881 |
+
},
|
| 9882 |
+
{
|
| 9883 |
+
"epoch": 15.14,
|
| 9884 |
+
"eval_loss": 0.7747126221656799,
|
| 9885 |
+
"eval_runtime": 0.5031,
|
| 9886 |
+
"eval_samples_per_second": 1987.677,
|
| 9887 |
+
"eval_steps_per_second": 31.803,
|
| 9888 |
+
"step": 494000
|
| 9889 |
+
},
|
| 9890 |
+
{
|
| 9891 |
+
"epoch": 15.15,
|
| 9892 |
+
"learning_rate": 1.0095924163708572e-05,
|
| 9893 |
+
"loss": 0.3124,
|
| 9894 |
+
"step": 494500
|
| 9895 |
+
},
|
| 9896 |
+
{
|
| 9897 |
+
"epoch": 15.17,
|
| 9898 |
+
"learning_rate": 1.0079277685565724e-05,
|
| 9899 |
+
"loss": 0.3124,
|
| 9900 |
+
"step": 495000
|
| 9901 |
+
},
|
| 9902 |
+
{
|
| 9903 |
+
"epoch": 15.17,
|
| 9904 |
+
"eval_loss": 0.7728501558303833,
|
| 9905 |
+
"eval_runtime": 0.5097,
|
| 9906 |
+
"eval_samples_per_second": 1962.024,
|
| 9907 |
+
"eval_steps_per_second": 31.392,
|
| 9908 |
+
"step": 495000
|
| 9909 |
+
},
|
| 9910 |
+
{
|
| 9911 |
+
"epoch": 15.18,
|
| 9912 |
+
"learning_rate": 1.0064216037223772e-05,
|
| 9913 |
+
"loss": 0.3126,
|
| 9914 |
+
"step": 495500
|
| 9915 |
+
},
|
| 9916 |
+
{
|
| 9917 |
+
"epoch": 15.2,
|
| 9918 |
+
"learning_rate": 1.0050739383394454e-05,
|
| 9919 |
+
"loss": 0.3124,
|
| 9920 |
+
"step": 496000
|
| 9921 |
+
},
|
| 9922 |
+
{
|
| 9923 |
+
"epoch": 15.2,
|
| 9924 |
+
"eval_loss": 0.7736043930053711,
|
| 9925 |
+
"eval_runtime": 0.504,
|
| 9926 |
+
"eval_samples_per_second": 1984.023,
|
| 9927 |
+
"eval_steps_per_second": 31.744,
|
| 9928 |
+
"step": 496000
|
| 9929 |
+
},
|
| 9930 |
+
{
|
| 9931 |
+
"epoch": 15.21,
|
| 9932 |
+
"learning_rate": 1.003884787145633e-05,
|
| 9933 |
+
"loss": 0.3123,
|
| 9934 |
+
"step": 496500
|
| 9935 |
+
},
|
| 9936 |
+
{
|
| 9937 |
+
"epoch": 15.23,
|
| 9938 |
+
"learning_rate": 1.002854163145305e-05,
|
| 9939 |
+
"loss": 0.3125,
|
| 9940 |
+
"step": 497000
|
| 9941 |
+
},
|
| 9942 |
+
{
|
| 9943 |
+
"epoch": 15.23,
|
| 9944 |
+
"eval_loss": 0.7738491892814636,
|
| 9945 |
+
"eval_runtime": 0.5171,
|
| 9946 |
+
"eval_samples_per_second": 1933.887,
|
| 9947 |
+
"eval_steps_per_second": 30.942,
|
| 9948 |
+
"step": 497000
|
| 9949 |
+
},
|
| 9950 |
+
{
|
| 9951 |
+
"epoch": 15.24,
|
| 9952 |
+
"learning_rate": 1.0019820776091995e-05,
|
| 9953 |
+
"loss": 0.3123,
|
| 9954 |
+
"step": 497500
|
| 9955 |
+
},
|
| 9956 |
+
{
|
| 9957 |
+
"epoch": 15.26,
|
| 9958 |
+
"learning_rate": 1.0012685400743077e-05,
|
| 9959 |
+
"loss": 0.3123,
|
| 9960 |
+
"step": 498000
|
| 9961 |
+
},
|
| 9962 |
+
{
|
| 9963 |
+
"epoch": 15.26,
|
| 9964 |
+
"eval_loss": 0.7730560898780823,
|
| 9965 |
+
"eval_runtime": 0.5124,
|
| 9966 |
+
"eval_samples_per_second": 1951.657,
|
| 9967 |
+
"eval_steps_per_second": 31.227,
|
| 9968 |
+
"step": 498000
|
| 9969 |
+
},
|
| 9970 |
+
{
|
| 9971 |
+
"epoch": 15.27,
|
| 9972 |
+
"learning_rate": 1.0007135583437572e-05,
|
| 9973 |
+
"loss": 0.3126,
|
| 9974 |
+
"step": 498500
|
| 9975 |
+
},
|
| 9976 |
+
{
|
| 9977 |
+
"epoch": 15.29,
|
| 9978 |
+
"learning_rate": 1.0003171384867436e-05,
|
| 9979 |
+
"loss": 0.3123,
|
| 9980 |
+
"step": 499000
|
| 9981 |
+
},
|
| 9982 |
+
{
|
| 9983 |
+
"epoch": 15.29,
|
| 9984 |
+
"eval_loss": 0.7752643823623657,
|
| 9985 |
+
"eval_runtime": 0.5071,
|
| 9986 |
+
"eval_samples_per_second": 1972.007,
|
| 9987 |
+
"eval_steps_per_second": 31.552,
|
| 9988 |
+
"step": 499000
|
| 9989 |
+
},
|
| 9990 |
+
{
|
| 9991 |
+
"epoch": 15.3,
|
| 9992 |
+
"learning_rate": 1.0000792848384467e-05,
|
| 9993 |
+
"loss": 0.3121,
|
| 9994 |
+
"step": 499500
|
| 9995 |
+
},
|
| 9996 |
+
{
|
| 9997 |
+
"epoch": 15.32,
|
| 9998 |
+
"learning_rate": 1e-05,
|
| 9999 |
+
"loss": 0.3122,
|
| 10000 |
+
"step": 500000
|
| 10001 |
+
},
|
| 10002 |
+
{
|
| 10003 |
+
"epoch": 15.32,
|
| 10004 |
+
"eval_loss": 0.7746031284332275,
|
| 10005 |
+
"eval_runtime": 0.5134,
|
| 10006 |
+
"eval_samples_per_second": 1947.857,
|
| 10007 |
+
"eval_steps_per_second": 31.166,
|
| 10008 |
+
"step": 500000
|
| 10009 |
}
|
| 10010 |
],
|
| 10011 |
"max_steps": 500000,
|
| 10012 |
"num_train_epochs": 16,
|
| 10013 |
+
"total_flos": 1.5974237119445362e+22,
|
| 10014 |
"trial_name": null,
|
| 10015 |
"trial_params": null
|
| 10016 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05568a72287e960fd9739070140c44a287f370c2fcded0e2c9e42000a5ecda50
|
| 3 |
size 102501541
|