Training in progress, step 660000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:facea498a3305bb3a4cf04ba493e9b7a282bf34c7699144b12c3401905a21a21
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b0253822de0f7fe54e007e60affd0913e4b68439fe1550215e7a076507078bb
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46211629a4cc11950e226f7dbdda304cda1420c43879285d3e04ebb8508dc043
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05c11aa747eb56c76be6b0a8185d8eb345ab22e3a85121576df152b2d5604743
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87f323fe99852eb9065ab82d8c0f478aeb01397ae4967c2beb7e4de9ca3f02ec
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b8df123d1834d25753f8b002915374feeeb89dc9c6992036e0b849ae27e9320
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90443240959e9f40acd615af8df90f5d9b01b4b49736141665a99e8168dc3c6a
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bd0fd3633719fca8fe6bbb82b9e0f5384f9458c9e7cc0938ff32b60be639fdb
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8ca6715dedc0846ae860b91a71263b7a826d6a54596ee7919105a93c87a6496
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:555ec27b704b2cb1160b682f3e0caec8c125b854ba2add3f3b500f4ed76a0e8e
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e8e59bcc271d9e83e953974cfcbd52d5f5cf63d456de8e0f805b45487976195
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7806,11 +7806,131 @@
|
|
| 7806 |
"learning_rate": 5.188132027429215e-05,
|
| 7807 |
"loss": 0.2991,
|
| 7808 |
"step": 650000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7809 |
}
|
| 7810 |
],
|
| 7811 |
"max_steps": 1000000,
|
| 7812 |
"num_train_epochs": 2,
|
| 7813 |
-
"total_flos": 4.
|
| 7814 |
"trial_name": null,
|
| 7815 |
"trial_params": null
|
| 7816 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.3193719789380256,
|
| 5 |
+
"global_step": 660000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7806 |
"learning_rate": 5.188132027429215e-05,
|
| 7807 |
"loss": 0.2991,
|
| 7808 |
"step": 650000
|
| 7809 |
+
},
|
| 7810 |
+
{
|
| 7811 |
+
"epoch": 1.3,
|
| 7812 |
+
"learning_rate": 5.177536451140569e-05,
|
| 7813 |
+
"loss": 0.3005,
|
| 7814 |
+
"step": 650500
|
| 7815 |
+
},
|
| 7816 |
+
{
|
| 7817 |
+
"epoch": 1.3,
|
| 7818 |
+
"learning_rate": 5.166948591359489e-05,
|
| 7819 |
+
"loss": 0.3002,
|
| 7820 |
+
"step": 651000
|
| 7821 |
+
},
|
| 7822 |
+
{
|
| 7823 |
+
"epoch": 1.3,
|
| 7824 |
+
"learning_rate": 5.1563684770327804e-05,
|
| 7825 |
+
"loss": 0.3003,
|
| 7826 |
+
"step": 651500
|
| 7827 |
+
},
|
| 7828 |
+
{
|
| 7829 |
+
"epoch": 1.3,
|
| 7830 |
+
"learning_rate": 5.145796137086076e-05,
|
| 7831 |
+
"loss": 0.3,
|
| 7832 |
+
"step": 652000
|
| 7833 |
+
},
|
| 7834 |
+
{
|
| 7835 |
+
"epoch": 1.3,
|
| 7836 |
+
"learning_rate": 5.135231600423742e-05,
|
| 7837 |
+
"loss": 0.2997,
|
| 7838 |
+
"step": 652500
|
| 7839 |
+
},
|
| 7840 |
+
{
|
| 7841 |
+
"epoch": 1.31,
|
| 7842 |
+
"learning_rate": 5.124674895928823e-05,
|
| 7843 |
+
"loss": 0.2998,
|
| 7844 |
+
"step": 653000
|
| 7845 |
+
},
|
| 7846 |
+
{
|
| 7847 |
+
"epoch": 1.31,
|
| 7848 |
+
"learning_rate": 5.114126052462943e-05,
|
| 7849 |
+
"loss": 0.2998,
|
| 7850 |
+
"step": 653500
|
| 7851 |
+
},
|
| 7852 |
+
{
|
| 7853 |
+
"epoch": 1.31,
|
| 7854 |
+
"learning_rate": 5.103585098866237e-05,
|
| 7855 |
+
"loss": 0.2995,
|
| 7856 |
+
"step": 654000
|
| 7857 |
+
},
|
| 7858 |
+
{
|
| 7859 |
+
"epoch": 1.31,
|
| 7860 |
+
"learning_rate": 5.093052063957276e-05,
|
| 7861 |
+
"loss": 0.2996,
|
| 7862 |
+
"step": 654500
|
| 7863 |
+
},
|
| 7864 |
+
{
|
| 7865 |
+
"epoch": 1.31,
|
| 7866 |
+
"learning_rate": 5.082526976532968e-05,
|
| 7867 |
+
"loss": 0.2994,
|
| 7868 |
+
"step": 655000
|
| 7869 |
+
},
|
| 7870 |
+
{
|
| 7871 |
+
"epoch": 1.31,
|
| 7872 |
+
"learning_rate": 5.072009865368501e-05,
|
| 7873 |
+
"loss": 0.2996,
|
| 7874 |
+
"step": 655500
|
| 7875 |
+
},
|
| 7876 |
+
{
|
| 7877 |
+
"epoch": 1.31,
|
| 7878 |
+
"learning_rate": 5.061500759217261e-05,
|
| 7879 |
+
"loss": 0.2999,
|
| 7880 |
+
"step": 656000
|
| 7881 |
+
},
|
| 7882 |
+
{
|
| 7883 |
+
"epoch": 1.31,
|
| 7884 |
+
"learning_rate": 5.050999686810735e-05,
|
| 7885 |
+
"loss": 0.2997,
|
| 7886 |
+
"step": 656500
|
| 7887 |
+
},
|
| 7888 |
+
{
|
| 7889 |
+
"epoch": 1.31,
|
| 7890 |
+
"learning_rate": 5.04050667685846e-05,
|
| 7891 |
+
"loss": 0.2995,
|
| 7892 |
+
"step": 657000
|
| 7893 |
+
},
|
| 7894 |
+
{
|
| 7895 |
+
"epoch": 1.31,
|
| 7896 |
+
"learning_rate": 5.0300217580479244e-05,
|
| 7897 |
+
"loss": 0.2997,
|
| 7898 |
+
"step": 657500
|
| 7899 |
+
},
|
| 7900 |
+
{
|
| 7901 |
+
"epoch": 1.32,
|
| 7902 |
+
"learning_rate": 5.01954495904449e-05,
|
| 7903 |
+
"loss": 0.2993,
|
| 7904 |
+
"step": 658000
|
| 7905 |
+
},
|
| 7906 |
+
{
|
| 7907 |
+
"epoch": 1.32,
|
| 7908 |
+
"learning_rate": 5.0090763084913336e-05,
|
| 7909 |
+
"loss": 0.2991,
|
| 7910 |
+
"step": 658500
|
| 7911 |
+
},
|
| 7912 |
+
{
|
| 7913 |
+
"epoch": 1.32,
|
| 7914 |
+
"learning_rate": 4.998615835009339e-05,
|
| 7915 |
+
"loss": 0.2995,
|
| 7916 |
+
"step": 659000
|
| 7917 |
+
},
|
| 7918 |
+
{
|
| 7919 |
+
"epoch": 1.32,
|
| 7920 |
+
"learning_rate": 4.988163567197043e-05,
|
| 7921 |
+
"loss": 0.2993,
|
| 7922 |
+
"step": 659500
|
| 7923 |
+
},
|
| 7924 |
+
{
|
| 7925 |
+
"epoch": 1.32,
|
| 7926 |
+
"learning_rate": 4.97771953363055e-05,
|
| 7927 |
+
"loss": 0.299,
|
| 7928 |
+
"step": 660000
|
| 7929 |
}
|
| 7930 |
],
|
| 7931 |
"max_steps": 1000000,
|
| 7932 |
"num_train_epochs": 2,
|
| 7933 |
+
"total_flos": 4.462065045446751e+22,
|
| 7934 |
"trial_name": null,
|
| 7935 |
"trial_params": null
|
| 7936 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b0253822de0f7fe54e007e60affd0913e4b68439fe1550215e7a076507078bb
|
| 3 |
size 449450757
|