Training in progress, step 530000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00b387d4f9f04e8c9303a92c0ddb4fecebb2d329d1bad50fe16824cce0afa53f
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1625f5e23d8f1ad41b87b90859c51a9a7e8e0c2f203d02de268a294a2c0644e2
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f39691b2c245489425bdb803bff6f96b52586788af71785b57e3d212e063166a
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af7326fef653b7a70f9f155f4acb1e3ee4232444cd6d9204bc168a2d2e45c727
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee1218ce9b7108b8255e87b249fdf7f16f07137f90939e9084af13b071ce6ffe
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9c5bbfb1027111f06815f4497b89bb272c0861b51bd6d39c91af85222b32d16
|
| 3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:286366dc95c1888d75c773db3569adfce838dc105f2552542e8252bf7ab19ebf
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -3854,11 +3854,85 @@
|
|
| 3854 |
"eval_samples_per_second": 952.036,
|
| 3855 |
"eval_steps_per_second": 15.233,
|
| 3856 |
"step": 520000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3857 |
}
|
| 3858 |
],
|
| 3859 |
"max_steps": 1000000,
|
| 3860 |
"num_train_epochs": 16,
|
| 3861 |
-
"total_flos": 3.
|
| 3862 |
"trial_name": null,
|
| 3863 |
"trial_params": null
|
| 3864 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.093209339258173,
|
| 5 |
+
"global_step": 530000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 3854 |
"eval_samples_per_second": 952.036,
|
| 3855 |
"eval_steps_per_second": 15.233,
|
| 3856 |
"step": 520000
|
| 3857 |
+
},
|
| 3858 |
+
{
|
| 3859 |
+
"epoch": 7.96,
|
| 3860 |
+
"learning_rate": 8.092591609553747e-05,
|
| 3861 |
+
"loss": 0.2623,
|
| 3862 |
+
"step": 521000
|
| 3863 |
+
},
|
| 3864 |
+
{
|
| 3865 |
+
"epoch": 7.97,
|
| 3866 |
+
"learning_rate": 8.069444593175975e-05,
|
| 3867 |
+
"loss": 0.2622,
|
| 3868 |
+
"step": 522000
|
| 3869 |
+
},
|
| 3870 |
+
{
|
| 3871 |
+
"epoch": 7.99,
|
| 3872 |
+
"learning_rate": 8.046296817363259e-05,
|
| 3873 |
+
"loss": 0.262,
|
| 3874 |
+
"step": 523000
|
| 3875 |
+
},
|
| 3876 |
+
{
|
| 3877 |
+
"epoch": 8.0,
|
| 3878 |
+
"learning_rate": 8.023148535255965e-05,
|
| 3879 |
+
"loss": 0.2619,
|
| 3880 |
+
"step": 524000
|
| 3881 |
+
},
|
| 3882 |
+
{
|
| 3883 |
+
"epoch": 8.02,
|
| 3884 |
+
"learning_rate": 7.999999999999999e-05,
|
| 3885 |
+
"loss": 0.262,
|
| 3886 |
+
"step": 525000
|
| 3887 |
+
},
|
| 3888 |
+
{
|
| 3889 |
+
"epoch": 8.02,
|
| 3890 |
+
"eval_runtime": 1.1375,
|
| 3891 |
+
"eval_samples_per_second": 879.131,
|
| 3892 |
+
"eval_steps_per_second": 14.066,
|
| 3893 |
+
"step": 525000
|
| 3894 |
+
},
|
| 3895 |
+
{
|
| 3896 |
+
"epoch": 8.03,
|
| 3897 |
+
"learning_rate": 7.976851464744033e-05,
|
| 3898 |
+
"loss": 0.2616,
|
| 3899 |
+
"step": 526000
|
| 3900 |
+
},
|
| 3901 |
+
{
|
| 3902 |
+
"epoch": 8.05,
|
| 3903 |
+
"learning_rate": 7.953703182636741e-05,
|
| 3904 |
+
"loss": 0.2616,
|
| 3905 |
+
"step": 527000
|
| 3906 |
+
},
|
| 3907 |
+
{
|
| 3908 |
+
"epoch": 8.06,
|
| 3909 |
+
"learning_rate": 7.930555406824026e-05,
|
| 3910 |
+
"loss": 0.2617,
|
| 3911 |
+
"step": 528000
|
| 3912 |
+
},
|
| 3913 |
+
{
|
| 3914 |
+
"epoch": 8.08,
|
| 3915 |
+
"learning_rate": 7.907408390446254e-05,
|
| 3916 |
+
"loss": 0.2614,
|
| 3917 |
+
"step": 529000
|
| 3918 |
+
},
|
| 3919 |
+
{
|
| 3920 |
+
"epoch": 8.09,
|
| 3921 |
+
"learning_rate": 7.884262386635489e-05,
|
| 3922 |
+
"loss": 0.2607,
|
| 3923 |
+
"step": 530000
|
| 3924 |
+
},
|
| 3925 |
+
{
|
| 3926 |
+
"epoch": 8.09,
|
| 3927 |
+
"eval_runtime": 1.0134,
|
| 3928 |
+
"eval_samples_per_second": 986.75,
|
| 3929 |
+
"eval_steps_per_second": 15.788,
|
| 3930 |
+
"step": 530000
|
| 3931 |
}
|
| 3932 |
],
|
| 3933 |
"max_steps": 1000000,
|
| 3934 |
"num_train_epochs": 16,
|
| 3935 |
+
"total_flos": 3.7153085780632086e+22,
|
| 3936 |
"trial_name": null,
|
| 3937 |
"trial_params": null
|
| 3938 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1625f5e23d8f1ad41b87b90859c51a9a7e8e0c2f203d02de268a294a2c0644e2
|
| 3 |
size 449471589
|