Training in progress, step 940000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a56a8f0ae8b5b9f2aec995742da47ff25dfe07ebb15ec7ee61db4a763d8289c8
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c00c6557048c11e66b95589de29730bca35a40598e37017f064297cb592c4f93
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e8be78115c1d7a8b7fd3ba012ea9e0890f7c8e7c74970d79909bd336b578ec4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a534ad0e2fa8f314cc3ae9bccc570e59499e6b5c546b12853d32ae75d416e0e6
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2825291049da8c0b63497c412e6d53ce0d529ebee7d30f4f47c2a1d271fc14d
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:087eacd702285507c39ac952397dc9f6dae700c001504f218d7b716e9f249005
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dab84d4b75593cd9349f424c4371ea8ac2493751bc544a294c8ef74a18b08e9
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 14.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6888,11 +6888,85 @@
|
|
| 6888 |
"eval_samples_per_second": 1036.936,
|
| 6889 |
"eval_steps_per_second": 16.591,
|
| 6890 |
"step": 930000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6891 |
}
|
| 6892 |
],
|
| 6893 |
"max_steps": 1000000,
|
| 6894 |
"num_train_epochs": 16,
|
| 6895 |
-
"total_flos": 6.
|
| 6896 |
"trial_name": null,
|
| 6897 |
"trial_params": null
|
| 6898 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 14.353993922457892,
|
| 5 |
+
"global_step": 940000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6888 |
"eval_samples_per_second": 1036.936,
|
| 6889 |
"eval_steps_per_second": 16.591,
|
| 6890 |
"step": 930000
|
| 6891 |
+
},
|
| 6892 |
+
{
|
| 6893 |
+
"epoch": 14.22,
|
| 6894 |
+
"learning_rate": 1.1814402460652382e-05,
|
| 6895 |
+
"loss": 0.2291,
|
| 6896 |
+
"step": 931000
|
| 6897 |
+
},
|
| 6898 |
+
{
|
| 6899 |
+
"epoch": 14.23,
|
| 6900 |
+
"learning_rate": 1.176241239543558e-05,
|
| 6901 |
+
"loss": 0.229,
|
| 6902 |
+
"step": 932000
|
| 6903 |
+
},
|
| 6904 |
+
{
|
| 6905 |
+
"epoch": 14.25,
|
| 6906 |
+
"learning_rate": 1.171116856554418e-05,
|
| 6907 |
+
"loss": 0.2291,
|
| 6908 |
+
"step": 933000
|
| 6909 |
+
},
|
| 6910 |
+
{
|
| 6911 |
+
"epoch": 14.26,
|
| 6912 |
+
"learning_rate": 1.1660671531372517e-05,
|
| 6913 |
+
"loss": 0.2301,
|
| 6914 |
+
"step": 934000
|
| 6915 |
+
},
|
| 6916 |
+
{
|
| 6917 |
+
"epoch": 14.28,
|
| 6918 |
+
"learning_rate": 1.1610921845148052e-05,
|
| 6919 |
+
"loss": 0.2295,
|
| 6920 |
+
"step": 935000
|
| 6921 |
+
},
|
| 6922 |
+
{
|
| 6923 |
+
"epoch": 14.28,
|
| 6924 |
+
"eval_runtime": 0.8534,
|
| 6925 |
+
"eval_samples_per_second": 1171.832,
|
| 6926 |
+
"eval_steps_per_second": 18.749,
|
| 6927 |
+
"step": 935000
|
| 6928 |
+
},
|
| 6929 |
+
{
|
| 6930 |
+
"epoch": 14.29,
|
| 6931 |
+
"learning_rate": 1.156192005092539e-05,
|
| 6932 |
+
"loss": 0.2301,
|
| 6933 |
+
"step": 936000
|
| 6934 |
+
},
|
| 6935 |
+
{
|
| 6936 |
+
"epoch": 14.31,
|
| 6937 |
+
"learning_rate": 1.1513666684580308e-05,
|
| 6938 |
+
"loss": 0.2291,
|
| 6939 |
+
"step": 937000
|
| 6940 |
+
},
|
| 6941 |
+
{
|
| 6942 |
+
"epoch": 14.32,
|
| 6943 |
+
"learning_rate": 1.1466162273803876e-05,
|
| 6944 |
+
"loss": 0.2292,
|
| 6945 |
+
"step": 938000
|
| 6946 |
+
},
|
| 6947 |
+
{
|
| 6948 |
+
"epoch": 14.34,
|
| 6949 |
+
"learning_rate": 1.1419407338096732e-05,
|
| 6950 |
+
"loss": 0.2287,
|
| 6951 |
+
"step": 939000
|
| 6952 |
+
},
|
| 6953 |
+
{
|
| 6954 |
+
"epoch": 14.35,
|
| 6955 |
+
"learning_rate": 1.1373402388763346e-05,
|
| 6956 |
+
"loss": 0.2286,
|
| 6957 |
+
"step": 940000
|
| 6958 |
+
},
|
| 6959 |
+
{
|
| 6960 |
+
"epoch": 14.35,
|
| 6961 |
+
"eval_runtime": 0.7875,
|
| 6962 |
+
"eval_samples_per_second": 1269.803,
|
| 6963 |
+
"eval_steps_per_second": 20.317,
|
| 6964 |
+
"step": 940000
|
| 6965 |
}
|
| 6966 |
],
|
| 6967 |
"max_steps": 1000000,
|
| 6968 |
"num_train_epochs": 16,
|
| 6969 |
+
"total_flos": 6.589415337898549e+22,
|
| 6970 |
"trial_name": null,
|
| 6971 |
"trial_params": null
|
| 6972 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c00c6557048c11e66b95589de29730bca35a40598e37017f064297cb592c4f93
|
| 3 |
size 449471589
|