Training in progress, step 690000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9c0e525e514fe1c5cef0557ef6488da0c1fdebf6272aba47004231ca6976c18
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e75512fbedd8d7adaa6c4c34c4ac863b89edc805a7194b74d72eea23cbb4e11b
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57580e912c8eb48eaacc83064464221889da2f0844a835381b4fa326341cb678
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62f6f091581690af80f436a9b86b0d75c2dad2d63b594e5185c0e63d2bb68d1e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a7b8b83cde1bd3869fe090f1cb9c794e65d5033208eec9d00a90a14b364e27f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1804d7c7415f17b94f7dec6a3f79e792faa4e7579f021ed604639d6ae0a4f8f2
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5379aefa05a140fc154bf332c86fa067c2c130445bc4590eb14522743a5b9947
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:601105f28ea2021a75065a3124db1406fa8cee4a2f9774cf405bde49613afb37
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:578996460506c4bb1c40a07e7da011e3e8db58802532f0ccbbdc09eaa32c4f08
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a80cf61947b33b1a7acf0fb558355efcd3fbb3669a40d006ee3d143181ad13b1
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccd22c7052fac9ec9f037ee44414440d92721875c54fac648c7b4ca864fbffde
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -8166,11 +8166,131 @@
|
|
| 8166 |
"learning_rate": 4.567035473186444e-05,
|
| 8167 |
"loss": 0.2978,
|
| 8168 |
"step": 680000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8169 |
}
|
| 8170 |
],
|
| 8171 |
"max_steps": 1000000,
|
| 8172 |
"num_train_epochs": 2,
|
| 8173 |
-
"total_flos": 4.
|
| 8174 |
"trial_name": null,
|
| 8175 |
"trial_params": null
|
| 8176 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.3793434325261176,
|
| 5 |
+
"global_step": 690000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 8166 |
"learning_rate": 4.567035473186444e-05,
|
| 8167 |
"loss": 0.2978,
|
| 8168 |
"step": 680000
|
| 8169 |
+
},
|
| 8170 |
+
{
|
| 8171 |
+
"epoch": 1.36,
|
| 8172 |
+
"learning_rate": 4.556953360325625e-05,
|
| 8173 |
+
"loss": 0.2978,
|
| 8174 |
+
"step": 680500
|
| 8175 |
+
},
|
| 8176 |
+
{
|
| 8177 |
+
"epoch": 1.36,
|
| 8178 |
+
"learning_rate": 4.546880660622845e-05,
|
| 8179 |
+
"loss": 0.2975,
|
| 8180 |
+
"step": 681000
|
| 8181 |
+
},
|
| 8182 |
+
{
|
| 8183 |
+
"epoch": 1.36,
|
| 8184 |
+
"learning_rate": 4.5368174016164844e-05,
|
| 8185 |
+
"loss": 0.2976,
|
| 8186 |
+
"step": 681500
|
| 8187 |
+
},
|
| 8188 |
+
{
|
| 8189 |
+
"epoch": 1.36,
|
| 8190 |
+
"learning_rate": 4.5267636108191036e-05,
|
| 8191 |
+
"loss": 0.2983,
|
| 8192 |
+
"step": 682000
|
| 8193 |
+
},
|
| 8194 |
+
{
|
| 8195 |
+
"epoch": 1.36,
|
| 8196 |
+
"learning_rate": 4.5167193157173913e-05,
|
| 8197 |
+
"loss": 0.2978,
|
| 8198 |
+
"step": 682500
|
| 8199 |
+
},
|
| 8200 |
+
{
|
| 8201 |
+
"epoch": 1.37,
|
| 8202 |
+
"learning_rate": 4.5066845437720555e-05,
|
| 8203 |
+
"loss": 0.2977,
|
| 8204 |
+
"step": 683000
|
| 8205 |
+
},
|
| 8206 |
+
{
|
| 8207 |
+
"epoch": 1.37,
|
| 8208 |
+
"learning_rate": 4.4966593224177866e-05,
|
| 8209 |
+
"loss": 0.2976,
|
| 8210 |
+
"step": 683500
|
| 8211 |
+
},
|
| 8212 |
+
{
|
| 8213 |
+
"epoch": 1.37,
|
| 8214 |
+
"learning_rate": 4.4866436790631564e-05,
|
| 8215 |
+
"loss": 0.2971,
|
| 8216 |
+
"step": 684000
|
| 8217 |
+
},
|
| 8218 |
+
{
|
| 8219 |
+
"epoch": 1.37,
|
| 8220 |
+
"learning_rate": 4.476637641090551e-05,
|
| 8221 |
+
"loss": 0.2975,
|
| 8222 |
+
"step": 684500
|
| 8223 |
+
},
|
| 8224 |
+
{
|
| 8225 |
+
"epoch": 1.37,
|
| 8226 |
+
"learning_rate": 4.4666412358560955e-05,
|
| 8227 |
+
"loss": 0.2978,
|
| 8228 |
+
"step": 685000
|
| 8229 |
+
},
|
| 8230 |
+
{
|
| 8231 |
+
"epoch": 1.37,
|
| 8232 |
+
"learning_rate": 4.456654490689578e-05,
|
| 8233 |
+
"loss": 0.2967,
|
| 8234 |
+
"step": 685500
|
| 8235 |
+
},
|
| 8236 |
+
{
|
| 8237 |
+
"epoch": 1.37,
|
| 8238 |
+
"learning_rate": 4.4466774328943796e-05,
|
| 8239 |
+
"loss": 0.2979,
|
| 8240 |
+
"step": 686000
|
| 8241 |
+
},
|
| 8242 |
+
{
|
| 8243 |
+
"epoch": 1.37,
|
| 8244 |
+
"learning_rate": 4.4367100897474e-05,
|
| 8245 |
+
"loss": 0.2975,
|
| 8246 |
+
"step": 686500
|
| 8247 |
+
},
|
| 8248 |
+
{
|
| 8249 |
+
"epoch": 1.37,
|
| 8250 |
+
"learning_rate": 4.426752488498972e-05,
|
| 8251 |
+
"loss": 0.2972,
|
| 8252 |
+
"step": 687000
|
| 8253 |
+
},
|
| 8254 |
+
{
|
| 8255 |
+
"epoch": 1.37,
|
| 8256 |
+
"learning_rate": 4.4168046563727945e-05,
|
| 8257 |
+
"loss": 0.2972,
|
| 8258 |
+
"step": 687500
|
| 8259 |
+
},
|
| 8260 |
+
{
|
| 8261 |
+
"epoch": 1.38,
|
| 8262 |
+
"learning_rate": 4.406866620565862e-05,
|
| 8263 |
+
"loss": 0.2968,
|
| 8264 |
+
"step": 688000
|
| 8265 |
+
},
|
| 8266 |
+
{
|
| 8267 |
+
"epoch": 1.38,
|
| 8268 |
+
"learning_rate": 4.396938408248383e-05,
|
| 8269 |
+
"loss": 0.2973,
|
| 8270 |
+
"step": 688500
|
| 8271 |
+
},
|
| 8272 |
+
{
|
| 8273 |
+
"epoch": 1.38,
|
| 8274 |
+
"learning_rate": 4.3870200465637164e-05,
|
| 8275 |
+
"loss": 0.2972,
|
| 8276 |
+
"step": 689000
|
| 8277 |
+
},
|
| 8278 |
+
{
|
| 8279 |
+
"epoch": 1.38,
|
| 8280 |
+
"learning_rate": 4.377111562628282e-05,
|
| 8281 |
+
"loss": 0.2965,
|
| 8282 |
+
"step": 689500
|
| 8283 |
+
},
|
| 8284 |
+
{
|
| 8285 |
+
"epoch": 1.38,
|
| 8286 |
+
"learning_rate": 4.3672129835314955e-05,
|
| 8287 |
+
"loss": 0.2971,
|
| 8288 |
+
"step": 690000
|
| 8289 |
}
|
| 8290 |
],
|
| 8291 |
"max_steps": 1000000,
|
| 8292 |
"num_train_epochs": 2,
|
| 8293 |
+
"total_flos": 4.664892321779864e+22,
|
| 8294 |
"trial_name": null,
|
| 8295 |
"trial_params": null
|
| 8296 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e75512fbedd8d7adaa6c4c34c4ac863b89edc805a7194b74d72eea23cbb4e11b
|
| 3 |
size 449450757
|