Training in progress, step 960000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82bf941331b6147f0c38426d0407fa25ba10c0bd3b73ef74a1673cd375f5dea3
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e039cd872f61b73cdde9f431db14aa3e4f6ae315b0cdef8e97e75cdb6be6fa4
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73634de375042b3baa7b5c117beb24655dd2f7f5f57009b1eef654c82b3b44b5
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fb4c489f0f7eeedc1b3b1654e89c9a4aafbf4af00e935321e2351196b10ff6c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:060c7b3ac0997105e228b3a17b751784076ba7d3219bd9bc28aad1940ff45553
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:612d68332bbb7f66da9a1c4eee686f9c7adc9fb542398fcfd0c492b56e914c02
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70baf209a4631065e5f1d839e29da7241e0065c3f0cd0e2e1c6f4c4e169d312e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acc2e296643bbf328fc4ab16e724c938d6325131f9cb567ebc676b5d1a649c3a
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bc74f55e3f6e86286f729f34589914d40a1b187feaf939dd73f214761d85e9c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a192d4f815ba365d126dfc7fc40698d69e696351b09b7c12fff827e40276ec96
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6eea93722769fd2cdfccc4deac474dd6ab3e6b96299bf9d74b4a0082fc65937
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11406,11 +11406,131 @@
|
|
| 11406 |
"learning_rate": 1.0954708761809438e-05,
|
| 11407 |
"loss": 0.2843,
|
| 11408 |
"step": 950000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11409 |
}
|
| 11410 |
],
|
| 11411 |
"max_steps": 1000000,
|
| 11412 |
"num_train_epochs": 2,
|
| 11413 |
-
"total_flos": 6.
|
| 11414 |
"trial_name": null,
|
| 11415 |
"trial_params": null
|
| 11416 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.458382825392017,
|
| 5 |
+
"global_step": 960000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11406 |
"learning_rate": 1.0954708761809438e-05,
|
| 11407 |
"loss": 0.2843,
|
| 11408 |
"step": 950000
|
| 11409 |
+
},
|
| 11410 |
+
{
|
| 11411 |
+
"epoch": 1.44,
|
| 11412 |
+
"learning_rate": 1.0935752500982175e-05,
|
| 11413 |
+
"loss": 0.2834,
|
| 11414 |
+
"step": 950500
|
| 11415 |
+
},
|
| 11416 |
+
{
|
| 11417 |
+
"epoch": 1.44,
|
| 11418 |
+
"learning_rate": 1.091698505917036e-05,
|
| 11419 |
+
"loss": 0.2853,
|
| 11420 |
+
"step": 951000
|
| 11421 |
+
},
|
| 11422 |
+
{
|
| 11423 |
+
"epoch": 1.44,
|
| 11424 |
+
"learning_rate": 1.0898406487683472e-05,
|
| 11425 |
+
"loss": 0.284,
|
| 11426 |
+
"step": 951500
|
| 11427 |
+
},
|
| 11428 |
+
{
|
| 11429 |
+
"epoch": 1.44,
|
| 11430 |
+
"learning_rate": 1.0880016837314599e-05,
|
| 11431 |
+
"loss": 0.2833,
|
| 11432 |
+
"step": 952000
|
| 11433 |
+
},
|
| 11434 |
+
{
|
| 11435 |
+
"epoch": 1.44,
|
| 11436 |
+
"learning_rate": 1.0861816158340365e-05,
|
| 11437 |
+
"loss": 0.2835,
|
| 11438 |
+
"step": 952500
|
| 11439 |
+
},
|
| 11440 |
+
{
|
| 11441 |
+
"epoch": 1.44,
|
| 11442 |
+
"learning_rate": 1.084380450052071e-05,
|
| 11443 |
+
"loss": 0.284,
|
| 11444 |
+
"step": 953000
|
| 11445 |
+
},
|
| 11446 |
+
{
|
| 11447 |
+
"epoch": 1.45,
|
| 11448 |
+
"learning_rate": 1.0825981913098828e-05,
|
| 11449 |
+
"loss": 0.2835,
|
| 11450 |
+
"step": 953500
|
| 11451 |
+
},
|
| 11452 |
+
{
|
| 11453 |
+
"epoch": 1.45,
|
| 11454 |
+
"learning_rate": 1.0808348444801e-05,
|
| 11455 |
+
"loss": 0.2836,
|
| 11456 |
+
"step": 954000
|
| 11457 |
+
},
|
| 11458 |
+
{
|
| 11459 |
+
"epoch": 1.45,
|
| 11460 |
+
"learning_rate": 1.0790904143836438e-05,
|
| 11461 |
+
"loss": 0.2834,
|
| 11462 |
+
"step": 954500
|
| 11463 |
+
},
|
| 11464 |
+
{
|
| 11465 |
+
"epoch": 1.45,
|
| 11466 |
+
"learning_rate": 1.0773649057897206e-05,
|
| 11467 |
+
"loss": 0.2833,
|
| 11468 |
+
"step": 955000
|
| 11469 |
+
},
|
| 11470 |
+
{
|
| 11471 |
+
"epoch": 1.45,
|
| 11472 |
+
"learning_rate": 1.0756583234158057e-05,
|
| 11473 |
+
"loss": 0.2839,
|
| 11474 |
+
"step": 955500
|
| 11475 |
+
},
|
| 11476 |
+
{
|
| 11477 |
+
"epoch": 1.45,
|
| 11478 |
+
"learning_rate": 1.073970671927628e-05,
|
| 11479 |
+
"loss": 0.2834,
|
| 11480 |
+
"step": 956000
|
| 11481 |
+
},
|
| 11482 |
+
{
|
| 11483 |
+
"epoch": 1.45,
|
| 11484 |
+
"learning_rate": 1.0723019559391643e-05,
|
| 11485 |
+
"loss": 0.2843,
|
| 11486 |
+
"step": 956500
|
| 11487 |
+
},
|
| 11488 |
+
{
|
| 11489 |
+
"epoch": 1.45,
|
| 11490 |
+
"learning_rate": 1.0706521800126198e-05,
|
| 11491 |
+
"loss": 0.2843,
|
| 11492 |
+
"step": 957000
|
| 11493 |
+
},
|
| 11494 |
+
{
|
| 11495 |
+
"epoch": 1.45,
|
| 11496 |
+
"learning_rate": 1.0690213486584175e-05,
|
| 11497 |
+
"loss": 0.284,
|
| 11498 |
+
"step": 957500
|
| 11499 |
+
},
|
| 11500 |
+
{
|
| 11501 |
+
"epoch": 1.45,
|
| 11502 |
+
"learning_rate": 1.0674094663351906e-05,
|
| 11503 |
+
"loss": 0.2833,
|
| 11504 |
+
"step": 958000
|
| 11505 |
+
},
|
| 11506 |
+
{
|
| 11507 |
+
"epoch": 1.46,
|
| 11508 |
+
"learning_rate": 1.0658165374497611e-05,
|
| 11509 |
+
"loss": 0.2836,
|
| 11510 |
+
"step": 958500
|
| 11511 |
+
},
|
| 11512 |
+
{
|
| 11513 |
+
"epoch": 1.46,
|
| 11514 |
+
"learning_rate": 1.0642425663571383e-05,
|
| 11515 |
+
"loss": 0.2839,
|
| 11516 |
+
"step": 959000
|
| 11517 |
+
},
|
| 11518 |
+
{
|
| 11519 |
+
"epoch": 1.46,
|
| 11520 |
+
"learning_rate": 1.062687557360497e-05,
|
| 11521 |
+
"loss": 0.2834,
|
| 11522 |
+
"step": 959500
|
| 11523 |
+
},
|
| 11524 |
+
{
|
| 11525 |
+
"epoch": 1.46,
|
| 11526 |
+
"learning_rate": 1.0611515147111736e-05,
|
| 11527 |
+
"loss": 0.2832,
|
| 11528 |
+
"step": 960000
|
| 11529 |
}
|
| 11530 |
],
|
| 11531 |
"max_steps": 1000000,
|
| 11532 |
"num_train_epochs": 2,
|
| 11533 |
+
"total_flos": 6.4902759073727495e+22,
|
| 11534 |
"trial_name": null,
|
| 11535 |
"trial_params": null
|
| 11536 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e039cd872f61b73cdde9f431db14aa3e4f6ae315b0cdef8e97e75cdb6be6fa4
|
| 3 |
size 449450757
|