Training in progress, step 800000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a28142d72c1a639c002962982eef85320960aec72ba5875c70ba183bc4428b1
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce05dc3d2487d7ae9aa6b0c59ea7f4616a5a7c54ca64071fd6c540348ab08786
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81c65759179a0409080d1617c50ff7701ce92dbb64fc3e317b9b62050537c3c7
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44a3c67c522ab27350032a90eb3d95a3dabd1324000cbd2835b62067512227df
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d08d861261452452fd94fae84216cedf28f7702bcc30b7c349d5fe92376b24f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68acbc45c81b60f69d2a54d496b38309d0cdcd82a52bd9be2a013dfa91b5b790
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7bd01ddf08fc5e563abc90c1bcd231f2103e9ae4cae190ae9888d8dbb7258d1
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:294721238c7a8c697c0dc55ae3f4c4580f7fc2de42c41858980ea55e897cb68b
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a269e37b31e512e59995e2d8559babba1725032fdbd7ed2e8cd2d1c9cdf42315
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ada5eca471a55afc26d1f38512d1163ebd3e62514e19a602711641b70fa1cc7
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc1b9455cfc1cc0d89898dcc5885e6586d120f989c1d84c4e74dce29a1aeae31
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9486,11 +9486,131 @@
|
|
| 9486 |
"learning_rate": 2.6211922240019883e-05,
|
| 9487 |
"loss": 0.2908,
|
| 9488 |
"step": 790000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9489 |
}
|
| 9490 |
],
|
| 9491 |
"max_steps": 1000000,
|
| 9492 |
"num_train_epochs": 2,
|
| 9493 |
-
"total_flos": 5.
|
| 9494 |
"trial_name": null,
|
| 9495 |
"trial_params": null
|
| 9496 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.1395078164236574,
|
| 5 |
+
"global_step": 800000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9486 |
"learning_rate": 2.6211922240019883e-05,
|
| 9487 |
"loss": 0.2908,
|
| 9488 |
"step": 790000
|
| 9489 |
+
},
|
| 9490 |
+
{
|
| 9491 |
+
"epoch": 1.12,
|
| 9492 |
+
"learning_rate": 2.6137923994427768e-05,
|
| 9493 |
+
"loss": 0.2911,
|
| 9494 |
+
"step": 790500
|
| 9495 |
+
},
|
| 9496 |
+
{
|
| 9497 |
+
"epoch": 1.12,
|
| 9498 |
+
"learning_rate": 2.6064073005694758e-05,
|
| 9499 |
+
"loss": 0.2909,
|
| 9500 |
+
"step": 791000
|
| 9501 |
+
},
|
| 9502 |
+
{
|
| 9503 |
+
"epoch": 1.12,
|
| 9504 |
+
"learning_rate": 2.5990369475726598e-05,
|
| 9505 |
+
"loss": 0.2898,
|
| 9506 |
+
"step": 791500
|
| 9507 |
+
},
|
| 9508 |
+
{
|
| 9509 |
+
"epoch": 1.12,
|
| 9510 |
+
"learning_rate": 2.591681360602595e-05,
|
| 9511 |
+
"loss": 0.2902,
|
| 9512 |
+
"step": 792000
|
| 9513 |
+
},
|
| 9514 |
+
{
|
| 9515 |
+
"epoch": 1.12,
|
| 9516 |
+
"learning_rate": 2.5843405597691748e-05,
|
| 9517 |
+
"loss": 0.2909,
|
| 9518 |
+
"step": 792500
|
| 9519 |
+
},
|
| 9520 |
+
{
|
| 9521 |
+
"epoch": 1.13,
|
| 9522 |
+
"learning_rate": 2.577014565141866e-05,
|
| 9523 |
+
"loss": 0.2899,
|
| 9524 |
+
"step": 793000
|
| 9525 |
+
},
|
| 9526 |
+
{
|
| 9527 |
+
"epoch": 1.13,
|
| 9528 |
+
"learning_rate": 2.569703396749661e-05,
|
| 9529 |
+
"loss": 0.2905,
|
| 9530 |
+
"step": 793500
|
| 9531 |
+
},
|
| 9532 |
+
{
|
| 9533 |
+
"epoch": 1.13,
|
| 9534 |
+
"learning_rate": 2.562407074581014e-05,
|
| 9535 |
+
"loss": 0.2908,
|
| 9536 |
+
"step": 794000
|
| 9537 |
+
},
|
| 9538 |
+
{
|
| 9539 |
+
"epoch": 1.13,
|
| 9540 |
+
"learning_rate": 2.5551256185837897e-05,
|
| 9541 |
+
"loss": 0.2904,
|
| 9542 |
+
"step": 794500
|
| 9543 |
+
},
|
| 9544 |
+
{
|
| 9545 |
+
"epoch": 1.13,
|
| 9546 |
+
"learning_rate": 2.5478590486652137e-05,
|
| 9547 |
+
"loss": 0.2905,
|
| 9548 |
+
"step": 795000
|
| 9549 |
+
},
|
| 9550 |
+
{
|
| 9551 |
+
"epoch": 1.13,
|
| 9552 |
+
"learning_rate": 2.5406073846918076e-05,
|
| 9553 |
+
"loss": 0.2901,
|
| 9554 |
+
"step": 795500
|
| 9555 |
+
},
|
| 9556 |
+
{
|
| 9557 |
+
"epoch": 1.13,
|
| 9558 |
+
"learning_rate": 2.533370646489347e-05,
|
| 9559 |
+
"loss": 0.2904,
|
| 9560 |
+
"step": 796000
|
| 9561 |
+
},
|
| 9562 |
+
{
|
| 9563 |
+
"epoch": 1.13,
|
| 9564 |
+
"learning_rate": 2.526148853842796e-05,
|
| 9565 |
+
"loss": 0.2903,
|
| 9566 |
+
"step": 796500
|
| 9567 |
+
},
|
| 9568 |
+
{
|
| 9569 |
+
"epoch": 1.13,
|
| 9570 |
+
"learning_rate": 2.5189420264962586e-05,
|
| 9571 |
+
"loss": 0.2898,
|
| 9572 |
+
"step": 797000
|
| 9573 |
+
},
|
| 9574 |
+
{
|
| 9575 |
+
"epoch": 1.13,
|
| 9576 |
+
"learning_rate": 2.5117501841529297e-05,
|
| 9577 |
+
"loss": 0.291,
|
| 9578 |
+
"step": 797500
|
| 9579 |
+
},
|
| 9580 |
+
{
|
| 9581 |
+
"epoch": 1.14,
|
| 9582 |
+
"learning_rate": 2.504573346475026e-05,
|
| 9583 |
+
"loss": 0.2897,
|
| 9584 |
+
"step": 798000
|
| 9585 |
+
},
|
| 9586 |
+
{
|
| 9587 |
+
"epoch": 1.14,
|
| 9588 |
+
"learning_rate": 2.497411533083753e-05,
|
| 9589 |
+
"loss": 0.2901,
|
| 9590 |
+
"step": 798500
|
| 9591 |
+
},
|
| 9592 |
+
{
|
| 9593 |
+
"epoch": 1.14,
|
| 9594 |
+
"learning_rate": 2.4902647635592324e-05,
|
| 9595 |
+
"loss": 0.2909,
|
| 9596 |
+
"step": 799000
|
| 9597 |
+
},
|
| 9598 |
+
{
|
| 9599 |
+
"epoch": 1.14,
|
| 9600 |
+
"learning_rate": 2.483133057440458e-05,
|
| 9601 |
+
"loss": 0.2899,
|
| 9602 |
+
"step": 799500
|
| 9603 |
+
},
|
| 9604 |
+
{
|
| 9605 |
+
"epoch": 1.14,
|
| 9606 |
+
"learning_rate": 2.476016434225246e-05,
|
| 9607 |
+
"loss": 0.2901,
|
| 9608 |
+
"step": 800000
|
| 9609 |
}
|
| 9610 |
],
|
| 9611 |
"max_steps": 1000000,
|
| 9612 |
"num_train_epochs": 2,
|
| 9613 |
+
"total_flos": 5.4085624444111735e+22,
|
| 9614 |
"trial_name": null,
|
| 9615 |
"trial_params": null
|
| 9616 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce05dc3d2487d7ae9aa6b0c59ea7f4616a5a7c54ca64071fd6c540348ab08786
|
| 3 |
size 449450757
|