Training in progress, step 810000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4d687ddbdb4e89fc2dcf4a1194021793a9bf6bf7cb019db9f4960ca46caec57
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ec87ec32fe6f1afb99642886552d48e3abf86b7380d88757c48489a6974eadf
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e09a2f5687d865e3c781f2165eefcb1856cc3c45b89b03d8a7d88cfa59107bfb
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acb039c0d2f72216fce3d8d73be28207294bda5cfc4474547820110b11abd2a8
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5adc671b102c788828dd37ee1d0f0b0a03c77bb3d279f123bf2cbe3d6d5cd23e
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8116bccf5ca568ceb54edc4c2f036f266c94ac035ff894ed751446d6238c146f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4decc89420866c9caffa620eb043d2f83ba7fe11eee6e3e9db617a680a5e3419
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3ecec9a0f4bcccdee7615b55d893bdd755c9846b4db2c967eb5630e87ff3741
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7819877f3abe602d4d64d7ac8cdc0b37ac08d27db902f3ca861703ead38253c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c95826cc93c68dd743486cd9578bd00eeec47504d8a825d434c0d8b522697126
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:311a816d2396f8ee752cab7a1a3a8667609453373ab7e8b0474b724f8acc447d
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9606,11 +9606,131 @@
|
|
| 9606 |
"learning_rate": 2.476016434225246e-05,
|
| 9607 |
"loss": 0.2901,
|
| 9608 |
"step": 800000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9609 |
}
|
| 9610 |
],
|
| 9611 |
"max_steps": 1000000,
|
| 9612 |
"num_train_epochs": 2,
|
| 9613 |
-
"total_flos": 5.
|
| 9614 |
"trial_name": null,
|
| 9615 |
"trial_params": null
|
| 9616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.1594375044841798,
|
| 5 |
+
"global_step": 810000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9606 |
"learning_rate": 2.476016434225246e-05,
|
| 9607 |
"loss": 0.2901,
|
| 9608 |
"step": 800000
|
| 9609 |
+
},
|
| 9610 |
+
{
|
| 9611 |
+
"epoch": 1.14,
|
| 9612 |
+
"learning_rate": 2.4689149133701672e-05,
|
| 9613 |
+
"loss": 0.2905,
|
| 9614 |
+
"step": 800500
|
| 9615 |
+
},
|
| 9616 |
+
{
|
| 9617 |
+
"epoch": 1.14,
|
| 9618 |
+
"learning_rate": 2.461828514290513e-05,
|
| 9619 |
+
"loss": 0.2898,
|
| 9620 |
+
"step": 801000
|
| 9621 |
+
},
|
| 9622 |
+
{
|
| 9623 |
+
"epoch": 1.14,
|
| 9624 |
+
"learning_rate": 2.4547572563602267e-05,
|
| 9625 |
+
"loss": 0.2903,
|
| 9626 |
+
"step": 801500
|
| 9627 |
+
},
|
| 9628 |
+
{
|
| 9629 |
+
"epoch": 1.14,
|
| 9630 |
+
"learning_rate": 2.447701158911855e-05,
|
| 9631 |
+
"loss": 0.2898,
|
| 9632 |
+
"step": 802000
|
| 9633 |
+
},
|
| 9634 |
+
{
|
| 9635 |
+
"epoch": 1.14,
|
| 9636 |
+
"learning_rate": 2.4406602412365027e-05,
|
| 9637 |
+
"loss": 0.2898,
|
| 9638 |
+
"step": 802500
|
| 9639 |
+
},
|
| 9640 |
+
{
|
| 9641 |
+
"epoch": 1.15,
|
| 9642 |
+
"learning_rate": 2.4336345225837658e-05,
|
| 9643 |
+
"loss": 0.2902,
|
| 9644 |
+
"step": 803000
|
| 9645 |
+
},
|
| 9646 |
+
{
|
| 9647 |
+
"epoch": 1.15,
|
| 9648 |
+
"learning_rate": 2.4266240221616956e-05,
|
| 9649 |
+
"loss": 0.2905,
|
| 9650 |
+
"step": 803500
|
| 9651 |
+
},
|
| 9652 |
+
{
|
| 9653 |
+
"epoch": 1.15,
|
| 9654 |
+
"learning_rate": 2.4196287591367296e-05,
|
| 9655 |
+
"loss": 0.2897,
|
| 9656 |
+
"step": 804000
|
| 9657 |
+
},
|
| 9658 |
+
{
|
| 9659 |
+
"epoch": 1.15,
|
| 9660 |
+
"learning_rate": 2.412648752633649e-05,
|
| 9661 |
+
"loss": 0.2898,
|
| 9662 |
+
"step": 804500
|
| 9663 |
+
},
|
| 9664 |
+
{
|
| 9665 |
+
"epoch": 1.15,
|
| 9666 |
+
"learning_rate": 2.405684021735527e-05,
|
| 9667 |
+
"loss": 0.2898,
|
| 9668 |
+
"step": 805000
|
| 9669 |
+
},
|
| 9670 |
+
{
|
| 9671 |
+
"epoch": 1.15,
|
| 9672 |
+
"learning_rate": 2.39873458548367e-05,
|
| 9673 |
+
"loss": 0.2895,
|
| 9674 |
+
"step": 805500
|
| 9675 |
+
},
|
| 9676 |
+
{
|
| 9677 |
+
"epoch": 1.15,
|
| 9678 |
+
"learning_rate": 2.3918004628775736e-05,
|
| 9679 |
+
"loss": 0.2897,
|
| 9680 |
+
"step": 806000
|
| 9681 |
+
},
|
| 9682 |
+
{
|
| 9683 |
+
"epoch": 1.15,
|
| 9684 |
+
"learning_rate": 2.3848816728748643e-05,
|
| 9685 |
+
"loss": 0.2897,
|
| 9686 |
+
"step": 806500
|
| 9687 |
+
},
|
| 9688 |
+
{
|
| 9689 |
+
"epoch": 1.15,
|
| 9690 |
+
"learning_rate": 2.3779782343912463e-05,
|
| 9691 |
+
"loss": 0.2888,
|
| 9692 |
+
"step": 807000
|
| 9693 |
+
},
|
| 9694 |
+
{
|
| 9695 |
+
"epoch": 1.15,
|
| 9696 |
+
"learning_rate": 2.3710901663004604e-05,
|
| 9697 |
+
"loss": 0.29,
|
| 9698 |
+
"step": 807500
|
| 9699 |
+
},
|
| 9700 |
+
{
|
| 9701 |
+
"epoch": 1.16,
|
| 9702 |
+
"learning_rate": 2.364217487434221e-05,
|
| 9703 |
+
"loss": 0.2895,
|
| 9704 |
+
"step": 808000
|
| 9705 |
+
},
|
| 9706 |
+
{
|
| 9707 |
+
"epoch": 1.16,
|
| 9708 |
+
"learning_rate": 2.3573602165821668e-05,
|
| 9709 |
+
"loss": 0.2899,
|
| 9710 |
+
"step": 808500
|
| 9711 |
+
},
|
| 9712 |
+
{
|
| 9713 |
+
"epoch": 1.16,
|
| 9714 |
+
"learning_rate": 2.3505183724918196e-05,
|
| 9715 |
+
"loss": 0.2897,
|
| 9716 |
+
"step": 809000
|
| 9717 |
+
},
|
| 9718 |
+
{
|
| 9719 |
+
"epoch": 1.16,
|
| 9720 |
+
"learning_rate": 2.3436919738685132e-05,
|
| 9721 |
+
"loss": 0.29,
|
| 9722 |
+
"step": 809500
|
| 9723 |
+
},
|
| 9724 |
+
{
|
| 9725 |
+
"epoch": 1.16,
|
| 9726 |
+
"learning_rate": 2.3368810393753687e-05,
|
| 9727 |
+
"loss": 0.2895,
|
| 9728 |
+
"step": 810000
|
| 9729 |
}
|
| 9730 |
],
|
| 9731 |
"max_steps": 1000000,
|
| 9732 |
"num_train_epochs": 2,
|
| 9733 |
+
"total_flos": 5.476171643101538e+22,
|
| 9734 |
"trial_name": null,
|
| 9735 |
"trial_params": null
|
| 9736 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ec87ec32fe6f1afb99642886552d48e3abf86b7380d88757c48489a6974eadf
|
| 3 |
size 449450757
|