Training in progress, step 980000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a37ff0bb2125ba916e184875051de31c0a53ab6d8764d350d94b8f895cf97825
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29e72a077e735dda0d42d87bba36bdd29e6f03ff47e8d9d7c8531209111d469e
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22c34d3305b209ee825f5aed33939719436301cfaf9de55d7b3b5639a3350e80
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6315416921462fe0ecc7a0c3f368f4b3c932064b761ab22ca7678bb4befc6c5
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a76def7122ba11f5e0b58c3da4b2e90151c2789ca35782d50ddc98428cb6201
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8dd7ce8763729f28d8acde8cd6b3dfea779e9b4dbaa1cc534d994758c7e6d95d
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3093dd66e653e1347d48de0c3738e9baef47fa7023af660daaa6d276c2516c1c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c71863cae64230816d8f7da13edddd177d84ad915ea936c18a2d7e479676590
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1106c2b8026b5aad28464b0bd2b8b204a664cd4c27abf0a87c50c85c6899ce87
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4abf64edbda9080a72948f7486c0ac7635f48e74d5752a1a9ab0d947e838bf23
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f6a86f148673573f563d78ae30ad4429d07d6c9eca28255a514457fc218ec48
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11646,11 +11646,131 @@
|
|
| 11646 |
"learning_rate": 1.0344196821849202e-05,
|
| 11647 |
"loss": 0.2821,
|
| 11648 |
"step": 970000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11649 |
}
|
| 11650 |
],
|
| 11651 |
"max_steps": 1000000,
|
| 11652 |
"num_train_epochs": 2,
|
| 11653 |
-
"total_flos": 6.
|
| 11654 |
"trial_name": null,
|
| 11655 |
"trial_params": null
|
| 11656 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.4982422015130619,
|
| 5 |
+
"global_step": 980000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11646 |
"learning_rate": 1.0344196821849202e-05,
|
| 11647 |
"loss": 0.2821,
|
| 11648 |
"step": 970000
|
| 11649 |
+
},
|
| 11650 |
+
{
|
| 11651 |
+
"epoch": 1.48,
|
| 11652 |
+
"learning_rate": 1.0332828229586692e-05,
|
| 11653 |
+
"loss": 0.2831,
|
| 11654 |
+
"step": 970500
|
| 11655 |
+
},
|
| 11656 |
+
{
|
| 11657 |
+
"epoch": 1.48,
|
| 11658 |
+
"learning_rate": 1.032165010471157e-05,
|
| 11659 |
+
"loss": 0.2835,
|
| 11660 |
+
"step": 971000
|
| 11661 |
+
},
|
| 11662 |
+
{
|
| 11663 |
+
"epoch": 1.48,
|
| 11664 |
+
"learning_rate": 1.0310662477784401e-05,
|
| 11665 |
+
"loss": 0.2824,
|
| 11666 |
+
"step": 971500
|
| 11667 |
+
},
|
| 11668 |
+
{
|
| 11669 |
+
"epoch": 1.48,
|
| 11670 |
+
"learning_rate": 1.0299865378844936e-05,
|
| 11671 |
+
"loss": 0.2828,
|
| 11672 |
+
"step": 972000
|
| 11673 |
+
},
|
| 11674 |
+
{
|
| 11675 |
+
"epoch": 1.48,
|
| 11676 |
+
"learning_rate": 1.028925883741203e-05,
|
| 11677 |
+
"loss": 0.2832,
|
| 11678 |
+
"step": 972500
|
| 11679 |
+
},
|
| 11680 |
+
{
|
| 11681 |
+
"epoch": 1.48,
|
| 11682 |
+
"learning_rate": 1.0278842882483569e-05,
|
| 11683 |
+
"loss": 0.2836,
|
| 11684 |
+
"step": 973000
|
| 11685 |
+
},
|
| 11686 |
+
{
|
| 11687 |
+
"epoch": 1.49,
|
| 11688 |
+
"learning_rate": 1.026861754253637e-05,
|
| 11689 |
+
"loss": 0.2837,
|
| 11690 |
+
"step": 973500
|
| 11691 |
+
},
|
| 11692 |
+
{
|
| 11693 |
+
"epoch": 1.49,
|
| 11694 |
+
"learning_rate": 1.025858284552612e-05,
|
| 11695 |
+
"loss": 0.2829,
|
| 11696 |
+
"step": 974000
|
| 11697 |
+
},
|
| 11698 |
+
{
|
| 11699 |
+
"epoch": 1.49,
|
| 11700 |
+
"learning_rate": 1.0248738818887307e-05,
|
| 11701 |
+
"loss": 0.2829,
|
| 11702 |
+
"step": 974500
|
| 11703 |
+
},
|
| 11704 |
+
{
|
| 11705 |
+
"epoch": 1.49,
|
| 11706 |
+
"learning_rate": 1.023908548953311e-05,
|
| 11707 |
+
"loss": 0.2832,
|
| 11708 |
+
"step": 975000
|
| 11709 |
+
},
|
| 11710 |
+
{
|
| 11711 |
+
"epoch": 1.49,
|
| 11712 |
+
"learning_rate": 1.0229622883855378e-05,
|
| 11713 |
+
"loss": 0.2837,
|
| 11714 |
+
"step": 975500
|
| 11715 |
+
},
|
| 11716 |
+
{
|
| 11717 |
+
"epoch": 1.49,
|
| 11718 |
+
"learning_rate": 1.02203510277245e-05,
|
| 11719 |
+
"loss": 0.2832,
|
| 11720 |
+
"step": 976000
|
| 11721 |
+
},
|
| 11722 |
+
{
|
| 11723 |
+
"epoch": 1.49,
|
| 11724 |
+
"learning_rate": 1.021126994648939e-05,
|
| 11725 |
+
"loss": 0.2828,
|
| 11726 |
+
"step": 976500
|
| 11727 |
+
},
|
| 11728 |
+
{
|
| 11729 |
+
"epoch": 1.49,
|
| 11730 |
+
"learning_rate": 1.0202379664977364e-05,
|
| 11731 |
+
"loss": 0.2838,
|
| 11732 |
+
"step": 977000
|
| 11733 |
+
},
|
| 11734 |
+
{
|
| 11735 |
+
"epoch": 1.49,
|
| 11736 |
+
"learning_rate": 1.019368020749412e-05,
|
| 11737 |
+
"loss": 0.2828,
|
| 11738 |
+
"step": 977500
|
| 11739 |
+
},
|
| 11740 |
+
{
|
| 11741 |
+
"epoch": 1.49,
|
| 11742 |
+
"learning_rate": 1.018517159782365e-05,
|
| 11743 |
+
"loss": 0.2826,
|
| 11744 |
+
"step": 978000
|
| 11745 |
+
},
|
| 11746 |
+
{
|
| 11747 |
+
"epoch": 1.5,
|
| 11748 |
+
"learning_rate": 1.0176853859228149e-05,
|
| 11749 |
+
"loss": 0.2829,
|
| 11750 |
+
"step": 978500
|
| 11751 |
+
},
|
| 11752 |
+
{
|
| 11753 |
+
"epoch": 1.5,
|
| 11754 |
+
"learning_rate": 1.0168727014448004e-05,
|
| 11755 |
+
"loss": 0.2836,
|
| 11756 |
+
"step": 979000
|
| 11757 |
+
},
|
| 11758 |
+
{
|
| 11759 |
+
"epoch": 1.5,
|
| 11760 |
+
"learning_rate": 1.0160791085701714e-05,
|
| 11761 |
+
"loss": 0.2834,
|
| 11762 |
+
"step": 979500
|
| 11763 |
+
},
|
| 11764 |
+
{
|
| 11765 |
+
"epoch": 1.5,
|
| 11766 |
+
"learning_rate": 1.0153046094685783e-05,
|
| 11767 |
+
"loss": 0.2831,
|
| 11768 |
+
"step": 980000
|
| 11769 |
}
|
| 11770 |
],
|
| 11771 |
"max_steps": 1000000,
|
| 11772 |
"num_train_epochs": 2,
|
| 11773 |
+
"total_flos": 6.625495046923828e+22,
|
| 11774 |
"trial_name": null,
|
| 11775 |
"trial_params": null
|
| 11776 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29e72a077e735dda0d42d87bba36bdd29e6f03ff47e8d9d7c8531209111d469e
|
| 3 |
size 449450757
|