Training in progress, step 650000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0094043a45ee455c34cdbf7e5ed868b844e2cc109c62c31adc8eabe0945cd55
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afca04d573f1bb3162caabe1bb7b946edefb5cb8fa7beeabdf4a9618ee0ba3ea
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8afdb75bc9c4b3b8d3f36f77e21f0d34f0633a3fe673f092dd264b1121465456
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:334f507bebbd8e2eb32a3a52e1460054ef235aff9b388a6044a2cf6124700604
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1fe6196ed311cd8ddb4f7739bbce785a7482bd7a8a89fc83aadbb7b199e0b80
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c16cf0e46ab235f9e251974c64ca93772ae50300b4f1505ff50d8f4e2246708
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dea98aa1b40d4dde89de24ce301ffc44f0dee70fb25e51e28dfe6b65e5e6240d
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:476caa126ce91db0ad93d8541266aa7e5c1a71c0473ab678864fc300fdd08e70
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3637d3be8f9c8d6ff9f1958a71fe2d848eaecb87ddf0683d13eaae5352425491
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b62727907f78fc16c3f0e4b91fbdcc94bc537750512333e674d8d2c4dcd12411
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb24dd41ced00dc957e38e97b930833d2e52e5141588b4ec2f84d6e2ee23293d
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7686,11 +7686,131 @@
|
|
| 7686 |
"learning_rate": 5.401619257572453e-05,
|
| 7687 |
"loss": 0.3007,
|
| 7688 |
"step": 640000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7689 |
}
|
| 7690 |
],
|
| 7691 |
"max_steps": 1000000,
|
| 7692 |
"num_train_epochs": 2,
|
| 7693 |
-
"total_flos": 4.
|
| 7694 |
"trial_name": null,
|
| 7695 |
"trial_params": null
|
| 7696 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.2993814944086615,
|
| 5 |
+
"global_step": 650000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7686 |
"learning_rate": 5.401619257572453e-05,
|
| 7687 |
"loss": 0.3007,
|
| 7688 |
"step": 640000
|
| 7689 |
+
},
|
| 7690 |
+
{
|
| 7691 |
+
"epoch": 1.28,
|
| 7692 |
+
"learning_rate": 5.390875461869379e-05,
|
| 7693 |
+
"loss": 0.3011,
|
| 7694 |
+
"step": 640500
|
| 7695 |
+
},
|
| 7696 |
+
{
|
| 7697 |
+
"epoch": 1.28,
|
| 7698 |
+
"learning_rate": 5.3801387994131576e-05,
|
| 7699 |
+
"loss": 0.3012,
|
| 7700 |
+
"step": 641000
|
| 7701 |
+
},
|
| 7702 |
+
{
|
| 7703 |
+
"epoch": 1.28,
|
| 7704 |
+
"learning_rate": 5.36940929955742e-05,
|
| 7705 |
+
"loss": 0.3011,
|
| 7706 |
+
"step": 641500
|
| 7707 |
+
},
|
| 7708 |
+
{
|
| 7709 |
+
"epoch": 1.28,
|
| 7710 |
+
"learning_rate": 5.358686991636209e-05,
|
| 7711 |
+
"loss": 0.3005,
|
| 7712 |
+
"step": 642000
|
| 7713 |
+
},
|
| 7714 |
+
{
|
| 7715 |
+
"epoch": 1.28,
|
| 7716 |
+
"learning_rate": 5.347971904963904e-05,
|
| 7717 |
+
"loss": 0.3004,
|
| 7718 |
+
"step": 642500
|
| 7719 |
+
},
|
| 7720 |
+
{
|
| 7721 |
+
"epoch": 1.29,
|
| 7722 |
+
"learning_rate": 5.3372640688351476e-05,
|
| 7723 |
+
"loss": 0.3002,
|
| 7724 |
+
"step": 643000
|
| 7725 |
+
},
|
| 7726 |
+
{
|
| 7727 |
+
"epoch": 1.29,
|
| 7728 |
+
"learning_rate": 5.326563512524748e-05,
|
| 7729 |
+
"loss": 0.3005,
|
| 7730 |
+
"step": 643500
|
| 7731 |
+
},
|
| 7732 |
+
{
|
| 7733 |
+
"epoch": 1.29,
|
| 7734 |
+
"learning_rate": 5.315870265287618e-05,
|
| 7735 |
+
"loss": 0.2999,
|
| 7736 |
+
"step": 644000
|
| 7737 |
+
},
|
| 7738 |
+
{
|
| 7739 |
+
"epoch": 1.29,
|
| 7740 |
+
"learning_rate": 5.3051843563586914e-05,
|
| 7741 |
+
"loss": 0.3004,
|
| 7742 |
+
"step": 644500
|
| 7743 |
+
},
|
| 7744 |
+
{
|
| 7745 |
+
"epoch": 1.29,
|
| 7746 |
+
"learning_rate": 5.294505814952835e-05,
|
| 7747 |
+
"loss": 0.3003,
|
| 7748 |
+
"step": 645000
|
| 7749 |
+
},
|
| 7750 |
+
{
|
| 7751 |
+
"epoch": 1.29,
|
| 7752 |
+
"learning_rate": 5.28383467026477e-05,
|
| 7753 |
+
"loss": 0.2999,
|
| 7754 |
+
"step": 645500
|
| 7755 |
+
},
|
| 7756 |
+
{
|
| 7757 |
+
"epoch": 1.29,
|
| 7758 |
+
"learning_rate": 5.2731709514689995e-05,
|
| 7759 |
+
"loss": 0.2997,
|
| 7760 |
+
"step": 646000
|
| 7761 |
+
},
|
| 7762 |
+
{
|
| 7763 |
+
"epoch": 1.29,
|
| 7764 |
+
"learning_rate": 5.262514687719722e-05,
|
| 7765 |
+
"loss": 0.2999,
|
| 7766 |
+
"step": 646500
|
| 7767 |
+
},
|
| 7768 |
+
{
|
| 7769 |
+
"epoch": 1.29,
|
| 7770 |
+
"learning_rate": 5.25186590815076e-05,
|
| 7771 |
+
"loss": 0.3007,
|
| 7772 |
+
"step": 647000
|
| 7773 |
+
},
|
| 7774 |
+
{
|
| 7775 |
+
"epoch": 1.29,
|
| 7776 |
+
"learning_rate": 5.24122464187547e-05,
|
| 7777 |
+
"loss": 0.3007,
|
| 7778 |
+
"step": 647500
|
| 7779 |
+
},
|
| 7780 |
+
{
|
| 7781 |
+
"epoch": 1.3,
|
| 7782 |
+
"learning_rate": 5.2305909179866635e-05,
|
| 7783 |
+
"loss": 0.3002,
|
| 7784 |
+
"step": 648000
|
| 7785 |
+
},
|
| 7786 |
+
{
|
| 7787 |
+
"epoch": 1.3,
|
| 7788 |
+
"learning_rate": 5.219964765556536e-05,
|
| 7789 |
+
"loss": 0.3003,
|
| 7790 |
+
"step": 648500
|
| 7791 |
+
},
|
| 7792 |
+
{
|
| 7793 |
+
"epoch": 1.3,
|
| 7794 |
+
"learning_rate": 5.209346213636584e-05,
|
| 7795 |
+
"loss": 0.2997,
|
| 7796 |
+
"step": 649000
|
| 7797 |
+
},
|
| 7798 |
+
{
|
| 7799 |
+
"epoch": 1.3,
|
| 7800 |
+
"learning_rate": 5.1987352912575244e-05,
|
| 7801 |
+
"loss": 0.2995,
|
| 7802 |
+
"step": 649500
|
| 7803 |
+
},
|
| 7804 |
+
{
|
| 7805 |
+
"epoch": 1.3,
|
| 7806 |
+
"learning_rate": 5.188132027429215e-05,
|
| 7807 |
+
"loss": 0.2991,
|
| 7808 |
+
"step": 650000
|
| 7809 |
}
|
| 7810 |
],
|
| 7811 |
"max_steps": 1000000,
|
| 7812 |
"num_train_epochs": 2,
|
| 7813 |
+
"total_flos": 4.394457756168279e+22,
|
| 7814 |
"trial_name": null,
|
| 7815 |
"trial_params": null
|
| 7816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afca04d573f1bb3162caabe1bb7b946edefb5cb8fa7beeabdf4a9618ee0ba3ea
|
| 3 |
size 449450757
|