Training in progress, step 920000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37962abda8557285d298a4265c60f4356df61e7cc10bd44c3063d910102d66d3
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47f1c4302a150ebf621ef207481964f3a7603e3f271a1522dc7c4f830b5f8ea8
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14f7de72f01c7df685d896df6cd18118f7cd962784b86c13c1ec4716f711fe5e
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ce3ab5e3a6adf4b5050188a365eda6e65d45b5f880ed022a630511122a962d8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dac1435372cea6f546c378b27a4b5fc759dcb93ae36e129dcb0cfa5374977cc3
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7314965ea42ee1f19abe451e572b5d6938b7b63a561ae48af7903b9728329e39
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7df508c344c1f04d3b388cef9605593fbfd129cd18e2830701d3110873541479
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -6740,11 +6740,85 @@
|
|
| 6740 |
"eval_samples_per_second": 1377.03,
|
| 6741 |
"eval_steps_per_second": 22.032,
|
| 6742 |
"step": 910000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6743 |
}
|
| 6744 |
],
|
| 6745 |
"max_steps": 1000000,
|
| 6746 |
"num_train_epochs": 16,
|
| 6747 |
-
"total_flos": 6.
|
| 6748 |
"trial_name": null,
|
| 6749 |
"trial_params": null
|
| 6750 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 14.04858979644815,
|
| 5 |
+
"global_step": 920000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 6740 |
"eval_samples_per_second": 1377.03,
|
| 6741 |
"eval_steps_per_second": 22.032,
|
| 6742 |
"step": 910000
|
| 6743 |
+
},
|
| 6744 |
+
{
|
| 6745 |
+
"epoch": 13.91,
|
| 6746 |
+
"learning_rate": 1.3009978131197669e-05,
|
| 6747 |
+
"loss": 0.2314,
|
| 6748 |
+
"step": 911000
|
| 6749 |
+
},
|
| 6750 |
+
{
|
| 6751 |
+
"epoch": 13.93,
|
| 6752 |
+
"learning_rate": 1.2943193561946762e-05,
|
| 6753 |
+
"loss": 0.2304,
|
| 6754 |
+
"step": 912000
|
| 6755 |
+
},
|
| 6756 |
+
{
|
| 6757 |
+
"epoch": 13.94,
|
| 6758 |
+
"learning_rate": 1.2877142315187628e-05,
|
| 6759 |
+
"loss": 0.2299,
|
| 6760 |
+
"step": 913000
|
| 6761 |
+
},
|
| 6762 |
+
{
|
| 6763 |
+
"epoch": 13.96,
|
| 6764 |
+
"learning_rate": 1.28118251132461e-05,
|
| 6765 |
+
"loss": 0.23,
|
| 6766 |
+
"step": 914000
|
| 6767 |
+
},
|
| 6768 |
+
{
|
| 6769 |
+
"epoch": 13.97,
|
| 6770 |
+
"learning_rate": 1.274724267042063e-05,
|
| 6771 |
+
"loss": 0.2299,
|
| 6772 |
+
"step": 915000
|
| 6773 |
+
},
|
| 6774 |
+
{
|
| 6775 |
+
"epoch": 13.97,
|
| 6776 |
+
"eval_runtime": 0.795,
|
| 6777 |
+
"eval_samples_per_second": 1257.794,
|
| 6778 |
+
"eval_steps_per_second": 20.125,
|
| 6779 |
+
"step": 915000
|
| 6780 |
+
},
|
| 6781 |
+
{
|
| 6782 |
+
"epoch": 13.99,
|
| 6783 |
+
"learning_rate": 1.2683395692974472e-05,
|
| 6784 |
+
"loss": 0.23,
|
| 6785 |
+
"step": 916000
|
| 6786 |
+
},
|
| 6787 |
+
{
|
| 6788 |
+
"epoch": 14.0,
|
| 6789 |
+
"learning_rate": 1.2620284879127947e-05,
|
| 6790 |
+
"loss": 0.23,
|
| 6791 |
+
"step": 917000
|
| 6792 |
+
},
|
| 6793 |
+
{
|
| 6794 |
+
"epoch": 14.02,
|
| 6795 |
+
"learning_rate": 1.2557910919050803e-05,
|
| 6796 |
+
"loss": 0.2295,
|
| 6797 |
+
"step": 918000
|
| 6798 |
+
},
|
| 6799 |
+
{
|
| 6800 |
+
"epoch": 14.03,
|
| 6801 |
+
"learning_rate": 1.2496274494854666e-05,
|
| 6802 |
+
"loss": 0.2296,
|
| 6803 |
+
"step": 919000
|
| 6804 |
+
},
|
| 6805 |
+
{
|
| 6806 |
+
"epoch": 14.05,
|
| 6807 |
+
"learning_rate": 1.24353762805856e-05,
|
| 6808 |
+
"loss": 0.2297,
|
| 6809 |
+
"step": 920000
|
| 6810 |
+
},
|
| 6811 |
+
{
|
| 6812 |
+
"epoch": 14.05,
|
| 6813 |
+
"eval_runtime": 0.7692,
|
| 6814 |
+
"eval_samples_per_second": 1300.053,
|
| 6815 |
+
"eval_steps_per_second": 20.801,
|
| 6816 |
+
"step": 920000
|
| 6817 |
}
|
| 6818 |
],
|
| 6819 |
"max_steps": 1000000,
|
| 6820 |
"num_train_epochs": 16,
|
| 6821 |
+
"total_flos": 6.449214815837855e+22,
|
| 6822 |
"trial_name": null,
|
| 6823 |
"trial_params": null
|
| 6824 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47f1c4302a150ebf621ef207481964f3a7603e3f271a1522dc7c4f830b5f8ea8
|
| 3 |
size 449471589
|