Training in progress, step 40000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +103 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893438545
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8364802d21dd9f982e45881cc79c347aa3801886af5229e46080ee91f3907fe6
|
| 3 |
size 893438545
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d09408dd133bba0b615b1fc392982c3e187892b1f9f86f244d616011599238fa
|
| 3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15523
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7452543a8fbf992ab3cce28416697ef7ccf235bef8f9b12b8a45f822598554fe
|
| 3 |
size 15523
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 559
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c140b5a229b9a3368f84f81bd05277429e8fd4356be63302dcf2f4ec2ee074c7
|
| 3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:668b6868588ce6f6b1dad74dfa79e9c675d217e8314657782f2e491c66698c2c
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -706,11 +706,111 @@
|
|
| 706 |
"eval_samples_per_second": 29.603,
|
| 707 |
"eval_steps_per_second": 0.947,
|
| 708 |
"step": 35000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 709 |
}
|
| 710 |
],
|
| 711 |
"max_steps": 1000000,
|
| 712 |
"num_train_epochs": 86,
|
| 713 |
-
"total_flos": 1.
|
| 714 |
"trial_name": null,
|
| 715 |
"trial_params": null
|
| 716 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 3.402517863218782,
|
| 5 |
+
"global_step": 40000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 706 |
"eval_samples_per_second": 29.603,
|
| 707 |
"eval_steps_per_second": 0.947,
|
| 708 |
"step": 35000
|
| 709 |
+
},
|
| 710 |
+
{
|
| 711 |
+
"epoch": 3.02,
|
| 712 |
+
"learning_rate": 9.999999999999999e-06,
|
| 713 |
+
"loss": 0.4195,
|
| 714 |
+
"step": 35500
|
| 715 |
+
},
|
| 716 |
+
{
|
| 717 |
+
"epoch": 3.06,
|
| 718 |
+
"learning_rate": 9.999999999999999e-06,
|
| 719 |
+
"loss": 0.4194,
|
| 720 |
+
"step": 36000
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"epoch": 3.06,
|
| 724 |
+
"eval_loss": 0.38856348395347595,
|
| 725 |
+
"eval_runtime": 16.5028,
|
| 726 |
+
"eval_samples_per_second": 30.298,
|
| 727 |
+
"eval_steps_per_second": 0.97,
|
| 728 |
+
"step": 36000
|
| 729 |
+
},
|
| 730 |
+
{
|
| 731 |
+
"epoch": 3.1,
|
| 732 |
+
"learning_rate": 9.999999999999999e-06,
|
| 733 |
+
"loss": 0.4193,
|
| 734 |
+
"step": 36500
|
| 735 |
+
},
|
| 736 |
+
{
|
| 737 |
+
"epoch": 3.15,
|
| 738 |
+
"learning_rate": 9.999999999999999e-06,
|
| 739 |
+
"loss": 0.4208,
|
| 740 |
+
"step": 37000
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"epoch": 3.15,
|
| 744 |
+
"eval_loss": 0.3889642059803009,
|
| 745 |
+
"eval_runtime": 28.0106,
|
| 746 |
+
"eval_samples_per_second": 17.85,
|
| 747 |
+
"eval_steps_per_second": 0.571,
|
| 748 |
+
"step": 37000
|
| 749 |
+
},
|
| 750 |
+
{
|
| 751 |
+
"epoch": 3.19,
|
| 752 |
+
"learning_rate": 9.999999999999999e-06,
|
| 753 |
+
"loss": 0.4189,
|
| 754 |
+
"step": 37500
|
| 755 |
+
},
|
| 756 |
+
{
|
| 757 |
+
"epoch": 3.23,
|
| 758 |
+
"learning_rate": 9.999999999999999e-06,
|
| 759 |
+
"loss": 0.4187,
|
| 760 |
+
"step": 38000
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"epoch": 3.23,
|
| 764 |
+
"eval_loss": 0.3886989653110504,
|
| 765 |
+
"eval_runtime": 15.6007,
|
| 766 |
+
"eval_samples_per_second": 32.05,
|
| 767 |
+
"eval_steps_per_second": 1.026,
|
| 768 |
+
"step": 38000
|
| 769 |
+
},
|
| 770 |
+
{
|
| 771 |
+
"epoch": 3.27,
|
| 772 |
+
"learning_rate": 9.999999999999999e-06,
|
| 773 |
+
"loss": 0.4181,
|
| 774 |
+
"step": 38500
|
| 775 |
+
},
|
| 776 |
+
{
|
| 777 |
+
"epoch": 3.32,
|
| 778 |
+
"learning_rate": 9.999999999999999e-06,
|
| 779 |
+
"loss": 0.417,
|
| 780 |
+
"step": 39000
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 3.32,
|
| 784 |
+
"eval_loss": 0.3878667950630188,
|
| 785 |
+
"eval_runtime": 14.893,
|
| 786 |
+
"eval_samples_per_second": 33.573,
|
| 787 |
+
"eval_steps_per_second": 1.074,
|
| 788 |
+
"step": 39000
|
| 789 |
+
},
|
| 790 |
+
{
|
| 791 |
+
"epoch": 3.36,
|
| 792 |
+
"learning_rate": 9.999999999999999e-06,
|
| 793 |
+
"loss": 0.4176,
|
| 794 |
+
"step": 39500
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 3.4,
|
| 798 |
+
"learning_rate": 9.999999999999999e-06,
|
| 799 |
+
"loss": 0.4164,
|
| 800 |
+
"step": 40000
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"epoch": 3.4,
|
| 804 |
+
"eval_loss": 0.3855785131454468,
|
| 805 |
+
"eval_runtime": 15.2409,
|
| 806 |
+
"eval_samples_per_second": 32.806,
|
| 807 |
+
"eval_steps_per_second": 1.05,
|
| 808 |
+
"step": 40000
|
| 809 |
}
|
| 810 |
],
|
| 811 |
"max_steps": 1000000,
|
| 812 |
"num_train_epochs": 86,
|
| 813 |
+
"total_flos": 1.84008340746311e+21,
|
| 814 |
"trial_name": null,
|
| 815 |
"trial_params": null
|
| 816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d09408dd133bba0b615b1fc392982c3e187892b1f9f86f244d616011599238fa
|
| 3 |
size 449471589
|