Training in progress, step 940000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0c72fdd0154d524cda12334954eb1e4f193d30dc2134990578a195ba70ede7f
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0068dc2eb70214cc7f3762f96ca920a8342113b2f223dfdea92c9f41e6012f4
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04943bf52360b66fe1cc3a9ed304be65d048fdffb6ef9f94a272755dcb0d94a4
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dab84d4b75593cd9349f424c4371ea8ac2493751bc544a294c8ef74a18b08e9
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -18606,11 +18606,211 @@
|
|
| 18606 |
"eval_samples_per_second": 877.459,
|
| 18607 |
"eval_steps_per_second": 13.752,
|
| 18608 |
"step": 930000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18609 |
}
|
| 18610 |
],
|
| 18611 |
"max_steps": 1000000,
|
| 18612 |
"num_train_epochs": 12,
|
| 18613 |
-
"total_flos": 6.
|
| 18614 |
"trial_name": null,
|
| 18615 |
"trial_params": null
|
| 18616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.126562796248173,
|
| 5 |
+
"global_step": 940000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 18606 |
"eval_samples_per_second": 877.459,
|
| 18607 |
"eval_steps_per_second": 13.752,
|
| 18608 |
"step": 930000
|
| 18609 |
+
},
|
| 18610 |
+
{
|
| 18611 |
+
"epoch": 10.02,
|
| 18612 |
+
"learning_rate": 1.1840677154152987e-05,
|
| 18613 |
+
"loss": 0.181,
|
| 18614 |
+
"step": 930500
|
| 18615 |
+
},
|
| 18616 |
+
{
|
| 18617 |
+
"epoch": 10.03,
|
| 18618 |
+
"learning_rate": 1.1814402460652382e-05,
|
| 18619 |
+
"loss": 0.1811,
|
| 18620 |
+
"step": 931000
|
| 18621 |
+
},
|
| 18622 |
+
{
|
| 18623 |
+
"epoch": 10.03,
|
| 18624 |
+
"eval_loss": 0.17144934833049774,
|
| 18625 |
+
"eval_runtime": 2.7383,
|
| 18626 |
+
"eval_samples_per_second": 838.849,
|
| 18627 |
+
"eval_steps_per_second": 13.147,
|
| 18628 |
+
"step": 931000
|
| 18629 |
+
},
|
| 18630 |
+
{
|
| 18631 |
+
"epoch": 10.03,
|
| 18632 |
+
"learning_rate": 1.178831418397181e-05,
|
| 18633 |
+
"loss": 0.181,
|
| 18634 |
+
"step": 931500
|
| 18635 |
+
},
|
| 18636 |
+
{
|
| 18637 |
+
"epoch": 10.04,
|
| 18638 |
+
"learning_rate": 1.176241239543558e-05,
|
| 18639 |
+
"loss": 0.181,
|
| 18640 |
+
"step": 932000
|
| 18641 |
+
},
|
| 18642 |
+
{
|
| 18643 |
+
"epoch": 10.04,
|
| 18644 |
+
"eval_loss": 0.17210912704467773,
|
| 18645 |
+
"eval_runtime": 2.6312,
|
| 18646 |
+
"eval_samples_per_second": 872.975,
|
| 18647 |
+
"eval_steps_per_second": 13.682,
|
| 18648 |
+
"step": 932000
|
| 18649 |
+
},
|
| 18650 |
+
{
|
| 18651 |
+
"epoch": 10.04,
|
| 18652 |
+
"learning_rate": 1.173669716585822e-05,
|
| 18653 |
+
"loss": 0.1809,
|
| 18654 |
+
"step": 932500
|
| 18655 |
+
},
|
| 18656 |
+
{
|
| 18657 |
+
"epoch": 10.05,
|
| 18658 |
+
"learning_rate": 1.171116856554418e-05,
|
| 18659 |
+
"loss": 0.1809,
|
| 18660 |
+
"step": 933000
|
| 18661 |
+
},
|
| 18662 |
+
{
|
| 18663 |
+
"epoch": 10.05,
|
| 18664 |
+
"eval_loss": 0.17279262840747833,
|
| 18665 |
+
"eval_runtime": 2.687,
|
| 18666 |
+
"eval_samples_per_second": 854.858,
|
| 18667 |
+
"eval_steps_per_second": 13.398,
|
| 18668 |
+
"step": 933000
|
| 18669 |
+
},
|
| 18670 |
+
{
|
| 18671 |
+
"epoch": 10.05,
|
| 18672 |
+
"learning_rate": 1.168582666428768e-05,
|
| 18673 |
+
"loss": 0.1809,
|
| 18674 |
+
"step": 933500
|
| 18675 |
+
},
|
| 18676 |
+
{
|
| 18677 |
+
"epoch": 10.06,
|
| 18678 |
+
"learning_rate": 1.1660671531372517e-05,
|
| 18679 |
+
"loss": 0.1807,
|
| 18680 |
+
"step": 934000
|
| 18681 |
+
},
|
| 18682 |
+
{
|
| 18683 |
+
"epoch": 10.06,
|
| 18684 |
+
"eval_loss": 0.17214839160442352,
|
| 18685 |
+
"eval_runtime": 2.6862,
|
| 18686 |
+
"eval_samples_per_second": 855.103,
|
| 18687 |
+
"eval_steps_per_second": 13.402,
|
| 18688 |
+
"step": 934000
|
| 18689 |
+
},
|
| 18690 |
+
{
|
| 18691 |
+
"epoch": 10.07,
|
| 18692 |
+
"learning_rate": 1.1635703235571846e-05,
|
| 18693 |
+
"loss": 0.181,
|
| 18694 |
+
"step": 934500
|
| 18695 |
+
},
|
| 18696 |
+
{
|
| 18697 |
+
"epoch": 10.07,
|
| 18698 |
+
"learning_rate": 1.1610921845148052e-05,
|
| 18699 |
+
"loss": 0.1805,
|
| 18700 |
+
"step": 935000
|
| 18701 |
+
},
|
| 18702 |
+
{
|
| 18703 |
+
"epoch": 10.07,
|
| 18704 |
+
"eval_loss": 0.17261220514774323,
|
| 18705 |
+
"eval_runtime": 2.7622,
|
| 18706 |
+
"eval_samples_per_second": 831.593,
|
| 18707 |
+
"eval_steps_per_second": 13.033,
|
| 18708 |
+
"step": 935000
|
| 18709 |
+
},
|
| 18710 |
+
{
|
| 18711 |
+
"epoch": 10.08,
|
| 18712 |
+
"learning_rate": 1.1586327427852503e-05,
|
| 18713 |
+
"loss": 0.1805,
|
| 18714 |
+
"step": 935500
|
| 18715 |
+
},
|
| 18716 |
+
{
|
| 18717 |
+
"epoch": 10.08,
|
| 18718 |
+
"learning_rate": 1.156192005092539e-05,
|
| 18719 |
+
"loss": 0.1807,
|
| 18720 |
+
"step": 936000
|
| 18721 |
+
},
|
| 18722 |
+
{
|
| 18723 |
+
"epoch": 10.08,
|
| 18724 |
+
"eval_loss": 0.17041905224323273,
|
| 18725 |
+
"eval_runtime": 2.643,
|
| 18726 |
+
"eval_samples_per_second": 869.101,
|
| 18727 |
+
"eval_steps_per_second": 13.621,
|
| 18728 |
+
"step": 936000
|
| 18729 |
+
},
|
| 18730 |
+
{
|
| 18731 |
+
"epoch": 10.09,
|
| 18732 |
+
"learning_rate": 1.153769978109557e-05,
|
| 18733 |
+
"loss": 0.1806,
|
| 18734 |
+
"step": 936500
|
| 18735 |
+
},
|
| 18736 |
+
{
|
| 18737 |
+
"epoch": 10.09,
|
| 18738 |
+
"learning_rate": 1.1513666684580308e-05,
|
| 18739 |
+
"loss": 0.1809,
|
| 18740 |
+
"step": 937000
|
| 18741 |
+
},
|
| 18742 |
+
{
|
| 18743 |
+
"epoch": 10.09,
|
| 18744 |
+
"eval_loss": 0.1718713790178299,
|
| 18745 |
+
"eval_runtime": 2.6411,
|
| 18746 |
+
"eval_samples_per_second": 869.707,
|
| 18747 |
+
"eval_steps_per_second": 13.631,
|
| 18748 |
+
"step": 937000
|
| 18749 |
+
},
|
| 18750 |
+
{
|
| 18751 |
+
"epoch": 10.1,
|
| 18752 |
+
"learning_rate": 1.1489820827085185e-05,
|
| 18753 |
+
"loss": 0.1808,
|
| 18754 |
+
"step": 937500
|
| 18755 |
+
},
|
| 18756 |
+
{
|
| 18757 |
+
"epoch": 10.1,
|
| 18758 |
+
"learning_rate": 1.1466162273803876e-05,
|
| 18759 |
+
"loss": 0.1809,
|
| 18760 |
+
"step": 938000
|
| 18761 |
+
},
|
| 18762 |
+
{
|
| 18763 |
+
"epoch": 10.1,
|
| 18764 |
+
"eval_loss": 0.17236891388893127,
|
| 18765 |
+
"eval_runtime": 2.4881,
|
| 18766 |
+
"eval_samples_per_second": 923.197,
|
| 18767 |
+
"eval_steps_per_second": 14.469,
|
| 18768 |
+
"step": 938000
|
| 18769 |
+
},
|
| 18770 |
+
{
|
| 18771 |
+
"epoch": 10.11,
|
| 18772 |
+
"learning_rate": 1.144269108941795e-05,
|
| 18773 |
+
"loss": 0.1808,
|
| 18774 |
+
"step": 938500
|
| 18775 |
+
},
|
| 18776 |
+
{
|
| 18777 |
+
"epoch": 10.12,
|
| 18778 |
+
"learning_rate": 1.1419407338096732e-05,
|
| 18779 |
+
"loss": 0.1807,
|
| 18780 |
+
"step": 939000
|
| 18781 |
+
},
|
| 18782 |
+
{
|
| 18783 |
+
"epoch": 10.12,
|
| 18784 |
+
"eval_loss": 0.17213864624500275,
|
| 18785 |
+
"eval_runtime": 2.697,
|
| 18786 |
+
"eval_samples_per_second": 851.681,
|
| 18787 |
+
"eval_steps_per_second": 13.348,
|
| 18788 |
+
"step": 939000
|
| 18789 |
+
},
|
| 18790 |
+
{
|
| 18791 |
+
"epoch": 10.12,
|
| 18792 |
+
"learning_rate": 1.1396311083497103e-05,
|
| 18793 |
+
"loss": 0.1808,
|
| 18794 |
+
"step": 939500
|
| 18795 |
+
},
|
| 18796 |
+
{
|
| 18797 |
+
"epoch": 10.13,
|
| 18798 |
+
"learning_rate": 1.1373402388763346e-05,
|
| 18799 |
+
"loss": 0.1806,
|
| 18800 |
+
"step": 940000
|
| 18801 |
+
},
|
| 18802 |
+
{
|
| 18803 |
+
"epoch": 10.13,
|
| 18804 |
+
"eval_loss": 0.17225030064582825,
|
| 18805 |
+
"eval_runtime": 2.5852,
|
| 18806 |
+
"eval_samples_per_second": 888.512,
|
| 18807 |
+
"eval_steps_per_second": 13.925,
|
| 18808 |
+
"step": 940000
|
| 18809 |
}
|
| 18810 |
],
|
| 18811 |
"max_steps": 1000000,
|
| 18812 |
"num_train_epochs": 12,
|
| 18813 |
+
"total_flos": 6.589369772377475e+22,
|
| 18814 |
"trial_name": null,
|
| 18815 |
"trial_params": null
|
| 18816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0068dc2eb70214cc7f3762f96ca920a8342113b2f223dfdea92c9f41e6012f4
|
| 3 |
size 449471589
|