Training in progress, step 890000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:683c63bc197cf3bb64f6c2ce95a62fc4f0bf6028b19e6d2e5831707a2f06c758
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40f6901c05f87feaf8e889e20cabff1c85d845893260d2343fe525c0b122a6e9
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05ef6355cd8ed257d93bbf3bb6826ffc7fce329c4f54e85b15a0f850c99370ac
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2afa6aa14483adb7c817c2439178a198c4680dbfe427eab82def33bea1566914
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -17606,11 +17606,211 @@
|
|
| 17606 |
"eval_samples_per_second": 870.144,
|
| 17607 |
"eval_steps_per_second": 13.637,
|
| 17608 |
"step": 880000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17609 |
}
|
| 17610 |
],
|
| 17611 |
"max_steps": 1000000,
|
| 17612 |
"num_train_epochs": 12,
|
| 17613 |
-
"total_flos": 6.
|
| 17614 |
"trial_name": null,
|
| 17615 |
"trial_params": null
|
| 17616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.568919175133555,
|
| 5 |
+
"global_step": 890000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 17606 |
"eval_samples_per_second": 870.144,
|
| 17607 |
"eval_steps_per_second": 13.637,
|
| 17608 |
"step": 880000
|
| 17609 |
+
},
|
| 17610 |
+
{
|
| 17611 |
+
"epoch": 9.46,
|
| 17612 |
+
"learning_rate": 1.539507089487205e-05,
|
| 17613 |
+
"loss": 0.183,
|
| 17614 |
+
"step": 880500
|
| 17615 |
+
},
|
| 17616 |
+
{
|
| 17617 |
+
"epoch": 9.47,
|
| 17618 |
+
"learning_rate": 1.535060126557028e-05,
|
| 17619 |
+
"loss": 0.1829,
|
| 17620 |
+
"step": 881000
|
| 17621 |
+
},
|
| 17622 |
+
{
|
| 17623 |
+
"epoch": 9.47,
|
| 17624 |
+
"eval_loss": 0.17408204078674316,
|
| 17625 |
+
"eval_runtime": 2.6439,
|
| 17626 |
+
"eval_samples_per_second": 868.809,
|
| 17627 |
+
"eval_steps_per_second": 13.617,
|
| 17628 |
+
"step": 881000
|
| 17629 |
+
},
|
| 17630 |
+
{
|
| 17631 |
+
"epoch": 9.47,
|
| 17632 |
+
"learning_rate": 1.5306308385255997e-05,
|
| 17633 |
+
"loss": 0.1827,
|
| 17634 |
+
"step": 881500
|
| 17635 |
+
},
|
| 17636 |
+
{
|
| 17637 |
+
"epoch": 9.48,
|
| 17638 |
+
"learning_rate": 1.5262192375024284e-05,
|
| 17639 |
+
"loss": 0.1827,
|
| 17640 |
+
"step": 882000
|
| 17641 |
+
},
|
| 17642 |
+
{
|
| 17643 |
+
"epoch": 9.48,
|
| 17644 |
+
"eval_loss": 0.17428572475910187,
|
| 17645 |
+
"eval_runtime": 2.6251,
|
| 17646 |
+
"eval_samples_per_second": 875.0,
|
| 17647 |
+
"eval_steps_per_second": 13.714,
|
| 17648 |
+
"step": 882000
|
| 17649 |
+
},
|
| 17650 |
+
{
|
| 17651 |
+
"epoch": 9.49,
|
| 17652 |
+
"learning_rate": 1.521825335548661e-05,
|
| 17653 |
+
"loss": 0.1832,
|
| 17654 |
+
"step": 882500
|
| 17655 |
+
},
|
| 17656 |
+
{
|
| 17657 |
+
"epoch": 9.49,
|
| 17658 |
+
"learning_rate": 1.5174491446770566e-05,
|
| 17659 |
+
"loss": 0.1827,
|
| 17660 |
+
"step": 883000
|
| 17661 |
+
},
|
| 17662 |
+
{
|
| 17663 |
+
"epoch": 9.49,
|
| 17664 |
+
"eval_loss": 0.17153075337409973,
|
| 17665 |
+
"eval_runtime": 2.6515,
|
| 17666 |
+
"eval_samples_per_second": 866.317,
|
| 17667 |
+
"eval_steps_per_second": 13.577,
|
| 17668 |
+
"step": 883000
|
| 17669 |
+
},
|
| 17670 |
+
{
|
| 17671 |
+
"epoch": 9.5,
|
| 17672 |
+
"learning_rate": 1.5130906768519563e-05,
|
| 17673 |
+
"loss": 0.1827,
|
| 17674 |
+
"step": 883500
|
| 17675 |
+
},
|
| 17676 |
+
{
|
| 17677 |
+
"epoch": 9.5,
|
| 17678 |
+
"learning_rate": 1.508749943989242e-05,
|
| 17679 |
+
"loss": 0.183,
|
| 17680 |
+
"step": 884000
|
| 17681 |
+
},
|
| 17682 |
+
{
|
| 17683 |
+
"epoch": 9.5,
|
| 17684 |
+
"eval_loss": 0.17301537096500397,
|
| 17685 |
+
"eval_runtime": 2.656,
|
| 17686 |
+
"eval_samples_per_second": 864.819,
|
| 17687 |
+
"eval_steps_per_second": 13.554,
|
| 17688 |
+
"step": 884000
|
| 17689 |
+
},
|
| 17690 |
+
{
|
| 17691 |
+
"epoch": 9.51,
|
| 17692 |
+
"learning_rate": 1.5044269579563144e-05,
|
| 17693 |
+
"loss": 0.1825,
|
| 17694 |
+
"step": 884500
|
| 17695 |
+
},
|
| 17696 |
+
{
|
| 17697 |
+
"epoch": 9.51,
|
| 17698 |
+
"learning_rate": 1.500121730572051e-05,
|
| 17699 |
+
"loss": 0.183,
|
| 17700 |
+
"step": 885000
|
| 17701 |
+
},
|
| 17702 |
+
{
|
| 17703 |
+
"epoch": 9.51,
|
| 17704 |
+
"eval_loss": 0.17374014854431152,
|
| 17705 |
+
"eval_runtime": 2.719,
|
| 17706 |
+
"eval_samples_per_second": 844.787,
|
| 17707 |
+
"eval_steps_per_second": 13.24,
|
| 17708 |
+
"step": 885000
|
| 17709 |
+
},
|
| 17710 |
+
{
|
| 17711 |
+
"epoch": 9.52,
|
| 17712 |
+
"learning_rate": 1.4958342736067783e-05,
|
| 17713 |
+
"loss": 0.1829,
|
| 17714 |
+
"step": 885500
|
| 17715 |
+
},
|
| 17716 |
+
{
|
| 17717 |
+
"epoch": 9.52,
|
| 17718 |
+
"learning_rate": 1.4915645987822406e-05,
|
| 17719 |
+
"loss": 0.1829,
|
| 17720 |
+
"step": 886000
|
| 17721 |
+
},
|
| 17722 |
+
{
|
| 17723 |
+
"epoch": 9.52,
|
| 17724 |
+
"eval_loss": 0.17604438960552216,
|
| 17725 |
+
"eval_runtime": 2.7026,
|
| 17726 |
+
"eval_samples_per_second": 849.921,
|
| 17727 |
+
"eval_steps_per_second": 13.32,
|
| 17728 |
+
"step": 886000
|
| 17729 |
+
},
|
| 17730 |
+
{
|
| 17731 |
+
"epoch": 9.53,
|
| 17732 |
+
"learning_rate": 1.4873127177715653e-05,
|
| 17733 |
+
"loss": 0.1827,
|
| 17734 |
+
"step": 886500
|
| 17735 |
+
},
|
| 17736 |
+
{
|
| 17737 |
+
"epoch": 9.54,
|
| 17738 |
+
"learning_rate": 1.4830786421992347e-05,
|
| 17739 |
+
"loss": 0.1829,
|
| 17740 |
+
"step": 887000
|
| 17741 |
+
},
|
| 17742 |
+
{
|
| 17743 |
+
"epoch": 9.54,
|
| 17744 |
+
"eval_loss": 0.17339639365673065,
|
| 17745 |
+
"eval_runtime": 2.7392,
|
| 17746 |
+
"eval_samples_per_second": 838.573,
|
| 17747 |
+
"eval_steps_per_second": 13.143,
|
| 17748 |
+
"step": 887000
|
| 17749 |
+
},
|
| 17750 |
+
{
|
| 17751 |
+
"epoch": 9.54,
|
| 17752 |
+
"learning_rate": 1.4788623836410479e-05,
|
| 17753 |
+
"loss": 0.1823,
|
| 17754 |
+
"step": 887500
|
| 17755 |
+
},
|
| 17756 |
+
{
|
| 17757 |
+
"epoch": 9.55,
|
| 17758 |
+
"learning_rate": 1.4746639536240942e-05,
|
| 17759 |
+
"loss": 0.1824,
|
| 17760 |
+
"step": 888000
|
| 17761 |
+
},
|
| 17762 |
+
{
|
| 17763 |
+
"epoch": 9.55,
|
| 17764 |
+
"eval_loss": 0.17382191121578217,
|
| 17765 |
+
"eval_runtime": 2.7435,
|
| 17766 |
+
"eval_samples_per_second": 837.254,
|
| 17767 |
+
"eval_steps_per_second": 13.122,
|
| 17768 |
+
"step": 888000
|
| 17769 |
+
},
|
| 17770 |
+
{
|
| 17771 |
+
"epoch": 9.55,
|
| 17772 |
+
"learning_rate": 1.4704833636267232e-05,
|
| 17773 |
+
"loss": 0.1825,
|
| 17774 |
+
"step": 888500
|
| 17775 |
+
},
|
| 17776 |
+
{
|
| 17777 |
+
"epoch": 9.56,
|
| 17778 |
+
"learning_rate": 1.4663206250785055e-05,
|
| 17779 |
+
"loss": 0.1824,
|
| 17780 |
+
"step": 889000
|
| 17781 |
+
},
|
| 17782 |
+
{
|
| 17783 |
+
"epoch": 9.56,
|
| 17784 |
+
"eval_loss": 0.17390523850917816,
|
| 17785 |
+
"eval_runtime": 2.7145,
|
| 17786 |
+
"eval_samples_per_second": 846.211,
|
| 17787 |
+
"eval_steps_per_second": 13.262,
|
| 17788 |
+
"step": 889000
|
| 17789 |
+
},
|
| 17790 |
+
{
|
| 17791 |
+
"epoch": 9.56,
|
| 17792 |
+
"learning_rate": 1.4621757493602125e-05,
|
| 17793 |
+
"loss": 0.1826,
|
| 17794 |
+
"step": 889500
|
| 17795 |
+
},
|
| 17796 |
+
{
|
| 17797 |
+
"epoch": 9.57,
|
| 17798 |
+
"learning_rate": 1.4580487478037748e-05,
|
| 17799 |
+
"loss": 0.1826,
|
| 17800 |
+
"step": 890000
|
| 17801 |
+
},
|
| 17802 |
+
{
|
| 17803 |
+
"epoch": 9.57,
|
| 17804 |
+
"eval_loss": 0.17268939316272736,
|
| 17805 |
+
"eval_runtime": 2.6865,
|
| 17806 |
+
"eval_samples_per_second": 855.002,
|
| 17807 |
+
"eval_steps_per_second": 13.4,
|
| 17808 |
+
"step": 890000
|
| 17809 |
}
|
| 17810 |
],
|
| 17811 |
"max_steps": 1000000,
|
| 17812 |
"num_train_epochs": 12,
|
| 17813 |
+
"total_flos": 6.238873943673255e+22,
|
| 17814 |
"trial_name": null,
|
| 17815 |
"trial_params": null
|
| 17816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40f6901c05f87feaf8e889e20cabff1c85d845893260d2343fe525c0b122a6e9
|
| 3 |
size 449471589
|