Training in progress, step 840000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24859c623c6a5769d23a445d9e652805ef93ef8232d0532f3fafc5dad772c85e
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b27758f4c4f1be46ca953f785452acae2687180a06e7c14c3b975c46e8947612
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4e2d13a0c0716c1c500a2d28a58a0073e425d28549a76ecb1f2b04513b7909
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b75da63b821a4c72c4b37f39fc301b88ce6e4d7dc37edf4f078b7f5706f736e3
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -16606,11 +16606,211 @@
|
|
| 16606 |
"eval_samples_per_second": 864.555,
|
| 16607 |
"eval_steps_per_second": 13.55,
|
| 16608 |
"step": 830000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16609 |
}
|
| 16610 |
],
|
| 16611 |
"max_steps": 1000000,
|
| 16612 |
"num_train_epochs": 12,
|
| 16613 |
-
"total_flos": 5.
|
| 16614 |
"trial_name": null,
|
| 16615 |
"trial_params": null
|
| 16616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.011275554018937,
|
| 5 |
+
"global_step": 840000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 16606 |
"eval_samples_per_second": 864.555,
|
| 16607 |
"eval_steps_per_second": 13.55,
|
| 16608 |
"step": 830000
|
| 16609 |
+
},
|
| 16610 |
+
{
|
| 16611 |
+
"epoch": 8.91,
|
| 16612 |
+
"learning_rate": 2.0711718689098057e-05,
|
| 16613 |
+
"loss": 0.1858,
|
| 16614 |
+
"step": 830500
|
| 16615 |
+
},
|
| 16616 |
+
{
|
| 16617 |
+
"epoch": 8.91,
|
| 16618 |
+
"learning_rate": 2.0650267139558772e-05,
|
| 16619 |
+
"loss": 0.1854,
|
| 16620 |
+
"step": 831000
|
| 16621 |
+
},
|
| 16622 |
+
{
|
| 16623 |
+
"epoch": 8.91,
|
| 16624 |
+
"eval_loss": 0.17501012980937958,
|
| 16625 |
+
"eval_runtime": 2.6667,
|
| 16626 |
+
"eval_samples_per_second": 861.35,
|
| 16627 |
+
"eval_steps_per_second": 13.5,
|
| 16628 |
+
"step": 831000
|
| 16629 |
+
},
|
| 16630 |
+
{
|
| 16631 |
+
"epoch": 8.92,
|
| 16632 |
+
"learning_rate": 2.058897784992289e-05,
|
| 16633 |
+
"loss": 0.1855,
|
| 16634 |
+
"step": 831500
|
| 16635 |
+
},
|
| 16636 |
+
{
|
| 16637 |
+
"epoch": 8.92,
|
| 16638 |
+
"learning_rate": 2.052785098775293e-05,
|
| 16639 |
+
"loss": 0.1855,
|
| 16640 |
+
"step": 832000
|
| 16641 |
+
},
|
| 16642 |
+
{
|
| 16643 |
+
"epoch": 8.92,
|
| 16644 |
+
"eval_loss": 0.17379425466060638,
|
| 16645 |
+
"eval_runtime": 2.675,
|
| 16646 |
+
"eval_samples_per_second": 858.689,
|
| 16647 |
+
"eval_steps_per_second": 13.458,
|
| 16648 |
+
"step": 832000
|
| 16649 |
+
},
|
| 16650 |
+
{
|
| 16651 |
+
"epoch": 8.93,
|
| 16652 |
+
"learning_rate": 2.0466886720167436e-05,
|
| 16653 |
+
"loss": 0.1847,
|
| 16654 |
+
"step": 832500
|
| 16655 |
+
},
|
| 16656 |
+
{
|
| 16657 |
+
"epoch": 8.93,
|
| 16658 |
+
"learning_rate": 2.04060852138404e-05,
|
| 16659 |
+
"loss": 0.1854,
|
| 16660 |
+
"step": 833000
|
| 16661 |
+
},
|
| 16662 |
+
{
|
| 16663 |
+
"epoch": 8.93,
|
| 16664 |
+
"eval_loss": 0.1764400452375412,
|
| 16665 |
+
"eval_runtime": 2.6031,
|
| 16666 |
+
"eval_samples_per_second": 882.425,
|
| 16667 |
+
"eval_steps_per_second": 13.83,
|
| 16668 |
+
"step": 833000
|
| 16669 |
+
},
|
| 16670 |
+
{
|
| 16671 |
+
"epoch": 8.94,
|
| 16672 |
+
"learning_rate": 2.0345446635000783e-05,
|
| 16673 |
+
"loss": 0.1856,
|
| 16674 |
+
"step": 833500
|
| 16675 |
+
},
|
| 16676 |
+
{
|
| 16677 |
+
"epoch": 8.94,
|
| 16678 |
+
"learning_rate": 2.028497114943219e-05,
|
| 16679 |
+
"loss": 0.1851,
|
| 16680 |
+
"step": 834000
|
| 16681 |
+
},
|
| 16682 |
+
{
|
| 16683 |
+
"epoch": 8.94,
|
| 16684 |
+
"eval_loss": 0.17593778669834137,
|
| 16685 |
+
"eval_runtime": 2.5824,
|
| 16686 |
+
"eval_samples_per_second": 889.497,
|
| 16687 |
+
"eval_steps_per_second": 13.941,
|
| 16688 |
+
"step": 834000
|
| 16689 |
+
},
|
| 16690 |
+
{
|
| 16691 |
+
"epoch": 8.95,
|
| 16692 |
+
"learning_rate": 2.022465892247223e-05,
|
| 16693 |
+
"loss": 0.1855,
|
| 16694 |
+
"step": 834500
|
| 16695 |
+
},
|
| 16696 |
+
{
|
| 16697 |
+
"epoch": 8.96,
|
| 16698 |
+
"learning_rate": 2.0164510119012263e-05,
|
| 16699 |
+
"loss": 0.1849,
|
| 16700 |
+
"step": 835000
|
| 16701 |
+
},
|
| 16702 |
+
{
|
| 16703 |
+
"epoch": 8.96,
|
| 16704 |
+
"eval_loss": 0.1772100031375885,
|
| 16705 |
+
"eval_runtime": 2.6877,
|
| 16706 |
+
"eval_samples_per_second": 854.619,
|
| 16707 |
+
"eval_steps_per_second": 13.394,
|
| 16708 |
+
"step": 835000
|
| 16709 |
+
},
|
| 16710 |
+
{
|
| 16711 |
+
"epoch": 8.96,
|
| 16712 |
+
"learning_rate": 2.0104524903496834e-05,
|
| 16713 |
+
"loss": 0.1852,
|
| 16714 |
+
"step": 835500
|
| 16715 |
+
},
|
| 16716 |
+
{
|
| 16717 |
+
"epoch": 8.97,
|
| 16718 |
+
"learning_rate": 2.0044703439923217e-05,
|
| 16719 |
+
"loss": 0.1854,
|
| 16720 |
+
"step": 836000
|
| 16721 |
+
},
|
| 16722 |
+
{
|
| 16723 |
+
"epoch": 8.97,
|
| 16724 |
+
"eval_loss": 0.17744192481040955,
|
| 16725 |
+
"eval_runtime": 2.6114,
|
| 16726 |
+
"eval_samples_per_second": 879.619,
|
| 16727 |
+
"eval_steps_per_second": 13.786,
|
| 16728 |
+
"step": 836000
|
| 16729 |
+
},
|
| 16730 |
+
{
|
| 16731 |
+
"epoch": 8.97,
|
| 16732 |
+
"learning_rate": 1.998504589184101e-05,
|
| 16733 |
+
"loss": 0.1851,
|
| 16734 |
+
"step": 836500
|
| 16735 |
+
},
|
| 16736 |
+
{
|
| 16737 |
+
"epoch": 8.98,
|
| 16738 |
+
"learning_rate": 1.9925552422351654e-05,
|
| 16739 |
+
"loss": 0.1849,
|
| 16740 |
+
"step": 837000
|
| 16741 |
+
},
|
| 16742 |
+
{
|
| 16743 |
+
"epoch": 8.98,
|
| 16744 |
+
"eval_loss": 0.1755765676498413,
|
| 16745 |
+
"eval_runtime": 2.6326,
|
| 16746 |
+
"eval_samples_per_second": 872.529,
|
| 16747 |
+
"eval_steps_per_second": 13.675,
|
| 16748 |
+
"step": 837000
|
| 16749 |
+
},
|
| 16750 |
+
{
|
| 16751 |
+
"epoch": 8.98,
|
| 16752 |
+
"learning_rate": 1.9866223194108028e-05,
|
| 16753 |
+
"loss": 0.1851,
|
| 16754 |
+
"step": 837500
|
| 16755 |
+
},
|
| 16756 |
+
{
|
| 16757 |
+
"epoch": 8.99,
|
| 16758 |
+
"learning_rate": 1.9807058369314016e-05,
|
| 16759 |
+
"loss": 0.1845,
|
| 16760 |
+
"step": 838000
|
| 16761 |
+
},
|
| 16762 |
+
{
|
| 16763 |
+
"epoch": 8.99,
|
| 16764 |
+
"eval_loss": 0.17676672339439392,
|
| 16765 |
+
"eval_runtime": 2.6846,
|
| 16766 |
+
"eval_samples_per_second": 855.61,
|
| 16767 |
+
"eval_steps_per_second": 13.41,
|
| 16768 |
+
"step": 838000
|
| 16769 |
+
},
|
| 16770 |
+
{
|
| 16771 |
+
"epoch": 8.99,
|
| 16772 |
+
"learning_rate": 1.9748058109723953e-05,
|
| 16773 |
+
"loss": 0.1852,
|
| 16774 |
+
"step": 838500
|
| 16775 |
+
},
|
| 16776 |
+
{
|
| 16777 |
+
"epoch": 9.0,
|
| 16778 |
+
"learning_rate": 1.968922257664231e-05,
|
| 16779 |
+
"loss": 0.1853,
|
| 16780 |
+
"step": 839000
|
| 16781 |
+
},
|
| 16782 |
+
{
|
| 16783 |
+
"epoch": 9.0,
|
| 16784 |
+
"eval_loss": 0.17678546905517578,
|
| 16785 |
+
"eval_runtime": 2.6872,
|
| 16786 |
+
"eval_samples_per_second": 854.778,
|
| 16787 |
+
"eval_steps_per_second": 13.397,
|
| 16788 |
+
"step": 839000
|
| 16789 |
+
},
|
| 16790 |
+
{
|
| 16791 |
+
"epoch": 9.01,
|
| 16792 |
+
"learning_rate": 1.9630551930923155e-05,
|
| 16793 |
+
"loss": 0.1851,
|
| 16794 |
+
"step": 839500
|
| 16795 |
+
},
|
| 16796 |
+
{
|
| 16797 |
+
"epoch": 9.01,
|
| 16798 |
+
"learning_rate": 1.9572046332969825e-05,
|
| 16799 |
+
"loss": 0.1848,
|
| 16800 |
+
"step": 840000
|
| 16801 |
+
},
|
| 16802 |
+
{
|
| 16803 |
+
"epoch": 9.01,
|
| 16804 |
+
"eval_loss": 0.1751183122396469,
|
| 16805 |
+
"eval_runtime": 2.6372,
|
| 16806 |
+
"eval_samples_per_second": 870.984,
|
| 16807 |
+
"eval_steps_per_second": 13.651,
|
| 16808 |
+
"step": 840000
|
| 16809 |
}
|
| 16810 |
],
|
| 16811 |
"max_steps": 1000000,
|
| 16812 |
"num_train_epochs": 12,
|
| 16813 |
+
"total_flos": 5.8883726385215196e+22,
|
| 16814 |
"trial_name": null,
|
| 16815 |
"trial_params": null
|
| 16816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b27758f4c4f1be46ca953f785452acae2687180a06e7c14c3b975c46e8947612
|
| 3 |
size 449471589
|