Training in progress, step 594, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2147605960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44ec759cdb5f06b9e582baac1d961ceb3d11fcd7f8b6bcd68bb72968ad4fc853
|
| 3 |
size 2147605960
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1091573733
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e81b8e84d0a507cddcee392d3cd6e9ef9a76ed6e9f3e1b078a9c884d9902f9fd
|
| 3 |
size 1091573733
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04541f08bf69ac7fe480c21fd1822bf1bcd1879399508271123652fdf993af0e
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d74f2392731f9bfe3e3fcd2fa4bb9529cf683336ccaf940189be2143efabe210
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -708,6 +708,132 @@
|
|
| 708 |
"learning_rate": 1.3126041392116772e-05,
|
| 709 |
"loss": 0.5162,
|
| 710 |
"step": 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 711 |
}
|
| 712 |
],
|
| 713 |
"logging_steps": 5,
|
|
@@ -722,12 +848,12 @@
|
|
| 722 |
"should_evaluate": false,
|
| 723 |
"should_log": false,
|
| 724 |
"should_save": true,
|
| 725 |
-
"should_training_stop":
|
| 726 |
},
|
| 727 |
"attributes": {}
|
| 728 |
}
|
| 729 |
},
|
| 730 |
-
"total_flos": 3.
|
| 731 |
"train_batch_size": 4,
|
| 732 |
"trial_name": null,
|
| 733 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 594,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 708 |
"learning_rate": 1.3126041392116772e-05,
|
| 709 |
"loss": 0.5162,
|
| 710 |
"step": 500
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 0.8505263157894737,
|
| 714 |
+
"grad_norm": 0.11415216326713562,
|
| 715 |
+
"learning_rate": 1.1807873565164506e-05,
|
| 716 |
+
"loss": 0.5486,
|
| 717 |
+
"step": 505
|
| 718 |
+
},
|
| 719 |
+
{
|
| 720 |
+
"epoch": 0.8589473684210527,
|
| 721 |
+
"grad_norm": 0.12088897824287415,
|
| 722 |
+
"learning_rate": 1.0555289705749483e-05,
|
| 723 |
+
"loss": 0.5531,
|
| 724 |
+
"step": 510
|
| 725 |
+
},
|
| 726 |
+
{
|
| 727 |
+
"epoch": 0.8673684210526316,
|
| 728 |
+
"grad_norm": 0.1071295514702797,
|
| 729 |
+
"learning_rate": 9.369221296335006e-06,
|
| 730 |
+
"loss": 0.5187,
|
| 731 |
+
"step": 515
|
| 732 |
+
},
|
| 733 |
+
{
|
| 734 |
+
"epoch": 0.8757894736842106,
|
| 735 |
+
"grad_norm": 0.11686161160469055,
|
| 736 |
+
"learning_rate": 8.250550355250875e-06,
|
| 737 |
+
"loss": 0.5589,
|
| 738 |
+
"step": 520
|
| 739 |
+
},
|
| 740 |
+
{
|
| 741 |
+
"epoch": 0.8842105263157894,
|
| 742 |
+
"grad_norm": 0.11512535065412521,
|
| 743 |
+
"learning_rate": 7.200108780781556e-06,
|
| 744 |
+
"loss": 0.5505,
|
| 745 |
+
"step": 525
|
| 746 |
+
},
|
| 747 |
+
{
|
| 748 |
+
"epoch": 0.8926315789473684,
|
| 749 |
+
"grad_norm": 0.10691066831350327,
|
| 750 |
+
"learning_rate": 6.218677732526035e-06,
|
| 751 |
+
"loss": 0.538,
|
| 752 |
+
"step": 530
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 0.9010526315789473,
|
| 756 |
+
"grad_norm": 0.10703324526548386,
|
| 757 |
+
"learning_rate": 5.306987050489442e-06,
|
| 758 |
+
"loss": 0.5487,
|
| 759 |
+
"step": 535
|
| 760 |
+
},
|
| 761 |
+
{
|
| 762 |
+
"epoch": 0.9094736842105263,
|
| 763 |
+
"grad_norm": 0.11932916939258575,
|
| 764 |
+
"learning_rate": 4.465714712338398e-06,
|
| 765 |
+
"loss": 0.5522,
|
| 766 |
+
"step": 540
|
| 767 |
+
},
|
| 768 |
+
{
|
| 769 |
+
"epoch": 0.9178947368421052,
|
| 770 |
+
"grad_norm": 0.11946967244148254,
|
| 771 |
+
"learning_rate": 3.6954863292237297e-06,
|
| 772 |
+
"loss": 0.5326,
|
| 773 |
+
"step": 545
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 0.9263157894736842,
|
| 777 |
+
"grad_norm": 0.11607641726732254,
|
| 778 |
+
"learning_rate": 2.996874680545603e-06,
|
| 779 |
+
"loss": 0.5393,
|
| 780 |
+
"step": 550
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 0.9347368421052632,
|
| 784 |
+
"grad_norm": 0.11775229871273041,
|
| 785 |
+
"learning_rate": 2.3703992880066638e-06,
|
| 786 |
+
"loss": 0.5182,
|
| 787 |
+
"step": 555
|
| 788 |
+
},
|
| 789 |
+
{
|
| 790 |
+
"epoch": 0.9431578947368421,
|
| 791 |
+
"grad_norm": 0.1001369059085846,
|
| 792 |
+
"learning_rate": 1.8165260292704711e-06,
|
| 793 |
+
"loss": 0.5487,
|
| 794 |
+
"step": 560
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 0.9515789473684211,
|
| 798 |
+
"grad_norm": 0.12595337629318237,
|
| 799 |
+
"learning_rate": 1.3356667915121025e-06,
|
| 800 |
+
"loss": 0.5291,
|
| 801 |
+
"step": 565
|
| 802 |
+
},
|
| 803 |
+
{
|
| 804 |
+
"epoch": 0.96,
|
| 805 |
+
"grad_norm": 0.09997394680976868,
|
| 806 |
+
"learning_rate": 9.281791651187366e-07,
|
| 807 |
+
"loss": 0.5309,
|
| 808 |
+
"step": 570
|
| 809 |
+
},
|
| 810 |
+
{
|
| 811 |
+
"epoch": 0.968421052631579,
|
| 812 |
+
"grad_norm": 0.12577302753925323,
|
| 813 |
+
"learning_rate": 5.943661777680354e-07,
|
| 814 |
+
"loss": 0.5805,
|
| 815 |
+
"step": 575
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 0.9768421052631578,
|
| 819 |
+
"grad_norm": 0.11574060469865799,
|
| 820 |
+
"learning_rate": 3.3447606908196817e-07,
|
| 821 |
+
"loss": 0.5498,
|
| 822 |
+
"step": 580
|
| 823 |
+
},
|
| 824 |
+
{
|
| 825 |
+
"epoch": 0.9852631578947368,
|
| 826 |
+
"grad_norm": 0.1203237846493721,
|
| 827 |
+
"learning_rate": 1.487021060236904e-07,
|
| 828 |
+
"loss": 0.5443,
|
| 829 |
+
"step": 585
|
| 830 |
+
},
|
| 831 |
+
{
|
| 832 |
+
"epoch": 0.9936842105263158,
|
| 833 |
+
"grad_norm": 0.11638718843460083,
|
| 834 |
+
"learning_rate": 3.7182439174832106e-08,
|
| 835 |
+
"loss": 0.5372,
|
| 836 |
+
"step": 590
|
| 837 |
}
|
| 838 |
],
|
| 839 |
"logging_steps": 5,
|
|
|
|
| 848 |
"should_evaluate": false,
|
| 849 |
"should_log": false,
|
| 850 |
"should_save": true,
|
| 851 |
+
"should_training_stop": true
|
| 852 |
},
|
| 853 |
"attributes": {}
|
| 854 |
}
|
| 855 |
},
|
| 856 |
+
"total_flos": 3.781344564135076e+18,
|
| 857 |
"train_batch_size": 4,
|
| 858 |
"trial_name": null,
|
| 859 |
"trial_params": null
|