Training in progress, step 8500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:663d31a8b6ad2423dc3c0b8759bef8029d3f5914e7b173b5be641f54497bab8c
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c93129eb07b3c389c642dd3ac521458eb6b0b8b0b4b6634a4a4ec236e73b73dd
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95d6f8a42fc11a5f0262b0c737f666f824322b1b030452310cca3fb10ffef9ad
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cda9bcc9266ec91d2da20eab50cd7cea609c16666645a54519c40bab7f69f1a
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5743,6 +5743,364 @@
|
|
| 5743 |
"eval_samples_per_second": 267.812,
|
| 5744 |
"eval_steps_per_second": 5.624,
|
| 5745 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5746 |
}
|
| 5747 |
],
|
| 5748 |
"logging_steps": 10,
|
|
@@ -5762,7 +6120,7 @@
|
|
| 5762 |
"attributes": {}
|
| 5763 |
}
|
| 5764 |
},
|
| 5765 |
-
"total_flos": 2.
|
| 5766 |
"train_batch_size": 48,
|
| 5767 |
"trial_name": null,
|
| 5768 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.4360533873965196,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 8500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5743 |
"eval_samples_per_second": 267.812,
|
| 5744 |
"eval_steps_per_second": 5.624,
|
| 5745 |
"step": 8000
|
| 5746 |
+
},
|
| 5747 |
+
{
|
| 5748 |
+
"epoch": 1.353269133299544,
|
| 5749 |
+
"grad_norm": 0.463106632232666,
|
| 5750 |
+
"learning_rate": 9.883703849181374e-05,
|
| 5751 |
+
"loss": 4.368831634521484,
|
| 5752 |
+
"step": 8010
|
| 5753 |
+
},
|
| 5754 |
+
{
|
| 5755 |
+
"epoch": 1.3549586078729514,
|
| 5756 |
+
"grad_norm": 0.4774092137813568,
|
| 5757 |
+
"learning_rate": 9.838702606099289e-05,
|
| 5758 |
+
"loss": 4.350126647949219,
|
| 5759 |
+
"step": 8020
|
| 5760 |
+
},
|
| 5761 |
+
{
|
| 5762 |
+
"epoch": 1.3566480824463591,
|
| 5763 |
+
"grad_norm": 0.5083175897598267,
|
| 5764 |
+
"learning_rate": 9.793753994381003e-05,
|
| 5765 |
+
"loss": 4.375761032104492,
|
| 5766 |
+
"step": 8030
|
| 5767 |
+
},
|
| 5768 |
+
{
|
| 5769 |
+
"epoch": 1.3583375570197669,
|
| 5770 |
+
"grad_norm": 0.493473619222641,
|
| 5771 |
+
"learning_rate": 9.748858472381567e-05,
|
| 5772 |
+
"loss": 4.382857894897461,
|
| 5773 |
+
"step": 8040
|
| 5774 |
+
},
|
| 5775 |
+
{
|
| 5776 |
+
"epoch": 1.3600270315931744,
|
| 5777 |
+
"grad_norm": 0.47200217843055725,
|
| 5778 |
+
"learning_rate": 9.704016497914657e-05,
|
| 5779 |
+
"loss": 4.363901901245117,
|
| 5780 |
+
"step": 8050
|
| 5781 |
+
},
|
| 5782 |
+
{
|
| 5783 |
+
"epoch": 1.3617165061665821,
|
| 5784 |
+
"grad_norm": 0.47423017024993896,
|
| 5785 |
+
"learning_rate": 9.659228528247923e-05,
|
| 5786 |
+
"loss": 4.352508544921875,
|
| 5787 |
+
"step": 8060
|
| 5788 |
+
},
|
| 5789 |
+
{
|
| 5790 |
+
"epoch": 1.3634059807399899,
|
| 5791 |
+
"grad_norm": 0.49032631516456604,
|
| 5792 |
+
"learning_rate": 9.614495020098284e-05,
|
| 5793 |
+
"loss": 4.386605834960937,
|
| 5794 |
+
"step": 8070
|
| 5795 |
+
},
|
| 5796 |
+
{
|
| 5797 |
+
"epoch": 1.3650954553133976,
|
| 5798 |
+
"grad_norm": 0.5129415392875671,
|
| 5799 |
+
"learning_rate": 9.569816429627329e-05,
|
| 5800 |
+
"loss": 4.370170211791992,
|
| 5801 |
+
"step": 8080
|
| 5802 |
+
},
|
| 5803 |
+
{
|
| 5804 |
+
"epoch": 1.3667849298868053,
|
| 5805 |
+
"grad_norm": 0.47328782081604004,
|
| 5806 |
+
"learning_rate": 9.525193212436607e-05,
|
| 5807 |
+
"loss": 4.394309616088867,
|
| 5808 |
+
"step": 8090
|
| 5809 |
+
},
|
| 5810 |
+
{
|
| 5811 |
+
"epoch": 1.3684744044602128,
|
| 5812 |
+
"grad_norm": 0.5091307759284973,
|
| 5813 |
+
"learning_rate": 9.480625823563032e-05,
|
| 5814 |
+
"loss": 4.353821182250977,
|
| 5815 |
+
"step": 8100
|
| 5816 |
+
},
|
| 5817 |
+
{
|
| 5818 |
+
"epoch": 1.3701638790336206,
|
| 5819 |
+
"grad_norm": 0.49530673027038574,
|
| 5820 |
+
"learning_rate": 9.436114717474197e-05,
|
| 5821 |
+
"loss": 4.374178314208985,
|
| 5822 |
+
"step": 8110
|
| 5823 |
+
},
|
| 5824 |
+
{
|
| 5825 |
+
"epoch": 1.3718533536070283,
|
| 5826 |
+
"grad_norm": 0.5062808394432068,
|
| 5827 |
+
"learning_rate": 9.391660348063778e-05,
|
| 5828 |
+
"loss": 4.366446685791016,
|
| 5829 |
+
"step": 8120
|
| 5830 |
+
},
|
| 5831 |
+
{
|
| 5832 |
+
"epoch": 1.3735428281804358,
|
| 5833 |
+
"grad_norm": 0.4893403947353363,
|
| 5834 |
+
"learning_rate": 9.347263168646881e-05,
|
| 5835 |
+
"loss": 4.377128601074219,
|
| 5836 |
+
"step": 8130
|
| 5837 |
+
},
|
| 5838 |
+
{
|
| 5839 |
+
"epoch": 1.3752323027538436,
|
| 5840 |
+
"grad_norm": 0.49352315068244934,
|
| 5841 |
+
"learning_rate": 9.30292363195543e-05,
|
| 5842 |
+
"loss": 4.390756988525391,
|
| 5843 |
+
"step": 8140
|
| 5844 |
+
},
|
| 5845 |
+
{
|
| 5846 |
+
"epoch": 1.3769217773272513,
|
| 5847 |
+
"grad_norm": 0.4956866502761841,
|
| 5848 |
+
"learning_rate": 9.258642190133548e-05,
|
| 5849 |
+
"loss": 4.364201354980469,
|
| 5850 |
+
"step": 8150
|
| 5851 |
+
},
|
| 5852 |
+
{
|
| 5853 |
+
"epoch": 1.3786112519006588,
|
| 5854 |
+
"grad_norm": 0.4806705415248871,
|
| 5855 |
+
"learning_rate": 9.21441929473295e-05,
|
| 5856 |
+
"loss": 4.336410140991211,
|
| 5857 |
+
"step": 8160
|
| 5858 |
+
},
|
| 5859 |
+
{
|
| 5860 |
+
"epoch": 1.3803007264740665,
|
| 5861 |
+
"grad_norm": 0.503070056438446,
|
| 5862 |
+
"learning_rate": 9.170255396708336e-05,
|
| 5863 |
+
"loss": 4.363087463378906,
|
| 5864 |
+
"step": 8170
|
| 5865 |
+
},
|
| 5866 |
+
{
|
| 5867 |
+
"epoch": 1.3819902010474743,
|
| 5868 |
+
"grad_norm": 0.4839601218700409,
|
| 5869 |
+
"learning_rate": 9.126150946412775e-05,
|
| 5870 |
+
"loss": 4.353903961181641,
|
| 5871 |
+
"step": 8180
|
| 5872 |
+
},
|
| 5873 |
+
{
|
| 5874 |
+
"epoch": 1.3836796756208818,
|
| 5875 |
+
"grad_norm": 0.4867366552352905,
|
| 5876 |
+
"learning_rate": 9.082106393593153e-05,
|
| 5877 |
+
"loss": 4.347708892822266,
|
| 5878 |
+
"step": 8190
|
| 5879 |
+
},
|
| 5880 |
+
{
|
| 5881 |
+
"epoch": 1.3853691501942895,
|
| 5882 |
+
"grad_norm": 0.4875339865684509,
|
| 5883 |
+
"learning_rate": 9.038122187385543e-05,
|
| 5884 |
+
"loss": 4.371865844726562,
|
| 5885 |
+
"step": 8200
|
| 5886 |
+
},
|
| 5887 |
+
{
|
| 5888 |
+
"epoch": 1.3870586247676973,
|
| 5889 |
+
"grad_norm": 0.49478384852409363,
|
| 5890 |
+
"learning_rate": 8.994198776310652e-05,
|
| 5891 |
+
"loss": 4.368743133544922,
|
| 5892 |
+
"step": 8210
|
| 5893 |
+
},
|
| 5894 |
+
{
|
| 5895 |
+
"epoch": 1.388748099341105,
|
| 5896 |
+
"grad_norm": 0.4815446734428406,
|
| 5897 |
+
"learning_rate": 8.950336608269243e-05,
|
| 5898 |
+
"loss": 4.383320999145508,
|
| 5899 |
+
"step": 8220
|
| 5900 |
+
},
|
| 5901 |
+
{
|
| 5902 |
+
"epoch": 1.3904375739145125,
|
| 5903 |
+
"grad_norm": 0.4883415997028351,
|
| 5904 |
+
"learning_rate": 8.906536130537566e-05,
|
| 5905 |
+
"loss": 4.368521881103516,
|
| 5906 |
+
"step": 8230
|
| 5907 |
+
},
|
| 5908 |
+
{
|
| 5909 |
+
"epoch": 1.3921270484879202,
|
| 5910 |
+
"grad_norm": 0.5107654929161072,
|
| 5911 |
+
"learning_rate": 8.862797789762785e-05,
|
| 5912 |
+
"loss": 4.353972244262695,
|
| 5913 |
+
"step": 8240
|
| 5914 |
+
},
|
| 5915 |
+
{
|
| 5916 |
+
"epoch": 1.393816523061328,
|
| 5917 |
+
"grad_norm": 0.46853381395339966,
|
| 5918 |
+
"learning_rate": 8.819122031958446e-05,
|
| 5919 |
+
"loss": 4.374198150634766,
|
| 5920 |
+
"step": 8250
|
| 5921 |
+
},
|
| 5922 |
+
{
|
| 5923 |
+
"epoch": 1.3955059976347357,
|
| 5924 |
+
"grad_norm": 0.49264970421791077,
|
| 5925 |
+
"learning_rate": 8.77550930249991e-05,
|
| 5926 |
+
"loss": 4.353750228881836,
|
| 5927 |
+
"step": 8260
|
| 5928 |
+
},
|
| 5929 |
+
{
|
| 5930 |
+
"epoch": 1.3971954722081432,
|
| 5931 |
+
"grad_norm": 0.49197956919670105,
|
| 5932 |
+
"learning_rate": 8.731960046119819e-05,
|
| 5933 |
+
"loss": 4.378075408935547,
|
| 5934 |
+
"step": 8270
|
| 5935 |
+
},
|
| 5936 |
+
{
|
| 5937 |
+
"epoch": 1.398884946781551,
|
| 5938 |
+
"grad_norm": 0.48225274682044983,
|
| 5939 |
+
"learning_rate": 8.688474706903554e-05,
|
| 5940 |
+
"loss": 4.360022735595703,
|
| 5941 |
+
"step": 8280
|
| 5942 |
+
},
|
| 5943 |
+
{
|
| 5944 |
+
"epoch": 1.4005744213549587,
|
| 5945 |
+
"grad_norm": 0.4796869456768036,
|
| 5946 |
+
"learning_rate": 8.645053728284734e-05,
|
| 5947 |
+
"loss": 4.351276779174805,
|
| 5948 |
+
"step": 8290
|
| 5949 |
+
},
|
| 5950 |
+
{
|
| 5951 |
+
"epoch": 1.4022638959283662,
|
| 5952 |
+
"grad_norm": 0.46706125140190125,
|
| 5953 |
+
"learning_rate": 8.601697553040645e-05,
|
| 5954 |
+
"loss": 4.367401885986328,
|
| 5955 |
+
"step": 8300
|
| 5956 |
+
},
|
| 5957 |
+
{
|
| 5958 |
+
"epoch": 1.403953370501774,
|
| 5959 |
+
"grad_norm": 0.4695565104484558,
|
| 5960 |
+
"learning_rate": 8.55840662328778e-05,
|
| 5961 |
+
"loss": 4.338150405883789,
|
| 5962 |
+
"step": 8310
|
| 5963 |
+
},
|
| 5964 |
+
{
|
| 5965 |
+
"epoch": 1.4056428450751817,
|
| 5966 |
+
"grad_norm": 0.4987981915473938,
|
| 5967 |
+
"learning_rate": 8.515181380477273e-05,
|
| 5968 |
+
"loss": 4.369682693481446,
|
| 5969 |
+
"step": 8320
|
| 5970 |
+
},
|
| 5971 |
+
{
|
| 5972 |
+
"epoch": 1.4073323196485892,
|
| 5973 |
+
"grad_norm": 0.4853006899356842,
|
| 5974 |
+
"learning_rate": 8.47202226539046e-05,
|
| 5975 |
+
"loss": 4.392825698852539,
|
| 5976 |
+
"step": 8330
|
| 5977 |
+
},
|
| 5978 |
+
{
|
| 5979 |
+
"epoch": 1.409021794221997,
|
| 5980 |
+
"grad_norm": 0.48891976475715637,
|
| 5981 |
+
"learning_rate": 8.428929718134331e-05,
|
| 5982 |
+
"loss": 4.3820442199707035,
|
| 5983 |
+
"step": 8340
|
| 5984 |
+
},
|
| 5985 |
+
{
|
| 5986 |
+
"epoch": 1.4107112687954046,
|
| 5987 |
+
"grad_norm": 0.48374229669570923,
|
| 5988 |
+
"learning_rate": 8.385904178137061e-05,
|
| 5989 |
+
"loss": 4.367736053466797,
|
| 5990 |
+
"step": 8350
|
| 5991 |
+
},
|
| 5992 |
+
{
|
| 5993 |
+
"epoch": 1.4124007433688122,
|
| 5994 |
+
"grad_norm": 0.4966294765472412,
|
| 5995 |
+
"learning_rate": 8.342946084143546e-05,
|
| 5996 |
+
"loss": 4.336813354492188,
|
| 5997 |
+
"step": 8360
|
| 5998 |
+
},
|
| 5999 |
+
{
|
| 6000 |
+
"epoch": 1.41409021794222,
|
| 6001 |
+
"grad_norm": 0.4939606487751007,
|
| 6002 |
+
"learning_rate": 8.300055874210903e-05,
|
| 6003 |
+
"loss": 4.390798568725586,
|
| 6004 |
+
"step": 8370
|
| 6005 |
+
},
|
| 6006 |
+
{
|
| 6007 |
+
"epoch": 1.4157796925156276,
|
| 6008 |
+
"grad_norm": 0.48403191566467285,
|
| 6009 |
+
"learning_rate": 8.257233985704021e-05,
|
| 6010 |
+
"loss": 4.3521678924560545,
|
| 6011 |
+
"step": 8380
|
| 6012 |
+
},
|
| 6013 |
+
{
|
| 6014 |
+
"epoch": 1.4174691670890354,
|
| 6015 |
+
"grad_norm": 0.4766407012939453,
|
| 6016 |
+
"learning_rate": 8.214480855291084e-05,
|
| 6017 |
+
"loss": 4.337980651855469,
|
| 6018 |
+
"step": 8390
|
| 6019 |
+
},
|
| 6020 |
+
{
|
| 6021 |
+
"epoch": 1.419158641662443,
|
| 6022 |
+
"grad_norm": 0.469018816947937,
|
| 6023 |
+
"learning_rate": 8.171796918939142e-05,
|
| 6024 |
+
"loss": 4.341955184936523,
|
| 6025 |
+
"step": 8400
|
| 6026 |
+
},
|
| 6027 |
+
{
|
| 6028 |
+
"epoch": 1.4208481162358506,
|
| 6029 |
+
"grad_norm": 0.4855271875858307,
|
| 6030 |
+
"learning_rate": 8.129182611909642e-05,
|
| 6031 |
+
"loss": 4.353343963623047,
|
| 6032 |
+
"step": 8410
|
| 6033 |
+
},
|
| 6034 |
+
{
|
| 6035 |
+
"epoch": 1.4225375908092583,
|
| 6036 |
+
"grad_norm": 0.4870193898677826,
|
| 6037 |
+
"learning_rate": 8.086638368753993e-05,
|
| 6038 |
+
"loss": 4.374142074584961,
|
| 6039 |
+
"step": 8420
|
| 6040 |
+
},
|
| 6041 |
+
{
|
| 6042 |
+
"epoch": 1.424227065382666,
|
| 6043 |
+
"grad_norm": 0.4896891415119171,
|
| 6044 |
+
"learning_rate": 8.04416462330916e-05,
|
| 6045 |
+
"loss": 4.367203140258789,
|
| 6046 |
+
"step": 8430
|
| 6047 |
+
},
|
| 6048 |
+
{
|
| 6049 |
+
"epoch": 1.4259165399560736,
|
| 6050 |
+
"grad_norm": 0.46844348311424255,
|
| 6051 |
+
"learning_rate": 8.0017618086932e-05,
|
| 6052 |
+
"loss": 4.35595817565918,
|
| 6053 |
+
"step": 8440
|
| 6054 |
+
},
|
| 6055 |
+
{
|
| 6056 |
+
"epoch": 1.4276060145294813,
|
| 6057 |
+
"grad_norm": 0.4512944519519806,
|
| 6058 |
+
"learning_rate": 7.959430357300885e-05,
|
| 6059 |
+
"loss": 4.3400733947753904,
|
| 6060 |
+
"step": 8450
|
| 6061 |
+
},
|
| 6062 |
+
{
|
| 6063 |
+
"epoch": 1.429295489102889,
|
| 6064 |
+
"grad_norm": 0.4732443392276764,
|
| 6065 |
+
"learning_rate": 7.917170700799256e-05,
|
| 6066 |
+
"loss": 4.333652114868164,
|
| 6067 |
+
"step": 8460
|
| 6068 |
+
},
|
| 6069 |
+
{
|
| 6070 |
+
"epoch": 1.4309849636762966,
|
| 6071 |
+
"grad_norm": 0.4684848487377167,
|
| 6072 |
+
"learning_rate": 7.874983270123254e-05,
|
| 6073 |
+
"loss": 4.352918243408203,
|
| 6074 |
+
"step": 8470
|
| 6075 |
+
},
|
| 6076 |
+
{
|
| 6077 |
+
"epoch": 1.4326744382497043,
|
| 6078 |
+
"grad_norm": 0.506878137588501,
|
| 6079 |
+
"learning_rate": 7.832868495471306e-05,
|
| 6080 |
+
"loss": 4.357436752319336,
|
| 6081 |
+
"step": 8480
|
| 6082 |
+
},
|
| 6083 |
+
{
|
| 6084 |
+
"epoch": 1.434363912823112,
|
| 6085 |
+
"grad_norm": 0.5020336508750916,
|
| 6086 |
+
"learning_rate": 7.790826806300928e-05,
|
| 6087 |
+
"loss": 4.359925079345703,
|
| 6088 |
+
"step": 8490
|
| 6089 |
+
},
|
| 6090 |
+
{
|
| 6091 |
+
"epoch": 1.4360533873965196,
|
| 6092 |
+
"grad_norm": 0.4732269048690796,
|
| 6093 |
+
"learning_rate": 7.748858631324393e-05,
|
| 6094 |
+
"loss": 4.356634902954101,
|
| 6095 |
+
"step": 8500
|
| 6096 |
+
},
|
| 6097 |
+
{
|
| 6098 |
+
"epoch": 1.4360533873965196,
|
| 6099 |
+
"eval_loss": 4.3328938484191895,
|
| 6100 |
+
"eval_runtime": 3.6888,
|
| 6101 |
+
"eval_samples_per_second": 271.089,
|
| 6102 |
+
"eval_steps_per_second": 5.693,
|
| 6103 |
+
"step": 8500
|
| 6104 |
}
|
| 6105 |
],
|
| 6106 |
"logging_steps": 10,
|
|
|
|
| 6120 |
"attributes": {}
|
| 6121 |
}
|
| 6122 |
},
|
| 6123 |
+
"total_flos": 2.8428620737491763e+17,
|
| 6124 |
"train_batch_size": 48,
|
| 6125 |
"trial_name": null,
|
| 6126 |
"trial_params": null
|