Training in progress, step 19000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fd5c90298fb3b8e4b3bfc5252ca67d39257c11142359692b801557f737b7e42
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dde98893d5f526a3fd3a3e2c8cc5d0c5ec7ef3827d46a4ac82be414c5ffde16
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a62b565b93cfaa85c1ff8e14ede1dbb6d31acf0d0ff726cbda86bec73b0dea2e
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ad567ab1c91260dc0b589aab08ed3b669be820ef88836c60f94ca1975b277c3
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 8.
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -13759,6 +13759,770 @@
|
|
| 13759 |
"eval_spearman_manhattan": 0.7537307168421792,
|
| 13760 |
"eval_steps_per_second": 35.48,
|
| 13761 |
"step": 18000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13762 |
}
|
| 13763 |
],
|
| 13764 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 8.903467666354265,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 19000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 13759 |
"eval_spearman_manhattan": 0.7537307168421792,
|
| 13760 |
"eval_steps_per_second": 35.48,
|
| 13761 |
"step": 18000
|
| 13762 |
+
},
|
| 13763 |
+
{
|
| 13764 |
+
"epoch": 8.43955014058107,
|
| 13765 |
+
"grad_norm": 0.9349134564399719,
|
| 13766 |
+
"learning_rate": 8.945056232427367e-06,
|
| 13767 |
+
"loss": 0.0479,
|
| 13768 |
+
"step": 18010
|
| 13769 |
+
},
|
| 13770 |
+
{
|
| 13771 |
+
"epoch": 8.44423617619494,
|
| 13772 |
+
"grad_norm": 0.9666185975074768,
|
| 13773 |
+
"learning_rate": 8.944470477975634e-06,
|
| 13774 |
+
"loss": 0.0595,
|
| 13775 |
+
"step": 18020
|
| 13776 |
+
},
|
| 13777 |
+
{
|
| 13778 |
+
"epoch": 8.44892221180881,
|
| 13779 |
+
"grad_norm": 2.2687034606933594,
|
| 13780 |
+
"learning_rate": 8.943884723523899e-06,
|
| 13781 |
+
"loss": 0.055,
|
| 13782 |
+
"step": 18030
|
| 13783 |
+
},
|
| 13784 |
+
{
|
| 13785 |
+
"epoch": 8.45360824742268,
|
| 13786 |
+
"grad_norm": 1.9229964017868042,
|
| 13787 |
+
"learning_rate": 8.943298969072166e-06,
|
| 13788 |
+
"loss": 0.0602,
|
| 13789 |
+
"step": 18040
|
| 13790 |
+
},
|
| 13791 |
+
{
|
| 13792 |
+
"epoch": 8.458294283036551,
|
| 13793 |
+
"grad_norm": 2.1603922843933105,
|
| 13794 |
+
"learning_rate": 8.942713214620433e-06,
|
| 13795 |
+
"loss": 0.0736,
|
| 13796 |
+
"step": 18050
|
| 13797 |
+
},
|
| 13798 |
+
{
|
| 13799 |
+
"epoch": 8.462980318650422,
|
| 13800 |
+
"grad_norm": 2.0796990394592285,
|
| 13801 |
+
"learning_rate": 8.942127460168698e-06,
|
| 13802 |
+
"loss": 0.0569,
|
| 13803 |
+
"step": 18060
|
| 13804 |
+
},
|
| 13805 |
+
{
|
| 13806 |
+
"epoch": 8.467666354264292,
|
| 13807 |
+
"grad_norm": 1.9972143173217773,
|
| 13808 |
+
"learning_rate": 8.941541705716963e-06,
|
| 13809 |
+
"loss": 0.0678,
|
| 13810 |
+
"step": 18070
|
| 13811 |
+
},
|
| 13812 |
+
{
|
| 13813 |
+
"epoch": 8.472352389878163,
|
| 13814 |
+
"grad_norm": 0.865214467048645,
|
| 13815 |
+
"learning_rate": 8.94095595126523e-06,
|
| 13816 |
+
"loss": 0.0627,
|
| 13817 |
+
"step": 18080
|
| 13818 |
+
},
|
| 13819 |
+
{
|
| 13820 |
+
"epoch": 8.477038425492033,
|
| 13821 |
+
"grad_norm": 2.142777442932129,
|
| 13822 |
+
"learning_rate": 8.940370196813497e-06,
|
| 13823 |
+
"loss": 0.0674,
|
| 13824 |
+
"step": 18090
|
| 13825 |
+
},
|
| 13826 |
+
{
|
| 13827 |
+
"epoch": 8.481724461105905,
|
| 13828 |
+
"grad_norm": 1.4264150857925415,
|
| 13829 |
+
"learning_rate": 8.939784442361762e-06,
|
| 13830 |
+
"loss": 0.0671,
|
| 13831 |
+
"step": 18100
|
| 13832 |
+
},
|
| 13833 |
+
{
|
| 13834 |
+
"epoch": 8.486410496719776,
|
| 13835 |
+
"grad_norm": 0.7674472332000732,
|
| 13836 |
+
"learning_rate": 8.93919868791003e-06,
|
| 13837 |
+
"loss": 0.0559,
|
| 13838 |
+
"step": 18110
|
| 13839 |
+
},
|
| 13840 |
+
{
|
| 13841 |
+
"epoch": 8.491096532333646,
|
| 13842 |
+
"grad_norm": 0.81045001745224,
|
| 13843 |
+
"learning_rate": 8.938612933458294e-06,
|
| 13844 |
+
"loss": 0.0587,
|
| 13845 |
+
"step": 18120
|
| 13846 |
+
},
|
| 13847 |
+
{
|
| 13848 |
+
"epoch": 8.495782567947517,
|
| 13849 |
+
"grad_norm": 1.4473369121551514,
|
| 13850 |
+
"learning_rate": 8.938027179006561e-06,
|
| 13851 |
+
"loss": 0.0613,
|
| 13852 |
+
"step": 18130
|
| 13853 |
+
},
|
| 13854 |
+
{
|
| 13855 |
+
"epoch": 8.500468603561387,
|
| 13856 |
+
"grad_norm": 1.741360068321228,
|
| 13857 |
+
"learning_rate": 8.937441424554827e-06,
|
| 13858 |
+
"loss": 0.0638,
|
| 13859 |
+
"step": 18140
|
| 13860 |
+
},
|
| 13861 |
+
{
|
| 13862 |
+
"epoch": 8.505154639175258,
|
| 13863 |
+
"grad_norm": 2.0288760662078857,
|
| 13864 |
+
"learning_rate": 8.936855670103094e-06,
|
| 13865 |
+
"loss": 0.0586,
|
| 13866 |
+
"step": 18150
|
| 13867 |
+
},
|
| 13868 |
+
{
|
| 13869 |
+
"epoch": 8.509840674789128,
|
| 13870 |
+
"grad_norm": 2.380078077316284,
|
| 13871 |
+
"learning_rate": 8.93626991565136e-06,
|
| 13872 |
+
"loss": 0.0612,
|
| 13873 |
+
"step": 18160
|
| 13874 |
+
},
|
| 13875 |
+
{
|
| 13876 |
+
"epoch": 8.514526710402999,
|
| 13877 |
+
"grad_norm": 1.9383304119110107,
|
| 13878 |
+
"learning_rate": 8.935684161199626e-06,
|
| 13879 |
+
"loss": 0.0601,
|
| 13880 |
+
"step": 18170
|
| 13881 |
+
},
|
| 13882 |
+
{
|
| 13883 |
+
"epoch": 8.51921274601687,
|
| 13884 |
+
"grad_norm": 1.2531105279922485,
|
| 13885 |
+
"learning_rate": 8.935098406747893e-06,
|
| 13886 |
+
"loss": 0.0719,
|
| 13887 |
+
"step": 18180
|
| 13888 |
+
},
|
| 13889 |
+
{
|
| 13890 |
+
"epoch": 8.52389878163074,
|
| 13891 |
+
"grad_norm": 0.9482662081718445,
|
| 13892 |
+
"learning_rate": 8.934512652296158e-06,
|
| 13893 |
+
"loss": 0.0546,
|
| 13894 |
+
"step": 18190
|
| 13895 |
+
},
|
| 13896 |
+
{
|
| 13897 |
+
"epoch": 8.52858481724461,
|
| 13898 |
+
"grad_norm": 1.4870429039001465,
|
| 13899 |
+
"learning_rate": 8.933926897844423e-06,
|
| 13900 |
+
"loss": 0.0568,
|
| 13901 |
+
"step": 18200
|
| 13902 |
+
},
|
| 13903 |
+
{
|
| 13904 |
+
"epoch": 8.533270852858482,
|
| 13905 |
+
"grad_norm": 1.6849005222320557,
|
| 13906 |
+
"learning_rate": 8.933341143392692e-06,
|
| 13907 |
+
"loss": 0.0555,
|
| 13908 |
+
"step": 18210
|
| 13909 |
+
},
|
| 13910 |
+
{
|
| 13911 |
+
"epoch": 8.537956888472353,
|
| 13912 |
+
"grad_norm": 1.955870270729065,
|
| 13913 |
+
"learning_rate": 8.932755388940957e-06,
|
| 13914 |
+
"loss": 0.0689,
|
| 13915 |
+
"step": 18220
|
| 13916 |
+
},
|
| 13917 |
+
{
|
| 13918 |
+
"epoch": 8.542642924086223,
|
| 13919 |
+
"grad_norm": 1.697548747062683,
|
| 13920 |
+
"learning_rate": 8.932169634489222e-06,
|
| 13921 |
+
"loss": 0.056,
|
| 13922 |
+
"step": 18230
|
| 13923 |
+
},
|
| 13924 |
+
{
|
| 13925 |
+
"epoch": 8.547328959700094,
|
| 13926 |
+
"grad_norm": 1.673592209815979,
|
| 13927 |
+
"learning_rate": 8.931583880037489e-06,
|
| 13928 |
+
"loss": 0.0705,
|
| 13929 |
+
"step": 18240
|
| 13930 |
+
},
|
| 13931 |
+
{
|
| 13932 |
+
"epoch": 8.552014995313964,
|
| 13933 |
+
"grad_norm": 1.5452814102172852,
|
| 13934 |
+
"learning_rate": 8.930998125585754e-06,
|
| 13935 |
+
"loss": 0.0568,
|
| 13936 |
+
"step": 18250
|
| 13937 |
+
},
|
| 13938 |
+
{
|
| 13939 |
+
"epoch": 8.552014995313964,
|
| 13940 |
+
"eval_loss": 0.038943566381931305,
|
| 13941 |
+
"eval_pearson_cosine": 0.7825822620756648,
|
| 13942 |
+
"eval_pearson_dot": 0.6378466620068579,
|
| 13943 |
+
"eval_pearson_euclidean": 0.736198412680281,
|
| 13944 |
+
"eval_pearson_manhattan": 0.7358221479501772,
|
| 13945 |
+
"eval_runtime": 39.5923,
|
| 13946 |
+
"eval_samples_per_second": 37.886,
|
| 13947 |
+
"eval_spearman_cosine": 0.7830920224286129,
|
| 13948 |
+
"eval_spearman_dot": 0.6535736820096772,
|
| 13949 |
+
"eval_spearman_euclidean": 0.7509264123559705,
|
| 13950 |
+
"eval_spearman_manhattan": 0.7510068056516,
|
| 13951 |
+
"eval_steps_per_second": 37.886,
|
| 13952 |
+
"step": 18250
|
| 13953 |
+
},
|
| 13954 |
+
{
|
| 13955 |
+
"epoch": 8.556701030927835,
|
| 13956 |
+
"grad_norm": 2.1532504558563232,
|
| 13957 |
+
"learning_rate": 8.930412371134021e-06,
|
| 13958 |
+
"loss": 0.0737,
|
| 13959 |
+
"step": 18260
|
| 13960 |
+
},
|
| 13961 |
+
{
|
| 13962 |
+
"epoch": 8.561387066541705,
|
| 13963 |
+
"grad_norm": 1.189831256866455,
|
| 13964 |
+
"learning_rate": 8.929826616682288e-06,
|
| 13965 |
+
"loss": 0.0641,
|
| 13966 |
+
"step": 18270
|
| 13967 |
+
},
|
| 13968 |
+
{
|
| 13969 |
+
"epoch": 8.566073102155576,
|
| 13970 |
+
"grad_norm": 1.0703136920928955,
|
| 13971 |
+
"learning_rate": 8.929240862230553e-06,
|
| 13972 |
+
"loss": 0.0597,
|
| 13973 |
+
"step": 18280
|
| 13974 |
+
},
|
| 13975 |
+
{
|
| 13976 |
+
"epoch": 8.570759137769446,
|
| 13977 |
+
"grad_norm": 1.7828891277313232,
|
| 13978 |
+
"learning_rate": 8.92865510777882e-06,
|
| 13979 |
+
"loss": 0.056,
|
| 13980 |
+
"step": 18290
|
| 13981 |
+
},
|
| 13982 |
+
{
|
| 13983 |
+
"epoch": 8.575445173383319,
|
| 13984 |
+
"grad_norm": 1.6652967929840088,
|
| 13985 |
+
"learning_rate": 8.928069353327085e-06,
|
| 13986 |
+
"loss": 0.0587,
|
| 13987 |
+
"step": 18300
|
| 13988 |
+
},
|
| 13989 |
+
{
|
| 13990 |
+
"epoch": 8.580131208997189,
|
| 13991 |
+
"grad_norm": 1.5879887342453003,
|
| 13992 |
+
"learning_rate": 8.927483598875352e-06,
|
| 13993 |
+
"loss": 0.0652,
|
| 13994 |
+
"step": 18310
|
| 13995 |
+
},
|
| 13996 |
+
{
|
| 13997 |
+
"epoch": 8.58481724461106,
|
| 13998 |
+
"grad_norm": 1.400453805923462,
|
| 13999 |
+
"learning_rate": 8.92689784442362e-06,
|
| 14000 |
+
"loss": 0.0589,
|
| 14001 |
+
"step": 18320
|
| 14002 |
+
},
|
| 14003 |
+
{
|
| 14004 |
+
"epoch": 8.58950328022493,
|
| 14005 |
+
"grad_norm": 1.2532896995544434,
|
| 14006 |
+
"learning_rate": 8.926312089971884e-06,
|
| 14007 |
+
"loss": 0.0538,
|
| 14008 |
+
"step": 18330
|
| 14009 |
+
},
|
| 14010 |
+
{
|
| 14011 |
+
"epoch": 8.5941893158388,
|
| 14012 |
+
"grad_norm": 1.3725074529647827,
|
| 14013 |
+
"learning_rate": 8.925726335520151e-06,
|
| 14014 |
+
"loss": 0.0458,
|
| 14015 |
+
"step": 18340
|
| 14016 |
+
},
|
| 14017 |
+
{
|
| 14018 |
+
"epoch": 8.598875351452671,
|
| 14019 |
+
"grad_norm": 0.8545303344726562,
|
| 14020 |
+
"learning_rate": 8.925140581068417e-06,
|
| 14021 |
+
"loss": 0.054,
|
| 14022 |
+
"step": 18350
|
| 14023 |
+
},
|
| 14024 |
+
{
|
| 14025 |
+
"epoch": 8.603561387066541,
|
| 14026 |
+
"grad_norm": 2.2644894123077393,
|
| 14027 |
+
"learning_rate": 8.924554826616682e-06,
|
| 14028 |
+
"loss": 0.0727,
|
| 14029 |
+
"step": 18360
|
| 14030 |
+
},
|
| 14031 |
+
{
|
| 14032 |
+
"epoch": 8.608247422680412,
|
| 14033 |
+
"grad_norm": 2.0160939693450928,
|
| 14034 |
+
"learning_rate": 8.92396907216495e-06,
|
| 14035 |
+
"loss": 0.0716,
|
| 14036 |
+
"step": 18370
|
| 14037 |
+
},
|
| 14038 |
+
{
|
| 14039 |
+
"epoch": 8.612933458294282,
|
| 14040 |
+
"grad_norm": 1.2805579900741577,
|
| 14041 |
+
"learning_rate": 8.923383317713216e-06,
|
| 14042 |
+
"loss": 0.06,
|
| 14043 |
+
"step": 18380
|
| 14044 |
+
},
|
| 14045 |
+
{
|
| 14046 |
+
"epoch": 8.617619493908153,
|
| 14047 |
+
"grad_norm": 2.359361410140991,
|
| 14048 |
+
"learning_rate": 8.922797563261481e-06,
|
| 14049 |
+
"loss": 0.0568,
|
| 14050 |
+
"step": 18390
|
| 14051 |
+
},
|
| 14052 |
+
{
|
| 14053 |
+
"epoch": 8.622305529522023,
|
| 14054 |
+
"grad_norm": 2.423886775970459,
|
| 14055 |
+
"learning_rate": 8.922211808809748e-06,
|
| 14056 |
+
"loss": 0.0732,
|
| 14057 |
+
"step": 18400
|
| 14058 |
+
},
|
| 14059 |
+
{
|
| 14060 |
+
"epoch": 8.626991565135896,
|
| 14061 |
+
"grad_norm": 1.2898362874984741,
|
| 14062 |
+
"learning_rate": 8.921626054358013e-06,
|
| 14063 |
+
"loss": 0.0566,
|
| 14064 |
+
"step": 18410
|
| 14065 |
+
},
|
| 14066 |
+
{
|
| 14067 |
+
"epoch": 8.631677600749766,
|
| 14068 |
+
"grad_norm": 0.6553903818130493,
|
| 14069 |
+
"learning_rate": 8.92104029990628e-06,
|
| 14070 |
+
"loss": 0.0507,
|
| 14071 |
+
"step": 18420
|
| 14072 |
+
},
|
| 14073 |
+
{
|
| 14074 |
+
"epoch": 8.636363636363637,
|
| 14075 |
+
"grad_norm": 1.3605937957763672,
|
| 14076 |
+
"learning_rate": 8.920454545454547e-06,
|
| 14077 |
+
"loss": 0.0581,
|
| 14078 |
+
"step": 18430
|
| 14079 |
+
},
|
| 14080 |
+
{
|
| 14081 |
+
"epoch": 8.641049671977507,
|
| 14082 |
+
"grad_norm": 1.9910422563552856,
|
| 14083 |
+
"learning_rate": 8.919868791002812e-06,
|
| 14084 |
+
"loss": 0.0566,
|
| 14085 |
+
"step": 18440
|
| 14086 |
+
},
|
| 14087 |
+
{
|
| 14088 |
+
"epoch": 8.645735707591378,
|
| 14089 |
+
"grad_norm": 2.0107765197753906,
|
| 14090 |
+
"learning_rate": 8.919283036551079e-06,
|
| 14091 |
+
"loss": 0.068,
|
| 14092 |
+
"step": 18450
|
| 14093 |
+
},
|
| 14094 |
+
{
|
| 14095 |
+
"epoch": 8.650421743205248,
|
| 14096 |
+
"grad_norm": 1.168728232383728,
|
| 14097 |
+
"learning_rate": 8.918697282099344e-06,
|
| 14098 |
+
"loss": 0.0589,
|
| 14099 |
+
"step": 18460
|
| 14100 |
+
},
|
| 14101 |
+
{
|
| 14102 |
+
"epoch": 8.655107778819119,
|
| 14103 |
+
"grad_norm": 2.3766093254089355,
|
| 14104 |
+
"learning_rate": 8.918111527647611e-06,
|
| 14105 |
+
"loss": 0.061,
|
| 14106 |
+
"step": 18470
|
| 14107 |
+
},
|
| 14108 |
+
{
|
| 14109 |
+
"epoch": 8.65979381443299,
|
| 14110 |
+
"grad_norm": 1.6704158782958984,
|
| 14111 |
+
"learning_rate": 8.917525773195878e-06,
|
| 14112 |
+
"loss": 0.0629,
|
| 14113 |
+
"step": 18480
|
| 14114 |
+
},
|
| 14115 |
+
{
|
| 14116 |
+
"epoch": 8.66447985004686,
|
| 14117 |
+
"grad_norm": 1.9102870225906372,
|
| 14118 |
+
"learning_rate": 8.916940018744143e-06,
|
| 14119 |
+
"loss": 0.0464,
|
| 14120 |
+
"step": 18490
|
| 14121 |
+
},
|
| 14122 |
+
{
|
| 14123 |
+
"epoch": 8.669165885660732,
|
| 14124 |
+
"grad_norm": 2.742626428604126,
|
| 14125 |
+
"learning_rate": 8.91635426429241e-06,
|
| 14126 |
+
"loss": 0.0645,
|
| 14127 |
+
"step": 18500
|
| 14128 |
+
},
|
| 14129 |
+
{
|
| 14130 |
+
"epoch": 8.669165885660732,
|
| 14131 |
+
"eval_loss": 0.03774439916014671,
|
| 14132 |
+
"eval_pearson_cosine": 0.7887750445614863,
|
| 14133 |
+
"eval_pearson_dot": 0.6513653629224123,
|
| 14134 |
+
"eval_pearson_euclidean": 0.7319388075486906,
|
| 14135 |
+
"eval_pearson_manhattan": 0.7314905753471947,
|
| 14136 |
+
"eval_runtime": 40.5433,
|
| 14137 |
+
"eval_samples_per_second": 36.998,
|
| 14138 |
+
"eval_spearman_cosine": 0.7892064111202951,
|
| 14139 |
+
"eval_spearman_dot": 0.6704252435211006,
|
| 14140 |
+
"eval_spearman_euclidean": 0.7498699934549212,
|
| 14141 |
+
"eval_spearman_manhattan": 0.7495320910792913,
|
| 14142 |
+
"eval_steps_per_second": 36.998,
|
| 14143 |
+
"step": 18500
|
| 14144 |
+
},
|
| 14145 |
+
{
|
| 14146 |
+
"epoch": 8.673851921274602,
|
| 14147 |
+
"grad_norm": 1.4276272058486938,
|
| 14148 |
+
"learning_rate": 8.915768509840675e-06,
|
| 14149 |
+
"loss": 0.0466,
|
| 14150 |
+
"step": 18510
|
| 14151 |
+
},
|
| 14152 |
+
{
|
| 14153 |
+
"epoch": 8.678537956888473,
|
| 14154 |
+
"grad_norm": 1.780705451965332,
|
| 14155 |
+
"learning_rate": 8.91518275538894e-06,
|
| 14156 |
+
"loss": 0.073,
|
| 14157 |
+
"step": 18520
|
| 14158 |
+
},
|
| 14159 |
+
{
|
| 14160 |
+
"epoch": 8.683223992502343,
|
| 14161 |
+
"grad_norm": 1.422787070274353,
|
| 14162 |
+
"learning_rate": 8.914597000937208e-06,
|
| 14163 |
+
"loss": 0.0662,
|
| 14164 |
+
"step": 18530
|
| 14165 |
+
},
|
| 14166 |
+
{
|
| 14167 |
+
"epoch": 8.687910028116214,
|
| 14168 |
+
"grad_norm": 1.8989777565002441,
|
| 14169 |
+
"learning_rate": 8.914011246485474e-06,
|
| 14170 |
+
"loss": 0.053,
|
| 14171 |
+
"step": 18540
|
| 14172 |
+
},
|
| 14173 |
+
{
|
| 14174 |
+
"epoch": 8.692596063730084,
|
| 14175 |
+
"grad_norm": 1.208201289176941,
|
| 14176 |
+
"learning_rate": 8.91342549203374e-06,
|
| 14177 |
+
"loss": 0.0557,
|
| 14178 |
+
"step": 18550
|
| 14179 |
+
},
|
| 14180 |
+
{
|
| 14181 |
+
"epoch": 8.697282099343955,
|
| 14182 |
+
"grad_norm": 1.4029545783996582,
|
| 14183 |
+
"learning_rate": 8.912839737582007e-06,
|
| 14184 |
+
"loss": 0.0498,
|
| 14185 |
+
"step": 18560
|
| 14186 |
+
},
|
| 14187 |
+
{
|
| 14188 |
+
"epoch": 8.701968134957825,
|
| 14189 |
+
"grad_norm": 1.4905900955200195,
|
| 14190 |
+
"learning_rate": 8.912253983130272e-06,
|
| 14191 |
+
"loss": 0.0586,
|
| 14192 |
+
"step": 18570
|
| 14193 |
+
},
|
| 14194 |
+
{
|
| 14195 |
+
"epoch": 8.706654170571696,
|
| 14196 |
+
"grad_norm": 1.494296669960022,
|
| 14197 |
+
"learning_rate": 8.911668228678539e-06,
|
| 14198 |
+
"loss": 0.0597,
|
| 14199 |
+
"step": 18580
|
| 14200 |
+
},
|
| 14201 |
+
{
|
| 14202 |
+
"epoch": 8.711340206185566,
|
| 14203 |
+
"grad_norm": 1.8540481328964233,
|
| 14204 |
+
"learning_rate": 8.911082474226806e-06,
|
| 14205 |
+
"loss": 0.06,
|
| 14206 |
+
"step": 18590
|
| 14207 |
+
},
|
| 14208 |
+
{
|
| 14209 |
+
"epoch": 8.716026241799437,
|
| 14210 |
+
"grad_norm": 0.9429871439933777,
|
| 14211 |
+
"learning_rate": 8.910496719775071e-06,
|
| 14212 |
+
"loss": 0.0708,
|
| 14213 |
+
"step": 18600
|
| 14214 |
+
},
|
| 14215 |
+
{
|
| 14216 |
+
"epoch": 8.720712277413309,
|
| 14217 |
+
"grad_norm": 1.333791732788086,
|
| 14218 |
+
"learning_rate": 8.909910965323338e-06,
|
| 14219 |
+
"loss": 0.0583,
|
| 14220 |
+
"step": 18610
|
| 14221 |
+
},
|
| 14222 |
+
{
|
| 14223 |
+
"epoch": 8.72539831302718,
|
| 14224 |
+
"grad_norm": 1.1609207391738892,
|
| 14225 |
+
"learning_rate": 8.909325210871603e-06,
|
| 14226 |
+
"loss": 0.0499,
|
| 14227 |
+
"step": 18620
|
| 14228 |
+
},
|
| 14229 |
+
{
|
| 14230 |
+
"epoch": 8.73008434864105,
|
| 14231 |
+
"grad_norm": 1.9390841722488403,
|
| 14232 |
+
"learning_rate": 8.90873945641987e-06,
|
| 14233 |
+
"loss": 0.0751,
|
| 14234 |
+
"step": 18630
|
| 14235 |
+
},
|
| 14236 |
+
{
|
| 14237 |
+
"epoch": 8.73477038425492,
|
| 14238 |
+
"grad_norm": 1.693433165550232,
|
| 14239 |
+
"learning_rate": 8.908153701968135e-06,
|
| 14240 |
+
"loss": 0.0685,
|
| 14241 |
+
"step": 18640
|
| 14242 |
+
},
|
| 14243 |
+
{
|
| 14244 |
+
"epoch": 8.739456419868791,
|
| 14245 |
+
"grad_norm": 1.7784210443496704,
|
| 14246 |
+
"learning_rate": 8.907567947516402e-06,
|
| 14247 |
+
"loss": 0.0524,
|
| 14248 |
+
"step": 18650
|
| 14249 |
+
},
|
| 14250 |
+
{
|
| 14251 |
+
"epoch": 8.744142455482661,
|
| 14252 |
+
"grad_norm": 1.4945738315582275,
|
| 14253 |
+
"learning_rate": 8.906982193064669e-06,
|
| 14254 |
+
"loss": 0.064,
|
| 14255 |
+
"step": 18660
|
| 14256 |
+
},
|
| 14257 |
+
{
|
| 14258 |
+
"epoch": 8.748828491096532,
|
| 14259 |
+
"grad_norm": 1.7549676895141602,
|
| 14260 |
+
"learning_rate": 8.906396438612934e-06,
|
| 14261 |
+
"loss": 0.0634,
|
| 14262 |
+
"step": 18670
|
| 14263 |
+
},
|
| 14264 |
+
{
|
| 14265 |
+
"epoch": 8.753514526710402,
|
| 14266 |
+
"grad_norm": 1.1789377927780151,
|
| 14267 |
+
"learning_rate": 8.9058106841612e-06,
|
| 14268 |
+
"loss": 0.0597,
|
| 14269 |
+
"step": 18680
|
| 14270 |
+
},
|
| 14271 |
+
{
|
| 14272 |
+
"epoch": 8.758200562324273,
|
| 14273 |
+
"grad_norm": 1.983936071395874,
|
| 14274 |
+
"learning_rate": 8.905224929709466e-06,
|
| 14275 |
+
"loss": 0.0614,
|
| 14276 |
+
"step": 18690
|
| 14277 |
+
},
|
| 14278 |
+
{
|
| 14279 |
+
"epoch": 8.762886597938145,
|
| 14280 |
+
"grad_norm": 2.564476251602173,
|
| 14281 |
+
"learning_rate": 8.904639175257732e-06,
|
| 14282 |
+
"loss": 0.0711,
|
| 14283 |
+
"step": 18700
|
| 14284 |
+
},
|
| 14285 |
+
{
|
| 14286 |
+
"epoch": 8.767572633552016,
|
| 14287 |
+
"grad_norm": 0.5671543478965759,
|
| 14288 |
+
"learning_rate": 8.904053420805998e-06,
|
| 14289 |
+
"loss": 0.0586,
|
| 14290 |
+
"step": 18710
|
| 14291 |
+
},
|
| 14292 |
+
{
|
| 14293 |
+
"epoch": 8.772258669165886,
|
| 14294 |
+
"grad_norm": 1.1714857816696167,
|
| 14295 |
+
"learning_rate": 8.903467666354265e-06,
|
| 14296 |
+
"loss": 0.06,
|
| 14297 |
+
"step": 18720
|
| 14298 |
+
},
|
| 14299 |
+
{
|
| 14300 |
+
"epoch": 8.776944704779757,
|
| 14301 |
+
"grad_norm": 1.8699477910995483,
|
| 14302 |
+
"learning_rate": 8.90288191190253e-06,
|
| 14303 |
+
"loss": 0.052,
|
| 14304 |
+
"step": 18730
|
| 14305 |
+
},
|
| 14306 |
+
{
|
| 14307 |
+
"epoch": 8.781630740393627,
|
| 14308 |
+
"grad_norm": 1.0824236869812012,
|
| 14309 |
+
"learning_rate": 8.902296157450798e-06,
|
| 14310 |
+
"loss": 0.0638,
|
| 14311 |
+
"step": 18740
|
| 14312 |
+
},
|
| 14313 |
+
{
|
| 14314 |
+
"epoch": 8.786316776007498,
|
| 14315 |
+
"grad_norm": 1.3703303337097168,
|
| 14316 |
+
"learning_rate": 8.901710402999063e-06,
|
| 14317 |
+
"loss": 0.0563,
|
| 14318 |
+
"step": 18750
|
| 14319 |
+
},
|
| 14320 |
+
{
|
| 14321 |
+
"epoch": 8.786316776007498,
|
| 14322 |
+
"eval_loss": 0.037630029022693634,
|
| 14323 |
+
"eval_pearson_cosine": 0.7870129329535697,
|
| 14324 |
+
"eval_pearson_dot": 0.6393485188875303,
|
| 14325 |
+
"eval_pearson_euclidean": 0.7289305204204517,
|
| 14326 |
+
"eval_pearson_manhattan": 0.7285165698261729,
|
| 14327 |
+
"eval_runtime": 40.5046,
|
| 14328 |
+
"eval_samples_per_second": 37.033,
|
| 14329 |
+
"eval_spearman_cosine": 0.7878034848552876,
|
| 14330 |
+
"eval_spearman_dot": 0.6605642491363777,
|
| 14331 |
+
"eval_spearman_euclidean": 0.7454305721470555,
|
| 14332 |
+
"eval_spearman_manhattan": 0.745136975852769,
|
| 14333 |
+
"eval_steps_per_second": 37.033,
|
| 14334 |
+
"step": 18750
|
| 14335 |
+
},
|
| 14336 |
+
{
|
| 14337 |
+
"epoch": 8.791002811621368,
|
| 14338 |
+
"grad_norm": 1.745339035987854,
|
| 14339 |
+
"learning_rate": 8.90112464854733e-06,
|
| 14340 |
+
"loss": 0.0566,
|
| 14341 |
+
"step": 18760
|
| 14342 |
+
},
|
| 14343 |
+
{
|
| 14344 |
+
"epoch": 8.795688847235239,
|
| 14345 |
+
"grad_norm": 1.5828258991241455,
|
| 14346 |
+
"learning_rate": 8.900538894095597e-06,
|
| 14347 |
+
"loss": 0.0602,
|
| 14348 |
+
"step": 18770
|
| 14349 |
+
},
|
| 14350 |
+
{
|
| 14351 |
+
"epoch": 8.800374882849109,
|
| 14352 |
+
"grad_norm": 1.4292279481887817,
|
| 14353 |
+
"learning_rate": 8.899953139643862e-06,
|
| 14354 |
+
"loss": 0.0638,
|
| 14355 |
+
"step": 18780
|
| 14356 |
+
},
|
| 14357 |
+
{
|
| 14358 |
+
"epoch": 8.80506091846298,
|
| 14359 |
+
"grad_norm": 1.956358790397644,
|
| 14360 |
+
"learning_rate": 8.899367385192129e-06,
|
| 14361 |
+
"loss": 0.0667,
|
| 14362 |
+
"step": 18790
|
| 14363 |
+
},
|
| 14364 |
+
{
|
| 14365 |
+
"epoch": 8.80974695407685,
|
| 14366 |
+
"grad_norm": 0.9023747444152832,
|
| 14367 |
+
"learning_rate": 8.898781630740394e-06,
|
| 14368 |
+
"loss": 0.0662,
|
| 14369 |
+
"step": 18800
|
| 14370 |
+
},
|
| 14371 |
+
{
|
| 14372 |
+
"epoch": 8.814432989690722,
|
| 14373 |
+
"grad_norm": 2.1007392406463623,
|
| 14374 |
+
"learning_rate": 8.89819587628866e-06,
|
| 14375 |
+
"loss": 0.0561,
|
| 14376 |
+
"step": 18810
|
| 14377 |
+
},
|
| 14378 |
+
{
|
| 14379 |
+
"epoch": 8.819119025304593,
|
| 14380 |
+
"grad_norm": 2.0597100257873535,
|
| 14381 |
+
"learning_rate": 8.897610121836928e-06,
|
| 14382 |
+
"loss": 0.0666,
|
| 14383 |
+
"step": 18820
|
| 14384 |
+
},
|
| 14385 |
+
{
|
| 14386 |
+
"epoch": 8.823805060918463,
|
| 14387 |
+
"grad_norm": 1.1200934648513794,
|
| 14388 |
+
"learning_rate": 8.897024367385193e-06,
|
| 14389 |
+
"loss": 0.0538,
|
| 14390 |
+
"step": 18830
|
| 14391 |
+
},
|
| 14392 |
+
{
|
| 14393 |
+
"epoch": 8.828491096532334,
|
| 14394 |
+
"grad_norm": 2.032970428466797,
|
| 14395 |
+
"learning_rate": 8.896438612933458e-06,
|
| 14396 |
+
"loss": 0.0734,
|
| 14397 |
+
"step": 18840
|
| 14398 |
+
},
|
| 14399 |
+
{
|
| 14400 |
+
"epoch": 8.833177132146204,
|
| 14401 |
+
"grad_norm": 1.5491752624511719,
|
| 14402 |
+
"learning_rate": 8.895852858481725e-06,
|
| 14403 |
+
"loss": 0.0638,
|
| 14404 |
+
"step": 18850
|
| 14405 |
+
},
|
| 14406 |
+
{
|
| 14407 |
+
"epoch": 8.837863167760075,
|
| 14408 |
+
"grad_norm": 0.7450467348098755,
|
| 14409 |
+
"learning_rate": 8.89526710402999e-06,
|
| 14410 |
+
"loss": 0.0752,
|
| 14411 |
+
"step": 18860
|
| 14412 |
+
},
|
| 14413 |
+
{
|
| 14414 |
+
"epoch": 8.842549203373945,
|
| 14415 |
+
"grad_norm": 1.0671043395996094,
|
| 14416 |
+
"learning_rate": 8.894681349578257e-06,
|
| 14417 |
+
"loss": 0.0562,
|
| 14418 |
+
"step": 18870
|
| 14419 |
+
},
|
| 14420 |
+
{
|
| 14421 |
+
"epoch": 8.847235238987816,
|
| 14422 |
+
"grad_norm": 1.3302968740463257,
|
| 14423 |
+
"learning_rate": 8.894095595126524e-06,
|
| 14424 |
+
"loss": 0.0573,
|
| 14425 |
+
"step": 18880
|
| 14426 |
+
},
|
| 14427 |
+
{
|
| 14428 |
+
"epoch": 8.851921274601686,
|
| 14429 |
+
"grad_norm": 1.423279881477356,
|
| 14430 |
+
"learning_rate": 8.89350984067479e-06,
|
| 14431 |
+
"loss": 0.0645,
|
| 14432 |
+
"step": 18890
|
| 14433 |
+
},
|
| 14434 |
+
{
|
| 14435 |
+
"epoch": 8.856607310215558,
|
| 14436 |
+
"grad_norm": 1.1250574588775635,
|
| 14437 |
+
"learning_rate": 8.892924086223056e-06,
|
| 14438 |
+
"loss": 0.0616,
|
| 14439 |
+
"step": 18900
|
| 14440 |
+
},
|
| 14441 |
+
{
|
| 14442 |
+
"epoch": 8.861293345829429,
|
| 14443 |
+
"grad_norm": 1.3438372611999512,
|
| 14444 |
+
"learning_rate": 8.892338331771322e-06,
|
| 14445 |
+
"loss": 0.0525,
|
| 14446 |
+
"step": 18910
|
| 14447 |
+
},
|
| 14448 |
+
{
|
| 14449 |
+
"epoch": 8.8659793814433,
|
| 14450 |
+
"grad_norm": 1.5097957849502563,
|
| 14451 |
+
"learning_rate": 8.891752577319588e-06,
|
| 14452 |
+
"loss": 0.0593,
|
| 14453 |
+
"step": 18920
|
| 14454 |
+
},
|
| 14455 |
+
{
|
| 14456 |
+
"epoch": 8.87066541705717,
|
| 14457 |
+
"grad_norm": 1.9522205591201782,
|
| 14458 |
+
"learning_rate": 8.891166822867855e-06,
|
| 14459 |
+
"loss": 0.0674,
|
| 14460 |
+
"step": 18930
|
| 14461 |
+
},
|
| 14462 |
+
{
|
| 14463 |
+
"epoch": 8.87535145267104,
|
| 14464 |
+
"grad_norm": 1.1841950416564941,
|
| 14465 |
+
"learning_rate": 8.89058106841612e-06,
|
| 14466 |
+
"loss": 0.0641,
|
| 14467 |
+
"step": 18940
|
| 14468 |
+
},
|
| 14469 |
+
{
|
| 14470 |
+
"epoch": 8.880037488284911,
|
| 14471 |
+
"grad_norm": 1.658074140548706,
|
| 14472 |
+
"learning_rate": 8.889995313964388e-06,
|
| 14473 |
+
"loss": 0.0737,
|
| 14474 |
+
"step": 18950
|
| 14475 |
+
},
|
| 14476 |
+
{
|
| 14477 |
+
"epoch": 8.884723523898781,
|
| 14478 |
+
"grad_norm": 1.5924397706985474,
|
| 14479 |
+
"learning_rate": 8.889409559512653e-06,
|
| 14480 |
+
"loss": 0.07,
|
| 14481 |
+
"step": 18960
|
| 14482 |
+
},
|
| 14483 |
+
{
|
| 14484 |
+
"epoch": 8.889409559512652,
|
| 14485 |
+
"grad_norm": 1.379166603088379,
|
| 14486 |
+
"learning_rate": 8.888823805060918e-06,
|
| 14487 |
+
"loss": 0.0689,
|
| 14488 |
+
"step": 18970
|
| 14489 |
+
},
|
| 14490 |
+
{
|
| 14491 |
+
"epoch": 8.894095595126522,
|
| 14492 |
+
"grad_norm": 1.3292274475097656,
|
| 14493 |
+
"learning_rate": 8.888238050609187e-06,
|
| 14494 |
+
"loss": 0.0564,
|
| 14495 |
+
"step": 18980
|
| 14496 |
+
},
|
| 14497 |
+
{
|
| 14498 |
+
"epoch": 8.898781630740393,
|
| 14499 |
+
"grad_norm": 1.4383434057235718,
|
| 14500 |
+
"learning_rate": 8.887652296157452e-06,
|
| 14501 |
+
"loss": 0.0583,
|
| 14502 |
+
"step": 18990
|
| 14503 |
+
},
|
| 14504 |
+
{
|
| 14505 |
+
"epoch": 8.903467666354265,
|
| 14506 |
+
"grad_norm": 2.1288797855377197,
|
| 14507 |
+
"learning_rate": 8.887066541705717e-06,
|
| 14508 |
+
"loss": 0.0669,
|
| 14509 |
+
"step": 19000
|
| 14510 |
+
},
|
| 14511 |
+
{
|
| 14512 |
+
"epoch": 8.903467666354265,
|
| 14513 |
+
"eval_loss": 0.03827948495745659,
|
| 14514 |
+
"eval_pearson_cosine": 0.7850468616972819,
|
| 14515 |
+
"eval_pearson_dot": 0.6358914679070722,
|
| 14516 |
+
"eval_pearson_euclidean": 0.7244498308050709,
|
| 14517 |
+
"eval_pearson_manhattan": 0.7238488356503296,
|
| 14518 |
+
"eval_runtime": 40.8545,
|
| 14519 |
+
"eval_samples_per_second": 36.716,
|
| 14520 |
+
"eval_spearman_cosine": 0.7865593789879696,
|
| 14521 |
+
"eval_spearman_dot": 0.6571265794919958,
|
| 14522 |
+
"eval_spearman_euclidean": 0.7437161421017117,
|
| 14523 |
+
"eval_spearman_manhattan": 0.7432616809242956,
|
| 14524 |
+
"eval_steps_per_second": 36.716,
|
| 14525 |
+
"step": 19000
|
| 14526 |
}
|
| 14527 |
],
|
| 14528 |
"logging_steps": 10,
|