Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad6a577dd191618d3b00fe33fe0cdf33b81a5c002fe33712258e536fcc1520a8
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ffe192963611108e55a8d98be8d1dafc323965fc2794aed1bf226c3a6c17145
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5e1f8a9c771c1f6fac3e948b043a309dd550a6e17ac92d5fdcc99c5e406cf1c
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c9a67dc102c0201a7b47ec5ea788b4d7cd182385f8b19e21244eece8e7dbf3b
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 4.
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -6883,6 +6883,770 @@
|
|
| 6883 |
"eval_spearman_manhattan": 0.7624729408213863,
|
| 6884 |
"eval_steps_per_second": 37.636,
|
| 6885 |
"step": 9000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6886 |
}
|
| 6887 |
],
|
| 6888 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.686035613870665,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 10000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 6883 |
"eval_spearman_manhattan": 0.7624729408213863,
|
| 6884 |
"eval_steps_per_second": 37.636,
|
| 6885 |
"step": 9000
|
| 6886 |
+
},
|
| 6887 |
+
{
|
| 6888 |
+
"epoch": 4.22211808809747,
|
| 6889 |
+
"grad_norm": 1.666392207145691,
|
| 6890 |
+
"learning_rate": 9.472235238987816e-06,
|
| 6891 |
+
"loss": 0.1418,
|
| 6892 |
+
"step": 9010
|
| 6893 |
+
},
|
| 6894 |
+
{
|
| 6895 |
+
"epoch": 4.22680412371134,
|
| 6896 |
+
"grad_norm": 2.7508490085601807,
|
| 6897 |
+
"learning_rate": 9.471649484536083e-06,
|
| 6898 |
+
"loss": 0.1448,
|
| 6899 |
+
"step": 9020
|
| 6900 |
+
},
|
| 6901 |
+
{
|
| 6902 |
+
"epoch": 4.231490159325211,
|
| 6903 |
+
"grad_norm": 2.883143663406372,
|
| 6904 |
+
"learning_rate": 9.471063730084349e-06,
|
| 6905 |
+
"loss": 0.166,
|
| 6906 |
+
"step": 9030
|
| 6907 |
+
},
|
| 6908 |
+
{
|
| 6909 |
+
"epoch": 4.236176194939081,
|
| 6910 |
+
"grad_norm": 2.5505452156066895,
|
| 6911 |
+
"learning_rate": 9.470477975632615e-06,
|
| 6912 |
+
"loss": 0.1417,
|
| 6913 |
+
"step": 9040
|
| 6914 |
+
},
|
| 6915 |
+
{
|
| 6916 |
+
"epoch": 4.240862230552953,
|
| 6917 |
+
"grad_norm": 2.3343942165374756,
|
| 6918 |
+
"learning_rate": 9.469892221180882e-06,
|
| 6919 |
+
"loss": 0.1569,
|
| 6920 |
+
"step": 9050
|
| 6921 |
+
},
|
| 6922 |
+
{
|
| 6923 |
+
"epoch": 4.245548266166823,
|
| 6924 |
+
"grad_norm": 2.2727818489074707,
|
| 6925 |
+
"learning_rate": 9.469306466729148e-06,
|
| 6926 |
+
"loss": 0.1346,
|
| 6927 |
+
"step": 9060
|
| 6928 |
+
},
|
| 6929 |
+
{
|
| 6930 |
+
"epoch": 4.250234301780694,
|
| 6931 |
+
"grad_norm": 2.8398542404174805,
|
| 6932 |
+
"learning_rate": 9.468720712277415e-06,
|
| 6933 |
+
"loss": 0.151,
|
| 6934 |
+
"step": 9070
|
| 6935 |
+
},
|
| 6936 |
+
{
|
| 6937 |
+
"epoch": 4.254920337394564,
|
| 6938 |
+
"grad_norm": 2.0750465393066406,
|
| 6939 |
+
"learning_rate": 9.46813495782568e-06,
|
| 6940 |
+
"loss": 0.1555,
|
| 6941 |
+
"step": 9080
|
| 6942 |
+
},
|
| 6943 |
+
{
|
| 6944 |
+
"epoch": 4.259606373008435,
|
| 6945 |
+
"grad_norm": 2.1490373611450195,
|
| 6946 |
+
"learning_rate": 9.467549203373947e-06,
|
| 6947 |
+
"loss": 0.1719,
|
| 6948 |
+
"step": 9090
|
| 6949 |
+
},
|
| 6950 |
+
{
|
| 6951 |
+
"epoch": 4.264292408622305,
|
| 6952 |
+
"grad_norm": 1.9958566427230835,
|
| 6953 |
+
"learning_rate": 9.466963448922214e-06,
|
| 6954 |
+
"loss": 0.1332,
|
| 6955 |
+
"step": 9100
|
| 6956 |
+
},
|
| 6957 |
+
{
|
| 6958 |
+
"epoch": 4.2689784442361765,
|
| 6959 |
+
"grad_norm": 2.586869478225708,
|
| 6960 |
+
"learning_rate": 9.466377694470479e-06,
|
| 6961 |
+
"loss": 0.1545,
|
| 6962 |
+
"step": 9110
|
| 6963 |
+
},
|
| 6964 |
+
{
|
| 6965 |
+
"epoch": 4.273664479850047,
|
| 6966 |
+
"grad_norm": 1.7282586097717285,
|
| 6967 |
+
"learning_rate": 9.465791940018744e-06,
|
| 6968 |
+
"loss": 0.1608,
|
| 6969 |
+
"step": 9120
|
| 6970 |
+
},
|
| 6971 |
+
{
|
| 6972 |
+
"epoch": 4.278350515463917,
|
| 6973 |
+
"grad_norm": 2.7652742862701416,
|
| 6974 |
+
"learning_rate": 9.465206185567011e-06,
|
| 6975 |
+
"loss": 0.1319,
|
| 6976 |
+
"step": 9130
|
| 6977 |
+
},
|
| 6978 |
+
{
|
| 6979 |
+
"epoch": 4.283036551077788,
|
| 6980 |
+
"grad_norm": 2.3922176361083984,
|
| 6981 |
+
"learning_rate": 9.464620431115276e-06,
|
| 6982 |
+
"loss": 0.1569,
|
| 6983 |
+
"step": 9140
|
| 6984 |
+
},
|
| 6985 |
+
{
|
| 6986 |
+
"epoch": 4.287722586691659,
|
| 6987 |
+
"grad_norm": 2.223822832107544,
|
| 6988 |
+
"learning_rate": 9.464034676663543e-06,
|
| 6989 |
+
"loss": 0.1494,
|
| 6990 |
+
"step": 9150
|
| 6991 |
+
},
|
| 6992 |
+
{
|
| 6993 |
+
"epoch": 4.29240862230553,
|
| 6994 |
+
"grad_norm": 2.0714290142059326,
|
| 6995 |
+
"learning_rate": 9.46344892221181e-06,
|
| 6996 |
+
"loss": 0.1506,
|
| 6997 |
+
"step": 9160
|
| 6998 |
+
},
|
| 6999 |
+
{
|
| 7000 |
+
"epoch": 4.2970946579194,
|
| 7001 |
+
"grad_norm": 2.4368040561676025,
|
| 7002 |
+
"learning_rate": 9.462863167760075e-06,
|
| 7003 |
+
"loss": 0.1413,
|
| 7004 |
+
"step": 9170
|
| 7005 |
+
},
|
| 7006 |
+
{
|
| 7007 |
+
"epoch": 4.301780693533271,
|
| 7008 |
+
"grad_norm": 2.760032892227173,
|
| 7009 |
+
"learning_rate": 9.462277413308342e-06,
|
| 7010 |
+
"loss": 0.1599,
|
| 7011 |
+
"step": 9180
|
| 7012 |
+
},
|
| 7013 |
+
{
|
| 7014 |
+
"epoch": 4.306466729147141,
|
| 7015 |
+
"grad_norm": 2.8202686309814453,
|
| 7016 |
+
"learning_rate": 9.461691658856607e-06,
|
| 7017 |
+
"loss": 0.1444,
|
| 7018 |
+
"step": 9190
|
| 7019 |
+
},
|
| 7020 |
+
{
|
| 7021 |
+
"epoch": 4.311152764761012,
|
| 7022 |
+
"grad_norm": 2.5001349449157715,
|
| 7023 |
+
"learning_rate": 9.461105904404874e-06,
|
| 7024 |
+
"loss": 0.1322,
|
| 7025 |
+
"step": 9200
|
| 7026 |
+
},
|
| 7027 |
+
{
|
| 7028 |
+
"epoch": 4.315838800374883,
|
| 7029 |
+
"grad_norm": 2.5955960750579834,
|
| 7030 |
+
"learning_rate": 9.460520149953141e-06,
|
| 7031 |
+
"loss": 0.1522,
|
| 7032 |
+
"step": 9210
|
| 7033 |
+
},
|
| 7034 |
+
{
|
| 7035 |
+
"epoch": 4.320524835988754,
|
| 7036 |
+
"grad_norm": 1.966848611831665,
|
| 7037 |
+
"learning_rate": 9.459934395501406e-06,
|
| 7038 |
+
"loss": 0.133,
|
| 7039 |
+
"step": 9220
|
| 7040 |
+
},
|
| 7041 |
+
{
|
| 7042 |
+
"epoch": 4.325210871602624,
|
| 7043 |
+
"grad_norm": 2.4600915908813477,
|
| 7044 |
+
"learning_rate": 9.459348641049673e-06,
|
| 7045 |
+
"loss": 0.1381,
|
| 7046 |
+
"step": 9230
|
| 7047 |
+
},
|
| 7048 |
+
{
|
| 7049 |
+
"epoch": 4.329896907216495,
|
| 7050 |
+
"grad_norm": 2.6355576515197754,
|
| 7051 |
+
"learning_rate": 9.458762886597939e-06,
|
| 7052 |
+
"loss": 0.1671,
|
| 7053 |
+
"step": 9240
|
| 7054 |
+
},
|
| 7055 |
+
{
|
| 7056 |
+
"epoch": 4.334582942830366,
|
| 7057 |
+
"grad_norm": 1.9884010553359985,
|
| 7058 |
+
"learning_rate": 9.458177132146204e-06,
|
| 7059 |
+
"loss": 0.1565,
|
| 7060 |
+
"step": 9250
|
| 7061 |
+
},
|
| 7062 |
+
{
|
| 7063 |
+
"epoch": 4.334582942830366,
|
| 7064 |
+
"eval_loss": 0.04769788682460785,
|
| 7065 |
+
"eval_pearson_cosine": 0.7716762907463419,
|
| 7066 |
+
"eval_pearson_dot": 0.6025868220654615,
|
| 7067 |
+
"eval_pearson_euclidean": 0.7486154713041202,
|
| 7068 |
+
"eval_pearson_manhattan": 0.7480947777024909,
|
| 7069 |
+
"eval_runtime": 39.7245,
|
| 7070 |
+
"eval_samples_per_second": 37.76,
|
| 7071 |
+
"eval_spearman_cosine": 0.7767828328865971,
|
| 7072 |
+
"eval_spearman_dot": 0.6102486537359278,
|
| 7073 |
+
"eval_spearman_euclidean": 0.764536418480009,
|
| 7074 |
+
"eval_spearman_manhattan": 0.7641318665907693,
|
| 7075 |
+
"eval_steps_per_second": 37.76,
|
| 7076 |
+
"step": 9250
|
| 7077 |
+
},
|
| 7078 |
+
{
|
| 7079 |
+
"epoch": 4.339268978444236,
|
| 7080 |
+
"grad_norm": 2.462095260620117,
|
| 7081 |
+
"learning_rate": 9.457591377694472e-06,
|
| 7082 |
+
"loss": 0.1574,
|
| 7083 |
+
"step": 9260
|
| 7084 |
+
},
|
| 7085 |
+
{
|
| 7086 |
+
"epoch": 4.343955014058107,
|
| 7087 |
+
"grad_norm": 2.8646531105041504,
|
| 7088 |
+
"learning_rate": 9.457005623242738e-06,
|
| 7089 |
+
"loss": 0.1447,
|
| 7090 |
+
"step": 9270
|
| 7091 |
+
},
|
| 7092 |
+
{
|
| 7093 |
+
"epoch": 4.348641049671977,
|
| 7094 |
+
"grad_norm": 2.456422805786133,
|
| 7095 |
+
"learning_rate": 9.456419868791003e-06,
|
| 7096 |
+
"loss": 0.1481,
|
| 7097 |
+
"step": 9280
|
| 7098 |
+
},
|
| 7099 |
+
{
|
| 7100 |
+
"epoch": 4.353327085285848,
|
| 7101 |
+
"grad_norm": 3.1123046875,
|
| 7102 |
+
"learning_rate": 9.45583411433927e-06,
|
| 7103 |
+
"loss": 0.1465,
|
| 7104 |
+
"step": 9290
|
| 7105 |
+
},
|
| 7106 |
+
{
|
| 7107 |
+
"epoch": 4.358013120899718,
|
| 7108 |
+
"grad_norm": 3.3074183464050293,
|
| 7109 |
+
"learning_rate": 9.455248359887535e-06,
|
| 7110 |
+
"loss": 0.1642,
|
| 7111 |
+
"step": 9300
|
| 7112 |
+
},
|
| 7113 |
+
{
|
| 7114 |
+
"epoch": 4.36269915651359,
|
| 7115 |
+
"grad_norm": 1.6989983320236206,
|
| 7116 |
+
"learning_rate": 9.454662605435802e-06,
|
| 7117 |
+
"loss": 0.1282,
|
| 7118 |
+
"step": 9310
|
| 7119 |
+
},
|
| 7120 |
+
{
|
| 7121 |
+
"epoch": 4.36738519212746,
|
| 7122 |
+
"grad_norm": 1.7987381219863892,
|
| 7123 |
+
"learning_rate": 9.454076850984069e-06,
|
| 7124 |
+
"loss": 0.1456,
|
| 7125 |
+
"step": 9320
|
| 7126 |
+
},
|
| 7127 |
+
{
|
| 7128 |
+
"epoch": 4.372071227741331,
|
| 7129 |
+
"grad_norm": 1.6666337251663208,
|
| 7130 |
+
"learning_rate": 9.453491096532334e-06,
|
| 7131 |
+
"loss": 0.1618,
|
| 7132 |
+
"step": 9330
|
| 7133 |
+
},
|
| 7134 |
+
{
|
| 7135 |
+
"epoch": 4.376757263355201,
|
| 7136 |
+
"grad_norm": 2.1631813049316406,
|
| 7137 |
+
"learning_rate": 9.452905342080601e-06,
|
| 7138 |
+
"loss": 0.142,
|
| 7139 |
+
"step": 9340
|
| 7140 |
+
},
|
| 7141 |
+
{
|
| 7142 |
+
"epoch": 4.381443298969073,
|
| 7143 |
+
"grad_norm": 2.5329623222351074,
|
| 7144 |
+
"learning_rate": 9.452319587628866e-06,
|
| 7145 |
+
"loss": 0.1436,
|
| 7146 |
+
"step": 9350
|
| 7147 |
+
},
|
| 7148 |
+
{
|
| 7149 |
+
"epoch": 4.386129334582943,
|
| 7150 |
+
"grad_norm": 1.9239972829818726,
|
| 7151 |
+
"learning_rate": 9.451733833177133e-06,
|
| 7152 |
+
"loss": 0.1361,
|
| 7153 |
+
"step": 9360
|
| 7154 |
+
},
|
| 7155 |
+
{
|
| 7156 |
+
"epoch": 4.390815370196814,
|
| 7157 |
+
"grad_norm": 2.091343641281128,
|
| 7158 |
+
"learning_rate": 9.4511480787254e-06,
|
| 7159 |
+
"loss": 0.1525,
|
| 7160 |
+
"step": 9370
|
| 7161 |
+
},
|
| 7162 |
+
{
|
| 7163 |
+
"epoch": 4.395501405810684,
|
| 7164 |
+
"grad_norm": 2.544008255004883,
|
| 7165 |
+
"learning_rate": 9.450562324273665e-06,
|
| 7166 |
+
"loss": 0.1454,
|
| 7167 |
+
"step": 9380
|
| 7168 |
+
},
|
| 7169 |
+
{
|
| 7170 |
+
"epoch": 4.4001874414245545,
|
| 7171 |
+
"grad_norm": 2.5722694396972656,
|
| 7172 |
+
"learning_rate": 9.449976569821932e-06,
|
| 7173 |
+
"loss": 0.1619,
|
| 7174 |
+
"step": 9390
|
| 7175 |
+
},
|
| 7176 |
+
{
|
| 7177 |
+
"epoch": 4.404873477038425,
|
| 7178 |
+
"grad_norm": 2.6824848651885986,
|
| 7179 |
+
"learning_rate": 9.449390815370197e-06,
|
| 7180 |
+
"loss": 0.1529,
|
| 7181 |
+
"step": 9400
|
| 7182 |
+
},
|
| 7183 |
+
{
|
| 7184 |
+
"epoch": 4.409559512652296,
|
| 7185 |
+
"grad_norm": 2.406972885131836,
|
| 7186 |
+
"learning_rate": 9.448805060918463e-06,
|
| 7187 |
+
"loss": 0.153,
|
| 7188 |
+
"step": 9410
|
| 7189 |
+
},
|
| 7190 |
+
{
|
| 7191 |
+
"epoch": 4.414245548266167,
|
| 7192 |
+
"grad_norm": 2.7198727130889893,
|
| 7193 |
+
"learning_rate": 9.448219306466731e-06,
|
| 7194 |
+
"loss": 0.1657,
|
| 7195 |
+
"step": 9420
|
| 7196 |
+
},
|
| 7197 |
+
{
|
| 7198 |
+
"epoch": 4.418931583880037,
|
| 7199 |
+
"grad_norm": 2.511366844177246,
|
| 7200 |
+
"learning_rate": 9.447633552014996e-06,
|
| 7201 |
+
"loss": 0.1409,
|
| 7202 |
+
"step": 9430
|
| 7203 |
+
},
|
| 7204 |
+
{
|
| 7205 |
+
"epoch": 4.423617619493908,
|
| 7206 |
+
"grad_norm": 2.4278454780578613,
|
| 7207 |
+
"learning_rate": 9.447047797563262e-06,
|
| 7208 |
+
"loss": 0.1392,
|
| 7209 |
+
"step": 9440
|
| 7210 |
+
},
|
| 7211 |
+
{
|
| 7212 |
+
"epoch": 4.428303655107779,
|
| 7213 |
+
"grad_norm": 3.2954795360565186,
|
| 7214 |
+
"learning_rate": 9.446462043111529e-06,
|
| 7215 |
+
"loss": 0.142,
|
| 7216 |
+
"step": 9450
|
| 7217 |
+
},
|
| 7218 |
+
{
|
| 7219 |
+
"epoch": 4.43298969072165,
|
| 7220 |
+
"grad_norm": 2.079050302505493,
|
| 7221 |
+
"learning_rate": 9.445876288659794e-06,
|
| 7222 |
+
"loss": 0.1444,
|
| 7223 |
+
"step": 9460
|
| 7224 |
+
},
|
| 7225 |
+
{
|
| 7226 |
+
"epoch": 4.43767572633552,
|
| 7227 |
+
"grad_norm": 2.284108877182007,
|
| 7228 |
+
"learning_rate": 9.44529053420806e-06,
|
| 7229 |
+
"loss": 0.1549,
|
| 7230 |
+
"step": 9470
|
| 7231 |
+
},
|
| 7232 |
+
{
|
| 7233 |
+
"epoch": 4.442361761949391,
|
| 7234 |
+
"grad_norm": 1.7953790426254272,
|
| 7235 |
+
"learning_rate": 9.444704779756328e-06,
|
| 7236 |
+
"loss": 0.1632,
|
| 7237 |
+
"step": 9480
|
| 7238 |
+
},
|
| 7239 |
+
{
|
| 7240 |
+
"epoch": 4.447047797563261,
|
| 7241 |
+
"grad_norm": 2.6434693336486816,
|
| 7242 |
+
"learning_rate": 9.444119025304593e-06,
|
| 7243 |
+
"loss": 0.1453,
|
| 7244 |
+
"step": 9490
|
| 7245 |
+
},
|
| 7246 |
+
{
|
| 7247 |
+
"epoch": 4.451733833177133,
|
| 7248 |
+
"grad_norm": 2.6489696502685547,
|
| 7249 |
+
"learning_rate": 9.44353327085286e-06,
|
| 7250 |
+
"loss": 0.1577,
|
| 7251 |
+
"step": 9500
|
| 7252 |
+
},
|
| 7253 |
+
{
|
| 7254 |
+
"epoch": 4.451733833177133,
|
| 7255 |
+
"eval_loss": 0.0442158505320549,
|
| 7256 |
+
"eval_pearson_cosine": 0.7793668842918748,
|
| 7257 |
+
"eval_pearson_dot": 0.6181762937224704,
|
| 7258 |
+
"eval_pearson_euclidean": 0.7443899044656206,
|
| 7259 |
+
"eval_pearson_manhattan": 0.7439473885249299,
|
| 7260 |
+
"eval_runtime": 39.5492,
|
| 7261 |
+
"eval_samples_per_second": 37.927,
|
| 7262 |
+
"eval_spearman_cosine": 0.7823666516115477,
|
| 7263 |
+
"eval_spearman_dot": 0.6290855072192552,
|
| 7264 |
+
"eval_spearman_euclidean": 0.7629748815703942,
|
| 7265 |
+
"eval_spearman_manhattan": 0.7626889580909112,
|
| 7266 |
+
"eval_steps_per_second": 37.927,
|
| 7267 |
+
"step": 9500
|
| 7268 |
+
},
|
| 7269 |
+
{
|
| 7270 |
+
"epoch": 4.456419868791003,
|
| 7271 |
+
"grad_norm": 2.769876003265381,
|
| 7272 |
+
"learning_rate": 9.442947516401125e-06,
|
| 7273 |
+
"loss": 0.1474,
|
| 7274 |
+
"step": 9510
|
| 7275 |
+
},
|
| 7276 |
+
{
|
| 7277 |
+
"epoch": 4.4611059044048735,
|
| 7278 |
+
"grad_norm": 2.8958747386932373,
|
| 7279 |
+
"learning_rate": 9.442361761949392e-06,
|
| 7280 |
+
"loss": 0.1479,
|
| 7281 |
+
"step": 9520
|
| 7282 |
+
},
|
| 7283 |
+
{
|
| 7284 |
+
"epoch": 4.465791940018744,
|
| 7285 |
+
"grad_norm": 3.219896078109741,
|
| 7286 |
+
"learning_rate": 9.441776007497657e-06,
|
| 7287 |
+
"loss": 0.1621,
|
| 7288 |
+
"step": 9530
|
| 7289 |
+
},
|
| 7290 |
+
{
|
| 7291 |
+
"epoch": 4.4704779756326145,
|
| 7292 |
+
"grad_norm": 2.0421993732452393,
|
| 7293 |
+
"learning_rate": 9.441190253045924e-06,
|
| 7294 |
+
"loss": 0.1473,
|
| 7295 |
+
"step": 9540
|
| 7296 |
+
},
|
| 7297 |
+
{
|
| 7298 |
+
"epoch": 4.475164011246486,
|
| 7299 |
+
"grad_norm": 2.189121961593628,
|
| 7300 |
+
"learning_rate": 9.440604498594191e-06,
|
| 7301 |
+
"loss": 0.1751,
|
| 7302 |
+
"step": 9550
|
| 7303 |
+
},
|
| 7304 |
+
{
|
| 7305 |
+
"epoch": 4.479850046860356,
|
| 7306 |
+
"grad_norm": 2.7012038230895996,
|
| 7307 |
+
"learning_rate": 9.440018744142456e-06,
|
| 7308 |
+
"loss": 0.1598,
|
| 7309 |
+
"step": 9560
|
| 7310 |
+
},
|
| 7311 |
+
{
|
| 7312 |
+
"epoch": 4.484536082474227,
|
| 7313 |
+
"grad_norm": 2.8359875679016113,
|
| 7314 |
+
"learning_rate": 9.439432989690721e-06,
|
| 7315 |
+
"loss": 0.1217,
|
| 7316 |
+
"step": 9570
|
| 7317 |
+
},
|
| 7318 |
+
{
|
| 7319 |
+
"epoch": 4.489222118088097,
|
| 7320 |
+
"grad_norm": 2.1257011890411377,
|
| 7321 |
+
"learning_rate": 9.438847235238988e-06,
|
| 7322 |
+
"loss": 0.1395,
|
| 7323 |
+
"step": 9580
|
| 7324 |
+
},
|
| 7325 |
+
{
|
| 7326 |
+
"epoch": 4.493908153701968,
|
| 7327 |
+
"grad_norm": 2.8070883750915527,
|
| 7328 |
+
"learning_rate": 9.438261480787255e-06,
|
| 7329 |
+
"loss": 0.1654,
|
| 7330 |
+
"step": 9590
|
| 7331 |
+
},
|
| 7332 |
+
{
|
| 7333 |
+
"epoch": 4.498594189315839,
|
| 7334 |
+
"grad_norm": 2.672773838043213,
|
| 7335 |
+
"learning_rate": 9.43767572633552e-06,
|
| 7336 |
+
"loss": 0.133,
|
| 7337 |
+
"step": 9600
|
| 7338 |
+
},
|
| 7339 |
+
{
|
| 7340 |
+
"epoch": 4.50328022492971,
|
| 7341 |
+
"grad_norm": 2.5079360008239746,
|
| 7342 |
+
"learning_rate": 9.437089971883787e-06,
|
| 7343 |
+
"loss": 0.131,
|
| 7344 |
+
"step": 9610
|
| 7345 |
+
},
|
| 7346 |
+
{
|
| 7347 |
+
"epoch": 4.50796626054358,
|
| 7348 |
+
"grad_norm": 3.078028678894043,
|
| 7349 |
+
"learning_rate": 9.436504217432053e-06,
|
| 7350 |
+
"loss": 0.1634,
|
| 7351 |
+
"step": 9620
|
| 7352 |
+
},
|
| 7353 |
+
{
|
| 7354 |
+
"epoch": 4.512652296157451,
|
| 7355 |
+
"grad_norm": 2.3852665424346924,
|
| 7356 |
+
"learning_rate": 9.43591846298032e-06,
|
| 7357 |
+
"loss": 0.1382,
|
| 7358 |
+
"step": 9630
|
| 7359 |
+
},
|
| 7360 |
+
{
|
| 7361 |
+
"epoch": 4.517338331771321,
|
| 7362 |
+
"grad_norm": 2.917783260345459,
|
| 7363 |
+
"learning_rate": 9.435332708528585e-06,
|
| 7364 |
+
"loss": 0.1573,
|
| 7365 |
+
"step": 9640
|
| 7366 |
+
},
|
| 7367 |
+
{
|
| 7368 |
+
"epoch": 4.5220243673851925,
|
| 7369 |
+
"grad_norm": 2.0304133892059326,
|
| 7370 |
+
"learning_rate": 9.434746954076852e-06,
|
| 7371 |
+
"loss": 0.1261,
|
| 7372 |
+
"step": 9650
|
| 7373 |
+
},
|
| 7374 |
+
{
|
| 7375 |
+
"epoch": 4.526710402999063,
|
| 7376 |
+
"grad_norm": 2.4420013427734375,
|
| 7377 |
+
"learning_rate": 9.434161199625119e-06,
|
| 7378 |
+
"loss": 0.1529,
|
| 7379 |
+
"step": 9660
|
| 7380 |
+
},
|
| 7381 |
+
{
|
| 7382 |
+
"epoch": 4.5313964386129335,
|
| 7383 |
+
"grad_norm": 3.5100250244140625,
|
| 7384 |
+
"learning_rate": 9.433575445173384e-06,
|
| 7385 |
+
"loss": 0.1415,
|
| 7386 |
+
"step": 9670
|
| 7387 |
+
},
|
| 7388 |
+
{
|
| 7389 |
+
"epoch": 4.536082474226804,
|
| 7390 |
+
"grad_norm": 2.250225782394409,
|
| 7391 |
+
"learning_rate": 9.43298969072165e-06,
|
| 7392 |
+
"loss": 0.1389,
|
| 7393 |
+
"step": 9680
|
| 7394 |
+
},
|
| 7395 |
+
{
|
| 7396 |
+
"epoch": 4.5407685098406745,
|
| 7397 |
+
"grad_norm": 2.866528272628784,
|
| 7398 |
+
"learning_rate": 9.432403936269916e-06,
|
| 7399 |
+
"loss": 0.1366,
|
| 7400 |
+
"step": 9690
|
| 7401 |
+
},
|
| 7402 |
+
{
|
| 7403 |
+
"epoch": 4.545454545454545,
|
| 7404 |
+
"grad_norm": 2.196103096008301,
|
| 7405 |
+
"learning_rate": 9.431818181818183e-06,
|
| 7406 |
+
"loss": 0.139,
|
| 7407 |
+
"step": 9700
|
| 7408 |
+
},
|
| 7409 |
+
{
|
| 7410 |
+
"epoch": 4.550140581068416,
|
| 7411 |
+
"grad_norm": 2.383646249771118,
|
| 7412 |
+
"learning_rate": 9.43123242736645e-06,
|
| 7413 |
+
"loss": 0.1503,
|
| 7414 |
+
"step": 9710
|
| 7415 |
+
},
|
| 7416 |
+
{
|
| 7417 |
+
"epoch": 4.554826616682287,
|
| 7418 |
+
"grad_norm": 2.5517141819000244,
|
| 7419 |
+
"learning_rate": 9.430646672914715e-06,
|
| 7420 |
+
"loss": 0.1588,
|
| 7421 |
+
"step": 9720
|
| 7422 |
+
},
|
| 7423 |
+
{
|
| 7424 |
+
"epoch": 4.559512652296157,
|
| 7425 |
+
"grad_norm": 2.985891103744507,
|
| 7426 |
+
"learning_rate": 9.43006091846298e-06,
|
| 7427 |
+
"loss": 0.1476,
|
| 7428 |
+
"step": 9730
|
| 7429 |
+
},
|
| 7430 |
+
{
|
| 7431 |
+
"epoch": 4.564198687910028,
|
| 7432 |
+
"grad_norm": 2.9456253051757812,
|
| 7433 |
+
"learning_rate": 9.429475164011247e-06,
|
| 7434 |
+
"loss": 0.1698,
|
| 7435 |
+
"step": 9740
|
| 7436 |
+
},
|
| 7437 |
+
{
|
| 7438 |
+
"epoch": 4.568884723523899,
|
| 7439 |
+
"grad_norm": 2.052727699279785,
|
| 7440 |
+
"learning_rate": 9.428889409559512e-06,
|
| 7441 |
+
"loss": 0.1463,
|
| 7442 |
+
"step": 9750
|
| 7443 |
+
},
|
| 7444 |
+
{
|
| 7445 |
+
"epoch": 4.568884723523899,
|
| 7446 |
+
"eval_loss": 0.0455799400806427,
|
| 7447 |
+
"eval_pearson_cosine": 0.7764385842406938,
|
| 7448 |
+
"eval_pearson_dot": 0.5941173098885884,
|
| 7449 |
+
"eval_pearson_euclidean": 0.7405048981360327,
|
| 7450 |
+
"eval_pearson_manhattan": 0.740129834669768,
|
| 7451 |
+
"eval_runtime": 40.4889,
|
| 7452 |
+
"eval_samples_per_second": 37.047,
|
| 7453 |
+
"eval_spearman_cosine": 0.782117792198136,
|
| 7454 |
+
"eval_spearman_dot": 0.599132023248896,
|
| 7455 |
+
"eval_spearman_euclidean": 0.7603847963092804,
|
| 7456 |
+
"eval_spearman_manhattan": 0.7601623940158222,
|
| 7457 |
+
"eval_steps_per_second": 37.047,
|
| 7458 |
+
"step": 9750
|
| 7459 |
+
},
|
| 7460 |
+
{
|
| 7461 |
+
"epoch": 4.57357075913777,
|
| 7462 |
+
"grad_norm": 2.1978442668914795,
|
| 7463 |
+
"learning_rate": 9.42830365510778e-06,
|
| 7464 |
+
"loss": 0.1537,
|
| 7465 |
+
"step": 9760
|
| 7466 |
+
},
|
| 7467 |
+
{
|
| 7468 |
+
"epoch": 4.57825679475164,
|
| 7469 |
+
"grad_norm": 2.976311206817627,
|
| 7470 |
+
"learning_rate": 9.427717900656046e-06,
|
| 7471 |
+
"loss": 0.1506,
|
| 7472 |
+
"step": 9770
|
| 7473 |
+
},
|
| 7474 |
+
{
|
| 7475 |
+
"epoch": 4.582942830365511,
|
| 7476 |
+
"grad_norm": 1.1756877899169922,
|
| 7477 |
+
"learning_rate": 9.427132146204311e-06,
|
| 7478 |
+
"loss": 0.127,
|
| 7479 |
+
"step": 9780
|
| 7480 |
+
},
|
| 7481 |
+
{
|
| 7482 |
+
"epoch": 4.587628865979381,
|
| 7483 |
+
"grad_norm": 2.0622363090515137,
|
| 7484 |
+
"learning_rate": 9.426546391752578e-06,
|
| 7485 |
+
"loss": 0.1508,
|
| 7486 |
+
"step": 9790
|
| 7487 |
+
},
|
| 7488 |
+
{
|
| 7489 |
+
"epoch": 4.592314901593252,
|
| 7490 |
+
"grad_norm": 2.6205925941467285,
|
| 7491 |
+
"learning_rate": 9.425960637300844e-06,
|
| 7492 |
+
"loss": 0.142,
|
| 7493 |
+
"step": 9800
|
| 7494 |
+
},
|
| 7495 |
+
{
|
| 7496 |
+
"epoch": 4.597000937207123,
|
| 7497 |
+
"grad_norm": 1.8698289394378662,
|
| 7498 |
+
"learning_rate": 9.42537488284911e-06,
|
| 7499 |
+
"loss": 0.1375,
|
| 7500 |
+
"step": 9810
|
| 7501 |
+
},
|
| 7502 |
+
{
|
| 7503 |
+
"epoch": 4.6016869728209935,
|
| 7504 |
+
"grad_norm": 1.9030426740646362,
|
| 7505 |
+
"learning_rate": 9.424789128397377e-06,
|
| 7506 |
+
"loss": 0.1626,
|
| 7507 |
+
"step": 9820
|
| 7508 |
+
},
|
| 7509 |
+
{
|
| 7510 |
+
"epoch": 4.606373008434864,
|
| 7511 |
+
"grad_norm": 2.2842605113983154,
|
| 7512 |
+
"learning_rate": 9.424203373945643e-06,
|
| 7513 |
+
"loss": 0.1518,
|
| 7514 |
+
"step": 9830
|
| 7515 |
+
},
|
| 7516 |
+
{
|
| 7517 |
+
"epoch": 4.6110590440487345,
|
| 7518 |
+
"grad_norm": 1.5332591533660889,
|
| 7519 |
+
"learning_rate": 9.42361761949391e-06,
|
| 7520 |
+
"loss": 0.1612,
|
| 7521 |
+
"step": 9840
|
| 7522 |
+
},
|
| 7523 |
+
{
|
| 7524 |
+
"epoch": 4.615745079662606,
|
| 7525 |
+
"grad_norm": 2.012329339981079,
|
| 7526 |
+
"learning_rate": 9.423031865042175e-06,
|
| 7527 |
+
"loss": 0.159,
|
| 7528 |
+
"step": 9850
|
| 7529 |
+
},
|
| 7530 |
+
{
|
| 7531 |
+
"epoch": 4.620431115276476,
|
| 7532 |
+
"grad_norm": 1.955610752105713,
|
| 7533 |
+
"learning_rate": 9.42244611059044e-06,
|
| 7534 |
+
"loss": 0.1672,
|
| 7535 |
+
"step": 9860
|
| 7536 |
+
},
|
| 7537 |
+
{
|
| 7538 |
+
"epoch": 4.625117150890347,
|
| 7539 |
+
"grad_norm": 2.124481439590454,
|
| 7540 |
+
"learning_rate": 9.421860356138709e-06,
|
| 7541 |
+
"loss": 0.1325,
|
| 7542 |
+
"step": 9870
|
| 7543 |
+
},
|
| 7544 |
+
{
|
| 7545 |
+
"epoch": 4.629803186504217,
|
| 7546 |
+
"grad_norm": 2.279585599899292,
|
| 7547 |
+
"learning_rate": 9.421274601686974e-06,
|
| 7548 |
+
"loss": 0.1611,
|
| 7549 |
+
"step": 9880
|
| 7550 |
+
},
|
| 7551 |
+
{
|
| 7552 |
+
"epoch": 4.634489222118088,
|
| 7553 |
+
"grad_norm": 1.9664572477340698,
|
| 7554 |
+
"learning_rate": 9.420688847235239e-06,
|
| 7555 |
+
"loss": 0.1299,
|
| 7556 |
+
"step": 9890
|
| 7557 |
+
},
|
| 7558 |
+
{
|
| 7559 |
+
"epoch": 4.639175257731958,
|
| 7560 |
+
"grad_norm": 2.385551691055298,
|
| 7561 |
+
"learning_rate": 9.420103092783506e-06,
|
| 7562 |
+
"loss": 0.1631,
|
| 7563 |
+
"step": 9900
|
| 7564 |
+
},
|
| 7565 |
+
{
|
| 7566 |
+
"epoch": 4.64386129334583,
|
| 7567 |
+
"grad_norm": 1.469383955001831,
|
| 7568 |
+
"learning_rate": 9.419517338331771e-06,
|
| 7569 |
+
"loss": 0.1205,
|
| 7570 |
+
"step": 9910
|
| 7571 |
+
},
|
| 7572 |
+
{
|
| 7573 |
+
"epoch": 4.6485473289597,
|
| 7574 |
+
"grad_norm": 2.6736183166503906,
|
| 7575 |
+
"learning_rate": 9.418931583880038e-06,
|
| 7576 |
+
"loss": 0.1675,
|
| 7577 |
+
"step": 9920
|
| 7578 |
+
},
|
| 7579 |
+
{
|
| 7580 |
+
"epoch": 4.653233364573571,
|
| 7581 |
+
"grad_norm": 3.122366189956665,
|
| 7582 |
+
"learning_rate": 9.418345829428305e-06,
|
| 7583 |
+
"loss": 0.135,
|
| 7584 |
+
"step": 9930
|
| 7585 |
+
},
|
| 7586 |
+
{
|
| 7587 |
+
"epoch": 4.657919400187441,
|
| 7588 |
+
"grad_norm": 2.8057267665863037,
|
| 7589 |
+
"learning_rate": 9.41776007497657e-06,
|
| 7590 |
+
"loss": 0.1318,
|
| 7591 |
+
"step": 9940
|
| 7592 |
+
},
|
| 7593 |
+
{
|
| 7594 |
+
"epoch": 4.6626054358013125,
|
| 7595 |
+
"grad_norm": 3.2085843086242676,
|
| 7596 |
+
"learning_rate": 9.417174320524837e-06,
|
| 7597 |
+
"loss": 0.163,
|
| 7598 |
+
"step": 9950
|
| 7599 |
+
},
|
| 7600 |
+
{
|
| 7601 |
+
"epoch": 4.667291471415183,
|
| 7602 |
+
"grad_norm": 3.1496477127075195,
|
| 7603 |
+
"learning_rate": 9.416588566073102e-06,
|
| 7604 |
+
"loss": 0.1655,
|
| 7605 |
+
"step": 9960
|
| 7606 |
+
},
|
| 7607 |
+
{
|
| 7608 |
+
"epoch": 4.6719775070290535,
|
| 7609 |
+
"grad_norm": 2.3873279094696045,
|
| 7610 |
+
"learning_rate": 9.41600281162137e-06,
|
| 7611 |
+
"loss": 0.1381,
|
| 7612 |
+
"step": 9970
|
| 7613 |
+
},
|
| 7614 |
+
{
|
| 7615 |
+
"epoch": 4.676663542642924,
|
| 7616 |
+
"grad_norm": 2.0492842197418213,
|
| 7617 |
+
"learning_rate": 9.415417057169636e-06,
|
| 7618 |
+
"loss": 0.1462,
|
| 7619 |
+
"step": 9980
|
| 7620 |
+
},
|
| 7621 |
+
{
|
| 7622 |
+
"epoch": 4.681349578256794,
|
| 7623 |
+
"grad_norm": 3.0173768997192383,
|
| 7624 |
+
"learning_rate": 9.414831302717901e-06,
|
| 7625 |
+
"loss": 0.1711,
|
| 7626 |
+
"step": 9990
|
| 7627 |
+
},
|
| 7628 |
+
{
|
| 7629 |
+
"epoch": 4.686035613870665,
|
| 7630 |
+
"grad_norm": 2.1712629795074463,
|
| 7631 |
+
"learning_rate": 9.414245548266168e-06,
|
| 7632 |
+
"loss": 0.16,
|
| 7633 |
+
"step": 10000
|
| 7634 |
+
},
|
| 7635 |
+
{
|
| 7636 |
+
"epoch": 4.686035613870665,
|
| 7637 |
+
"eval_loss": 0.0459674596786499,
|
| 7638 |
+
"eval_pearson_cosine": 0.7749308103807095,
|
| 7639 |
+
"eval_pearson_dot": 0.6140435552912393,
|
| 7640 |
+
"eval_pearson_euclidean": 0.7497870505171651,
|
| 7641 |
+
"eval_pearson_manhattan": 0.7494772460672863,
|
| 7642 |
+
"eval_runtime": 40.3087,
|
| 7643 |
+
"eval_samples_per_second": 37.213,
|
| 7644 |
+
"eval_spearman_cosine": 0.7793432902242333,
|
| 7645 |
+
"eval_spearman_dot": 0.6191753058355182,
|
| 7646 |
+
"eval_spearman_euclidean": 0.7659596686028919,
|
| 7647 |
+
"eval_spearman_manhattan": 0.7658321542772971,
|
| 7648 |
+
"eval_steps_per_second": 37.213,
|
| 7649 |
+
"step": 10000
|
| 7650 |
}
|
| 7651 |
],
|
| 7652 |
"logging_steps": 10,
|