Training in progress, step 13000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d153a85db882a8d2ec877dfba2d9b581b46d201ce2501b713d912d9b724be90d
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e64c3f6b99c05bc4af3b1afc1105f63d286ccdb944360ccc4c6c03aaa0867281
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:feb76d7d56395cb464f4c6b097cc298c265886f58499ea053baed20b9e64abbb
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61bccafd9792e811bc7ce6d26e59618969221a81768981a24e66ff1e4f6c92d4
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -9175,6 +9175,770 @@
|
|
| 9175 |
"eval_spearman_manhattan": 0.7598359774134882,
|
| 9176 |
"eval_steps_per_second": 37.573,
|
| 9177 |
"step": 12000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9178 |
}
|
| 9179 |
],
|
| 9180 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.091846298031865,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 13000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 9175 |
"eval_spearman_manhattan": 0.7598359774134882,
|
| 9176 |
"eval_steps_per_second": 37.573,
|
| 9177 |
"step": 12000
|
| 9178 |
+
},
|
| 9179 |
+
{
|
| 9180 |
+
"epoch": 5.627928772258669,
|
| 9181 |
+
"grad_norm": 2.1159090995788574,
|
| 9182 |
+
"learning_rate": 9.296508903467668e-06,
|
| 9183 |
+
"loss": 0.1142,
|
| 9184 |
+
"step": 12010
|
| 9185 |
+
},
|
| 9186 |
+
{
|
| 9187 |
+
"epoch": 5.63261480787254,
|
| 9188 |
+
"grad_norm": 2.249617576599121,
|
| 9189 |
+
"learning_rate": 9.295923149015933e-06,
|
| 9190 |
+
"loss": 0.1091,
|
| 9191 |
+
"step": 12020
|
| 9192 |
+
},
|
| 9193 |
+
{
|
| 9194 |
+
"epoch": 5.63730084348641,
|
| 9195 |
+
"grad_norm": 2.0257644653320312,
|
| 9196 |
+
"learning_rate": 9.295337394564198e-06,
|
| 9197 |
+
"loss": 0.1094,
|
| 9198 |
+
"step": 12030
|
| 9199 |
+
},
|
| 9200 |
+
{
|
| 9201 |
+
"epoch": 5.641986879100282,
|
| 9202 |
+
"grad_norm": 3.4566030502319336,
|
| 9203 |
+
"learning_rate": 9.294751640112467e-06,
|
| 9204 |
+
"loss": 0.1203,
|
| 9205 |
+
"step": 12040
|
| 9206 |
+
},
|
| 9207 |
+
{
|
| 9208 |
+
"epoch": 5.646672914714152,
|
| 9209 |
+
"grad_norm": 3.4752063751220703,
|
| 9210 |
+
"learning_rate": 9.294165885660732e-06,
|
| 9211 |
+
"loss": 0.1359,
|
| 9212 |
+
"step": 12050
|
| 9213 |
+
},
|
| 9214 |
+
{
|
| 9215 |
+
"epoch": 5.651358950328023,
|
| 9216 |
+
"grad_norm": 2.0857534408569336,
|
| 9217 |
+
"learning_rate": 9.293580131208997e-06,
|
| 9218 |
+
"loss": 0.0959,
|
| 9219 |
+
"step": 12060
|
| 9220 |
+
},
|
| 9221 |
+
{
|
| 9222 |
+
"epoch": 5.656044985941893,
|
| 9223 |
+
"grad_norm": 1.1136995553970337,
|
| 9224 |
+
"learning_rate": 9.292994376757264e-06,
|
| 9225 |
+
"loss": 0.0922,
|
| 9226 |
+
"step": 12070
|
| 9227 |
+
},
|
| 9228 |
+
{
|
| 9229 |
+
"epoch": 5.660731021555764,
|
| 9230 |
+
"grad_norm": 1.7703429460525513,
|
| 9231 |
+
"learning_rate": 9.29240862230553e-06,
|
| 9232 |
+
"loss": 0.1314,
|
| 9233 |
+
"step": 12080
|
| 9234 |
+
},
|
| 9235 |
+
{
|
| 9236 |
+
"epoch": 5.665417057169634,
|
| 9237 |
+
"grad_norm": 2.678006172180176,
|
| 9238 |
+
"learning_rate": 9.291822867853796e-06,
|
| 9239 |
+
"loss": 0.1124,
|
| 9240 |
+
"step": 12090
|
| 9241 |
+
},
|
| 9242 |
+
{
|
| 9243 |
+
"epoch": 5.670103092783505,
|
| 9244 |
+
"grad_norm": 2.3180134296417236,
|
| 9245 |
+
"learning_rate": 9.291237113402063e-06,
|
| 9246 |
+
"loss": 0.0916,
|
| 9247 |
+
"step": 12100
|
| 9248 |
+
},
|
| 9249 |
+
{
|
| 9250 |
+
"epoch": 5.674789128397376,
|
| 9251 |
+
"grad_norm": 1.4481223821640015,
|
| 9252 |
+
"learning_rate": 9.290651358950328e-06,
|
| 9253 |
+
"loss": 0.0934,
|
| 9254 |
+
"step": 12110
|
| 9255 |
+
},
|
| 9256 |
+
{
|
| 9257 |
+
"epoch": 5.679475164011246,
|
| 9258 |
+
"grad_norm": 1.6825222969055176,
|
| 9259 |
+
"learning_rate": 9.290065604498595e-06,
|
| 9260 |
+
"loss": 0.112,
|
| 9261 |
+
"step": 12120
|
| 9262 |
+
},
|
| 9263 |
+
{
|
| 9264 |
+
"epoch": 5.684161199625117,
|
| 9265 |
+
"grad_norm": 1.8293483257293701,
|
| 9266 |
+
"learning_rate": 9.28947985004686e-06,
|
| 9267 |
+
"loss": 0.1006,
|
| 9268 |
+
"step": 12130
|
| 9269 |
+
},
|
| 9270 |
+
{
|
| 9271 |
+
"epoch": 5.688847235238988,
|
| 9272 |
+
"grad_norm": 1.5621511936187744,
|
| 9273 |
+
"learning_rate": 9.288894095595127e-06,
|
| 9274 |
+
"loss": 0.1069,
|
| 9275 |
+
"step": 12140
|
| 9276 |
+
},
|
| 9277 |
+
{
|
| 9278 |
+
"epoch": 5.693533270852859,
|
| 9279 |
+
"grad_norm": 1.9712047576904297,
|
| 9280 |
+
"learning_rate": 9.288308341143394e-06,
|
| 9281 |
+
"loss": 0.1121,
|
| 9282 |
+
"step": 12150
|
| 9283 |
+
},
|
| 9284 |
+
{
|
| 9285 |
+
"epoch": 5.698219306466729,
|
| 9286 |
+
"grad_norm": 1.5489860773086548,
|
| 9287 |
+
"learning_rate": 9.28772258669166e-06,
|
| 9288 |
+
"loss": 0.1152,
|
| 9289 |
+
"step": 12160
|
| 9290 |
+
},
|
| 9291 |
+
{
|
| 9292 |
+
"epoch": 5.7029053420806,
|
| 9293 |
+
"grad_norm": 1.5992718935012817,
|
| 9294 |
+
"learning_rate": 9.287136832239927e-06,
|
| 9295 |
+
"loss": 0.1081,
|
| 9296 |
+
"step": 12170
|
| 9297 |
+
},
|
| 9298 |
+
{
|
| 9299 |
+
"epoch": 5.70759137769447,
|
| 9300 |
+
"grad_norm": 2.584080219268799,
|
| 9301 |
+
"learning_rate": 9.286551077788192e-06,
|
| 9302 |
+
"loss": 0.115,
|
| 9303 |
+
"step": 12180
|
| 9304 |
+
},
|
| 9305 |
+
{
|
| 9306 |
+
"epoch": 5.712277413308341,
|
| 9307 |
+
"grad_norm": 1.9940451383590698,
|
| 9308 |
+
"learning_rate": 9.285965323336457e-06,
|
| 9309 |
+
"loss": 0.1334,
|
| 9310 |
+
"step": 12190
|
| 9311 |
+
},
|
| 9312 |
+
{
|
| 9313 |
+
"epoch": 5.716963448922212,
|
| 9314 |
+
"grad_norm": 2.244067668914795,
|
| 9315 |
+
"learning_rate": 9.285379568884726e-06,
|
| 9316 |
+
"loss": 0.1126,
|
| 9317 |
+
"step": 12200
|
| 9318 |
+
},
|
| 9319 |
+
{
|
| 9320 |
+
"epoch": 5.721649484536083,
|
| 9321 |
+
"grad_norm": 2.828308343887329,
|
| 9322 |
+
"learning_rate": 9.28479381443299e-06,
|
| 9323 |
+
"loss": 0.0978,
|
| 9324 |
+
"step": 12210
|
| 9325 |
+
},
|
| 9326 |
+
{
|
| 9327 |
+
"epoch": 5.726335520149953,
|
| 9328 |
+
"grad_norm": 2.3048787117004395,
|
| 9329 |
+
"learning_rate": 9.284208059981256e-06,
|
| 9330 |
+
"loss": 0.1285,
|
| 9331 |
+
"step": 12220
|
| 9332 |
+
},
|
| 9333 |
+
{
|
| 9334 |
+
"epoch": 5.7310215557638235,
|
| 9335 |
+
"grad_norm": 1.9416192770004272,
|
| 9336 |
+
"learning_rate": 9.283622305529523e-06,
|
| 9337 |
+
"loss": 0.114,
|
| 9338 |
+
"step": 12230
|
| 9339 |
+
},
|
| 9340 |
+
{
|
| 9341 |
+
"epoch": 5.735707591377695,
|
| 9342 |
+
"grad_norm": 2.0904664993286133,
|
| 9343 |
+
"learning_rate": 9.283036551077788e-06,
|
| 9344 |
+
"loss": 0.1135,
|
| 9345 |
+
"step": 12240
|
| 9346 |
+
},
|
| 9347 |
+
{
|
| 9348 |
+
"epoch": 5.740393626991565,
|
| 9349 |
+
"grad_norm": 2.0567378997802734,
|
| 9350 |
+
"learning_rate": 9.282450796626055e-06,
|
| 9351 |
+
"loss": 0.1269,
|
| 9352 |
+
"step": 12250
|
| 9353 |
+
},
|
| 9354 |
+
{
|
| 9355 |
+
"epoch": 5.740393626991565,
|
| 9356 |
+
"eval_loss": 0.042026255279779434,
|
| 9357 |
+
"eval_pearson_cosine": 0.7802074426247394,
|
| 9358 |
+
"eval_pearson_dot": 0.621680331450122,
|
| 9359 |
+
"eval_pearson_euclidean": 0.7417166161845756,
|
| 9360 |
+
"eval_pearson_manhattan": 0.7412630516460794,
|
| 9361 |
+
"eval_runtime": 40.3256,
|
| 9362 |
+
"eval_samples_per_second": 37.197,
|
| 9363 |
+
"eval_spearman_cosine": 0.7839546315832364,
|
| 9364 |
+
"eval_spearman_dot": 0.6311338337036988,
|
| 9365 |
+
"eval_spearman_euclidean": 0.7564314536390471,
|
| 9366 |
+
"eval_spearman_manhattan": 0.7562308413966785,
|
| 9367 |
+
"eval_steps_per_second": 37.197,
|
| 9368 |
+
"step": 12250
|
| 9369 |
+
},
|
| 9370 |
+
{
|
| 9371 |
+
"epoch": 5.745079662605436,
|
| 9372 |
+
"grad_norm": 1.8017923831939697,
|
| 9373 |
+
"learning_rate": 9.281865042174322e-06,
|
| 9374 |
+
"loss": 0.116,
|
| 9375 |
+
"step": 12260
|
| 9376 |
+
},
|
| 9377 |
+
{
|
| 9378 |
+
"epoch": 5.749765698219306,
|
| 9379 |
+
"grad_norm": 2.184885025024414,
|
| 9380 |
+
"learning_rate": 9.281279287722587e-06,
|
| 9381 |
+
"loss": 0.1141,
|
| 9382 |
+
"step": 12270
|
| 9383 |
+
},
|
| 9384 |
+
{
|
| 9385 |
+
"epoch": 5.754451733833177,
|
| 9386 |
+
"grad_norm": 2.258493423461914,
|
| 9387 |
+
"learning_rate": 9.280693533270854e-06,
|
| 9388 |
+
"loss": 0.1179,
|
| 9389 |
+
"step": 12280
|
| 9390 |
+
},
|
| 9391 |
+
{
|
| 9392 |
+
"epoch": 5.759137769447047,
|
| 9393 |
+
"grad_norm": 3.2758543491363525,
|
| 9394 |
+
"learning_rate": 9.28010777881912e-06,
|
| 9395 |
+
"loss": 0.1354,
|
| 9396 |
+
"step": 12290
|
| 9397 |
+
},
|
| 9398 |
+
{
|
| 9399 |
+
"epoch": 5.763823805060919,
|
| 9400 |
+
"grad_norm": 2.4894609451293945,
|
| 9401 |
+
"learning_rate": 9.279522024367386e-06,
|
| 9402 |
+
"loss": 0.1088,
|
| 9403 |
+
"step": 12300
|
| 9404 |
+
},
|
| 9405 |
+
{
|
| 9406 |
+
"epoch": 5.768509840674789,
|
| 9407 |
+
"grad_norm": 1.9505615234375,
|
| 9408 |
+
"learning_rate": 9.278936269915653e-06,
|
| 9409 |
+
"loss": 0.1104,
|
| 9410 |
+
"step": 12310
|
| 9411 |
+
},
|
| 9412 |
+
{
|
| 9413 |
+
"epoch": 5.77319587628866,
|
| 9414 |
+
"grad_norm": 2.9411964416503906,
|
| 9415 |
+
"learning_rate": 9.278350515463918e-06,
|
| 9416 |
+
"loss": 0.1333,
|
| 9417 |
+
"step": 12320
|
| 9418 |
+
},
|
| 9419 |
+
{
|
| 9420 |
+
"epoch": 5.77788191190253,
|
| 9421 |
+
"grad_norm": 2.877175807952881,
|
| 9422 |
+
"learning_rate": 9.277764761012185e-06,
|
| 9423 |
+
"loss": 0.1038,
|
| 9424 |
+
"step": 12330
|
| 9425 |
+
},
|
| 9426 |
+
{
|
| 9427 |
+
"epoch": 5.782567947516402,
|
| 9428 |
+
"grad_norm": 2.866086006164551,
|
| 9429 |
+
"learning_rate": 9.27717900656045e-06,
|
| 9430 |
+
"loss": 0.1119,
|
| 9431 |
+
"step": 12340
|
| 9432 |
+
},
|
| 9433 |
+
{
|
| 9434 |
+
"epoch": 5.787253983130272,
|
| 9435 |
+
"grad_norm": 2.0350656509399414,
|
| 9436 |
+
"learning_rate": 9.276593252108716e-06,
|
| 9437 |
+
"loss": 0.1218,
|
| 9438 |
+
"step": 12350
|
| 9439 |
+
},
|
| 9440 |
+
{
|
| 9441 |
+
"epoch": 5.7919400187441425,
|
| 9442 |
+
"grad_norm": 1.9179691076278687,
|
| 9443 |
+
"learning_rate": 9.276007497656983e-06,
|
| 9444 |
+
"loss": 0.117,
|
| 9445 |
+
"step": 12360
|
| 9446 |
+
},
|
| 9447 |
+
{
|
| 9448 |
+
"epoch": 5.796626054358013,
|
| 9449 |
+
"grad_norm": 1.894805669784546,
|
| 9450 |
+
"learning_rate": 9.27542174320525e-06,
|
| 9451 |
+
"loss": 0.1148,
|
| 9452 |
+
"step": 12370
|
| 9453 |
+
},
|
| 9454 |
+
{
|
| 9455 |
+
"epoch": 5.8013120899718835,
|
| 9456 |
+
"grad_norm": 1.7460695505142212,
|
| 9457 |
+
"learning_rate": 9.274835988753515e-06,
|
| 9458 |
+
"loss": 0.1347,
|
| 9459 |
+
"step": 12380
|
| 9460 |
+
},
|
| 9461 |
+
{
|
| 9462 |
+
"epoch": 5.805998125585754,
|
| 9463 |
+
"grad_norm": 2.7748680114746094,
|
| 9464 |
+
"learning_rate": 9.274250234301782e-06,
|
| 9465 |
+
"loss": 0.1077,
|
| 9466 |
+
"step": 12390
|
| 9467 |
+
},
|
| 9468 |
+
{
|
| 9469 |
+
"epoch": 5.810684161199625,
|
| 9470 |
+
"grad_norm": 2.6616406440734863,
|
| 9471 |
+
"learning_rate": 9.273664479850047e-06,
|
| 9472 |
+
"loss": 0.111,
|
| 9473 |
+
"step": 12400
|
| 9474 |
+
},
|
| 9475 |
+
{
|
| 9476 |
+
"epoch": 5.815370196813496,
|
| 9477 |
+
"grad_norm": 2.389298439025879,
|
| 9478 |
+
"learning_rate": 9.273078725398314e-06,
|
| 9479 |
+
"loss": 0.1061,
|
| 9480 |
+
"step": 12410
|
| 9481 |
+
},
|
| 9482 |
+
{
|
| 9483 |
+
"epoch": 5.820056232427366,
|
| 9484 |
+
"grad_norm": 1.6245344877243042,
|
| 9485 |
+
"learning_rate": 9.272492970946579e-06,
|
| 9486 |
+
"loss": 0.1196,
|
| 9487 |
+
"step": 12420
|
| 9488 |
+
},
|
| 9489 |
+
{
|
| 9490 |
+
"epoch": 5.824742268041237,
|
| 9491 |
+
"grad_norm": 2.8195879459381104,
|
| 9492 |
+
"learning_rate": 9.271907216494846e-06,
|
| 9493 |
+
"loss": 0.1265,
|
| 9494 |
+
"step": 12430
|
| 9495 |
+
},
|
| 9496 |
+
{
|
| 9497 |
+
"epoch": 5.829428303655108,
|
| 9498 |
+
"grad_norm": 2.538292169570923,
|
| 9499 |
+
"learning_rate": 9.271321462043113e-06,
|
| 9500 |
+
"loss": 0.1038,
|
| 9501 |
+
"step": 12440
|
| 9502 |
+
},
|
| 9503 |
+
{
|
| 9504 |
+
"epoch": 5.834114339268979,
|
| 9505 |
+
"grad_norm": 1.4378900527954102,
|
| 9506 |
+
"learning_rate": 9.270735707591378e-06,
|
| 9507 |
+
"loss": 0.1097,
|
| 9508 |
+
"step": 12450
|
| 9509 |
+
},
|
| 9510 |
+
{
|
| 9511 |
+
"epoch": 5.838800374882849,
|
| 9512 |
+
"grad_norm": 2.120596170425415,
|
| 9513 |
+
"learning_rate": 9.270149953139645e-06,
|
| 9514 |
+
"loss": 0.1054,
|
| 9515 |
+
"step": 12460
|
| 9516 |
+
},
|
| 9517 |
+
{
|
| 9518 |
+
"epoch": 5.84348641049672,
|
| 9519 |
+
"grad_norm": 1.7521088123321533,
|
| 9520 |
+
"learning_rate": 9.26956419868791e-06,
|
| 9521 |
+
"loss": 0.0985,
|
| 9522 |
+
"step": 12470
|
| 9523 |
+
},
|
| 9524 |
+
{
|
| 9525 |
+
"epoch": 5.84817244611059,
|
| 9526 |
+
"grad_norm": 2.082510471343994,
|
| 9527 |
+
"learning_rate": 9.268978444236177e-06,
|
| 9528 |
+
"loss": 0.1142,
|
| 9529 |
+
"step": 12480
|
| 9530 |
+
},
|
| 9531 |
+
{
|
| 9532 |
+
"epoch": 5.852858481724461,
|
| 9533 |
+
"grad_norm": 2.3451695442199707,
|
| 9534 |
+
"learning_rate": 9.268392689784444e-06,
|
| 9535 |
+
"loss": 0.135,
|
| 9536 |
+
"step": 12490
|
| 9537 |
+
},
|
| 9538 |
+
{
|
| 9539 |
+
"epoch": 5.857544517338332,
|
| 9540 |
+
"grad_norm": 1.9797242879867554,
|
| 9541 |
+
"learning_rate": 9.26780693533271e-06,
|
| 9542 |
+
"loss": 0.0888,
|
| 9543 |
+
"step": 12500
|
| 9544 |
+
},
|
| 9545 |
+
{
|
| 9546 |
+
"epoch": 5.857544517338332,
|
| 9547 |
+
"eval_loss": 0.04142308607697487,
|
| 9548 |
+
"eval_pearson_cosine": 0.7805016780478695,
|
| 9549 |
+
"eval_pearson_dot": 0.6245128907955291,
|
| 9550 |
+
"eval_pearson_euclidean": 0.7411648320805888,
|
| 9551 |
+
"eval_pearson_manhattan": 0.7407809523735267,
|
| 9552 |
+
"eval_runtime": 39.8943,
|
| 9553 |
+
"eval_samples_per_second": 37.599,
|
| 9554 |
+
"eval_spearman_cosine": 0.7841450480888137,
|
| 9555 |
+
"eval_spearman_dot": 0.636499292941551,
|
| 9556 |
+
"eval_spearman_euclidean": 0.7567573577855005,
|
| 9557 |
+
"eval_spearman_manhattan": 0.7567068203829979,
|
| 9558 |
+
"eval_steps_per_second": 37.599,
|
| 9559 |
+
"step": 12500
|
| 9560 |
+
},
|
| 9561 |
+
{
|
| 9562 |
+
"epoch": 5.8622305529522025,
|
| 9563 |
+
"grad_norm": 2.519564628601074,
|
| 9564 |
+
"learning_rate": 9.267221180880975e-06,
|
| 9565 |
+
"loss": 0.1118,
|
| 9566 |
+
"step": 12510
|
| 9567 |
+
},
|
| 9568 |
+
{
|
| 9569 |
+
"epoch": 5.866916588566073,
|
| 9570 |
+
"grad_norm": 2.348604679107666,
|
| 9571 |
+
"learning_rate": 9.266635426429241e-06,
|
| 9572 |
+
"loss": 0.1165,
|
| 9573 |
+
"step": 12520
|
| 9574 |
+
},
|
| 9575 |
+
{
|
| 9576 |
+
"epoch": 5.8716026241799435,
|
| 9577 |
+
"grad_norm": 1.9285309314727783,
|
| 9578 |
+
"learning_rate": 9.266049671977507e-06,
|
| 9579 |
+
"loss": 0.1168,
|
| 9580 |
+
"step": 12530
|
| 9581 |
+
},
|
| 9582 |
+
{
|
| 9583 |
+
"epoch": 5.876288659793815,
|
| 9584 |
+
"grad_norm": 2.3968348503112793,
|
| 9585 |
+
"learning_rate": 9.265463917525774e-06,
|
| 9586 |
+
"loss": 0.1226,
|
| 9587 |
+
"step": 12540
|
| 9588 |
+
},
|
| 9589 |
+
{
|
| 9590 |
+
"epoch": 5.880974695407685,
|
| 9591 |
+
"grad_norm": 1.3296688795089722,
|
| 9592 |
+
"learning_rate": 9.26487816307404e-06,
|
| 9593 |
+
"loss": 0.0979,
|
| 9594 |
+
"step": 12550
|
| 9595 |
+
},
|
| 9596 |
+
{
|
| 9597 |
+
"epoch": 5.885660731021556,
|
| 9598 |
+
"grad_norm": 2.3655405044555664,
|
| 9599 |
+
"learning_rate": 9.264292408622306e-06,
|
| 9600 |
+
"loss": 0.1163,
|
| 9601 |
+
"step": 12560
|
| 9602 |
+
},
|
| 9603 |
+
{
|
| 9604 |
+
"epoch": 5.890346766635426,
|
| 9605 |
+
"grad_norm": 1.9741175174713135,
|
| 9606 |
+
"learning_rate": 9.263706654170573e-06,
|
| 9607 |
+
"loss": 0.1193,
|
| 9608 |
+
"step": 12570
|
| 9609 |
+
},
|
| 9610 |
+
{
|
| 9611 |
+
"epoch": 5.895032802249297,
|
| 9612 |
+
"grad_norm": 2.2787790298461914,
|
| 9613 |
+
"learning_rate": 9.263120899718838e-06,
|
| 9614 |
+
"loss": 0.1053,
|
| 9615 |
+
"step": 12580
|
| 9616 |
+
},
|
| 9617 |
+
{
|
| 9618 |
+
"epoch": 5.899718837863167,
|
| 9619 |
+
"grad_norm": 2.3028697967529297,
|
| 9620 |
+
"learning_rate": 9.262535145267105e-06,
|
| 9621 |
+
"loss": 0.105,
|
| 9622 |
+
"step": 12590
|
| 9623 |
+
},
|
| 9624 |
+
{
|
| 9625 |
+
"epoch": 5.904404873477039,
|
| 9626 |
+
"grad_norm": 2.420567274093628,
|
| 9627 |
+
"learning_rate": 9.261949390815372e-06,
|
| 9628 |
+
"loss": 0.1153,
|
| 9629 |
+
"step": 12600
|
| 9630 |
+
},
|
| 9631 |
+
{
|
| 9632 |
+
"epoch": 5.909090909090909,
|
| 9633 |
+
"grad_norm": 1.8667070865631104,
|
| 9634 |
+
"learning_rate": 9.261363636363637e-06,
|
| 9635 |
+
"loss": 0.1206,
|
| 9636 |
+
"step": 12610
|
| 9637 |
+
},
|
| 9638 |
+
{
|
| 9639 |
+
"epoch": 5.91377694470478,
|
| 9640 |
+
"grad_norm": 2.433323621749878,
|
| 9641 |
+
"learning_rate": 9.260777881911904e-06,
|
| 9642 |
+
"loss": 0.1107,
|
| 9643 |
+
"step": 12620
|
| 9644 |
+
},
|
| 9645 |
+
{
|
| 9646 |
+
"epoch": 5.91846298031865,
|
| 9647 |
+
"grad_norm": 1.6899259090423584,
|
| 9648 |
+
"learning_rate": 9.260192127460169e-06,
|
| 9649 |
+
"loss": 0.1006,
|
| 9650 |
+
"step": 12630
|
| 9651 |
+
},
|
| 9652 |
+
{
|
| 9653 |
+
"epoch": 5.9231490159325215,
|
| 9654 |
+
"grad_norm": 3.0744214057922363,
|
| 9655 |
+
"learning_rate": 9.259606373008434e-06,
|
| 9656 |
+
"loss": 0.1165,
|
| 9657 |
+
"step": 12640
|
| 9658 |
+
},
|
| 9659 |
+
{
|
| 9660 |
+
"epoch": 5.927835051546392,
|
| 9661 |
+
"grad_norm": 1.6527074575424194,
|
| 9662 |
+
"learning_rate": 9.259020618556703e-06,
|
| 9663 |
+
"loss": 0.1134,
|
| 9664 |
+
"step": 12650
|
| 9665 |
+
},
|
| 9666 |
+
{
|
| 9667 |
+
"epoch": 5.9325210871602625,
|
| 9668 |
+
"grad_norm": 2.3836679458618164,
|
| 9669 |
+
"learning_rate": 9.258434864104968e-06,
|
| 9670 |
+
"loss": 0.1195,
|
| 9671 |
+
"step": 12660
|
| 9672 |
+
},
|
| 9673 |
+
{
|
| 9674 |
+
"epoch": 5.937207122774133,
|
| 9675 |
+
"grad_norm": 1.6903315782546997,
|
| 9676 |
+
"learning_rate": 9.257849109653233e-06,
|
| 9677 |
+
"loss": 0.125,
|
| 9678 |
+
"step": 12670
|
| 9679 |
+
},
|
| 9680 |
+
{
|
| 9681 |
+
"epoch": 5.9418931583880035,
|
| 9682 |
+
"grad_norm": 2.0928590297698975,
|
| 9683 |
+
"learning_rate": 9.2572633552015e-06,
|
| 9684 |
+
"loss": 0.114,
|
| 9685 |
+
"step": 12680
|
| 9686 |
+
},
|
| 9687 |
+
{
|
| 9688 |
+
"epoch": 5.946579194001874,
|
| 9689 |
+
"grad_norm": 1.6326929330825806,
|
| 9690 |
+
"learning_rate": 9.256677600749765e-06,
|
| 9691 |
+
"loss": 0.1056,
|
| 9692 |
+
"step": 12690
|
| 9693 |
+
},
|
| 9694 |
+
{
|
| 9695 |
+
"epoch": 5.951265229615745,
|
| 9696 |
+
"grad_norm": 2.0911965370178223,
|
| 9697 |
+
"learning_rate": 9.256091846298032e-06,
|
| 9698 |
+
"loss": 0.128,
|
| 9699 |
+
"step": 12700
|
| 9700 |
+
},
|
| 9701 |
+
{
|
| 9702 |
+
"epoch": 5.955951265229616,
|
| 9703 |
+
"grad_norm": 1.6815580129623413,
|
| 9704 |
+
"learning_rate": 9.2555060918463e-06,
|
| 9705 |
+
"loss": 0.1211,
|
| 9706 |
+
"step": 12710
|
| 9707 |
+
},
|
| 9708 |
+
{
|
| 9709 |
+
"epoch": 5.960637300843486,
|
| 9710 |
+
"grad_norm": 2.4735517501831055,
|
| 9711 |
+
"learning_rate": 9.254920337394565e-06,
|
| 9712 |
+
"loss": 0.1246,
|
| 9713 |
+
"step": 12720
|
| 9714 |
+
},
|
| 9715 |
+
{
|
| 9716 |
+
"epoch": 5.965323336457357,
|
| 9717 |
+
"grad_norm": 1.822643756866455,
|
| 9718 |
+
"learning_rate": 9.254334582942831e-06,
|
| 9719 |
+
"loss": 0.1119,
|
| 9720 |
+
"step": 12730
|
| 9721 |
+
},
|
| 9722 |
+
{
|
| 9723 |
+
"epoch": 5.970009372071228,
|
| 9724 |
+
"grad_norm": 2.694791793823242,
|
| 9725 |
+
"learning_rate": 9.253748828491097e-06,
|
| 9726 |
+
"loss": 0.1186,
|
| 9727 |
+
"step": 12740
|
| 9728 |
+
},
|
| 9729 |
+
{
|
| 9730 |
+
"epoch": 5.974695407685099,
|
| 9731 |
+
"grad_norm": 1.8677020072937012,
|
| 9732 |
+
"learning_rate": 9.253163074039364e-06,
|
| 9733 |
+
"loss": 0.1202,
|
| 9734 |
+
"step": 12750
|
| 9735 |
+
},
|
| 9736 |
+
{
|
| 9737 |
+
"epoch": 5.974695407685099,
|
| 9738 |
+
"eval_loss": 0.04308323189616203,
|
| 9739 |
+
"eval_pearson_cosine": 0.7792983938024989,
|
| 9740 |
+
"eval_pearson_dot": 0.6261386080869897,
|
| 9741 |
+
"eval_pearson_euclidean": 0.7413977396293134,
|
| 9742 |
+
"eval_pearson_manhattan": 0.7411537960595762,
|
| 9743 |
+
"eval_runtime": 41.5128,
|
| 9744 |
+
"eval_samples_per_second": 36.133,
|
| 9745 |
+
"eval_spearman_cosine": 0.7834591025676726,
|
| 9746 |
+
"eval_spearman_dot": 0.6404906337885011,
|
| 9747 |
+
"eval_spearman_euclidean": 0.7574889490533175,
|
| 9748 |
+
"eval_spearman_manhattan": 0.7571743616408941,
|
| 9749 |
+
"eval_steps_per_second": 36.133,
|
| 9750 |
+
"step": 12750
|
| 9751 |
+
},
|
| 9752 |
+
{
|
| 9753 |
+
"epoch": 5.979381443298969,
|
| 9754 |
+
"grad_norm": 2.534433126449585,
|
| 9755 |
+
"learning_rate": 9.25257731958763e-06,
|
| 9756 |
+
"loss": 0.1275,
|
| 9757 |
+
"step": 12760
|
| 9758 |
+
},
|
| 9759 |
+
{
|
| 9760 |
+
"epoch": 5.98406747891284,
|
| 9761 |
+
"grad_norm": 1.7585105895996094,
|
| 9762 |
+
"learning_rate": 9.251991565135896e-06,
|
| 9763 |
+
"loss": 0.1129,
|
| 9764 |
+
"step": 12770
|
| 9765 |
+
},
|
| 9766 |
+
{
|
| 9767 |
+
"epoch": 5.98875351452671,
|
| 9768 |
+
"grad_norm": 2.6499111652374268,
|
| 9769 |
+
"learning_rate": 9.251405810684163e-06,
|
| 9770 |
+
"loss": 0.1117,
|
| 9771 |
+
"step": 12780
|
| 9772 |
+
},
|
| 9773 |
+
{
|
| 9774 |
+
"epoch": 5.993439550140581,
|
| 9775 |
+
"grad_norm": 2.0610055923461914,
|
| 9776 |
+
"learning_rate": 9.250820056232428e-06,
|
| 9777 |
+
"loss": 0.1137,
|
| 9778 |
+
"step": 12790
|
| 9779 |
+
},
|
| 9780 |
+
{
|
| 9781 |
+
"epoch": 5.998125585754452,
|
| 9782 |
+
"grad_norm": 2.293468952178955,
|
| 9783 |
+
"learning_rate": 9.250234301780693e-06,
|
| 9784 |
+
"loss": 0.1178,
|
| 9785 |
+
"step": 12800
|
| 9786 |
+
},
|
| 9787 |
+
{
|
| 9788 |
+
"epoch": 6.0028116213683225,
|
| 9789 |
+
"grad_norm": 1.97608482837677,
|
| 9790 |
+
"learning_rate": 9.249648547328962e-06,
|
| 9791 |
+
"loss": 0.1105,
|
| 9792 |
+
"step": 12810
|
| 9793 |
+
},
|
| 9794 |
+
{
|
| 9795 |
+
"epoch": 6.007497656982193,
|
| 9796 |
+
"grad_norm": 1.9157034158706665,
|
| 9797 |
+
"learning_rate": 9.249062792877227e-06,
|
| 9798 |
+
"loss": 0.0914,
|
| 9799 |
+
"step": 12820
|
| 9800 |
+
},
|
| 9801 |
+
{
|
| 9802 |
+
"epoch": 6.0121836925960634,
|
| 9803 |
+
"grad_norm": 1.4950352907180786,
|
| 9804 |
+
"learning_rate": 9.248477038425492e-06,
|
| 9805 |
+
"loss": 0.0983,
|
| 9806 |
+
"step": 12830
|
| 9807 |
+
},
|
| 9808 |
+
{
|
| 9809 |
+
"epoch": 6.016869728209935,
|
| 9810 |
+
"grad_norm": 1.4796631336212158,
|
| 9811 |
+
"learning_rate": 9.247891283973759e-06,
|
| 9812 |
+
"loss": 0.0799,
|
| 9813 |
+
"step": 12840
|
| 9814 |
+
},
|
| 9815 |
+
{
|
| 9816 |
+
"epoch": 6.021555763823805,
|
| 9817 |
+
"grad_norm": 1.68351149559021,
|
| 9818 |
+
"learning_rate": 9.247305529522024e-06,
|
| 9819 |
+
"loss": 0.079,
|
| 9820 |
+
"step": 12850
|
| 9821 |
+
},
|
| 9822 |
+
{
|
| 9823 |
+
"epoch": 6.026241799437676,
|
| 9824 |
+
"grad_norm": 2.24094295501709,
|
| 9825 |
+
"learning_rate": 9.246719775070291e-06,
|
| 9826 |
+
"loss": 0.0908,
|
| 9827 |
+
"step": 12860
|
| 9828 |
+
},
|
| 9829 |
+
{
|
| 9830 |
+
"epoch": 6.030927835051546,
|
| 9831 |
+
"grad_norm": 2.414583683013916,
|
| 9832 |
+
"learning_rate": 9.246134020618558e-06,
|
| 9833 |
+
"loss": 0.0908,
|
| 9834 |
+
"step": 12870
|
| 9835 |
+
},
|
| 9836 |
+
{
|
| 9837 |
+
"epoch": 6.035613870665417,
|
| 9838 |
+
"grad_norm": 2.87400221824646,
|
| 9839 |
+
"learning_rate": 9.245548266166823e-06,
|
| 9840 |
+
"loss": 0.085,
|
| 9841 |
+
"step": 12880
|
| 9842 |
+
},
|
| 9843 |
+
{
|
| 9844 |
+
"epoch": 6.040299906279288,
|
| 9845 |
+
"grad_norm": 1.8591458797454834,
|
| 9846 |
+
"learning_rate": 9.24496251171509e-06,
|
| 9847 |
+
"loss": 0.0825,
|
| 9848 |
+
"step": 12890
|
| 9849 |
+
},
|
| 9850 |
+
{
|
| 9851 |
+
"epoch": 6.044985941893159,
|
| 9852 |
+
"grad_norm": 2.2384636402130127,
|
| 9853 |
+
"learning_rate": 9.244376757263355e-06,
|
| 9854 |
+
"loss": 0.0826,
|
| 9855 |
+
"step": 12900
|
| 9856 |
+
},
|
| 9857 |
+
{
|
| 9858 |
+
"epoch": 6.049671977507029,
|
| 9859 |
+
"grad_norm": 1.670571208000183,
|
| 9860 |
+
"learning_rate": 9.243791002811622e-06,
|
| 9861 |
+
"loss": 0.0746,
|
| 9862 |
+
"step": 12910
|
| 9863 |
+
},
|
| 9864 |
+
{
|
| 9865 |
+
"epoch": 6.0543580131209,
|
| 9866 |
+
"grad_norm": 1.607620358467102,
|
| 9867 |
+
"learning_rate": 9.243205248359888e-06,
|
| 9868 |
+
"loss": 0.106,
|
| 9869 |
+
"step": 12920
|
| 9870 |
+
},
|
| 9871 |
+
{
|
| 9872 |
+
"epoch": 6.05904404873477,
|
| 9873 |
+
"grad_norm": 1.543734073638916,
|
| 9874 |
+
"learning_rate": 9.242619493908155e-06,
|
| 9875 |
+
"loss": 0.0788,
|
| 9876 |
+
"step": 12930
|
| 9877 |
+
},
|
| 9878 |
+
{
|
| 9879 |
+
"epoch": 6.0637300843486415,
|
| 9880 |
+
"grad_norm": 2.0840065479278564,
|
| 9881 |
+
"learning_rate": 9.242033739456421e-06,
|
| 9882 |
+
"loss": 0.1013,
|
| 9883 |
+
"step": 12940
|
| 9884 |
+
},
|
| 9885 |
+
{
|
| 9886 |
+
"epoch": 6.068416119962512,
|
| 9887 |
+
"grad_norm": 1.8061577081680298,
|
| 9888 |
+
"learning_rate": 9.241447985004687e-06,
|
| 9889 |
+
"loss": 0.0895,
|
| 9890 |
+
"step": 12950
|
| 9891 |
+
},
|
| 9892 |
+
{
|
| 9893 |
+
"epoch": 6.073102155576382,
|
| 9894 |
+
"grad_norm": 1.341036081314087,
|
| 9895 |
+
"learning_rate": 9.240862230552952e-06,
|
| 9896 |
+
"loss": 0.0714,
|
| 9897 |
+
"step": 12960
|
| 9898 |
+
},
|
| 9899 |
+
{
|
| 9900 |
+
"epoch": 6.077788191190253,
|
| 9901 |
+
"grad_norm": 2.1150712966918945,
|
| 9902 |
+
"learning_rate": 9.240276476101219e-06,
|
| 9903 |
+
"loss": 0.0899,
|
| 9904 |
+
"step": 12970
|
| 9905 |
+
},
|
| 9906 |
+
{
|
| 9907 |
+
"epoch": 6.082474226804123,
|
| 9908 |
+
"grad_norm": 2.214730739593506,
|
| 9909 |
+
"learning_rate": 9.239690721649486e-06,
|
| 9910 |
+
"loss": 0.0758,
|
| 9911 |
+
"step": 12980
|
| 9912 |
+
},
|
| 9913 |
+
{
|
| 9914 |
+
"epoch": 6.087160262417995,
|
| 9915 |
+
"grad_norm": 1.489686369895935,
|
| 9916 |
+
"learning_rate": 9.239104967197751e-06,
|
| 9917 |
+
"loss": 0.0784,
|
| 9918 |
+
"step": 12990
|
| 9919 |
+
},
|
| 9920 |
+
{
|
| 9921 |
+
"epoch": 6.091846298031865,
|
| 9922 |
+
"grad_norm": 1.2778211832046509,
|
| 9923 |
+
"learning_rate": 9.238519212746018e-06,
|
| 9924 |
+
"loss": 0.0941,
|
| 9925 |
+
"step": 13000
|
| 9926 |
+
},
|
| 9927 |
+
{
|
| 9928 |
+
"epoch": 6.091846298031865,
|
| 9929 |
+
"eval_loss": 0.0399174839258194,
|
| 9930 |
+
"eval_pearson_cosine": 0.7838266464106027,
|
| 9931 |
+
"eval_pearson_dot": 0.6493223534201924,
|
| 9932 |
+
"eval_pearson_euclidean": 0.739064666910151,
|
| 9933 |
+
"eval_pearson_manhattan": 0.7387769365054666,
|
| 9934 |
+
"eval_runtime": 40.0598,
|
| 9935 |
+
"eval_samples_per_second": 37.444,
|
| 9936 |
+
"eval_spearman_cosine": 0.7872885894711749,
|
| 9937 |
+
"eval_spearman_dot": 0.6641643317048077,
|
| 9938 |
+
"eval_spearman_euclidean": 0.7529671041992676,
|
| 9939 |
+
"eval_spearman_manhattan": 0.752705655614685,
|
| 9940 |
+
"eval_steps_per_second": 37.444,
|
| 9941 |
+
"step": 13000
|
| 9942 |
}
|
| 9943 |
],
|
| 9944 |
"logging_steps": 10,
|