Training in progress, step 21000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2345f93cb689f8d7f41eab40d0cef18241e972878e7fb6948d71f1371719ca8
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88aacbb6072c3cfcd0a072fac3b759771484a894347ed77a4b36afa5c1d0bc3b
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28d77b9708e785984189dd87311c593d951d08be1862b45c82e09f23e0a264bc
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a12c30886420598486baa82bdd0616396462f1a93af3275146e2f56424c6d27
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -15287,6 +15287,770 @@
|
|
| 15287 |
"eval_spearman_manhattan": 0.742345267890976,
|
| 15288 |
"eval_steps_per_second": 37.771,
|
| 15289 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15290 |
}
|
| 15291 |
],
|
| 15292 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.840674789128398,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 21000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 15287 |
"eval_spearman_manhattan": 0.742345267890976,
|
| 15288 |
"eval_steps_per_second": 37.771,
|
| 15289 |
"step": 20000
|
| 15290 |
+
},
|
| 15291 |
+
{
|
| 15292 |
+
"epoch": 9.376757263355202,
|
| 15293 |
+
"grad_norm": 1.831284999847412,
|
| 15294 |
+
"learning_rate": 8.827905342080601e-06,
|
| 15295 |
+
"loss": 0.0489,
|
| 15296 |
+
"step": 20010
|
| 15297 |
+
},
|
| 15298 |
+
{
|
| 15299 |
+
"epoch": 9.381443298969073,
|
| 15300 |
+
"grad_norm": 1.498917818069458,
|
| 15301 |
+
"learning_rate": 8.827319587628866e-06,
|
| 15302 |
+
"loss": 0.0497,
|
| 15303 |
+
"step": 20020
|
| 15304 |
+
},
|
| 15305 |
+
{
|
| 15306 |
+
"epoch": 9.386129334582943,
|
| 15307 |
+
"grad_norm": 1.7997996807098389,
|
| 15308 |
+
"learning_rate": 8.826733833177133e-06,
|
| 15309 |
+
"loss": 0.0543,
|
| 15310 |
+
"step": 20030
|
| 15311 |
+
},
|
| 15312 |
+
{
|
| 15313 |
+
"epoch": 9.390815370196814,
|
| 15314 |
+
"grad_norm": 1.4676984548568726,
|
| 15315 |
+
"learning_rate": 8.8261480787254e-06,
|
| 15316 |
+
"loss": 0.0402,
|
| 15317 |
+
"step": 20040
|
| 15318 |
+
},
|
| 15319 |
+
{
|
| 15320 |
+
"epoch": 9.395501405810684,
|
| 15321 |
+
"grad_norm": 1.4647475481033325,
|
| 15322 |
+
"learning_rate": 8.825562324273665e-06,
|
| 15323 |
+
"loss": 0.0483,
|
| 15324 |
+
"step": 20050
|
| 15325 |
+
},
|
| 15326 |
+
{
|
| 15327 |
+
"epoch": 9.400187441424555,
|
| 15328 |
+
"grad_norm": 1.9055359363555908,
|
| 15329 |
+
"learning_rate": 8.824976569821932e-06,
|
| 15330 |
+
"loss": 0.057,
|
| 15331 |
+
"step": 20060
|
| 15332 |
+
},
|
| 15333 |
+
{
|
| 15334 |
+
"epoch": 9.404873477038425,
|
| 15335 |
+
"grad_norm": 1.243730068206787,
|
| 15336 |
+
"learning_rate": 8.824390815370197e-06,
|
| 15337 |
+
"loss": 0.0521,
|
| 15338 |
+
"step": 20070
|
| 15339 |
+
},
|
| 15340 |
+
{
|
| 15341 |
+
"epoch": 9.409559512652296,
|
| 15342 |
+
"grad_norm": 2.290194272994995,
|
| 15343 |
+
"learning_rate": 8.823805060918463e-06,
|
| 15344 |
+
"loss": 0.0536,
|
| 15345 |
+
"step": 20080
|
| 15346 |
+
},
|
| 15347 |
+
{
|
| 15348 |
+
"epoch": 9.414245548266166,
|
| 15349 |
+
"grad_norm": 1.28463613986969,
|
| 15350 |
+
"learning_rate": 8.823219306466731e-06,
|
| 15351 |
+
"loss": 0.0627,
|
| 15352 |
+
"step": 20090
|
| 15353 |
+
},
|
| 15354 |
+
{
|
| 15355 |
+
"epoch": 9.418931583880038,
|
| 15356 |
+
"grad_norm": 1.6804534196853638,
|
| 15357 |
+
"learning_rate": 8.822633552014996e-06,
|
| 15358 |
+
"loss": 0.0512,
|
| 15359 |
+
"step": 20100
|
| 15360 |
+
},
|
| 15361 |
+
{
|
| 15362 |
+
"epoch": 9.423617619493909,
|
| 15363 |
+
"grad_norm": 0.8809636831283569,
|
| 15364 |
+
"learning_rate": 8.822047797563262e-06,
|
| 15365 |
+
"loss": 0.0429,
|
| 15366 |
+
"step": 20110
|
| 15367 |
+
},
|
| 15368 |
+
{
|
| 15369 |
+
"epoch": 9.42830365510778,
|
| 15370 |
+
"grad_norm": 1.8962526321411133,
|
| 15371 |
+
"learning_rate": 8.821462043111529e-06,
|
| 15372 |
+
"loss": 0.0531,
|
| 15373 |
+
"step": 20120
|
| 15374 |
+
},
|
| 15375 |
+
{
|
| 15376 |
+
"epoch": 9.43298969072165,
|
| 15377 |
+
"grad_norm": 1.0176962614059448,
|
| 15378 |
+
"learning_rate": 8.820876288659794e-06,
|
| 15379 |
+
"loss": 0.0467,
|
| 15380 |
+
"step": 20130
|
| 15381 |
+
},
|
| 15382 |
+
{
|
| 15383 |
+
"epoch": 9.43767572633552,
|
| 15384 |
+
"grad_norm": 1.49270761013031,
|
| 15385 |
+
"learning_rate": 8.82029053420806e-06,
|
| 15386 |
+
"loss": 0.0578,
|
| 15387 |
+
"step": 20140
|
| 15388 |
+
},
|
| 15389 |
+
{
|
| 15390 |
+
"epoch": 9.44236176194939,
|
| 15391 |
+
"grad_norm": 1.4182747602462769,
|
| 15392 |
+
"learning_rate": 8.819704779756328e-06,
|
| 15393 |
+
"loss": 0.051,
|
| 15394 |
+
"step": 20150
|
| 15395 |
+
},
|
| 15396 |
+
{
|
| 15397 |
+
"epoch": 9.447047797563261,
|
| 15398 |
+
"grad_norm": 1.2575933933258057,
|
| 15399 |
+
"learning_rate": 8.819119025304593e-06,
|
| 15400 |
+
"loss": 0.0503,
|
| 15401 |
+
"step": 20160
|
| 15402 |
+
},
|
| 15403 |
+
{
|
| 15404 |
+
"epoch": 9.451733833177132,
|
| 15405 |
+
"grad_norm": 1.8485591411590576,
|
| 15406 |
+
"learning_rate": 8.81853327085286e-06,
|
| 15407 |
+
"loss": 0.0578,
|
| 15408 |
+
"step": 20170
|
| 15409 |
+
},
|
| 15410 |
+
{
|
| 15411 |
+
"epoch": 9.456419868791002,
|
| 15412 |
+
"grad_norm": 1.7406198978424072,
|
| 15413 |
+
"learning_rate": 8.817947516401125e-06,
|
| 15414 |
+
"loss": 0.0532,
|
| 15415 |
+
"step": 20180
|
| 15416 |
+
},
|
| 15417 |
+
{
|
| 15418 |
+
"epoch": 9.461105904404873,
|
| 15419 |
+
"grad_norm": 1.138297438621521,
|
| 15420 |
+
"learning_rate": 8.817361761949392e-06,
|
| 15421 |
+
"loss": 0.0484,
|
| 15422 |
+
"step": 20190
|
| 15423 |
+
},
|
| 15424 |
+
{
|
| 15425 |
+
"epoch": 9.465791940018745,
|
| 15426 |
+
"grad_norm": 1.2107694149017334,
|
| 15427 |
+
"learning_rate": 8.816776007497657e-06,
|
| 15428 |
+
"loss": 0.045,
|
| 15429 |
+
"step": 20200
|
| 15430 |
+
},
|
| 15431 |
+
{
|
| 15432 |
+
"epoch": 9.470477975632615,
|
| 15433 |
+
"grad_norm": 1.5909892320632935,
|
| 15434 |
+
"learning_rate": 8.816190253045924e-06,
|
| 15435 |
+
"loss": 0.0463,
|
| 15436 |
+
"step": 20210
|
| 15437 |
+
},
|
| 15438 |
+
{
|
| 15439 |
+
"epoch": 9.475164011246486,
|
| 15440 |
+
"grad_norm": 1.1377689838409424,
|
| 15441 |
+
"learning_rate": 8.815604498594191e-06,
|
| 15442 |
+
"loss": 0.0688,
|
| 15443 |
+
"step": 20220
|
| 15444 |
+
},
|
| 15445 |
+
{
|
| 15446 |
+
"epoch": 9.479850046860356,
|
| 15447 |
+
"grad_norm": 2.0724937915802,
|
| 15448 |
+
"learning_rate": 8.815018744142456e-06,
|
| 15449 |
+
"loss": 0.0547,
|
| 15450 |
+
"step": 20230
|
| 15451 |
+
},
|
| 15452 |
+
{
|
| 15453 |
+
"epoch": 9.484536082474227,
|
| 15454 |
+
"grad_norm": 0.9459996819496155,
|
| 15455 |
+
"learning_rate": 8.814432989690721e-06,
|
| 15456 |
+
"loss": 0.0482,
|
| 15457 |
+
"step": 20240
|
| 15458 |
+
},
|
| 15459 |
+
{
|
| 15460 |
+
"epoch": 9.489222118088097,
|
| 15461 |
+
"grad_norm": 0.7871867418289185,
|
| 15462 |
+
"learning_rate": 8.813847235238988e-06,
|
| 15463 |
+
"loss": 0.0429,
|
| 15464 |
+
"step": 20250
|
| 15465 |
+
},
|
| 15466 |
+
{
|
| 15467 |
+
"epoch": 9.489222118088097,
|
| 15468 |
+
"eval_loss": 0.03782571852207184,
|
| 15469 |
+
"eval_pearson_cosine": 0.786819398080425,
|
| 15470 |
+
"eval_pearson_dot": 0.6502888686958528,
|
| 15471 |
+
"eval_pearson_euclidean": 0.7291925678539002,
|
| 15472 |
+
"eval_pearson_manhattan": 0.7285750403533555,
|
| 15473 |
+
"eval_runtime": 41.4193,
|
| 15474 |
+
"eval_samples_per_second": 36.215,
|
| 15475 |
+
"eval_spearman_cosine": 0.7882680919473954,
|
| 15476 |
+
"eval_spearman_dot": 0.6683716175414093,
|
| 15477 |
+
"eval_spearman_euclidean": 0.7431012804543077,
|
| 15478 |
+
"eval_spearman_manhattan": 0.7425560629845656,
|
| 15479 |
+
"eval_steps_per_second": 36.215,
|
| 15480 |
+
"step": 20250
|
| 15481 |
+
},
|
| 15482 |
+
{
|
| 15483 |
+
"epoch": 9.493908153701968,
|
| 15484 |
+
"grad_norm": 1.718775749206543,
|
| 15485 |
+
"learning_rate": 8.813261480787255e-06,
|
| 15486 |
+
"loss": 0.0528,
|
| 15487 |
+
"step": 20260
|
| 15488 |
+
},
|
| 15489 |
+
{
|
| 15490 |
+
"epoch": 9.498594189315838,
|
| 15491 |
+
"grad_norm": 1.860888957977295,
|
| 15492 |
+
"learning_rate": 8.81267572633552e-06,
|
| 15493 |
+
"loss": 0.051,
|
| 15494 |
+
"step": 20270
|
| 15495 |
+
},
|
| 15496 |
+
{
|
| 15497 |
+
"epoch": 9.503280224929709,
|
| 15498 |
+
"grad_norm": 1.33186674118042,
|
| 15499 |
+
"learning_rate": 8.812089971883787e-06,
|
| 15500 |
+
"loss": 0.0558,
|
| 15501 |
+
"step": 20280
|
| 15502 |
+
},
|
| 15503 |
+
{
|
| 15504 |
+
"epoch": 9.50796626054358,
|
| 15505 |
+
"grad_norm": 1.3585968017578125,
|
| 15506 |
+
"learning_rate": 8.811504217432053e-06,
|
| 15507 |
+
"loss": 0.0418,
|
| 15508 |
+
"step": 20290
|
| 15509 |
+
},
|
| 15510 |
+
{
|
| 15511 |
+
"epoch": 9.512652296157452,
|
| 15512 |
+
"grad_norm": 1.2041314840316772,
|
| 15513 |
+
"learning_rate": 8.81091846298032e-06,
|
| 15514 |
+
"loss": 0.0661,
|
| 15515 |
+
"step": 20300
|
| 15516 |
+
},
|
| 15517 |
+
{
|
| 15518 |
+
"epoch": 9.517338331771322,
|
| 15519 |
+
"grad_norm": 1.2717355489730835,
|
| 15520 |
+
"learning_rate": 8.810332708528585e-06,
|
| 15521 |
+
"loss": 0.0511,
|
| 15522 |
+
"step": 20310
|
| 15523 |
+
},
|
| 15524 |
+
{
|
| 15525 |
+
"epoch": 9.522024367385193,
|
| 15526 |
+
"grad_norm": 0.9652617573738098,
|
| 15527 |
+
"learning_rate": 8.809746954076852e-06,
|
| 15528 |
+
"loss": 0.0514,
|
| 15529 |
+
"step": 20320
|
| 15530 |
+
},
|
| 15531 |
+
{
|
| 15532 |
+
"epoch": 9.526710402999063,
|
| 15533 |
+
"grad_norm": 1.9312084913253784,
|
| 15534 |
+
"learning_rate": 8.809161199625119e-06,
|
| 15535 |
+
"loss": 0.0607,
|
| 15536 |
+
"step": 20330
|
| 15537 |
+
},
|
| 15538 |
+
{
|
| 15539 |
+
"epoch": 9.531396438612934,
|
| 15540 |
+
"grad_norm": 1.669273018836975,
|
| 15541 |
+
"learning_rate": 8.808575445173384e-06,
|
| 15542 |
+
"loss": 0.0468,
|
| 15543 |
+
"step": 20340
|
| 15544 |
+
},
|
| 15545 |
+
{
|
| 15546 |
+
"epoch": 9.536082474226804,
|
| 15547 |
+
"grad_norm": 1.204368233680725,
|
| 15548 |
+
"learning_rate": 8.80798969072165e-06,
|
| 15549 |
+
"loss": 0.0409,
|
| 15550 |
+
"step": 20350
|
| 15551 |
+
},
|
| 15552 |
+
{
|
| 15553 |
+
"epoch": 9.540768509840674,
|
| 15554 |
+
"grad_norm": 1.2132142782211304,
|
| 15555 |
+
"learning_rate": 8.807403936269916e-06,
|
| 15556 |
+
"loss": 0.0448,
|
| 15557 |
+
"step": 20360
|
| 15558 |
+
},
|
| 15559 |
+
{
|
| 15560 |
+
"epoch": 9.545454545454545,
|
| 15561 |
+
"grad_norm": 0.8759263157844543,
|
| 15562 |
+
"learning_rate": 8.806818181818183e-06,
|
| 15563 |
+
"loss": 0.0486,
|
| 15564 |
+
"step": 20370
|
| 15565 |
+
},
|
| 15566 |
+
{
|
| 15567 |
+
"epoch": 9.550140581068415,
|
| 15568 |
+
"grad_norm": 0.980694591999054,
|
| 15569 |
+
"learning_rate": 8.80623242736645e-06,
|
| 15570 |
+
"loss": 0.04,
|
| 15571 |
+
"step": 20380
|
| 15572 |
+
},
|
| 15573 |
+
{
|
| 15574 |
+
"epoch": 9.554826616682288,
|
| 15575 |
+
"grad_norm": 1.7257814407348633,
|
| 15576 |
+
"learning_rate": 8.805646672914715e-06,
|
| 15577 |
+
"loss": 0.0551,
|
| 15578 |
+
"step": 20390
|
| 15579 |
+
},
|
| 15580 |
+
{
|
| 15581 |
+
"epoch": 9.559512652296158,
|
| 15582 |
+
"grad_norm": 0.9855765700340271,
|
| 15583 |
+
"learning_rate": 8.80506091846298e-06,
|
| 15584 |
+
"loss": 0.0442,
|
| 15585 |
+
"step": 20400
|
| 15586 |
+
},
|
| 15587 |
+
{
|
| 15588 |
+
"epoch": 9.564198687910029,
|
| 15589 |
+
"grad_norm": 2.2688076496124268,
|
| 15590 |
+
"learning_rate": 8.804475164011247e-06,
|
| 15591 |
+
"loss": 0.0474,
|
| 15592 |
+
"step": 20410
|
| 15593 |
+
},
|
| 15594 |
+
{
|
| 15595 |
+
"epoch": 9.5688847235239,
|
| 15596 |
+
"grad_norm": 0.8345751762390137,
|
| 15597 |
+
"learning_rate": 8.803889409559512e-06,
|
| 15598 |
+
"loss": 0.0507,
|
| 15599 |
+
"step": 20420
|
| 15600 |
+
},
|
| 15601 |
+
{
|
| 15602 |
+
"epoch": 9.57357075913777,
|
| 15603 |
+
"grad_norm": 1.0074180364608765,
|
| 15604 |
+
"learning_rate": 8.80330365510778e-06,
|
| 15605 |
+
"loss": 0.0487,
|
| 15606 |
+
"step": 20430
|
| 15607 |
+
},
|
| 15608 |
+
{
|
| 15609 |
+
"epoch": 9.57825679475164,
|
| 15610 |
+
"grad_norm": 1.1515982151031494,
|
| 15611 |
+
"learning_rate": 8.802717900656046e-06,
|
| 15612 |
+
"loss": 0.0443,
|
| 15613 |
+
"step": 20440
|
| 15614 |
+
},
|
| 15615 |
+
{
|
| 15616 |
+
"epoch": 9.58294283036551,
|
| 15617 |
+
"grad_norm": 0.5248059630393982,
|
| 15618 |
+
"learning_rate": 8.802132146204311e-06,
|
| 15619 |
+
"loss": 0.0561,
|
| 15620 |
+
"step": 20450
|
| 15621 |
+
},
|
| 15622 |
+
{
|
| 15623 |
+
"epoch": 9.587628865979381,
|
| 15624 |
+
"grad_norm": 1.2470523118972778,
|
| 15625 |
+
"learning_rate": 8.801546391752578e-06,
|
| 15626 |
+
"loss": 0.0469,
|
| 15627 |
+
"step": 20460
|
| 15628 |
+
},
|
| 15629 |
+
{
|
| 15630 |
+
"epoch": 9.592314901593252,
|
| 15631 |
+
"grad_norm": 2.120579957962036,
|
| 15632 |
+
"learning_rate": 8.800960637300844e-06,
|
| 15633 |
+
"loss": 0.0513,
|
| 15634 |
+
"step": 20470
|
| 15635 |
+
},
|
| 15636 |
+
{
|
| 15637 |
+
"epoch": 9.597000937207122,
|
| 15638 |
+
"grad_norm": 2.442443609237671,
|
| 15639 |
+
"learning_rate": 8.80037488284911e-06,
|
| 15640 |
+
"loss": 0.0635,
|
| 15641 |
+
"step": 20480
|
| 15642 |
+
},
|
| 15643 |
+
{
|
| 15644 |
+
"epoch": 9.601686972820993,
|
| 15645 |
+
"grad_norm": 2.420138120651245,
|
| 15646 |
+
"learning_rate": 8.799789128397377e-06,
|
| 15647 |
+
"loss": 0.0626,
|
| 15648 |
+
"step": 20490
|
| 15649 |
+
},
|
| 15650 |
+
{
|
| 15651 |
+
"epoch": 9.606373008434865,
|
| 15652 |
+
"grad_norm": 2.3432815074920654,
|
| 15653 |
+
"learning_rate": 8.799203373945643e-06,
|
| 15654 |
+
"loss": 0.0534,
|
| 15655 |
+
"step": 20500
|
| 15656 |
+
},
|
| 15657 |
+
{
|
| 15658 |
+
"epoch": 9.606373008434865,
|
| 15659 |
+
"eval_loss": 0.037995509803295135,
|
| 15660 |
+
"eval_pearson_cosine": 0.786149907730362,
|
| 15661 |
+
"eval_pearson_dot": 0.6445644977545584,
|
| 15662 |
+
"eval_pearson_euclidean": 0.7304901967314237,
|
| 15663 |
+
"eval_pearson_manhattan": 0.7299852754916856,
|
| 15664 |
+
"eval_runtime": 40.4167,
|
| 15665 |
+
"eval_samples_per_second": 37.113,
|
| 15666 |
+
"eval_spearman_cosine": 0.788096924565833,
|
| 15667 |
+
"eval_spearman_dot": 0.6634744984860802,
|
| 15668 |
+
"eval_spearman_euclidean": 0.7450878530420201,
|
| 15669 |
+
"eval_spearman_manhattan": 0.7443460197740337,
|
| 15670 |
+
"eval_steps_per_second": 37.113,
|
| 15671 |
+
"step": 20500
|
| 15672 |
+
},
|
| 15673 |
+
{
|
| 15674 |
+
"epoch": 9.611059044048735,
|
| 15675 |
+
"grad_norm": 2.5431413650512695,
|
| 15676 |
+
"learning_rate": 8.79861761949391e-06,
|
| 15677 |
+
"loss": 0.0499,
|
| 15678 |
+
"step": 20510
|
| 15679 |
+
},
|
| 15680 |
+
{
|
| 15681 |
+
"epoch": 9.615745079662606,
|
| 15682 |
+
"grad_norm": 1.4701391458511353,
|
| 15683 |
+
"learning_rate": 8.798031865042175e-06,
|
| 15684 |
+
"loss": 0.0528,
|
| 15685 |
+
"step": 20520
|
| 15686 |
+
},
|
| 15687 |
+
{
|
| 15688 |
+
"epoch": 9.620431115276476,
|
| 15689 |
+
"grad_norm": 1.0605581998825073,
|
| 15690 |
+
"learning_rate": 8.79744611059044e-06,
|
| 15691 |
+
"loss": 0.0513,
|
| 15692 |
+
"step": 20530
|
| 15693 |
+
},
|
| 15694 |
+
{
|
| 15695 |
+
"epoch": 9.625117150890347,
|
| 15696 |
+
"grad_norm": 1.7231255769729614,
|
| 15697 |
+
"learning_rate": 8.796860356138709e-06,
|
| 15698 |
+
"loss": 0.0432,
|
| 15699 |
+
"step": 20540
|
| 15700 |
+
},
|
| 15701 |
+
{
|
| 15702 |
+
"epoch": 9.629803186504217,
|
| 15703 |
+
"grad_norm": 2.4519450664520264,
|
| 15704 |
+
"learning_rate": 8.796274601686974e-06,
|
| 15705 |
+
"loss": 0.0555,
|
| 15706 |
+
"step": 20550
|
| 15707 |
+
},
|
| 15708 |
+
{
|
| 15709 |
+
"epoch": 9.634489222118088,
|
| 15710 |
+
"grad_norm": 1.7406028509140015,
|
| 15711 |
+
"learning_rate": 8.795688847235239e-06,
|
| 15712 |
+
"loss": 0.0547,
|
| 15713 |
+
"step": 20560
|
| 15714 |
+
},
|
| 15715 |
+
{
|
| 15716 |
+
"epoch": 9.639175257731958,
|
| 15717 |
+
"grad_norm": 1.357200026512146,
|
| 15718 |
+
"learning_rate": 8.795103092783506e-06,
|
| 15719 |
+
"loss": 0.0548,
|
| 15720 |
+
"step": 20570
|
| 15721 |
+
},
|
| 15722 |
+
{
|
| 15723 |
+
"epoch": 9.643861293345829,
|
| 15724 |
+
"grad_norm": 1.7510253190994263,
|
| 15725 |
+
"learning_rate": 8.794517338331771e-06,
|
| 15726 |
+
"loss": 0.0541,
|
| 15727 |
+
"step": 20580
|
| 15728 |
+
},
|
| 15729 |
+
{
|
| 15730 |
+
"epoch": 9.648547328959701,
|
| 15731 |
+
"grad_norm": 2.1982178688049316,
|
| 15732 |
+
"learning_rate": 8.793931583880038e-06,
|
| 15733 |
+
"loss": 0.0572,
|
| 15734 |
+
"step": 20590
|
| 15735 |
+
},
|
| 15736 |
+
{
|
| 15737 |
+
"epoch": 9.653233364573572,
|
| 15738 |
+
"grad_norm": 1.6132203340530396,
|
| 15739 |
+
"learning_rate": 8.793345829428305e-06,
|
| 15740 |
+
"loss": 0.0467,
|
| 15741 |
+
"step": 20600
|
| 15742 |
+
},
|
| 15743 |
+
{
|
| 15744 |
+
"epoch": 9.657919400187442,
|
| 15745 |
+
"grad_norm": 1.165385127067566,
|
| 15746 |
+
"learning_rate": 8.79276007497657e-06,
|
| 15747 |
+
"loss": 0.0463,
|
| 15748 |
+
"step": 20610
|
| 15749 |
+
},
|
| 15750 |
+
{
|
| 15751 |
+
"epoch": 9.662605435801312,
|
| 15752 |
+
"grad_norm": 2.306887149810791,
|
| 15753 |
+
"learning_rate": 8.792174320524837e-06,
|
| 15754 |
+
"loss": 0.0529,
|
| 15755 |
+
"step": 20620
|
| 15756 |
+
},
|
| 15757 |
+
{
|
| 15758 |
+
"epoch": 9.667291471415183,
|
| 15759 |
+
"grad_norm": 1.740670084953308,
|
| 15760 |
+
"learning_rate": 8.791588566073102e-06,
|
| 15761 |
+
"loss": 0.0497,
|
| 15762 |
+
"step": 20630
|
| 15763 |
+
},
|
| 15764 |
+
{
|
| 15765 |
+
"epoch": 9.671977507029053,
|
| 15766 |
+
"grad_norm": 1.0078073740005493,
|
| 15767 |
+
"learning_rate": 8.79100281162137e-06,
|
| 15768 |
+
"loss": 0.0495,
|
| 15769 |
+
"step": 20640
|
| 15770 |
+
},
|
| 15771 |
+
{
|
| 15772 |
+
"epoch": 9.676663542642924,
|
| 15773 |
+
"grad_norm": 1.454647421836853,
|
| 15774 |
+
"learning_rate": 8.790417057169636e-06,
|
| 15775 |
+
"loss": 0.0477,
|
| 15776 |
+
"step": 20650
|
| 15777 |
+
},
|
| 15778 |
+
{
|
| 15779 |
+
"epoch": 9.681349578256794,
|
| 15780 |
+
"grad_norm": 1.6520277261734009,
|
| 15781 |
+
"learning_rate": 8.789831302717901e-06,
|
| 15782 |
+
"loss": 0.0499,
|
| 15783 |
+
"step": 20660
|
| 15784 |
+
},
|
| 15785 |
+
{
|
| 15786 |
+
"epoch": 9.686035613870665,
|
| 15787 |
+
"grad_norm": 2.0566940307617188,
|
| 15788 |
+
"learning_rate": 8.789245548266168e-06,
|
| 15789 |
+
"loss": 0.0504,
|
| 15790 |
+
"step": 20670
|
| 15791 |
+
},
|
| 15792 |
+
{
|
| 15793 |
+
"epoch": 9.690721649484535,
|
| 15794 |
+
"grad_norm": 1.7212245464324951,
|
| 15795 |
+
"learning_rate": 8.788659793814434e-06,
|
| 15796 |
+
"loss": 0.0558,
|
| 15797 |
+
"step": 20680
|
| 15798 |
+
},
|
| 15799 |
+
{
|
| 15800 |
+
"epoch": 9.695407685098406,
|
| 15801 |
+
"grad_norm": 0.9179878234863281,
|
| 15802 |
+
"learning_rate": 8.788074039362699e-06,
|
| 15803 |
+
"loss": 0.055,
|
| 15804 |
+
"step": 20690
|
| 15805 |
+
},
|
| 15806 |
+
{
|
| 15807 |
+
"epoch": 9.700093720712278,
|
| 15808 |
+
"grad_norm": 1.1311330795288086,
|
| 15809 |
+
"learning_rate": 8.787488284910966e-06,
|
| 15810 |
+
"loss": 0.0555,
|
| 15811 |
+
"step": 20700
|
| 15812 |
+
},
|
| 15813 |
+
{
|
| 15814 |
+
"epoch": 9.704779756326149,
|
| 15815 |
+
"grad_norm": 1.4247910976409912,
|
| 15816 |
+
"learning_rate": 8.786902530459233e-06,
|
| 15817 |
+
"loss": 0.0522,
|
| 15818 |
+
"step": 20710
|
| 15819 |
+
},
|
| 15820 |
+
{
|
| 15821 |
+
"epoch": 9.70946579194002,
|
| 15822 |
+
"grad_norm": 2.309624195098877,
|
| 15823 |
+
"learning_rate": 8.786316776007498e-06,
|
| 15824 |
+
"loss": 0.0492,
|
| 15825 |
+
"step": 20720
|
| 15826 |
+
},
|
| 15827 |
+
{
|
| 15828 |
+
"epoch": 9.71415182755389,
|
| 15829 |
+
"grad_norm": 0.9960254430770874,
|
| 15830 |
+
"learning_rate": 8.785731021555765e-06,
|
| 15831 |
+
"loss": 0.0461,
|
| 15832 |
+
"step": 20730
|
| 15833 |
+
},
|
| 15834 |
+
{
|
| 15835 |
+
"epoch": 9.71883786316776,
|
| 15836 |
+
"grad_norm": 0.9048061966896057,
|
| 15837 |
+
"learning_rate": 8.78514526710403e-06,
|
| 15838 |
+
"loss": 0.0497,
|
| 15839 |
+
"step": 20740
|
| 15840 |
+
},
|
| 15841 |
+
{
|
| 15842 |
+
"epoch": 9.72352389878163,
|
| 15843 |
+
"grad_norm": 1.7553735971450806,
|
| 15844 |
+
"learning_rate": 8.784559512652297e-06,
|
| 15845 |
+
"loss": 0.0531,
|
| 15846 |
+
"step": 20750
|
| 15847 |
+
},
|
| 15848 |
+
{
|
| 15849 |
+
"epoch": 9.72352389878163,
|
| 15850 |
+
"eval_loss": 0.0375310480594635,
|
| 15851 |
+
"eval_pearson_cosine": 0.7885717010435052,
|
| 15852 |
+
"eval_pearson_dot": 0.6441669695807519,
|
| 15853 |
+
"eval_pearson_euclidean": 0.7356023128188269,
|
| 15854 |
+
"eval_pearson_manhattan": 0.7349906496289833,
|
| 15855 |
+
"eval_runtime": 42.7003,
|
| 15856 |
+
"eval_samples_per_second": 35.129,
|
| 15857 |
+
"eval_spearman_cosine": 0.7894128881355192,
|
| 15858 |
+
"eval_spearman_dot": 0.6634003738795025,
|
| 15859 |
+
"eval_spearman_euclidean": 0.7498228606359407,
|
| 15860 |
+
"eval_spearman_manhattan": 0.7492125285743606,
|
| 15861 |
+
"eval_steps_per_second": 35.129,
|
| 15862 |
+
"step": 20750
|
| 15863 |
+
},
|
| 15864 |
+
{
|
| 15865 |
+
"epoch": 9.728209934395501,
|
| 15866 |
+
"grad_norm": 2.050300121307373,
|
| 15867 |
+
"learning_rate": 8.783973758200564e-06,
|
| 15868 |
+
"loss": 0.0559,
|
| 15869 |
+
"step": 20760
|
| 15870 |
+
},
|
| 15871 |
+
{
|
| 15872 |
+
"epoch": 9.732895970009372,
|
| 15873 |
+
"grad_norm": 1.7900549173355103,
|
| 15874 |
+
"learning_rate": 8.783388003748829e-06,
|
| 15875 |
+
"loss": 0.0468,
|
| 15876 |
+
"step": 20770
|
| 15877 |
+
},
|
| 15878 |
+
{
|
| 15879 |
+
"epoch": 9.737582005623242,
|
| 15880 |
+
"grad_norm": 2.7999625205993652,
|
| 15881 |
+
"learning_rate": 8.782802249297096e-06,
|
| 15882 |
+
"loss": 0.0567,
|
| 15883 |
+
"step": 20780
|
| 15884 |
+
},
|
| 15885 |
+
{
|
| 15886 |
+
"epoch": 9.742268041237114,
|
| 15887 |
+
"grad_norm": 1.746066927909851,
|
| 15888 |
+
"learning_rate": 8.782216494845361e-06,
|
| 15889 |
+
"loss": 0.0462,
|
| 15890 |
+
"step": 20790
|
| 15891 |
+
},
|
| 15892 |
+
{
|
| 15893 |
+
"epoch": 9.746954076850985,
|
| 15894 |
+
"grad_norm": 1.6031302213668823,
|
| 15895 |
+
"learning_rate": 8.781630740393628e-06,
|
| 15896 |
+
"loss": 0.0569,
|
| 15897 |
+
"step": 20800
|
| 15898 |
+
},
|
| 15899 |
+
{
|
| 15900 |
+
"epoch": 9.751640112464855,
|
| 15901 |
+
"grad_norm": 0.795835554599762,
|
| 15902 |
+
"learning_rate": 8.781044985941893e-06,
|
| 15903 |
+
"loss": 0.0471,
|
| 15904 |
+
"step": 20810
|
| 15905 |
+
},
|
| 15906 |
+
{
|
| 15907 |
+
"epoch": 9.756326148078726,
|
| 15908 |
+
"grad_norm": 1.4143311977386475,
|
| 15909 |
+
"learning_rate": 8.78045923149016e-06,
|
| 15910 |
+
"loss": 0.0495,
|
| 15911 |
+
"step": 20820
|
| 15912 |
+
},
|
| 15913 |
+
{
|
| 15914 |
+
"epoch": 9.761012183692596,
|
| 15915 |
+
"grad_norm": 1.2782717943191528,
|
| 15916 |
+
"learning_rate": 8.779873477038427e-06,
|
| 15917 |
+
"loss": 0.0594,
|
| 15918 |
+
"step": 20830
|
| 15919 |
+
},
|
| 15920 |
+
{
|
| 15921 |
+
"epoch": 9.765698219306467,
|
| 15922 |
+
"grad_norm": 0.9974650144577026,
|
| 15923 |
+
"learning_rate": 8.779287722586692e-06,
|
| 15924 |
+
"loss": 0.0485,
|
| 15925 |
+
"step": 20840
|
| 15926 |
+
},
|
| 15927 |
+
{
|
| 15928 |
+
"epoch": 9.770384254920337,
|
| 15929 |
+
"grad_norm": 1.5415414571762085,
|
| 15930 |
+
"learning_rate": 8.778701968134958e-06,
|
| 15931 |
+
"loss": 0.0476,
|
| 15932 |
+
"step": 20850
|
| 15933 |
+
},
|
| 15934 |
+
{
|
| 15935 |
+
"epoch": 9.775070290534208,
|
| 15936 |
+
"grad_norm": 0.8162183165550232,
|
| 15937 |
+
"learning_rate": 8.778116213683225e-06,
|
| 15938 |
+
"loss": 0.0529,
|
| 15939 |
+
"step": 20860
|
| 15940 |
+
},
|
| 15941 |
+
{
|
| 15942 |
+
"epoch": 9.779756326148078,
|
| 15943 |
+
"grad_norm": 1.532882571220398,
|
| 15944 |
+
"learning_rate": 8.777530459231491e-06,
|
| 15945 |
+
"loss": 0.0484,
|
| 15946 |
+
"step": 20870
|
| 15947 |
+
},
|
| 15948 |
+
{
|
| 15949 |
+
"epoch": 9.784442361761949,
|
| 15950 |
+
"grad_norm": 1.609947919845581,
|
| 15951 |
+
"learning_rate": 8.776944704779757e-06,
|
| 15952 |
+
"loss": 0.0575,
|
| 15953 |
+
"step": 20880
|
| 15954 |
+
},
|
| 15955 |
+
{
|
| 15956 |
+
"epoch": 9.78912839737582,
|
| 15957 |
+
"grad_norm": 1.7726844549179077,
|
| 15958 |
+
"learning_rate": 8.776358950328024e-06,
|
| 15959 |
+
"loss": 0.055,
|
| 15960 |
+
"step": 20890
|
| 15961 |
+
},
|
| 15962 |
+
{
|
| 15963 |
+
"epoch": 9.793814432989691,
|
| 15964 |
+
"grad_norm": 0.7471759915351868,
|
| 15965 |
+
"learning_rate": 8.775773195876289e-06,
|
| 15966 |
+
"loss": 0.0439,
|
| 15967 |
+
"step": 20900
|
| 15968 |
+
},
|
| 15969 |
+
{
|
| 15970 |
+
"epoch": 9.798500468603562,
|
| 15971 |
+
"grad_norm": 1.8393468856811523,
|
| 15972 |
+
"learning_rate": 8.775187441424556e-06,
|
| 15973 |
+
"loss": 0.0576,
|
| 15974 |
+
"step": 20910
|
| 15975 |
+
},
|
| 15976 |
+
{
|
| 15977 |
+
"epoch": 9.803186504217432,
|
| 15978 |
+
"grad_norm": 1.570793867111206,
|
| 15979 |
+
"learning_rate": 8.774601686972821e-06,
|
| 15980 |
+
"loss": 0.0518,
|
| 15981 |
+
"step": 20920
|
| 15982 |
+
},
|
| 15983 |
+
{
|
| 15984 |
+
"epoch": 9.807872539831303,
|
| 15985 |
+
"grad_norm": 2.121197462081909,
|
| 15986 |
+
"learning_rate": 8.774015932521088e-06,
|
| 15987 |
+
"loss": 0.0499,
|
| 15988 |
+
"step": 20930
|
| 15989 |
+
},
|
| 15990 |
+
{
|
| 15991 |
+
"epoch": 9.812558575445173,
|
| 15992 |
+
"grad_norm": 1.4100779294967651,
|
| 15993 |
+
"learning_rate": 8.773430178069355e-06,
|
| 15994 |
+
"loss": 0.0518,
|
| 15995 |
+
"step": 20940
|
| 15996 |
+
},
|
| 15997 |
+
{
|
| 15998 |
+
"epoch": 9.817244611059044,
|
| 15999 |
+
"grad_norm": 2.057370901107788,
|
| 16000 |
+
"learning_rate": 8.77284442361762e-06,
|
| 16001 |
+
"loss": 0.0445,
|
| 16002 |
+
"step": 20950
|
| 16003 |
+
},
|
| 16004 |
+
{
|
| 16005 |
+
"epoch": 9.821930646672914,
|
| 16006 |
+
"grad_norm": 1.0159096717834473,
|
| 16007 |
+
"learning_rate": 8.772258669165887e-06,
|
| 16008 |
+
"loss": 0.0488,
|
| 16009 |
+
"step": 20960
|
| 16010 |
+
},
|
| 16011 |
+
{
|
| 16012 |
+
"epoch": 9.826616682286785,
|
| 16013 |
+
"grad_norm": 2.1321892738342285,
|
| 16014 |
+
"learning_rate": 8.771672914714152e-06,
|
| 16015 |
+
"loss": 0.0435,
|
| 16016 |
+
"step": 20970
|
| 16017 |
+
},
|
| 16018 |
+
{
|
| 16019 |
+
"epoch": 9.831302717900655,
|
| 16020 |
+
"grad_norm": 1.727754831314087,
|
| 16021 |
+
"learning_rate": 8.771087160262419e-06,
|
| 16022 |
+
"loss": 0.0506,
|
| 16023 |
+
"step": 20980
|
| 16024 |
+
},
|
| 16025 |
+
{
|
| 16026 |
+
"epoch": 9.835988753514528,
|
| 16027 |
+
"grad_norm": 1.4544596672058105,
|
| 16028 |
+
"learning_rate": 8.770501405810686e-06,
|
| 16029 |
+
"loss": 0.0538,
|
| 16030 |
+
"step": 20990
|
| 16031 |
+
},
|
| 16032 |
+
{
|
| 16033 |
+
"epoch": 9.840674789128398,
|
| 16034 |
+
"grad_norm": 1.8320542573928833,
|
| 16035 |
+
"learning_rate": 8.769915651358951e-06,
|
| 16036 |
+
"loss": 0.0464,
|
| 16037 |
+
"step": 21000
|
| 16038 |
+
},
|
| 16039 |
+
{
|
| 16040 |
+
"epoch": 9.840674789128398,
|
| 16041 |
+
"eval_loss": 0.037997569888830185,
|
| 16042 |
+
"eval_pearson_cosine": 0.7860642455644182,
|
| 16043 |
+
"eval_pearson_dot": 0.6414601204917716,
|
| 16044 |
+
"eval_pearson_euclidean": 0.7319623204333681,
|
| 16045 |
+
"eval_pearson_manhattan": 0.731351329880491,
|
| 16046 |
+
"eval_runtime": 42.4574,
|
| 16047 |
+
"eval_samples_per_second": 35.33,
|
| 16048 |
+
"eval_spearman_cosine": 0.7870558046080526,
|
| 16049 |
+
"eval_spearman_dot": 0.6600440085619812,
|
| 16050 |
+
"eval_spearman_euclidean": 0.746835302683809,
|
| 16051 |
+
"eval_spearman_manhattan": 0.7463752489757238,
|
| 16052 |
+
"eval_steps_per_second": 35.33,
|
| 16053 |
+
"step": 21000
|
| 16054 |
}
|
| 16055 |
],
|
| 16056 |
"logging_steps": 10,
|