Training in progress, step 16000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2d2a0f5d2fd2db2c00ba8019c5d26c7e05ea6254b391695cc519d7dab59b225
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b5ee984e35f64350e857f17403ecda5095a0c3d2917a731f8237c213d237bae
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2be093747f5a4a232618c3318bfffdf24560aea746cf4c11903c465c5179b6c9
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:975ed305a3fe7b4927a3b3d12f66d6b14051cd85dfe6e94defa4d7c56781b5ac
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 7.
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -11467,6 +11467,770 @@
|
|
| 11467 |
"eval_spearman_manhattan": 0.7568637419859118,
|
| 11468 |
"eval_steps_per_second": 38.084,
|
| 11469 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11470 |
}
|
| 11471 |
],
|
| 11472 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.497656982193065,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 16000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 11467 |
"eval_spearman_manhattan": 0.7568637419859118,
|
| 11468 |
"eval_steps_per_second": 38.084,
|
| 11469 |
"step": 15000
|
| 11470 |
+
},
|
| 11471 |
+
{
|
| 11472 |
+
"epoch": 7.033739456419869,
|
| 11473 |
+
"grad_norm": 1.905097246170044,
|
| 11474 |
+
"learning_rate": 9.120782567947517e-06,
|
| 11475 |
+
"loss": 0.058,
|
| 11476 |
+
"step": 15010
|
| 11477 |
+
},
|
| 11478 |
+
{
|
| 11479 |
+
"epoch": 7.038425492033739,
|
| 11480 |
+
"grad_norm": 1.7693982124328613,
|
| 11481 |
+
"learning_rate": 9.120196813495782e-06,
|
| 11482 |
+
"loss": 0.0661,
|
| 11483 |
+
"step": 15020
|
| 11484 |
+
},
|
| 11485 |
+
{
|
| 11486 |
+
"epoch": 7.04311152764761,
|
| 11487 |
+
"grad_norm": 2.130645990371704,
|
| 11488 |
+
"learning_rate": 9.11961105904405e-06,
|
| 11489 |
+
"loss": 0.0749,
|
| 11490 |
+
"step": 15030
|
| 11491 |
+
},
|
| 11492 |
+
{
|
| 11493 |
+
"epoch": 7.047797563261481,
|
| 11494 |
+
"grad_norm": 1.778387427330017,
|
| 11495 |
+
"learning_rate": 9.119025304592316e-06,
|
| 11496 |
+
"loss": 0.063,
|
| 11497 |
+
"step": 15040
|
| 11498 |
+
},
|
| 11499 |
+
{
|
| 11500 |
+
"epoch": 7.052483598875352,
|
| 11501 |
+
"grad_norm": 1.038841724395752,
|
| 11502 |
+
"learning_rate": 9.118439550140582e-06,
|
| 11503 |
+
"loss": 0.0717,
|
| 11504 |
+
"step": 15050
|
| 11505 |
+
},
|
| 11506 |
+
{
|
| 11507 |
+
"epoch": 7.057169634489222,
|
| 11508 |
+
"grad_norm": 1.8356302976608276,
|
| 11509 |
+
"learning_rate": 9.117853795688848e-06,
|
| 11510 |
+
"loss": 0.0607,
|
| 11511 |
+
"step": 15060
|
| 11512 |
+
},
|
| 11513 |
+
{
|
| 11514 |
+
"epoch": 7.061855670103093,
|
| 11515 |
+
"grad_norm": 1.5863852500915527,
|
| 11516 |
+
"learning_rate": 9.117268041237114e-06,
|
| 11517 |
+
"loss": 0.0609,
|
| 11518 |
+
"step": 15070
|
| 11519 |
+
},
|
| 11520 |
+
{
|
| 11521 |
+
"epoch": 7.066541705716963,
|
| 11522 |
+
"grad_norm": 1.3317904472351074,
|
| 11523 |
+
"learning_rate": 9.11668228678538e-06,
|
| 11524 |
+
"loss": 0.0576,
|
| 11525 |
+
"step": 15080
|
| 11526 |
+
},
|
| 11527 |
+
{
|
| 11528 |
+
"epoch": 7.071227741330834,
|
| 11529 |
+
"grad_norm": 2.148087978363037,
|
| 11530 |
+
"learning_rate": 9.116096532333648e-06,
|
| 11531 |
+
"loss": 0.0694,
|
| 11532 |
+
"step": 15090
|
| 11533 |
+
},
|
| 11534 |
+
{
|
| 11535 |
+
"epoch": 7.075913776944705,
|
| 11536 |
+
"grad_norm": 1.34135901927948,
|
| 11537 |
+
"learning_rate": 9.115510777881913e-06,
|
| 11538 |
+
"loss": 0.063,
|
| 11539 |
+
"step": 15100
|
| 11540 |
+
},
|
| 11541 |
+
{
|
| 11542 |
+
"epoch": 7.080599812558575,
|
| 11543 |
+
"grad_norm": 1.9813562631607056,
|
| 11544 |
+
"learning_rate": 9.11492502343018e-06,
|
| 11545 |
+
"loss": 0.0843,
|
| 11546 |
+
"step": 15110
|
| 11547 |
+
},
|
| 11548 |
+
{
|
| 11549 |
+
"epoch": 7.085285848172446,
|
| 11550 |
+
"grad_norm": 1.3236151933670044,
|
| 11551 |
+
"learning_rate": 9.114339268978445e-06,
|
| 11552 |
+
"loss": 0.0682,
|
| 11553 |
+
"step": 15120
|
| 11554 |
+
},
|
| 11555 |
+
{
|
| 11556 |
+
"epoch": 7.089971883786316,
|
| 11557 |
+
"grad_norm": 1.483312726020813,
|
| 11558 |
+
"learning_rate": 9.11375351452671e-06,
|
| 11559 |
+
"loss": 0.0742,
|
| 11560 |
+
"step": 15130
|
| 11561 |
+
},
|
| 11562 |
+
{
|
| 11563 |
+
"epoch": 7.094657919400188,
|
| 11564 |
+
"grad_norm": 1.7315001487731934,
|
| 11565 |
+
"learning_rate": 9.113167760074977e-06,
|
| 11566 |
+
"loss": 0.0544,
|
| 11567 |
+
"step": 15140
|
| 11568 |
+
},
|
| 11569 |
+
{
|
| 11570 |
+
"epoch": 7.099343955014058,
|
| 11571 |
+
"grad_norm": 2.530773162841797,
|
| 11572 |
+
"learning_rate": 9.112582005623244e-06,
|
| 11573 |
+
"loss": 0.0768,
|
| 11574 |
+
"step": 15150
|
| 11575 |
+
},
|
| 11576 |
+
{
|
| 11577 |
+
"epoch": 7.104029990627929,
|
| 11578 |
+
"grad_norm": 2.089907169342041,
|
| 11579 |
+
"learning_rate": 9.11199625117151e-06,
|
| 11580 |
+
"loss": 0.0617,
|
| 11581 |
+
"step": 15160
|
| 11582 |
+
},
|
| 11583 |
+
{
|
| 11584 |
+
"epoch": 7.108716026241799,
|
| 11585 |
+
"grad_norm": 1.7428967952728271,
|
| 11586 |
+
"learning_rate": 9.111410496719776e-06,
|
| 11587 |
+
"loss": 0.0763,
|
| 11588 |
+
"step": 15170
|
| 11589 |
+
},
|
| 11590 |
+
{
|
| 11591 |
+
"epoch": 7.11340206185567,
|
| 11592 |
+
"grad_norm": 1.5844217538833618,
|
| 11593 |
+
"learning_rate": 9.110824742268041e-06,
|
| 11594 |
+
"loss": 0.0554,
|
| 11595 |
+
"step": 15180
|
| 11596 |
+
},
|
| 11597 |
+
{
|
| 11598 |
+
"epoch": 7.118088097469541,
|
| 11599 |
+
"grad_norm": 1.881791353225708,
|
| 11600 |
+
"learning_rate": 9.110238987816308e-06,
|
| 11601 |
+
"loss": 0.0662,
|
| 11602 |
+
"step": 15190
|
| 11603 |
+
},
|
| 11604 |
+
{
|
| 11605 |
+
"epoch": 7.122774133083412,
|
| 11606 |
+
"grad_norm": 1.2586263418197632,
|
| 11607 |
+
"learning_rate": 9.109653233364575e-06,
|
| 11608 |
+
"loss": 0.0648,
|
| 11609 |
+
"step": 15200
|
| 11610 |
+
},
|
| 11611 |
+
{
|
| 11612 |
+
"epoch": 7.127460168697282,
|
| 11613 |
+
"grad_norm": 1.9210679531097412,
|
| 11614 |
+
"learning_rate": 9.10906747891284e-06,
|
| 11615 |
+
"loss": 0.0647,
|
| 11616 |
+
"step": 15210
|
| 11617 |
+
},
|
| 11618 |
+
{
|
| 11619 |
+
"epoch": 7.1321462043111525,
|
| 11620 |
+
"grad_norm": 2.3099005222320557,
|
| 11621 |
+
"learning_rate": 9.108481724461107e-06,
|
| 11622 |
+
"loss": 0.0632,
|
| 11623 |
+
"step": 15220
|
| 11624 |
+
},
|
| 11625 |
+
{
|
| 11626 |
+
"epoch": 7.136832239925023,
|
| 11627 |
+
"grad_norm": 2.189209222793579,
|
| 11628 |
+
"learning_rate": 9.107895970009372e-06,
|
| 11629 |
+
"loss": 0.0779,
|
| 11630 |
+
"step": 15230
|
| 11631 |
+
},
|
| 11632 |
+
{
|
| 11633 |
+
"epoch": 7.141518275538894,
|
| 11634 |
+
"grad_norm": 1.2473788261413574,
|
| 11635 |
+
"learning_rate": 9.10731021555764e-06,
|
| 11636 |
+
"loss": 0.0684,
|
| 11637 |
+
"step": 15240
|
| 11638 |
+
},
|
| 11639 |
+
{
|
| 11640 |
+
"epoch": 7.146204311152765,
|
| 11641 |
+
"grad_norm": 1.381177306175232,
|
| 11642 |
+
"learning_rate": 9.106724461105905e-06,
|
| 11643 |
+
"loss": 0.0698,
|
| 11644 |
+
"step": 15250
|
| 11645 |
+
},
|
| 11646 |
+
{
|
| 11647 |
+
"epoch": 7.146204311152765,
|
| 11648 |
+
"eval_loss": 0.039560701698064804,
|
| 11649 |
+
"eval_pearson_cosine": 0.7821626687438226,
|
| 11650 |
+
"eval_pearson_dot": 0.6380954414398445,
|
| 11651 |
+
"eval_pearson_euclidean": 0.7346215848669857,
|
| 11652 |
+
"eval_pearson_manhattan": 0.7341262164749853,
|
| 11653 |
+
"eval_runtime": 40.552,
|
| 11654 |
+
"eval_samples_per_second": 36.99,
|
| 11655 |
+
"eval_spearman_cosine": 0.7855493497996395,
|
| 11656 |
+
"eval_spearman_dot": 0.6551626564215037,
|
| 11657 |
+
"eval_spearman_euclidean": 0.7509105689283286,
|
| 11658 |
+
"eval_spearman_manhattan": 0.7507208072274875,
|
| 11659 |
+
"eval_steps_per_second": 36.99,
|
| 11660 |
+
"step": 15250
|
| 11661 |
+
},
|
| 11662 |
+
{
|
| 11663 |
+
"epoch": 7.150890346766635,
|
| 11664 |
+
"grad_norm": 1.887905478477478,
|
| 11665 |
+
"learning_rate": 9.106138706654172e-06,
|
| 11666 |
+
"loss": 0.0744,
|
| 11667 |
+
"step": 15260
|
| 11668 |
+
},
|
| 11669 |
+
{
|
| 11670 |
+
"epoch": 7.155576382380506,
|
| 11671 |
+
"grad_norm": 1.1103498935699463,
|
| 11672 |
+
"learning_rate": 9.105552952202438e-06,
|
| 11673 |
+
"loss": 0.0614,
|
| 11674 |
+
"step": 15270
|
| 11675 |
+
},
|
| 11676 |
+
{
|
| 11677 |
+
"epoch": 7.160262417994376,
|
| 11678 |
+
"grad_norm": 1.2665252685546875,
|
| 11679 |
+
"learning_rate": 9.104967197750704e-06,
|
| 11680 |
+
"loss": 0.0668,
|
| 11681 |
+
"step": 15280
|
| 11682 |
+
},
|
| 11683 |
+
{
|
| 11684 |
+
"epoch": 7.164948453608248,
|
| 11685 |
+
"grad_norm": 1.8980008363723755,
|
| 11686 |
+
"learning_rate": 9.104381443298969e-06,
|
| 11687 |
+
"loss": 0.0615,
|
| 11688 |
+
"step": 15290
|
| 11689 |
+
},
|
| 11690 |
+
{
|
| 11691 |
+
"epoch": 7.169634489222118,
|
| 11692 |
+
"grad_norm": 1.789542317390442,
|
| 11693 |
+
"learning_rate": 9.103795688847236e-06,
|
| 11694 |
+
"loss": 0.0857,
|
| 11695 |
+
"step": 15300
|
| 11696 |
+
},
|
| 11697 |
+
{
|
| 11698 |
+
"epoch": 7.174320524835989,
|
| 11699 |
+
"grad_norm": 1.84969162940979,
|
| 11700 |
+
"learning_rate": 9.103209934395501e-06,
|
| 11701 |
+
"loss": 0.069,
|
| 11702 |
+
"step": 15310
|
| 11703 |
+
},
|
| 11704 |
+
{
|
| 11705 |
+
"epoch": 7.179006560449859,
|
| 11706 |
+
"grad_norm": 1.0945720672607422,
|
| 11707 |
+
"learning_rate": 9.102624179943768e-06,
|
| 11708 |
+
"loss": 0.0665,
|
| 11709 |
+
"step": 15320
|
| 11710 |
+
},
|
| 11711 |
+
{
|
| 11712 |
+
"epoch": 7.18369259606373,
|
| 11713 |
+
"grad_norm": 1.3088226318359375,
|
| 11714 |
+
"learning_rate": 9.102038425492035e-06,
|
| 11715 |
+
"loss": 0.0747,
|
| 11716 |
+
"step": 15330
|
| 11717 |
+
},
|
| 11718 |
+
{
|
| 11719 |
+
"epoch": 7.188378631677601,
|
| 11720 |
+
"grad_norm": 1.0556889772415161,
|
| 11721 |
+
"learning_rate": 9.1014526710403e-06,
|
| 11722 |
+
"loss": 0.0642,
|
| 11723 |
+
"step": 15340
|
| 11724 |
+
},
|
| 11725 |
+
{
|
| 11726 |
+
"epoch": 7.1930646672914715,
|
| 11727 |
+
"grad_norm": 1.7667440176010132,
|
| 11728 |
+
"learning_rate": 9.100866916588567e-06,
|
| 11729 |
+
"loss": 0.0644,
|
| 11730 |
+
"step": 15350
|
| 11731 |
+
},
|
| 11732 |
+
{
|
| 11733 |
+
"epoch": 7.197750702905342,
|
| 11734 |
+
"grad_norm": 1.3899027109146118,
|
| 11735 |
+
"learning_rate": 9.100281162136832e-06,
|
| 11736 |
+
"loss": 0.0712,
|
| 11737 |
+
"step": 15360
|
| 11738 |
+
},
|
| 11739 |
+
{
|
| 11740 |
+
"epoch": 7.2024367385192125,
|
| 11741 |
+
"grad_norm": 1.9200291633605957,
|
| 11742 |
+
"learning_rate": 9.099695407685099e-06,
|
| 11743 |
+
"loss": 0.08,
|
| 11744 |
+
"step": 15370
|
| 11745 |
+
},
|
| 11746 |
+
{
|
| 11747 |
+
"epoch": 7.207122774133083,
|
| 11748 |
+
"grad_norm": 2.3893768787384033,
|
| 11749 |
+
"learning_rate": 9.099109653233366e-06,
|
| 11750 |
+
"loss": 0.0741,
|
| 11751 |
+
"step": 15380
|
| 11752 |
+
},
|
| 11753 |
+
{
|
| 11754 |
+
"epoch": 7.211808809746954,
|
| 11755 |
+
"grad_norm": 1.2158704996109009,
|
| 11756 |
+
"learning_rate": 9.098523898781631e-06,
|
| 11757 |
+
"loss": 0.0866,
|
| 11758 |
+
"step": 15390
|
| 11759 |
+
},
|
| 11760 |
+
{
|
| 11761 |
+
"epoch": 7.216494845360825,
|
| 11762 |
+
"grad_norm": 2.252181053161621,
|
| 11763 |
+
"learning_rate": 9.097938144329898e-06,
|
| 11764 |
+
"loss": 0.076,
|
| 11765 |
+
"step": 15400
|
| 11766 |
+
},
|
| 11767 |
+
{
|
| 11768 |
+
"epoch": 7.221180880974695,
|
| 11769 |
+
"grad_norm": 1.2606340646743774,
|
| 11770 |
+
"learning_rate": 9.097352389878163e-06,
|
| 11771 |
+
"loss": 0.0661,
|
| 11772 |
+
"step": 15410
|
| 11773 |
+
},
|
| 11774 |
+
{
|
| 11775 |
+
"epoch": 7.225866916588566,
|
| 11776 |
+
"grad_norm": 1.1483300924301147,
|
| 11777 |
+
"learning_rate": 9.096766635426429e-06,
|
| 11778 |
+
"loss": 0.0776,
|
| 11779 |
+
"step": 15420
|
| 11780 |
+
},
|
| 11781 |
+
{
|
| 11782 |
+
"epoch": 7.230552952202436,
|
| 11783 |
+
"grad_norm": 1.4554270505905151,
|
| 11784 |
+
"learning_rate": 9.096180880974697e-06,
|
| 11785 |
+
"loss": 0.0713,
|
| 11786 |
+
"step": 15430
|
| 11787 |
+
},
|
| 11788 |
+
{
|
| 11789 |
+
"epoch": 7.235238987816308,
|
| 11790 |
+
"grad_norm": 1.8985337018966675,
|
| 11791 |
+
"learning_rate": 9.095595126522962e-06,
|
| 11792 |
+
"loss": 0.0787,
|
| 11793 |
+
"step": 15440
|
| 11794 |
+
},
|
| 11795 |
+
{
|
| 11796 |
+
"epoch": 7.239925023430178,
|
| 11797 |
+
"grad_norm": 2.394465208053589,
|
| 11798 |
+
"learning_rate": 9.095009372071228e-06,
|
| 11799 |
+
"loss": 0.0767,
|
| 11800 |
+
"step": 15450
|
| 11801 |
+
},
|
| 11802 |
+
{
|
| 11803 |
+
"epoch": 7.244611059044049,
|
| 11804 |
+
"grad_norm": 1.2314172983169556,
|
| 11805 |
+
"learning_rate": 9.094423617619495e-06,
|
| 11806 |
+
"loss": 0.0647,
|
| 11807 |
+
"step": 15460
|
| 11808 |
+
},
|
| 11809 |
+
{
|
| 11810 |
+
"epoch": 7.249297094657919,
|
| 11811 |
+
"grad_norm": 2.137882947921753,
|
| 11812 |
+
"learning_rate": 9.09383786316776e-06,
|
| 11813 |
+
"loss": 0.0656,
|
| 11814 |
+
"step": 15470
|
| 11815 |
+
},
|
| 11816 |
+
{
|
| 11817 |
+
"epoch": 7.25398313027179,
|
| 11818 |
+
"grad_norm": 1.7702836990356445,
|
| 11819 |
+
"learning_rate": 9.093252108716027e-06,
|
| 11820 |
+
"loss": 0.0576,
|
| 11821 |
+
"step": 15480
|
| 11822 |
+
},
|
| 11823 |
+
{
|
| 11824 |
+
"epoch": 7.258669165885661,
|
| 11825 |
+
"grad_norm": 2.0788486003875732,
|
| 11826 |
+
"learning_rate": 9.092666354264294e-06,
|
| 11827 |
+
"loss": 0.0582,
|
| 11828 |
+
"step": 15490
|
| 11829 |
+
},
|
| 11830 |
+
{
|
| 11831 |
+
"epoch": 7.2633552014995315,
|
| 11832 |
+
"grad_norm": 1.0218828916549683,
|
| 11833 |
+
"learning_rate": 9.092080599812559e-06,
|
| 11834 |
+
"loss": 0.0699,
|
| 11835 |
+
"step": 15500
|
| 11836 |
+
},
|
| 11837 |
+
{
|
| 11838 |
+
"epoch": 7.2633552014995315,
|
| 11839 |
+
"eval_loss": 0.03923952579498291,
|
| 11840 |
+
"eval_pearson_cosine": 0.7819686811712643,
|
| 11841 |
+
"eval_pearson_dot": 0.6466329055139823,
|
| 11842 |
+
"eval_pearson_euclidean": 0.732531031261928,
|
| 11843 |
+
"eval_pearson_manhattan": 0.732247137892952,
|
| 11844 |
+
"eval_runtime": 39.7194,
|
| 11845 |
+
"eval_samples_per_second": 37.765,
|
| 11846 |
+
"eval_spearman_cosine": 0.7850868672642034,
|
| 11847 |
+
"eval_spearman_dot": 0.6628931528870909,
|
| 11848 |
+
"eval_spearman_euclidean": 0.7501575772894145,
|
| 11849 |
+
"eval_spearman_manhattan": 0.7501561306691681,
|
| 11850 |
+
"eval_steps_per_second": 37.765,
|
| 11851 |
+
"step": 15500
|
| 11852 |
+
},
|
| 11853 |
+
{
|
| 11854 |
+
"epoch": 7.268041237113402,
|
| 11855 |
+
"grad_norm": 0.7509507536888123,
|
| 11856 |
+
"learning_rate": 9.091494845360826e-06,
|
| 11857 |
+
"loss": 0.0823,
|
| 11858 |
+
"step": 15510
|
| 11859 |
+
},
|
| 11860 |
+
{
|
| 11861 |
+
"epoch": 7.2727272727272725,
|
| 11862 |
+
"grad_norm": 2.109041690826416,
|
| 11863 |
+
"learning_rate": 9.090909090909091e-06,
|
| 11864 |
+
"loss": 0.0701,
|
| 11865 |
+
"step": 15520
|
| 11866 |
+
},
|
| 11867 |
+
{
|
| 11868 |
+
"epoch": 7.277413308341144,
|
| 11869 |
+
"grad_norm": 1.6811095476150513,
|
| 11870 |
+
"learning_rate": 9.090323336457358e-06,
|
| 11871 |
+
"loss": 0.0726,
|
| 11872 |
+
"step": 15530
|
| 11873 |
+
},
|
| 11874 |
+
{
|
| 11875 |
+
"epoch": 7.282099343955014,
|
| 11876 |
+
"grad_norm": 1.9557669162750244,
|
| 11877 |
+
"learning_rate": 9.089737582005625e-06,
|
| 11878 |
+
"loss": 0.0796,
|
| 11879 |
+
"step": 15540
|
| 11880 |
+
},
|
| 11881 |
+
{
|
| 11882 |
+
"epoch": 7.286785379568885,
|
| 11883 |
+
"grad_norm": 1.6342480182647705,
|
| 11884 |
+
"learning_rate": 9.08915182755389e-06,
|
| 11885 |
+
"loss": 0.0618,
|
| 11886 |
+
"step": 15550
|
| 11887 |
+
},
|
| 11888 |
+
{
|
| 11889 |
+
"epoch": 7.291471415182755,
|
| 11890 |
+
"grad_norm": 1.3443505764007568,
|
| 11891 |
+
"learning_rate": 9.088566073102157e-06,
|
| 11892 |
+
"loss": 0.0767,
|
| 11893 |
+
"step": 15560
|
| 11894 |
+
},
|
| 11895 |
+
{
|
| 11896 |
+
"epoch": 7.296157450796626,
|
| 11897 |
+
"grad_norm": 2.0768396854400635,
|
| 11898 |
+
"learning_rate": 9.087980318650422e-06,
|
| 11899 |
+
"loss": 0.0699,
|
| 11900 |
+
"step": 15570
|
| 11901 |
+
},
|
| 11902 |
+
{
|
| 11903 |
+
"epoch": 7.300843486410496,
|
| 11904 |
+
"grad_norm": 1.989401936531067,
|
| 11905 |
+
"learning_rate": 9.087394564198687e-06,
|
| 11906 |
+
"loss": 0.0706,
|
| 11907 |
+
"step": 15580
|
| 11908 |
+
},
|
| 11909 |
+
{
|
| 11910 |
+
"epoch": 7.305529522024368,
|
| 11911 |
+
"grad_norm": 1.7831469774246216,
|
| 11912 |
+
"learning_rate": 9.086808809746956e-06,
|
| 11913 |
+
"loss": 0.0701,
|
| 11914 |
+
"step": 15590
|
| 11915 |
+
},
|
| 11916 |
+
{
|
| 11917 |
+
"epoch": 7.310215557638238,
|
| 11918 |
+
"grad_norm": 2.3312692642211914,
|
| 11919 |
+
"learning_rate": 9.086223055295221e-06,
|
| 11920 |
+
"loss": 0.0703,
|
| 11921 |
+
"step": 15600
|
| 11922 |
+
},
|
| 11923 |
+
{
|
| 11924 |
+
"epoch": 7.314901593252109,
|
| 11925 |
+
"grad_norm": 1.7669209241867065,
|
| 11926 |
+
"learning_rate": 9.085637300843487e-06,
|
| 11927 |
+
"loss": 0.0646,
|
| 11928 |
+
"step": 15610
|
| 11929 |
+
},
|
| 11930 |
+
{
|
| 11931 |
+
"epoch": 7.319587628865979,
|
| 11932 |
+
"grad_norm": 1.880066156387329,
|
| 11933 |
+
"learning_rate": 9.085051546391753e-06,
|
| 11934 |
+
"loss": 0.0798,
|
| 11935 |
+
"step": 15620
|
| 11936 |
+
},
|
| 11937 |
+
{
|
| 11938 |
+
"epoch": 7.3242736644798505,
|
| 11939 |
+
"grad_norm": 1.3240752220153809,
|
| 11940 |
+
"learning_rate": 9.084465791940019e-06,
|
| 11941 |
+
"loss": 0.0749,
|
| 11942 |
+
"step": 15630
|
| 11943 |
+
},
|
| 11944 |
+
{
|
| 11945 |
+
"epoch": 7.328959700093721,
|
| 11946 |
+
"grad_norm": 1.0103267431259155,
|
| 11947 |
+
"learning_rate": 9.083880037488286e-06,
|
| 11948 |
+
"loss": 0.0635,
|
| 11949 |
+
"step": 15640
|
| 11950 |
+
},
|
| 11951 |
+
{
|
| 11952 |
+
"epoch": 7.3336457357075915,
|
| 11953 |
+
"grad_norm": 1.4677484035491943,
|
| 11954 |
+
"learning_rate": 9.083294283036552e-06,
|
| 11955 |
+
"loss": 0.0705,
|
| 11956 |
+
"step": 15650
|
| 11957 |
+
},
|
| 11958 |
+
{
|
| 11959 |
+
"epoch": 7.338331771321462,
|
| 11960 |
+
"grad_norm": 2.090219736099243,
|
| 11961 |
+
"learning_rate": 9.082708528584818e-06,
|
| 11962 |
+
"loss": 0.0693,
|
| 11963 |
+
"step": 15660
|
| 11964 |
+
},
|
| 11965 |
+
{
|
| 11966 |
+
"epoch": 7.3430178069353325,
|
| 11967 |
+
"grad_norm": 2.349215030670166,
|
| 11968 |
+
"learning_rate": 9.082122774133085e-06,
|
| 11969 |
+
"loss": 0.0714,
|
| 11970 |
+
"step": 15670
|
| 11971 |
+
},
|
| 11972 |
+
{
|
| 11973 |
+
"epoch": 7.347703842549203,
|
| 11974 |
+
"grad_norm": 0.8705586791038513,
|
| 11975 |
+
"learning_rate": 9.08153701968135e-06,
|
| 11976 |
+
"loss": 0.0761,
|
| 11977 |
+
"step": 15680
|
| 11978 |
+
},
|
| 11979 |
+
{
|
| 11980 |
+
"epoch": 7.352389878163074,
|
| 11981 |
+
"grad_norm": 1.986405372619629,
|
| 11982 |
+
"learning_rate": 9.080951265229617e-06,
|
| 11983 |
+
"loss": 0.0579,
|
| 11984 |
+
"step": 15690
|
| 11985 |
+
},
|
| 11986 |
+
{
|
| 11987 |
+
"epoch": 7.357075913776945,
|
| 11988 |
+
"grad_norm": 2.267803430557251,
|
| 11989 |
+
"learning_rate": 9.080365510777884e-06,
|
| 11990 |
+
"loss": 0.0925,
|
| 11991 |
+
"step": 15700
|
| 11992 |
+
},
|
| 11993 |
+
{
|
| 11994 |
+
"epoch": 7.361761949390815,
|
| 11995 |
+
"grad_norm": 1.7816276550292969,
|
| 11996 |
+
"learning_rate": 9.079779756326149e-06,
|
| 11997 |
+
"loss": 0.0713,
|
| 11998 |
+
"step": 15710
|
| 11999 |
+
},
|
| 12000 |
+
{
|
| 12001 |
+
"epoch": 7.366447985004686,
|
| 12002 |
+
"grad_norm": 3.1647868156433105,
|
| 12003 |
+
"learning_rate": 9.079194001874416e-06,
|
| 12004 |
+
"loss": 0.089,
|
| 12005 |
+
"step": 15720
|
| 12006 |
+
},
|
| 12007 |
+
{
|
| 12008 |
+
"epoch": 7.371134020618557,
|
| 12009 |
+
"grad_norm": 2.082855463027954,
|
| 12010 |
+
"learning_rate": 9.078608247422681e-06,
|
| 12011 |
+
"loss": 0.0695,
|
| 12012 |
+
"step": 15730
|
| 12013 |
+
},
|
| 12014 |
+
{
|
| 12015 |
+
"epoch": 7.375820056232428,
|
| 12016 |
+
"grad_norm": 1.4253464937210083,
|
| 12017 |
+
"learning_rate": 9.078022492970946e-06,
|
| 12018 |
+
"loss": 0.0681,
|
| 12019 |
+
"step": 15740
|
| 12020 |
+
},
|
| 12021 |
+
{
|
| 12022 |
+
"epoch": 7.380506091846298,
|
| 12023 |
+
"grad_norm": 1.7833616733551025,
|
| 12024 |
+
"learning_rate": 9.077436738519213e-06,
|
| 12025 |
+
"loss": 0.0739,
|
| 12026 |
+
"step": 15750
|
| 12027 |
+
},
|
| 12028 |
+
{
|
| 12029 |
+
"epoch": 7.380506091846298,
|
| 12030 |
+
"eval_loss": 0.03890243172645569,
|
| 12031 |
+
"eval_pearson_cosine": 0.7865226942731169,
|
| 12032 |
+
"eval_pearson_dot": 0.6411769886141485,
|
| 12033 |
+
"eval_pearson_euclidean": 0.7328063007950192,
|
| 12034 |
+
"eval_pearson_manhattan": 0.7322937842561661,
|
| 12035 |
+
"eval_runtime": 39.8796,
|
| 12036 |
+
"eval_samples_per_second": 37.613,
|
| 12037 |
+
"eval_spearman_cosine": 0.7886056526857715,
|
| 12038 |
+
"eval_spearman_dot": 0.6589435896491915,
|
| 12039 |
+
"eval_spearman_euclidean": 0.7495362761356495,
|
| 12040 |
+
"eval_spearman_manhattan": 0.7491310374131812,
|
| 12041 |
+
"eval_steps_per_second": 37.613,
|
| 12042 |
+
"step": 15750
|
| 12043 |
+
},
|
| 12044 |
+
{
|
| 12045 |
+
"epoch": 7.385192127460169,
|
| 12046 |
+
"grad_norm": 1.368802785873413,
|
| 12047 |
+
"learning_rate": 9.07685098406748e-06,
|
| 12048 |
+
"loss": 0.0634,
|
| 12049 |
+
"step": 15760
|
| 12050 |
+
},
|
| 12051 |
+
{
|
| 12052 |
+
"epoch": 7.389878163074039,
|
| 12053 |
+
"grad_norm": 2.0611209869384766,
|
| 12054 |
+
"learning_rate": 9.076265229615745e-06,
|
| 12055 |
+
"loss": 0.0732,
|
| 12056 |
+
"step": 15770
|
| 12057 |
+
},
|
| 12058 |
+
{
|
| 12059 |
+
"epoch": 7.39456419868791,
|
| 12060 |
+
"grad_norm": 1.3949185609817505,
|
| 12061 |
+
"learning_rate": 9.075679475164012e-06,
|
| 12062 |
+
"loss": 0.0643,
|
| 12063 |
+
"step": 15780
|
| 12064 |
+
},
|
| 12065 |
+
{
|
| 12066 |
+
"epoch": 7.399250234301781,
|
| 12067 |
+
"grad_norm": 2.267596960067749,
|
| 12068 |
+
"learning_rate": 9.075093720712277e-06,
|
| 12069 |
+
"loss": 0.0721,
|
| 12070 |
+
"step": 15790
|
| 12071 |
+
},
|
| 12072 |
+
{
|
| 12073 |
+
"epoch": 7.4039362699156515,
|
| 12074 |
+
"grad_norm": 1.2794581651687622,
|
| 12075 |
+
"learning_rate": 9.074507966260544e-06,
|
| 12076 |
+
"loss": 0.0718,
|
| 12077 |
+
"step": 15800
|
| 12078 |
+
},
|
| 12079 |
+
{
|
| 12080 |
+
"epoch": 7.408622305529522,
|
| 12081 |
+
"grad_norm": 1.8668746948242188,
|
| 12082 |
+
"learning_rate": 9.07392221180881e-06,
|
| 12083 |
+
"loss": 0.0734,
|
| 12084 |
+
"step": 15810
|
| 12085 |
+
},
|
| 12086 |
+
{
|
| 12087 |
+
"epoch": 7.413308341143392,
|
| 12088 |
+
"grad_norm": 2.141602039337158,
|
| 12089 |
+
"learning_rate": 9.073336457357077e-06,
|
| 12090 |
+
"loss": 0.0637,
|
| 12091 |
+
"step": 15820
|
| 12092 |
+
},
|
| 12093 |
+
{
|
| 12094 |
+
"epoch": 7.417994376757264,
|
| 12095 |
+
"grad_norm": 1.9020168781280518,
|
| 12096 |
+
"learning_rate": 9.072750702905343e-06,
|
| 12097 |
+
"loss": 0.0727,
|
| 12098 |
+
"step": 15830
|
| 12099 |
+
},
|
| 12100 |
+
{
|
| 12101 |
+
"epoch": 7.422680412371134,
|
| 12102 |
+
"grad_norm": 1.7302427291870117,
|
| 12103 |
+
"learning_rate": 9.072164948453609e-06,
|
| 12104 |
+
"loss": 0.0742,
|
| 12105 |
+
"step": 15840
|
| 12106 |
+
},
|
| 12107 |
+
{
|
| 12108 |
+
"epoch": 7.427366447985005,
|
| 12109 |
+
"grad_norm": 1.6152589321136475,
|
| 12110 |
+
"learning_rate": 9.071579194001876e-06,
|
| 12111 |
+
"loss": 0.0764,
|
| 12112 |
+
"step": 15850
|
| 12113 |
+
},
|
| 12114 |
+
{
|
| 12115 |
+
"epoch": 7.432052483598875,
|
| 12116 |
+
"grad_norm": 2.491912364959717,
|
| 12117 |
+
"learning_rate": 9.07099343955014e-06,
|
| 12118 |
+
"loss": 0.0746,
|
| 12119 |
+
"step": 15860
|
| 12120 |
+
},
|
| 12121 |
+
{
|
| 12122 |
+
"epoch": 7.436738519212746,
|
| 12123 |
+
"grad_norm": 1.8737932443618774,
|
| 12124 |
+
"learning_rate": 9.070407685098408e-06,
|
| 12125 |
+
"loss": 0.0665,
|
| 12126 |
+
"step": 15870
|
| 12127 |
+
},
|
| 12128 |
+
{
|
| 12129 |
+
"epoch": 7.441424554826616,
|
| 12130 |
+
"grad_norm": 2.3536882400512695,
|
| 12131 |
+
"learning_rate": 9.069821930646675e-06,
|
| 12132 |
+
"loss": 0.0702,
|
| 12133 |
+
"step": 15880
|
| 12134 |
+
},
|
| 12135 |
+
{
|
| 12136 |
+
"epoch": 7.446110590440488,
|
| 12137 |
+
"grad_norm": 1.0954251289367676,
|
| 12138 |
+
"learning_rate": 9.06923617619494e-06,
|
| 12139 |
+
"loss": 0.0768,
|
| 12140 |
+
"step": 15890
|
| 12141 |
+
},
|
| 12142 |
+
{
|
| 12143 |
+
"epoch": 7.450796626054358,
|
| 12144 |
+
"grad_norm": 1.973325490951538,
|
| 12145 |
+
"learning_rate": 9.068650421743205e-06,
|
| 12146 |
+
"loss": 0.0666,
|
| 12147 |
+
"step": 15900
|
| 12148 |
+
},
|
| 12149 |
+
{
|
| 12150 |
+
"epoch": 7.455482661668229,
|
| 12151 |
+
"grad_norm": 0.6486696600914001,
|
| 12152 |
+
"learning_rate": 9.068064667291472e-06,
|
| 12153 |
+
"loss": 0.0813,
|
| 12154 |
+
"step": 15910
|
| 12155 |
+
},
|
| 12156 |
+
{
|
| 12157 |
+
"epoch": 7.460168697282099,
|
| 12158 |
+
"grad_norm": 2.4715213775634766,
|
| 12159 |
+
"learning_rate": 9.067478912839737e-06,
|
| 12160 |
+
"loss": 0.0698,
|
| 12161 |
+
"step": 15920
|
| 12162 |
+
},
|
| 12163 |
+
{
|
| 12164 |
+
"epoch": 7.4648547328959705,
|
| 12165 |
+
"grad_norm": 1.3833788633346558,
|
| 12166 |
+
"learning_rate": 9.066893158388004e-06,
|
| 12167 |
+
"loss": 0.0783,
|
| 12168 |
+
"step": 15930
|
| 12169 |
+
},
|
| 12170 |
+
{
|
| 12171 |
+
"epoch": 7.469540768509841,
|
| 12172 |
+
"grad_norm": 1.5679866075515747,
|
| 12173 |
+
"learning_rate": 9.066307403936271e-06,
|
| 12174 |
+
"loss": 0.0675,
|
| 12175 |
+
"step": 15940
|
| 12176 |
+
},
|
| 12177 |
+
{
|
| 12178 |
+
"epoch": 7.474226804123711,
|
| 12179 |
+
"grad_norm": 1.173086166381836,
|
| 12180 |
+
"learning_rate": 9.065721649484536e-06,
|
| 12181 |
+
"loss": 0.0705,
|
| 12182 |
+
"step": 15950
|
| 12183 |
+
},
|
| 12184 |
+
{
|
| 12185 |
+
"epoch": 7.478912839737582,
|
| 12186 |
+
"grad_norm": 2.0635769367218018,
|
| 12187 |
+
"learning_rate": 9.065135895032803e-06,
|
| 12188 |
+
"loss": 0.0717,
|
| 12189 |
+
"step": 15960
|
| 12190 |
+
},
|
| 12191 |
+
{
|
| 12192 |
+
"epoch": 7.483598875351452,
|
| 12193 |
+
"grad_norm": 2.0800647735595703,
|
| 12194 |
+
"learning_rate": 9.064550140581068e-06,
|
| 12195 |
+
"loss": 0.0712,
|
| 12196 |
+
"step": 15970
|
| 12197 |
+
},
|
| 12198 |
+
{
|
| 12199 |
+
"epoch": 7.488284910965323,
|
| 12200 |
+
"grad_norm": 1.0860838890075684,
|
| 12201 |
+
"learning_rate": 9.063964386129335e-06,
|
| 12202 |
+
"loss": 0.0637,
|
| 12203 |
+
"step": 15980
|
| 12204 |
+
},
|
| 12205 |
+
{
|
| 12206 |
+
"epoch": 7.492970946579194,
|
| 12207 |
+
"grad_norm": 2.794854164123535,
|
| 12208 |
+
"learning_rate": 9.063378631677602e-06,
|
| 12209 |
+
"loss": 0.0799,
|
| 12210 |
+
"step": 15990
|
| 12211 |
+
},
|
| 12212 |
+
{
|
| 12213 |
+
"epoch": 7.497656982193065,
|
| 12214 |
+
"grad_norm": 0.8473652005195618,
|
| 12215 |
+
"learning_rate": 9.062792877225867e-06,
|
| 12216 |
+
"loss": 0.0745,
|
| 12217 |
+
"step": 16000
|
| 12218 |
+
},
|
| 12219 |
+
{
|
| 12220 |
+
"epoch": 7.497656982193065,
|
| 12221 |
+
"eval_loss": 0.039655983448028564,
|
| 12222 |
+
"eval_pearson_cosine": 0.779382095257283,
|
| 12223 |
+
"eval_pearson_dot": 0.6379781011095105,
|
| 12224 |
+
"eval_pearson_euclidean": 0.7372894551077778,
|
| 12225 |
+
"eval_pearson_manhattan": 0.736644904985166,
|
| 12226 |
+
"eval_runtime": 40.2485,
|
| 12227 |
+
"eval_samples_per_second": 37.268,
|
| 12228 |
+
"eval_spearman_cosine": 0.7827440097255054,
|
| 12229 |
+
"eval_spearman_dot": 0.6504351353485877,
|
| 12230 |
+
"eval_spearman_euclidean": 0.752448571801891,
|
| 12231 |
+
"eval_spearman_manhattan": 0.7524283280152466,
|
| 12232 |
+
"eval_steps_per_second": 37.268,
|
| 12233 |
+
"step": 16000
|
| 12234 |
}
|
| 12235 |
],
|
| 12236 |
"logging_steps": 10,
|