Training in progress, step 20000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a99077961d0a641c5ff38bc41aeb0e96f4e0aa881e97473db5564c741bb8ca1
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b633f36fabb5fb014eb719663342186c16c8ad074853b96d787c85ceecedc06
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc17503afa3bf0eafca6b72efbe6ae8cb454a3c16da90f560f71f4af87c7a4e4
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f38d69aa9147d77e04cfc7d1c0433054c65fed5f88ad2cafb308669398f46b3f
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -14523,6 +14523,770 @@
|
|
| 14523 |
"eval_spearman_manhattan": 0.7432616809242956,
|
| 14524 |
"eval_steps_per_second": 36.716,
|
| 14525 |
"step": 19000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14526 |
}
|
| 14527 |
],
|
| 14528 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.372071227741332,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 20000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 14523 |
"eval_spearman_manhattan": 0.7432616809242956,
|
| 14524 |
"eval_steps_per_second": 36.716,
|
| 14525 |
"step": 19000
|
| 14526 |
+
},
|
| 14527 |
+
{
|
| 14528 |
+
"epoch": 8.908153701968136,
|
| 14529 |
+
"grad_norm": 1.1705414056777954,
|
| 14530 |
+
"learning_rate": 8.886480787253984e-06,
|
| 14531 |
+
"loss": 0.0549,
|
| 14532 |
+
"step": 19010
|
| 14533 |
+
},
|
| 14534 |
+
{
|
| 14535 |
+
"epoch": 8.912839737582006,
|
| 14536 |
+
"grad_norm": 2.2512776851654053,
|
| 14537 |
+
"learning_rate": 8.885895032802249e-06,
|
| 14538 |
+
"loss": 0.0715,
|
| 14539 |
+
"step": 19020
|
| 14540 |
+
},
|
| 14541 |
+
{
|
| 14542 |
+
"epoch": 8.917525773195877,
|
| 14543 |
+
"grad_norm": 1.7541801929473877,
|
| 14544 |
+
"learning_rate": 8.885309278350516e-06,
|
| 14545 |
+
"loss": 0.0657,
|
| 14546 |
+
"step": 19030
|
| 14547 |
+
},
|
| 14548 |
+
{
|
| 14549 |
+
"epoch": 8.922211808809747,
|
| 14550 |
+
"grad_norm": 1.3972922563552856,
|
| 14551 |
+
"learning_rate": 8.884723523898783e-06,
|
| 14552 |
+
"loss": 0.0516,
|
| 14553 |
+
"step": 19040
|
| 14554 |
+
},
|
| 14555 |
+
{
|
| 14556 |
+
"epoch": 8.926897844423618,
|
| 14557 |
+
"grad_norm": 0.9502004384994507,
|
| 14558 |
+
"learning_rate": 8.884137769447048e-06,
|
| 14559 |
+
"loss": 0.0664,
|
| 14560 |
+
"step": 19050
|
| 14561 |
+
},
|
| 14562 |
+
{
|
| 14563 |
+
"epoch": 8.931583880037488,
|
| 14564 |
+
"grad_norm": 2.1048943996429443,
|
| 14565 |
+
"learning_rate": 8.883552014995315e-06,
|
| 14566 |
+
"loss": 0.0668,
|
| 14567 |
+
"step": 19060
|
| 14568 |
+
},
|
| 14569 |
+
{
|
| 14570 |
+
"epoch": 8.936269915651359,
|
| 14571 |
+
"grad_norm": 2.441774368286133,
|
| 14572 |
+
"learning_rate": 8.88296626054358e-06,
|
| 14573 |
+
"loss": 0.0681,
|
| 14574 |
+
"step": 19070
|
| 14575 |
+
},
|
| 14576 |
+
{
|
| 14577 |
+
"epoch": 8.940955951265229,
|
| 14578 |
+
"grad_norm": 1.6815327405929565,
|
| 14579 |
+
"learning_rate": 8.882380506091847e-06,
|
| 14580 |
+
"loss": 0.0583,
|
| 14581 |
+
"step": 19080
|
| 14582 |
+
},
|
| 14583 |
+
{
|
| 14584 |
+
"epoch": 8.9456419868791,
|
| 14585 |
+
"grad_norm": 2.0613820552825928,
|
| 14586 |
+
"learning_rate": 8.881794751640114e-06,
|
| 14587 |
+
"loss": 0.0605,
|
| 14588 |
+
"step": 19090
|
| 14589 |
+
},
|
| 14590 |
+
{
|
| 14591 |
+
"epoch": 8.950328022492972,
|
| 14592 |
+
"grad_norm": 2.164487838745117,
|
| 14593 |
+
"learning_rate": 8.88120899718838e-06,
|
| 14594 |
+
"loss": 0.0749,
|
| 14595 |
+
"step": 19100
|
| 14596 |
+
},
|
| 14597 |
+
{
|
| 14598 |
+
"epoch": 8.955014058106842,
|
| 14599 |
+
"grad_norm": 1.838508129119873,
|
| 14600 |
+
"learning_rate": 8.880623242736646e-06,
|
| 14601 |
+
"loss": 0.0588,
|
| 14602 |
+
"step": 19110
|
| 14603 |
+
},
|
| 14604 |
+
{
|
| 14605 |
+
"epoch": 8.959700093720713,
|
| 14606 |
+
"grad_norm": 2.338103771209717,
|
| 14607 |
+
"learning_rate": 8.880037488284912e-06,
|
| 14608 |
+
"loss": 0.06,
|
| 14609 |
+
"step": 19120
|
| 14610 |
+
},
|
| 14611 |
+
{
|
| 14612 |
+
"epoch": 8.964386129334583,
|
| 14613 |
+
"grad_norm": 1.5945453643798828,
|
| 14614 |
+
"learning_rate": 8.879451733833177e-06,
|
| 14615 |
+
"loss": 0.062,
|
| 14616 |
+
"step": 19130
|
| 14617 |
+
},
|
| 14618 |
+
{
|
| 14619 |
+
"epoch": 8.969072164948454,
|
| 14620 |
+
"grad_norm": 1.4666954278945923,
|
| 14621 |
+
"learning_rate": 8.878865979381444e-06,
|
| 14622 |
+
"loss": 0.0593,
|
| 14623 |
+
"step": 19140
|
| 14624 |
+
},
|
| 14625 |
+
{
|
| 14626 |
+
"epoch": 8.973758200562324,
|
| 14627 |
+
"grad_norm": 2.2944576740264893,
|
| 14628 |
+
"learning_rate": 8.87828022492971e-06,
|
| 14629 |
+
"loss": 0.0621,
|
| 14630 |
+
"step": 19150
|
| 14631 |
+
},
|
| 14632 |
+
{
|
| 14633 |
+
"epoch": 8.978444236176195,
|
| 14634 |
+
"grad_norm": 1.8283967971801758,
|
| 14635 |
+
"learning_rate": 8.877694470477976e-06,
|
| 14636 |
+
"loss": 0.0646,
|
| 14637 |
+
"step": 19160
|
| 14638 |
+
},
|
| 14639 |
+
{
|
| 14640 |
+
"epoch": 8.983130271790065,
|
| 14641 |
+
"grad_norm": 1.3863099813461304,
|
| 14642 |
+
"learning_rate": 8.877108716026243e-06,
|
| 14643 |
+
"loss": 0.0524,
|
| 14644 |
+
"step": 19170
|
| 14645 |
+
},
|
| 14646 |
+
{
|
| 14647 |
+
"epoch": 8.987816307403936,
|
| 14648 |
+
"grad_norm": 2.2185399532318115,
|
| 14649 |
+
"learning_rate": 8.876522961574508e-06,
|
| 14650 |
+
"loss": 0.0741,
|
| 14651 |
+
"step": 19180
|
| 14652 |
+
},
|
| 14653 |
+
{
|
| 14654 |
+
"epoch": 8.992502343017806,
|
| 14655 |
+
"grad_norm": 1.0174760818481445,
|
| 14656 |
+
"learning_rate": 8.875937207122775e-06,
|
| 14657 |
+
"loss": 0.0613,
|
| 14658 |
+
"step": 19190
|
| 14659 |
+
},
|
| 14660 |
+
{
|
| 14661 |
+
"epoch": 8.997188378631678,
|
| 14662 |
+
"grad_norm": 2.3824729919433594,
|
| 14663 |
+
"learning_rate": 8.87535145267104e-06,
|
| 14664 |
+
"loss": 0.0588,
|
| 14665 |
+
"step": 19200
|
| 14666 |
+
},
|
| 14667 |
+
{
|
| 14668 |
+
"epoch": 9.001874414245549,
|
| 14669 |
+
"grad_norm": 0.5327388048171997,
|
| 14670 |
+
"learning_rate": 8.874765698219307e-06,
|
| 14671 |
+
"loss": 0.0531,
|
| 14672 |
+
"step": 19210
|
| 14673 |
+
},
|
| 14674 |
+
{
|
| 14675 |
+
"epoch": 9.00656044985942,
|
| 14676 |
+
"grad_norm": 1.4109519720077515,
|
| 14677 |
+
"learning_rate": 8.874179943767574e-06,
|
| 14678 |
+
"loss": 0.0435,
|
| 14679 |
+
"step": 19220
|
| 14680 |
+
},
|
| 14681 |
+
{
|
| 14682 |
+
"epoch": 9.01124648547329,
|
| 14683 |
+
"grad_norm": 1.2190899848937988,
|
| 14684 |
+
"learning_rate": 8.873594189315839e-06,
|
| 14685 |
+
"loss": 0.0499,
|
| 14686 |
+
"step": 19230
|
| 14687 |
+
},
|
| 14688 |
+
{
|
| 14689 |
+
"epoch": 9.01593252108716,
|
| 14690 |
+
"grad_norm": 1.7541508674621582,
|
| 14691 |
+
"learning_rate": 8.873008434864106e-06,
|
| 14692 |
+
"loss": 0.046,
|
| 14693 |
+
"step": 19240
|
| 14694 |
+
},
|
| 14695 |
+
{
|
| 14696 |
+
"epoch": 9.02061855670103,
|
| 14697 |
+
"grad_norm": 0.7862921953201294,
|
| 14698 |
+
"learning_rate": 8.872422680412371e-06,
|
| 14699 |
+
"loss": 0.0436,
|
| 14700 |
+
"step": 19250
|
| 14701 |
+
},
|
| 14702 |
+
{
|
| 14703 |
+
"epoch": 9.02061855670103,
|
| 14704 |
+
"eval_loss": 0.03772435337305069,
|
| 14705 |
+
"eval_pearson_cosine": 0.7855273741960076,
|
| 14706 |
+
"eval_pearson_dot": 0.6489285063428554,
|
| 14707 |
+
"eval_pearson_euclidean": 0.7292628651619779,
|
| 14708 |
+
"eval_pearson_manhattan": 0.728854161244513,
|
| 14709 |
+
"eval_runtime": 39.9971,
|
| 14710 |
+
"eval_samples_per_second": 37.503,
|
| 14711 |
+
"eval_spearman_cosine": 0.7855635384218336,
|
| 14712 |
+
"eval_spearman_dot": 0.6695729920402427,
|
| 14713 |
+
"eval_spearman_euclidean": 0.7464551903137364,
|
| 14714 |
+
"eval_spearman_manhattan": 0.7461578348935499,
|
| 14715 |
+
"eval_steps_per_second": 37.503,
|
| 14716 |
+
"step": 19250
|
| 14717 |
+
},
|
| 14718 |
+
{
|
| 14719 |
+
"epoch": 9.025304592314901,
|
| 14720 |
+
"grad_norm": 1.5160027742385864,
|
| 14721 |
+
"learning_rate": 8.871836925960638e-06,
|
| 14722 |
+
"loss": 0.0402,
|
| 14723 |
+
"step": 19260
|
| 14724 |
+
},
|
| 14725 |
+
{
|
| 14726 |
+
"epoch": 9.029990627928772,
|
| 14727 |
+
"grad_norm": 0.9218592047691345,
|
| 14728 |
+
"learning_rate": 8.871251171508905e-06,
|
| 14729 |
+
"loss": 0.0548,
|
| 14730 |
+
"step": 19270
|
| 14731 |
+
},
|
| 14732 |
+
{
|
| 14733 |
+
"epoch": 9.034676663542642,
|
| 14734 |
+
"grad_norm": 1.078574299812317,
|
| 14735 |
+
"learning_rate": 8.87066541705717e-06,
|
| 14736 |
+
"loss": 0.0501,
|
| 14737 |
+
"step": 19280
|
| 14738 |
+
},
|
| 14739 |
+
{
|
| 14740 |
+
"epoch": 9.039362699156513,
|
| 14741 |
+
"grad_norm": 1.2724040746688843,
|
| 14742 |
+
"learning_rate": 8.870079662605436e-06,
|
| 14743 |
+
"loss": 0.0455,
|
| 14744 |
+
"step": 19290
|
| 14745 |
+
},
|
| 14746 |
+
{
|
| 14747 |
+
"epoch": 9.044048734770385,
|
| 14748 |
+
"grad_norm": 1.7886642217636108,
|
| 14749 |
+
"learning_rate": 8.869493908153702e-06,
|
| 14750 |
+
"loss": 0.0483,
|
| 14751 |
+
"step": 19300
|
| 14752 |
+
},
|
| 14753 |
+
{
|
| 14754 |
+
"epoch": 9.048734770384256,
|
| 14755 |
+
"grad_norm": 2.028181552886963,
|
| 14756 |
+
"learning_rate": 8.868908153701968e-06,
|
| 14757 |
+
"loss": 0.0453,
|
| 14758 |
+
"step": 19310
|
| 14759 |
+
},
|
| 14760 |
+
{
|
| 14761 |
+
"epoch": 9.053420805998126,
|
| 14762 |
+
"grad_norm": 1.9034656286239624,
|
| 14763 |
+
"learning_rate": 8.868322399250235e-06,
|
| 14764 |
+
"loss": 0.049,
|
| 14765 |
+
"step": 19320
|
| 14766 |
+
},
|
| 14767 |
+
{
|
| 14768 |
+
"epoch": 9.058106841611997,
|
| 14769 |
+
"grad_norm": 1.34951651096344,
|
| 14770 |
+
"learning_rate": 8.867736644798502e-06,
|
| 14771 |
+
"loss": 0.0486,
|
| 14772 |
+
"step": 19330
|
| 14773 |
+
},
|
| 14774 |
+
{
|
| 14775 |
+
"epoch": 9.062792877225867,
|
| 14776 |
+
"grad_norm": 2.447568655014038,
|
| 14777 |
+
"learning_rate": 8.867150890346767e-06,
|
| 14778 |
+
"loss": 0.0464,
|
| 14779 |
+
"step": 19340
|
| 14780 |
+
},
|
| 14781 |
+
{
|
| 14782 |
+
"epoch": 9.067478912839738,
|
| 14783 |
+
"grad_norm": 0.8183003664016724,
|
| 14784 |
+
"learning_rate": 8.866565135895034e-06,
|
| 14785 |
+
"loss": 0.0469,
|
| 14786 |
+
"step": 19350
|
| 14787 |
+
},
|
| 14788 |
+
{
|
| 14789 |
+
"epoch": 9.072164948453608,
|
| 14790 |
+
"grad_norm": 1.3520687818527222,
|
| 14791 |
+
"learning_rate": 8.865979381443299e-06,
|
| 14792 |
+
"loss": 0.052,
|
| 14793 |
+
"step": 19360
|
| 14794 |
+
},
|
| 14795 |
+
{
|
| 14796 |
+
"epoch": 9.076850984067478,
|
| 14797 |
+
"grad_norm": 1.827717900276184,
|
| 14798 |
+
"learning_rate": 8.865393626991566e-06,
|
| 14799 |
+
"loss": 0.052,
|
| 14800 |
+
"step": 19370
|
| 14801 |
+
},
|
| 14802 |
+
{
|
| 14803 |
+
"epoch": 9.081537019681349,
|
| 14804 |
+
"grad_norm": 1.5364701747894287,
|
| 14805 |
+
"learning_rate": 8.864807872539833e-06,
|
| 14806 |
+
"loss": 0.0522,
|
| 14807 |
+
"step": 19380
|
| 14808 |
+
},
|
| 14809 |
+
{
|
| 14810 |
+
"epoch": 9.08622305529522,
|
| 14811 |
+
"grad_norm": 1.0967464447021484,
|
| 14812 |
+
"learning_rate": 8.864222118088098e-06,
|
| 14813 |
+
"loss": 0.0546,
|
| 14814 |
+
"step": 19390
|
| 14815 |
+
},
|
| 14816 |
+
{
|
| 14817 |
+
"epoch": 9.090909090909092,
|
| 14818 |
+
"grad_norm": 1.141569972038269,
|
| 14819 |
+
"learning_rate": 8.863636363636365e-06,
|
| 14820 |
+
"loss": 0.054,
|
| 14821 |
+
"step": 19400
|
| 14822 |
+
},
|
| 14823 |
+
{
|
| 14824 |
+
"epoch": 9.095595126522962,
|
| 14825 |
+
"grad_norm": 1.025898814201355,
|
| 14826 |
+
"learning_rate": 8.86305060918463e-06,
|
| 14827 |
+
"loss": 0.0549,
|
| 14828 |
+
"step": 19410
|
| 14829 |
+
},
|
| 14830 |
+
{
|
| 14831 |
+
"epoch": 9.100281162136833,
|
| 14832 |
+
"grad_norm": 0.9598554968833923,
|
| 14833 |
+
"learning_rate": 8.862464854732897e-06,
|
| 14834 |
+
"loss": 0.0562,
|
| 14835 |
+
"step": 19420
|
| 14836 |
+
},
|
| 14837 |
+
{
|
| 14838 |
+
"epoch": 9.104967197750703,
|
| 14839 |
+
"grad_norm": 1.6386889219284058,
|
| 14840 |
+
"learning_rate": 8.861879100281164e-06,
|
| 14841 |
+
"loss": 0.0447,
|
| 14842 |
+
"step": 19430
|
| 14843 |
+
},
|
| 14844 |
+
{
|
| 14845 |
+
"epoch": 9.109653233364574,
|
| 14846 |
+
"grad_norm": 1.3437844514846802,
|
| 14847 |
+
"learning_rate": 8.861293345829429e-06,
|
| 14848 |
+
"loss": 0.0545,
|
| 14849 |
+
"step": 19440
|
| 14850 |
+
},
|
| 14851 |
+
{
|
| 14852 |
+
"epoch": 9.114339268978444,
|
| 14853 |
+
"grad_norm": 0.9290686249732971,
|
| 14854 |
+
"learning_rate": 8.860707591377694e-06,
|
| 14855 |
+
"loss": 0.0472,
|
| 14856 |
+
"step": 19450
|
| 14857 |
+
},
|
| 14858 |
+
{
|
| 14859 |
+
"epoch": 9.119025304592315,
|
| 14860 |
+
"grad_norm": 1.5257052183151245,
|
| 14861 |
+
"learning_rate": 8.860121836925961e-06,
|
| 14862 |
+
"loss": 0.0499,
|
| 14863 |
+
"step": 19460
|
| 14864 |
+
},
|
| 14865 |
+
{
|
| 14866 |
+
"epoch": 9.123711340206185,
|
| 14867 |
+
"grad_norm": 1.8884594440460205,
|
| 14868 |
+
"learning_rate": 8.859536082474226e-06,
|
| 14869 |
+
"loss": 0.0607,
|
| 14870 |
+
"step": 19470
|
| 14871 |
+
},
|
| 14872 |
+
{
|
| 14873 |
+
"epoch": 9.128397375820056,
|
| 14874 |
+
"grad_norm": 1.6822651624679565,
|
| 14875 |
+
"learning_rate": 8.858950328022493e-06,
|
| 14876 |
+
"loss": 0.0504,
|
| 14877 |
+
"step": 19480
|
| 14878 |
+
},
|
| 14879 |
+
{
|
| 14880 |
+
"epoch": 9.133083411433926,
|
| 14881 |
+
"grad_norm": 1.626015067100525,
|
| 14882 |
+
"learning_rate": 8.85836457357076e-06,
|
| 14883 |
+
"loss": 0.0516,
|
| 14884 |
+
"step": 19490
|
| 14885 |
+
},
|
| 14886 |
+
{
|
| 14887 |
+
"epoch": 9.137769447047798,
|
| 14888 |
+
"grad_norm": 1.1759178638458252,
|
| 14889 |
+
"learning_rate": 8.857778819119026e-06,
|
| 14890 |
+
"loss": 0.047,
|
| 14891 |
+
"step": 19500
|
| 14892 |
+
},
|
| 14893 |
+
{
|
| 14894 |
+
"epoch": 9.137769447047798,
|
| 14895 |
+
"eval_loss": 0.03765318915247917,
|
| 14896 |
+
"eval_pearson_cosine": 0.7869684109175026,
|
| 14897 |
+
"eval_pearson_dot": 0.6458746869453549,
|
| 14898 |
+
"eval_pearson_euclidean": 0.7253508328002916,
|
| 14899 |
+
"eval_pearson_manhattan": 0.7249120216278655,
|
| 14900 |
+
"eval_runtime": 39.8108,
|
| 14901 |
+
"eval_samples_per_second": 37.678,
|
| 14902 |
+
"eval_spearman_cosine": 0.7881589626771033,
|
| 14903 |
+
"eval_spearman_dot": 0.6693627499015223,
|
| 14904 |
+
"eval_spearman_euclidean": 0.7413480639045013,
|
| 14905 |
+
"eval_spearman_manhattan": 0.7414303112939764,
|
| 14906 |
+
"eval_steps_per_second": 37.678,
|
| 14907 |
+
"step": 19500
|
| 14908 |
+
},
|
| 14909 |
+
{
|
| 14910 |
+
"epoch": 9.142455482661669,
|
| 14911 |
+
"grad_norm": 1.134598731994629,
|
| 14912 |
+
"learning_rate": 8.857193064667292e-06,
|
| 14913 |
+
"loss": 0.0531,
|
| 14914 |
+
"step": 19510
|
| 14915 |
+
},
|
| 14916 |
+
{
|
| 14917 |
+
"epoch": 9.14714151827554,
|
| 14918 |
+
"grad_norm": 1.447082757949829,
|
| 14919 |
+
"learning_rate": 8.856607310215558e-06,
|
| 14920 |
+
"loss": 0.0531,
|
| 14921 |
+
"step": 19520
|
| 14922 |
+
},
|
| 14923 |
+
{
|
| 14924 |
+
"epoch": 9.15182755388941,
|
| 14925 |
+
"grad_norm": 1.004354476928711,
|
| 14926 |
+
"learning_rate": 8.856021555763825e-06,
|
| 14927 |
+
"loss": 0.0511,
|
| 14928 |
+
"step": 19530
|
| 14929 |
+
},
|
| 14930 |
+
{
|
| 14931 |
+
"epoch": 9.15651358950328,
|
| 14932 |
+
"grad_norm": 1.6353479623794556,
|
| 14933 |
+
"learning_rate": 8.855435801312092e-06,
|
| 14934 |
+
"loss": 0.0467,
|
| 14935 |
+
"step": 19540
|
| 14936 |
+
},
|
| 14937 |
+
{
|
| 14938 |
+
"epoch": 9.16119962511715,
|
| 14939 |
+
"grad_norm": 1.8899836540222168,
|
| 14940 |
+
"learning_rate": 8.854850046860357e-06,
|
| 14941 |
+
"loss": 0.0582,
|
| 14942 |
+
"step": 19550
|
| 14943 |
+
},
|
| 14944 |
+
{
|
| 14945 |
+
"epoch": 9.165885660731021,
|
| 14946 |
+
"grad_norm": 1.306091070175171,
|
| 14947 |
+
"learning_rate": 8.854264292408624e-06,
|
| 14948 |
+
"loss": 0.0571,
|
| 14949 |
+
"step": 19560
|
| 14950 |
+
},
|
| 14951 |
+
{
|
| 14952 |
+
"epoch": 9.170571696344892,
|
| 14953 |
+
"grad_norm": 1.7783139944076538,
|
| 14954 |
+
"learning_rate": 8.853678537956889e-06,
|
| 14955 |
+
"loss": 0.0543,
|
| 14956 |
+
"step": 19570
|
| 14957 |
+
},
|
| 14958 |
+
{
|
| 14959 |
+
"epoch": 9.175257731958762,
|
| 14960 |
+
"grad_norm": 1.1551589965820312,
|
| 14961 |
+
"learning_rate": 8.853092783505156e-06,
|
| 14962 |
+
"loss": 0.0626,
|
| 14963 |
+
"step": 19580
|
| 14964 |
+
},
|
| 14965 |
+
{
|
| 14966 |
+
"epoch": 9.179943767572633,
|
| 14967 |
+
"grad_norm": 0.8448215126991272,
|
| 14968 |
+
"learning_rate": 8.852507029053423e-06,
|
| 14969 |
+
"loss": 0.0509,
|
| 14970 |
+
"step": 19590
|
| 14971 |
+
},
|
| 14972 |
+
{
|
| 14973 |
+
"epoch": 9.184629803186505,
|
| 14974 |
+
"grad_norm": 1.3088339567184448,
|
| 14975 |
+
"learning_rate": 8.851921274601688e-06,
|
| 14976 |
+
"loss": 0.0532,
|
| 14977 |
+
"step": 19600
|
| 14978 |
+
},
|
| 14979 |
+
{
|
| 14980 |
+
"epoch": 9.189315838800376,
|
| 14981 |
+
"grad_norm": 1.2790261507034302,
|
| 14982 |
+
"learning_rate": 8.851335520149953e-06,
|
| 14983 |
+
"loss": 0.0366,
|
| 14984 |
+
"step": 19610
|
| 14985 |
+
},
|
| 14986 |
+
{
|
| 14987 |
+
"epoch": 9.194001874414246,
|
| 14988 |
+
"grad_norm": 1.4637041091918945,
|
| 14989 |
+
"learning_rate": 8.85074976569822e-06,
|
| 14990 |
+
"loss": 0.0476,
|
| 14991 |
+
"step": 19620
|
| 14992 |
+
},
|
| 14993 |
+
{
|
| 14994 |
+
"epoch": 9.198687910028116,
|
| 14995 |
+
"grad_norm": 1.1702561378479004,
|
| 14996 |
+
"learning_rate": 8.850164011246485e-06,
|
| 14997 |
+
"loss": 0.0539,
|
| 14998 |
+
"step": 19630
|
| 14999 |
+
},
|
| 15000 |
+
{
|
| 15001 |
+
"epoch": 9.203373945641987,
|
| 15002 |
+
"grad_norm": 1.4241745471954346,
|
| 15003 |
+
"learning_rate": 8.849578256794752e-06,
|
| 15004 |
+
"loss": 0.0488,
|
| 15005 |
+
"step": 19640
|
| 15006 |
+
},
|
| 15007 |
+
{
|
| 15008 |
+
"epoch": 9.208059981255857,
|
| 15009 |
+
"grad_norm": 1.3767116069793701,
|
| 15010 |
+
"learning_rate": 8.848992502343019e-06,
|
| 15011 |
+
"loss": 0.0501,
|
| 15012 |
+
"step": 19650
|
| 15013 |
+
},
|
| 15014 |
+
{
|
| 15015 |
+
"epoch": 9.212746016869728,
|
| 15016 |
+
"grad_norm": 0.946832001209259,
|
| 15017 |
+
"learning_rate": 8.848406747891284e-06,
|
| 15018 |
+
"loss": 0.0509,
|
| 15019 |
+
"step": 19660
|
| 15020 |
+
},
|
| 15021 |
+
{
|
| 15022 |
+
"epoch": 9.217432052483598,
|
| 15023 |
+
"grad_norm": 2.132277011871338,
|
| 15024 |
+
"learning_rate": 8.847820993439551e-06,
|
| 15025 |
+
"loss": 0.0517,
|
| 15026 |
+
"step": 19670
|
| 15027 |
+
},
|
| 15028 |
+
{
|
| 15029 |
+
"epoch": 9.222118088097469,
|
| 15030 |
+
"grad_norm": 3.003037929534912,
|
| 15031 |
+
"learning_rate": 8.847235238987816e-06,
|
| 15032 |
+
"loss": 0.0601,
|
| 15033 |
+
"step": 19680
|
| 15034 |
+
},
|
| 15035 |
+
{
|
| 15036 |
+
"epoch": 9.22680412371134,
|
| 15037 |
+
"grad_norm": 0.8297474384307861,
|
| 15038 |
+
"learning_rate": 8.846649484536083e-06,
|
| 15039 |
+
"loss": 0.0381,
|
| 15040 |
+
"step": 19690
|
| 15041 |
+
},
|
| 15042 |
+
{
|
| 15043 |
+
"epoch": 9.231490159325212,
|
| 15044 |
+
"grad_norm": 0.8142613768577576,
|
| 15045 |
+
"learning_rate": 8.846063730084349e-06,
|
| 15046 |
+
"loss": 0.0528,
|
| 15047 |
+
"step": 19700
|
| 15048 |
+
},
|
| 15049 |
+
{
|
| 15050 |
+
"epoch": 9.236176194939082,
|
| 15051 |
+
"grad_norm": 1.9133763313293457,
|
| 15052 |
+
"learning_rate": 8.845477975632616e-06,
|
| 15053 |
+
"loss": 0.0536,
|
| 15054 |
+
"step": 19710
|
| 15055 |
+
},
|
| 15056 |
+
{
|
| 15057 |
+
"epoch": 9.240862230552953,
|
| 15058 |
+
"grad_norm": 1.1931358575820923,
|
| 15059 |
+
"learning_rate": 8.844892221180882e-06,
|
| 15060 |
+
"loss": 0.0558,
|
| 15061 |
+
"step": 19720
|
| 15062 |
+
},
|
| 15063 |
+
{
|
| 15064 |
+
"epoch": 9.245548266166823,
|
| 15065 |
+
"grad_norm": 2.3464787006378174,
|
| 15066 |
+
"learning_rate": 8.844306466729148e-06,
|
| 15067 |
+
"loss": 0.0505,
|
| 15068 |
+
"step": 19730
|
| 15069 |
+
},
|
| 15070 |
+
{
|
| 15071 |
+
"epoch": 9.250234301780694,
|
| 15072 |
+
"grad_norm": 1.3109287023544312,
|
| 15073 |
+
"learning_rate": 8.843720712277415e-06,
|
| 15074 |
+
"loss": 0.0582,
|
| 15075 |
+
"step": 19740
|
| 15076 |
+
},
|
| 15077 |
+
{
|
| 15078 |
+
"epoch": 9.254920337394564,
|
| 15079 |
+
"grad_norm": 1.866816520690918,
|
| 15080 |
+
"learning_rate": 8.84313495782568e-06,
|
| 15081 |
+
"loss": 0.0482,
|
| 15082 |
+
"step": 19750
|
| 15083 |
+
},
|
| 15084 |
+
{
|
| 15085 |
+
"epoch": 9.254920337394564,
|
| 15086 |
+
"eval_loss": 0.03766845539212227,
|
| 15087 |
+
"eval_pearson_cosine": 0.786280047827276,
|
| 15088 |
+
"eval_pearson_dot": 0.6498320134943469,
|
| 15089 |
+
"eval_pearson_euclidean": 0.7306029375409793,
|
| 15090 |
+
"eval_pearson_manhattan": 0.7296493603800656,
|
| 15091 |
+
"eval_runtime": 40.2507,
|
| 15092 |
+
"eval_samples_per_second": 37.266,
|
| 15093 |
+
"eval_spearman_cosine": 0.7871053277749581,
|
| 15094 |
+
"eval_spearman_dot": 0.6689992229589644,
|
| 15095 |
+
"eval_spearman_euclidean": 0.7449412319412662,
|
| 15096 |
+
"eval_spearman_manhattan": 0.7442196282250385,
|
| 15097 |
+
"eval_steps_per_second": 37.266,
|
| 15098 |
+
"step": 19750
|
| 15099 |
+
},
|
| 15100 |
+
{
|
| 15101 |
+
"epoch": 9.259606373008435,
|
| 15102 |
+
"grad_norm": 1.386021614074707,
|
| 15103 |
+
"learning_rate": 8.842549203373947e-06,
|
| 15104 |
+
"loss": 0.0512,
|
| 15105 |
+
"step": 19760
|
| 15106 |
+
},
|
| 15107 |
+
{
|
| 15108 |
+
"epoch": 9.264292408622305,
|
| 15109 |
+
"grad_norm": 1.7170544862747192,
|
| 15110 |
+
"learning_rate": 8.841963448922212e-06,
|
| 15111 |
+
"loss": 0.0567,
|
| 15112 |
+
"step": 19770
|
| 15113 |
+
},
|
| 15114 |
+
{
|
| 15115 |
+
"epoch": 9.268978444236176,
|
| 15116 |
+
"grad_norm": 2.6358816623687744,
|
| 15117 |
+
"learning_rate": 8.841377694470479e-06,
|
| 15118 |
+
"loss": 0.0611,
|
| 15119 |
+
"step": 19780
|
| 15120 |
+
},
|
| 15121 |
+
{
|
| 15122 |
+
"epoch": 9.273664479850046,
|
| 15123 |
+
"grad_norm": 1.0627405643463135,
|
| 15124 |
+
"learning_rate": 8.840791940018744e-06,
|
| 15125 |
+
"loss": 0.0516,
|
| 15126 |
+
"step": 19790
|
| 15127 |
+
},
|
| 15128 |
+
{
|
| 15129 |
+
"epoch": 9.278350515463918,
|
| 15130 |
+
"grad_norm": 2.116197109222412,
|
| 15131 |
+
"learning_rate": 8.840206185567011e-06,
|
| 15132 |
+
"loss": 0.0442,
|
| 15133 |
+
"step": 19800
|
| 15134 |
+
},
|
| 15135 |
+
{
|
| 15136 |
+
"epoch": 9.283036551077789,
|
| 15137 |
+
"grad_norm": 1.0570743083953857,
|
| 15138 |
+
"learning_rate": 8.839620431115276e-06,
|
| 15139 |
+
"loss": 0.0517,
|
| 15140 |
+
"step": 19810
|
| 15141 |
+
},
|
| 15142 |
+
{
|
| 15143 |
+
"epoch": 9.28772258669166,
|
| 15144 |
+
"grad_norm": 0.9444879293441772,
|
| 15145 |
+
"learning_rate": 8.839034676663543e-06,
|
| 15146 |
+
"loss": 0.0427,
|
| 15147 |
+
"step": 19820
|
| 15148 |
+
},
|
| 15149 |
+
{
|
| 15150 |
+
"epoch": 9.29240862230553,
|
| 15151 |
+
"grad_norm": 0.820633590221405,
|
| 15152 |
+
"learning_rate": 8.83844892221181e-06,
|
| 15153 |
+
"loss": 0.0505,
|
| 15154 |
+
"step": 19830
|
| 15155 |
+
},
|
| 15156 |
+
{
|
| 15157 |
+
"epoch": 9.2970946579194,
|
| 15158 |
+
"grad_norm": 0.9164274334907532,
|
| 15159 |
+
"learning_rate": 8.837863167760075e-06,
|
| 15160 |
+
"loss": 0.0539,
|
| 15161 |
+
"step": 19840
|
| 15162 |
+
},
|
| 15163 |
+
{
|
| 15164 |
+
"epoch": 9.30178069353327,
|
| 15165 |
+
"grad_norm": 1.6659798622131348,
|
| 15166 |
+
"learning_rate": 8.837277413308342e-06,
|
| 15167 |
+
"loss": 0.055,
|
| 15168 |
+
"step": 19850
|
| 15169 |
+
},
|
| 15170 |
+
{
|
| 15171 |
+
"epoch": 9.306466729147141,
|
| 15172 |
+
"grad_norm": 1.224489450454712,
|
| 15173 |
+
"learning_rate": 8.836691658856607e-06,
|
| 15174 |
+
"loss": 0.0487,
|
| 15175 |
+
"step": 19860
|
| 15176 |
+
},
|
| 15177 |
+
{
|
| 15178 |
+
"epoch": 9.311152764761012,
|
| 15179 |
+
"grad_norm": 1.6015446186065674,
|
| 15180 |
+
"learning_rate": 8.836105904404874e-06,
|
| 15181 |
+
"loss": 0.0622,
|
| 15182 |
+
"step": 19870
|
| 15183 |
+
},
|
| 15184 |
+
{
|
| 15185 |
+
"epoch": 9.315838800374882,
|
| 15186 |
+
"grad_norm": 2.066589593887329,
|
| 15187 |
+
"learning_rate": 8.835520149953141e-06,
|
| 15188 |
+
"loss": 0.0562,
|
| 15189 |
+
"step": 19880
|
| 15190 |
+
},
|
| 15191 |
+
{
|
| 15192 |
+
"epoch": 9.320524835988753,
|
| 15193 |
+
"grad_norm": 1.8341182470321655,
|
| 15194 |
+
"learning_rate": 8.834934395501406e-06,
|
| 15195 |
+
"loss": 0.0414,
|
| 15196 |
+
"step": 19890
|
| 15197 |
+
},
|
| 15198 |
+
{
|
| 15199 |
+
"epoch": 9.325210871602625,
|
| 15200 |
+
"grad_norm": 2.1060688495635986,
|
| 15201 |
+
"learning_rate": 8.834348641049673e-06,
|
| 15202 |
+
"loss": 0.0423,
|
| 15203 |
+
"step": 19900
|
| 15204 |
+
},
|
| 15205 |
+
{
|
| 15206 |
+
"epoch": 9.329896907216495,
|
| 15207 |
+
"grad_norm": 2.0976791381835938,
|
| 15208 |
+
"learning_rate": 8.833762886597939e-06,
|
| 15209 |
+
"loss": 0.0562,
|
| 15210 |
+
"step": 19910
|
| 15211 |
+
},
|
| 15212 |
+
{
|
| 15213 |
+
"epoch": 9.334582942830366,
|
| 15214 |
+
"grad_norm": 1.7656900882720947,
|
| 15215 |
+
"learning_rate": 8.833177132146204e-06,
|
| 15216 |
+
"loss": 0.0454,
|
| 15217 |
+
"step": 19920
|
| 15218 |
+
},
|
| 15219 |
+
{
|
| 15220 |
+
"epoch": 9.339268978444236,
|
| 15221 |
+
"grad_norm": 0.9391831755638123,
|
| 15222 |
+
"learning_rate": 8.832591377694472e-06,
|
| 15223 |
+
"loss": 0.0471,
|
| 15224 |
+
"step": 19930
|
| 15225 |
+
},
|
| 15226 |
+
{
|
| 15227 |
+
"epoch": 9.343955014058107,
|
| 15228 |
+
"grad_norm": 1.8361108303070068,
|
| 15229 |
+
"learning_rate": 8.832005623242738e-06,
|
| 15230 |
+
"loss": 0.0521,
|
| 15231 |
+
"step": 19940
|
| 15232 |
+
},
|
| 15233 |
+
{
|
| 15234 |
+
"epoch": 9.348641049671977,
|
| 15235 |
+
"grad_norm": 1.4012130498886108,
|
| 15236 |
+
"learning_rate": 8.831419868791003e-06,
|
| 15237 |
+
"loss": 0.0476,
|
| 15238 |
+
"step": 19950
|
| 15239 |
+
},
|
| 15240 |
+
{
|
| 15241 |
+
"epoch": 9.353327085285848,
|
| 15242 |
+
"grad_norm": 1.4812968969345093,
|
| 15243 |
+
"learning_rate": 8.83083411433927e-06,
|
| 15244 |
+
"loss": 0.0356,
|
| 15245 |
+
"step": 19960
|
| 15246 |
+
},
|
| 15247 |
+
{
|
| 15248 |
+
"epoch": 9.358013120899718,
|
| 15249 |
+
"grad_norm": 1.4447283744812012,
|
| 15250 |
+
"learning_rate": 8.830248359887535e-06,
|
| 15251 |
+
"loss": 0.046,
|
| 15252 |
+
"step": 19970
|
| 15253 |
+
},
|
| 15254 |
+
{
|
| 15255 |
+
"epoch": 9.362699156513589,
|
| 15256 |
+
"grad_norm": 1.9198623895645142,
|
| 15257 |
+
"learning_rate": 8.829662605435802e-06,
|
| 15258 |
+
"loss": 0.0546,
|
| 15259 |
+
"step": 19980
|
| 15260 |
+
},
|
| 15261 |
+
{
|
| 15262 |
+
"epoch": 9.36738519212746,
|
| 15263 |
+
"grad_norm": 0.8466697335243225,
|
| 15264 |
+
"learning_rate": 8.829076850984069e-06,
|
| 15265 |
+
"loss": 0.0481,
|
| 15266 |
+
"step": 19990
|
| 15267 |
+
},
|
| 15268 |
+
{
|
| 15269 |
+
"epoch": 9.372071227741332,
|
| 15270 |
+
"grad_norm": 1.5158565044403076,
|
| 15271 |
+
"learning_rate": 8.828491096532334e-06,
|
| 15272 |
+
"loss": 0.0529,
|
| 15273 |
+
"step": 20000
|
| 15274 |
+
},
|
| 15275 |
+
{
|
| 15276 |
+
"epoch": 9.372071227741332,
|
| 15277 |
+
"eval_loss": 0.0377335324883461,
|
| 15278 |
+
"eval_pearson_cosine": 0.7872657190030239,
|
| 15279 |
+
"eval_pearson_dot": 0.6489881022917316,
|
| 15280 |
+
"eval_pearson_euclidean": 0.7290286852364005,
|
| 15281 |
+
"eval_pearson_manhattan": 0.7285143498985862,
|
| 15282 |
+
"eval_runtime": 39.7128,
|
| 15283 |
+
"eval_samples_per_second": 37.771,
|
| 15284 |
+
"eval_spearman_cosine": 0.7888105939241997,
|
| 15285 |
+
"eval_spearman_dot": 0.6689738777456538,
|
| 15286 |
+
"eval_spearman_euclidean": 0.7426040363283044,
|
| 15287 |
+
"eval_spearman_manhattan": 0.742345267890976,
|
| 15288 |
+
"eval_steps_per_second": 37.771,
|
| 15289 |
+
"step": 20000
|
| 15290 |
}
|
| 15291 |
],
|
| 15292 |
"logging_steps": 10,
|