Training in progress, step 15000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 613004648
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b233efdfedfad8f103bdeed119d35dcd099eba59c85ffbbf10546b2a64d3674e
|
| 3 |
size 613004648
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1226096954
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6cd415bf9e90b88d0f924f465a8f839a232fb228bedc13d8305dea0013598c0
|
| 3 |
size 1226096954
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35a846db1f952a5c57e0f96eff0e1f51f9bc69325d1c9533532221ee67d3d2cc
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ae2d4f55a59e8b5160b468e406d78254547d58c1b3eaf0e1797452533ce3e19
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -10703,6 +10703,770 @@
|
|
| 10703 |
"eval_spearman_manhattan": 0.757795808702236,
|
| 10704 |
"eval_steps_per_second": 37.345,
|
| 10705 |
"step": 14000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10706 |
}
|
| 10707 |
],
|
| 10708 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.029053420805998,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 15000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 10703 |
"eval_spearman_manhattan": 0.757795808702236,
|
| 10704 |
"eval_steps_per_second": 37.345,
|
| 10705 |
"step": 14000
|
| 10706 |
+
},
|
| 10707 |
+
{
|
| 10708 |
+
"epoch": 6.565135895032802,
|
| 10709 |
+
"grad_norm": 1.1771986484527588,
|
| 10710 |
+
"learning_rate": 9.1793580131209e-06,
|
| 10711 |
+
"loss": 0.0723,
|
| 10712 |
+
"step": 14010
|
| 10713 |
+
},
|
| 10714 |
+
{
|
| 10715 |
+
"epoch": 6.569821930646673,
|
| 10716 |
+
"grad_norm": 3.105875253677368,
|
| 10717 |
+
"learning_rate": 9.178772258669167e-06,
|
| 10718 |
+
"loss": 0.0831,
|
| 10719 |
+
"step": 14020
|
| 10720 |
+
},
|
| 10721 |
+
{
|
| 10722 |
+
"epoch": 6.574507966260543,
|
| 10723 |
+
"grad_norm": 2.368614673614502,
|
| 10724 |
+
"learning_rate": 9.178186504217432e-06,
|
| 10725 |
+
"loss": 0.0896,
|
| 10726 |
+
"step": 14030
|
| 10727 |
+
},
|
| 10728 |
+
{
|
| 10729 |
+
"epoch": 6.579194001874415,
|
| 10730 |
+
"grad_norm": 1.0225666761398315,
|
| 10731 |
+
"learning_rate": 9.1776007497657e-06,
|
| 10732 |
+
"loss": 0.0773,
|
| 10733 |
+
"step": 14040
|
| 10734 |
+
},
|
| 10735 |
+
{
|
| 10736 |
+
"epoch": 6.583880037488285,
|
| 10737 |
+
"grad_norm": 1.7535622119903564,
|
| 10738 |
+
"learning_rate": 9.177014995313966e-06,
|
| 10739 |
+
"loss": 0.0966,
|
| 10740 |
+
"step": 14050
|
| 10741 |
+
},
|
| 10742 |
+
{
|
| 10743 |
+
"epoch": 6.588566073102156,
|
| 10744 |
+
"grad_norm": 1.9074852466583252,
|
| 10745 |
+
"learning_rate": 9.176429240862231e-06,
|
| 10746 |
+
"loss": 0.099,
|
| 10747 |
+
"step": 14060
|
| 10748 |
+
},
|
| 10749 |
+
{
|
| 10750 |
+
"epoch": 6.593252108716026,
|
| 10751 |
+
"grad_norm": 1.9208943843841553,
|
| 10752 |
+
"learning_rate": 9.175843486410497e-06,
|
| 10753 |
+
"loss": 0.0829,
|
| 10754 |
+
"step": 14070
|
| 10755 |
+
},
|
| 10756 |
+
{
|
| 10757 |
+
"epoch": 6.597938144329897,
|
| 10758 |
+
"grad_norm": 1.6164686679840088,
|
| 10759 |
+
"learning_rate": 9.175257731958764e-06,
|
| 10760 |
+
"loss": 0.0791,
|
| 10761 |
+
"step": 14080
|
| 10762 |
+
},
|
| 10763 |
+
{
|
| 10764 |
+
"epoch": 6.602624179943768,
|
| 10765 |
+
"grad_norm": 2.0227229595184326,
|
| 10766 |
+
"learning_rate": 9.17467197750703e-06,
|
| 10767 |
+
"loss": 0.087,
|
| 10768 |
+
"step": 14090
|
| 10769 |
+
},
|
| 10770 |
+
{
|
| 10771 |
+
"epoch": 6.607310215557638,
|
| 10772 |
+
"grad_norm": 2.0741658210754395,
|
| 10773 |
+
"learning_rate": 9.174086223055296e-06,
|
| 10774 |
+
"loss": 0.0832,
|
| 10775 |
+
"step": 14100
|
| 10776 |
+
},
|
| 10777 |
+
{
|
| 10778 |
+
"epoch": 6.611996251171509,
|
| 10779 |
+
"grad_norm": 2.0126988887786865,
|
| 10780 |
+
"learning_rate": 9.173500468603563e-06,
|
| 10781 |
+
"loss": 0.0959,
|
| 10782 |
+
"step": 14110
|
| 10783 |
+
},
|
| 10784 |
+
{
|
| 10785 |
+
"epoch": 6.616682286785379,
|
| 10786 |
+
"grad_norm": 2.8039979934692383,
|
| 10787 |
+
"learning_rate": 9.172914714151828e-06,
|
| 10788 |
+
"loss": 0.0876,
|
| 10789 |
+
"step": 14120
|
| 10790 |
+
},
|
| 10791 |
+
{
|
| 10792 |
+
"epoch": 6.62136832239925,
|
| 10793 |
+
"grad_norm": 2.453516721725464,
|
| 10794 |
+
"learning_rate": 9.172328959700095e-06,
|
| 10795 |
+
"loss": 0.0946,
|
| 10796 |
+
"step": 14130
|
| 10797 |
+
},
|
| 10798 |
+
{
|
| 10799 |
+
"epoch": 6.626054358013121,
|
| 10800 |
+
"grad_norm": 1.891790509223938,
|
| 10801 |
+
"learning_rate": 9.17174320524836e-06,
|
| 10802 |
+
"loss": 0.0995,
|
| 10803 |
+
"step": 14140
|
| 10804 |
+
},
|
| 10805 |
+
{
|
| 10806 |
+
"epoch": 6.630740393626992,
|
| 10807 |
+
"grad_norm": 2.185473680496216,
|
| 10808 |
+
"learning_rate": 9.171157450796627e-06,
|
| 10809 |
+
"loss": 0.0766,
|
| 10810 |
+
"step": 14150
|
| 10811 |
+
},
|
| 10812 |
+
{
|
| 10813 |
+
"epoch": 6.635426429240862,
|
| 10814 |
+
"grad_norm": 1.8330421447753906,
|
| 10815 |
+
"learning_rate": 9.170571696344894e-06,
|
| 10816 |
+
"loss": 0.0944,
|
| 10817 |
+
"step": 14160
|
| 10818 |
+
},
|
| 10819 |
+
{
|
| 10820 |
+
"epoch": 6.640112464854733,
|
| 10821 |
+
"grad_norm": 1.9126405715942383,
|
| 10822 |
+
"learning_rate": 9.169985941893159e-06,
|
| 10823 |
+
"loss": 0.0903,
|
| 10824 |
+
"step": 14170
|
| 10825 |
+
},
|
| 10826 |
+
{
|
| 10827 |
+
"epoch": 6.644798500468603,
|
| 10828 |
+
"grad_norm": 2.1286425590515137,
|
| 10829 |
+
"learning_rate": 9.169400187441426e-06,
|
| 10830 |
+
"loss": 0.0909,
|
| 10831 |
+
"step": 14180
|
| 10832 |
+
},
|
| 10833 |
+
{
|
| 10834 |
+
"epoch": 6.649484536082475,
|
| 10835 |
+
"grad_norm": 2.539160966873169,
|
| 10836 |
+
"learning_rate": 9.168814432989691e-06,
|
| 10837 |
+
"loss": 0.0842,
|
| 10838 |
+
"step": 14190
|
| 10839 |
+
},
|
| 10840 |
+
{
|
| 10841 |
+
"epoch": 6.654170571696345,
|
| 10842 |
+
"grad_norm": 2.383507490158081,
|
| 10843 |
+
"learning_rate": 9.168228678537958e-06,
|
| 10844 |
+
"loss": 0.0884,
|
| 10845 |
+
"step": 14200
|
| 10846 |
+
},
|
| 10847 |
+
{
|
| 10848 |
+
"epoch": 6.658856607310216,
|
| 10849 |
+
"grad_norm": 1.336970567703247,
|
| 10850 |
+
"learning_rate": 9.167642924086225e-06,
|
| 10851 |
+
"loss": 0.0832,
|
| 10852 |
+
"step": 14210
|
| 10853 |
+
},
|
| 10854 |
+
{
|
| 10855 |
+
"epoch": 6.663542642924086,
|
| 10856 |
+
"grad_norm": 1.9437108039855957,
|
| 10857 |
+
"learning_rate": 9.16705716963449e-06,
|
| 10858 |
+
"loss": 0.0939,
|
| 10859 |
+
"step": 14220
|
| 10860 |
+
},
|
| 10861 |
+
{
|
| 10862 |
+
"epoch": 6.6682286785379565,
|
| 10863 |
+
"grad_norm": 1.757957100868225,
|
| 10864 |
+
"learning_rate": 9.166471415182755e-06,
|
| 10865 |
+
"loss": 0.0789,
|
| 10866 |
+
"step": 14230
|
| 10867 |
+
},
|
| 10868 |
+
{
|
| 10869 |
+
"epoch": 6.672914714151828,
|
| 10870 |
+
"grad_norm": 1.8997693061828613,
|
| 10871 |
+
"learning_rate": 9.165885660731022e-06,
|
| 10872 |
+
"loss": 0.0871,
|
| 10873 |
+
"step": 14240
|
| 10874 |
+
},
|
| 10875 |
+
{
|
| 10876 |
+
"epoch": 6.677600749765698,
|
| 10877 |
+
"grad_norm": 2.3691928386688232,
|
| 10878 |
+
"learning_rate": 9.165299906279288e-06,
|
| 10879 |
+
"loss": 0.0919,
|
| 10880 |
+
"step": 14250
|
| 10881 |
+
},
|
| 10882 |
+
{
|
| 10883 |
+
"epoch": 6.677600749765698,
|
| 10884 |
+
"eval_loss": 0.04086451604962349,
|
| 10885 |
+
"eval_pearson_cosine": 0.7820280058953486,
|
| 10886 |
+
"eval_pearson_dot": 0.6341032311060246,
|
| 10887 |
+
"eval_pearson_euclidean": 0.7402547311501273,
|
| 10888 |
+
"eval_pearson_manhattan": 0.7402377003069951,
|
| 10889 |
+
"eval_runtime": 39.165,
|
| 10890 |
+
"eval_samples_per_second": 38.3,
|
| 10891 |
+
"eval_spearman_cosine": 0.7858084087028067,
|
| 10892 |
+
"eval_spearman_dot": 0.6459085165502623,
|
| 10893 |
+
"eval_spearman_euclidean": 0.7544028045089441,
|
| 10894 |
+
"eval_spearman_manhattan": 0.7545435768510885,
|
| 10895 |
+
"eval_steps_per_second": 38.3,
|
| 10896 |
+
"step": 14250
|
| 10897 |
+
},
|
| 10898 |
+
{
|
| 10899 |
+
"epoch": 6.682286785379569,
|
| 10900 |
+
"grad_norm": 1.7762689590454102,
|
| 10901 |
+
"learning_rate": 9.164714151827554e-06,
|
| 10902 |
+
"loss": 0.0917,
|
| 10903 |
+
"step": 14260
|
| 10904 |
+
},
|
| 10905 |
+
{
|
| 10906 |
+
"epoch": 6.686972820993439,
|
| 10907 |
+
"grad_norm": 2.296797037124634,
|
| 10908 |
+
"learning_rate": 9.164128397375821e-06,
|
| 10909 |
+
"loss": 0.0843,
|
| 10910 |
+
"step": 14270
|
| 10911 |
+
},
|
| 10912 |
+
{
|
| 10913 |
+
"epoch": 6.69165885660731,
|
| 10914 |
+
"grad_norm": 2.3844399452209473,
|
| 10915 |
+
"learning_rate": 9.163542642924087e-06,
|
| 10916 |
+
"loss": 0.0987,
|
| 10917 |
+
"step": 14280
|
| 10918 |
+
},
|
| 10919 |
+
{
|
| 10920 |
+
"epoch": 6.696344892221181,
|
| 10921 |
+
"grad_norm": 1.65080988407135,
|
| 10922 |
+
"learning_rate": 9.162956888472354e-06,
|
| 10923 |
+
"loss": 0.0644,
|
| 10924 |
+
"step": 14290
|
| 10925 |
+
},
|
| 10926 |
+
{
|
| 10927 |
+
"epoch": 6.701030927835052,
|
| 10928 |
+
"grad_norm": 2.429184913635254,
|
| 10929 |
+
"learning_rate": 9.162371134020619e-06,
|
| 10930 |
+
"loss": 0.0913,
|
| 10931 |
+
"step": 14300
|
| 10932 |
+
},
|
| 10933 |
+
{
|
| 10934 |
+
"epoch": 6.705716963448922,
|
| 10935 |
+
"grad_norm": 2.7205028533935547,
|
| 10936 |
+
"learning_rate": 9.161785379568886e-06,
|
| 10937 |
+
"loss": 0.0955,
|
| 10938 |
+
"step": 14310
|
| 10939 |
+
},
|
| 10940 |
+
{
|
| 10941 |
+
"epoch": 6.710402999062793,
|
| 10942 |
+
"grad_norm": 1.9049710035324097,
|
| 10943 |
+
"learning_rate": 9.161199625117153e-06,
|
| 10944 |
+
"loss": 0.0943,
|
| 10945 |
+
"step": 14320
|
| 10946 |
+
},
|
| 10947 |
+
{
|
| 10948 |
+
"epoch": 6.715089034676663,
|
| 10949 |
+
"grad_norm": 1.4774081707000732,
|
| 10950 |
+
"learning_rate": 9.160613870665418e-06,
|
| 10951 |
+
"loss": 0.0909,
|
| 10952 |
+
"step": 14330
|
| 10953 |
+
},
|
| 10954 |
+
{
|
| 10955 |
+
"epoch": 6.719775070290535,
|
| 10956 |
+
"grad_norm": 2.102787971496582,
|
| 10957 |
+
"learning_rate": 9.160028116213685e-06,
|
| 10958 |
+
"loss": 0.0757,
|
| 10959 |
+
"step": 14340
|
| 10960 |
+
},
|
| 10961 |
+
{
|
| 10962 |
+
"epoch": 6.724461105904405,
|
| 10963 |
+
"grad_norm": 1.7760144472122192,
|
| 10964 |
+
"learning_rate": 9.15944236176195e-06,
|
| 10965 |
+
"loss": 0.0949,
|
| 10966 |
+
"step": 14350
|
| 10967 |
+
},
|
| 10968 |
+
{
|
| 10969 |
+
"epoch": 6.7291471415182755,
|
| 10970 |
+
"grad_norm": 2.6216742992401123,
|
| 10971 |
+
"learning_rate": 9.158856607310215e-06,
|
| 10972 |
+
"loss": 0.0997,
|
| 10973 |
+
"step": 14360
|
| 10974 |
+
},
|
| 10975 |
+
{
|
| 10976 |
+
"epoch": 6.733833177132146,
|
| 10977 |
+
"grad_norm": 1.8759592771530151,
|
| 10978 |
+
"learning_rate": 9.158270852858484e-06,
|
| 10979 |
+
"loss": 0.0898,
|
| 10980 |
+
"step": 14370
|
| 10981 |
+
},
|
| 10982 |
+
{
|
| 10983 |
+
"epoch": 6.7385192127460165,
|
| 10984 |
+
"grad_norm": 1.7543883323669434,
|
| 10985 |
+
"learning_rate": 9.157685098406749e-06,
|
| 10986 |
+
"loss": 0.0913,
|
| 10987 |
+
"step": 14380
|
| 10988 |
+
},
|
| 10989 |
+
{
|
| 10990 |
+
"epoch": 6.743205248359888,
|
| 10991 |
+
"grad_norm": 2.2106900215148926,
|
| 10992 |
+
"learning_rate": 9.157099343955014e-06,
|
| 10993 |
+
"loss": 0.0996,
|
| 10994 |
+
"step": 14390
|
| 10995 |
+
},
|
| 10996 |
+
{
|
| 10997 |
+
"epoch": 6.747891283973758,
|
| 10998 |
+
"grad_norm": 2.408613443374634,
|
| 10999 |
+
"learning_rate": 9.156513589503281e-06,
|
| 11000 |
+
"loss": 0.0988,
|
| 11001 |
+
"step": 14400
|
| 11002 |
+
},
|
| 11003 |
+
{
|
| 11004 |
+
"epoch": 6.752577319587629,
|
| 11005 |
+
"grad_norm": 3.430854082107544,
|
| 11006 |
+
"learning_rate": 9.155927835051546e-06,
|
| 11007 |
+
"loss": 0.0998,
|
| 11008 |
+
"step": 14410
|
| 11009 |
+
},
|
| 11010 |
+
{
|
| 11011 |
+
"epoch": 6.757263355201499,
|
| 11012 |
+
"grad_norm": 2.024101495742798,
|
| 11013 |
+
"learning_rate": 9.155342080599813e-06,
|
| 11014 |
+
"loss": 0.0784,
|
| 11015 |
+
"step": 14420
|
| 11016 |
+
},
|
| 11017 |
+
{
|
| 11018 |
+
"epoch": 6.76194939081537,
|
| 11019 |
+
"grad_norm": 1.9962611198425293,
|
| 11020 |
+
"learning_rate": 9.15475632614808e-06,
|
| 11021 |
+
"loss": 0.0778,
|
| 11022 |
+
"step": 14430
|
| 11023 |
+
},
|
| 11024 |
+
{
|
| 11025 |
+
"epoch": 6.766635426429241,
|
| 11026 |
+
"grad_norm": 2.7750437259674072,
|
| 11027 |
+
"learning_rate": 9.154170571696345e-06,
|
| 11028 |
+
"loss": 0.0787,
|
| 11029 |
+
"step": 14440
|
| 11030 |
+
},
|
| 11031 |
+
{
|
| 11032 |
+
"epoch": 6.771321462043112,
|
| 11033 |
+
"grad_norm": 2.2293503284454346,
|
| 11034 |
+
"learning_rate": 9.153584817244612e-06,
|
| 11035 |
+
"loss": 0.0866,
|
| 11036 |
+
"step": 14450
|
| 11037 |
+
},
|
| 11038 |
+
{
|
| 11039 |
+
"epoch": 6.776007497656982,
|
| 11040 |
+
"grad_norm": 2.104199171066284,
|
| 11041 |
+
"learning_rate": 9.152999062792878e-06,
|
| 11042 |
+
"loss": 0.0886,
|
| 11043 |
+
"step": 14460
|
| 11044 |
+
},
|
| 11045 |
+
{
|
| 11046 |
+
"epoch": 6.780693533270853,
|
| 11047 |
+
"grad_norm": 2.1825919151306152,
|
| 11048 |
+
"learning_rate": 9.152413308341144e-06,
|
| 11049 |
+
"loss": 0.0881,
|
| 11050 |
+
"step": 14470
|
| 11051 |
+
},
|
| 11052 |
+
{
|
| 11053 |
+
"epoch": 6.785379568884723,
|
| 11054 |
+
"grad_norm": 2.0743556022644043,
|
| 11055 |
+
"learning_rate": 9.151827553889411e-06,
|
| 11056 |
+
"loss": 0.0816,
|
| 11057 |
+
"step": 14480
|
| 11058 |
+
},
|
| 11059 |
+
{
|
| 11060 |
+
"epoch": 6.7900656044985945,
|
| 11061 |
+
"grad_norm": 1.4492499828338623,
|
| 11062 |
+
"learning_rate": 9.151241799437677e-06,
|
| 11063 |
+
"loss": 0.0798,
|
| 11064 |
+
"step": 14490
|
| 11065 |
+
},
|
| 11066 |
+
{
|
| 11067 |
+
"epoch": 6.794751640112465,
|
| 11068 |
+
"grad_norm": 2.3385043144226074,
|
| 11069 |
+
"learning_rate": 9.150656044985944e-06,
|
| 11070 |
+
"loss": 0.0784,
|
| 11071 |
+
"step": 14500
|
| 11072 |
+
},
|
| 11073 |
+
{
|
| 11074 |
+
"epoch": 6.794751640112465,
|
| 11075 |
+
"eval_loss": 0.040780164301395416,
|
| 11076 |
+
"eval_pearson_cosine": 0.7793833022968641,
|
| 11077 |
+
"eval_pearson_dot": 0.6305632848645555,
|
| 11078 |
+
"eval_pearson_euclidean": 0.7311736352551961,
|
| 11079 |
+
"eval_pearson_manhattan": 0.7308022032341484,
|
| 11080 |
+
"eval_runtime": 40.6522,
|
| 11081 |
+
"eval_samples_per_second": 36.898,
|
| 11082 |
+
"eval_spearman_cosine": 0.7838836973690695,
|
| 11083 |
+
"eval_spearman_dot": 0.6427126264330253,
|
| 11084 |
+
"eval_spearman_euclidean": 0.7493899504087712,
|
| 11085 |
+
"eval_spearman_manhattan": 0.7495131408878735,
|
| 11086 |
+
"eval_steps_per_second": 36.898,
|
| 11087 |
+
"step": 14500
|
| 11088 |
+
},
|
| 11089 |
+
{
|
| 11090 |
+
"epoch": 6.7994376757263355,
|
| 11091 |
+
"grad_norm": 2.441380023956299,
|
| 11092 |
+
"learning_rate": 9.150070290534209e-06,
|
| 11093 |
+
"loss": 0.0866,
|
| 11094 |
+
"step": 14510
|
| 11095 |
+
},
|
| 11096 |
+
{
|
| 11097 |
+
"epoch": 6.804123711340206,
|
| 11098 |
+
"grad_norm": 2.3444080352783203,
|
| 11099 |
+
"learning_rate": 9.149484536082474e-06,
|
| 11100 |
+
"loss": 0.1104,
|
| 11101 |
+
"step": 14520
|
| 11102 |
+
},
|
| 11103 |
+
{
|
| 11104 |
+
"epoch": 6.8088097469540765,
|
| 11105 |
+
"grad_norm": 2.7428460121154785,
|
| 11106 |
+
"learning_rate": 9.148898781630741e-06,
|
| 11107 |
+
"loss": 0.1116,
|
| 11108 |
+
"step": 14530
|
| 11109 |
+
},
|
| 11110 |
+
{
|
| 11111 |
+
"epoch": 6.813495782567948,
|
| 11112 |
+
"grad_norm": 2.756432294845581,
|
| 11113 |
+
"learning_rate": 9.148313027179008e-06,
|
| 11114 |
+
"loss": 0.0942,
|
| 11115 |
+
"step": 14540
|
| 11116 |
+
},
|
| 11117 |
+
{
|
| 11118 |
+
"epoch": 6.818181818181818,
|
| 11119 |
+
"grad_norm": 2.891023635864258,
|
| 11120 |
+
"learning_rate": 9.147727272727273e-06,
|
| 11121 |
+
"loss": 0.1101,
|
| 11122 |
+
"step": 14550
|
| 11123 |
+
},
|
| 11124 |
+
{
|
| 11125 |
+
"epoch": 6.822867853795689,
|
| 11126 |
+
"grad_norm": 2.115098476409912,
|
| 11127 |
+
"learning_rate": 9.14714151827554e-06,
|
| 11128 |
+
"loss": 0.0896,
|
| 11129 |
+
"step": 14560
|
| 11130 |
+
},
|
| 11131 |
+
{
|
| 11132 |
+
"epoch": 6.827553889409559,
|
| 11133 |
+
"grad_norm": 1.161385178565979,
|
| 11134 |
+
"learning_rate": 9.146555763823805e-06,
|
| 11135 |
+
"loss": 0.0881,
|
| 11136 |
+
"step": 14570
|
| 11137 |
+
},
|
| 11138 |
+
{
|
| 11139 |
+
"epoch": 6.83223992502343,
|
| 11140 |
+
"grad_norm": 2.266988754272461,
|
| 11141 |
+
"learning_rate": 9.145970009372072e-06,
|
| 11142 |
+
"loss": 0.0904,
|
| 11143 |
+
"step": 14580
|
| 11144 |
+
},
|
| 11145 |
+
{
|
| 11146 |
+
"epoch": 6.836925960637301,
|
| 11147 |
+
"grad_norm": 2.111978769302368,
|
| 11148 |
+
"learning_rate": 9.145384254920339e-06,
|
| 11149 |
+
"loss": 0.0864,
|
| 11150 |
+
"step": 14590
|
| 11151 |
+
},
|
| 11152 |
+
{
|
| 11153 |
+
"epoch": 6.841611996251172,
|
| 11154 |
+
"grad_norm": 2.025771141052246,
|
| 11155 |
+
"learning_rate": 9.144798500468604e-06,
|
| 11156 |
+
"loss": 0.0947,
|
| 11157 |
+
"step": 14600
|
| 11158 |
+
},
|
| 11159 |
+
{
|
| 11160 |
+
"epoch": 6.846298031865042,
|
| 11161 |
+
"grad_norm": 1.6989368200302124,
|
| 11162 |
+
"learning_rate": 9.144212746016871e-06,
|
| 11163 |
+
"loss": 0.0906,
|
| 11164 |
+
"step": 14610
|
| 11165 |
+
},
|
| 11166 |
+
{
|
| 11167 |
+
"epoch": 6.850984067478913,
|
| 11168 |
+
"grad_norm": 2.4871666431427,
|
| 11169 |
+
"learning_rate": 9.143626991565136e-06,
|
| 11170 |
+
"loss": 0.0969,
|
| 11171 |
+
"step": 14620
|
| 11172 |
+
},
|
| 11173 |
+
{
|
| 11174 |
+
"epoch": 6.855670103092783,
|
| 11175 |
+
"grad_norm": 2.155759572982788,
|
| 11176 |
+
"learning_rate": 9.143041237113403e-06,
|
| 11177 |
+
"loss": 0.0978,
|
| 11178 |
+
"step": 14630
|
| 11179 |
+
},
|
| 11180 |
+
{
|
| 11181 |
+
"epoch": 6.8603561387066545,
|
| 11182 |
+
"grad_norm": 3.0879483222961426,
|
| 11183 |
+
"learning_rate": 9.142455482661668e-06,
|
| 11184 |
+
"loss": 0.0928,
|
| 11185 |
+
"step": 14640
|
| 11186 |
+
},
|
| 11187 |
+
{
|
| 11188 |
+
"epoch": 6.865042174320525,
|
| 11189 |
+
"grad_norm": 1.6940726041793823,
|
| 11190 |
+
"learning_rate": 9.141869728209935e-06,
|
| 11191 |
+
"loss": 0.0879,
|
| 11192 |
+
"step": 14650
|
| 11193 |
+
},
|
| 11194 |
+
{
|
| 11195 |
+
"epoch": 6.8697282099343955,
|
| 11196 |
+
"grad_norm": 2.233914613723755,
|
| 11197 |
+
"learning_rate": 9.141283973758202e-06,
|
| 11198 |
+
"loss": 0.0894,
|
| 11199 |
+
"step": 14660
|
| 11200 |
+
},
|
| 11201 |
+
{
|
| 11202 |
+
"epoch": 6.874414245548266,
|
| 11203 |
+
"grad_norm": 2.0306718349456787,
|
| 11204 |
+
"learning_rate": 9.140698219306468e-06,
|
| 11205 |
+
"loss": 0.0844,
|
| 11206 |
+
"step": 14670
|
| 11207 |
+
},
|
| 11208 |
+
{
|
| 11209 |
+
"epoch": 6.8791002811621365,
|
| 11210 |
+
"grad_norm": 1.8942639827728271,
|
| 11211 |
+
"learning_rate": 9.140112464854733e-06,
|
| 11212 |
+
"loss": 0.0817,
|
| 11213 |
+
"step": 14680
|
| 11214 |
+
},
|
| 11215 |
+
{
|
| 11216 |
+
"epoch": 6.883786316776008,
|
| 11217 |
+
"grad_norm": 2.03265643119812,
|
| 11218 |
+
"learning_rate": 9.139526710403e-06,
|
| 11219 |
+
"loss": 0.0852,
|
| 11220 |
+
"step": 14690
|
| 11221 |
+
},
|
| 11222 |
+
{
|
| 11223 |
+
"epoch": 6.888472352389878,
|
| 11224 |
+
"grad_norm": 1.7808202505111694,
|
| 11225 |
+
"learning_rate": 9.138940955951267e-06,
|
| 11226 |
+
"loss": 0.0896,
|
| 11227 |
+
"step": 14700
|
| 11228 |
+
},
|
| 11229 |
+
{
|
| 11230 |
+
"epoch": 6.893158388003749,
|
| 11231 |
+
"grad_norm": 2.668078660964966,
|
| 11232 |
+
"learning_rate": 9.138355201499532e-06,
|
| 11233 |
+
"loss": 0.0973,
|
| 11234 |
+
"step": 14710
|
| 11235 |
+
},
|
| 11236 |
+
{
|
| 11237 |
+
"epoch": 6.897844423617619,
|
| 11238 |
+
"grad_norm": 1.402289867401123,
|
| 11239 |
+
"learning_rate": 9.137769447047799e-06,
|
| 11240 |
+
"loss": 0.1026,
|
| 11241 |
+
"step": 14720
|
| 11242 |
+
},
|
| 11243 |
+
{
|
| 11244 |
+
"epoch": 6.90253045923149,
|
| 11245 |
+
"grad_norm": 2.591413974761963,
|
| 11246 |
+
"learning_rate": 9.137183692596064e-06,
|
| 11247 |
+
"loss": 0.1042,
|
| 11248 |
+
"step": 14730
|
| 11249 |
+
},
|
| 11250 |
+
{
|
| 11251 |
+
"epoch": 6.907216494845361,
|
| 11252 |
+
"grad_norm": 2.172842264175415,
|
| 11253 |
+
"learning_rate": 9.136597938144331e-06,
|
| 11254 |
+
"loss": 0.0896,
|
| 11255 |
+
"step": 14740
|
| 11256 |
+
},
|
| 11257 |
+
{
|
| 11258 |
+
"epoch": 6.911902530459232,
|
| 11259 |
+
"grad_norm": 2.8410751819610596,
|
| 11260 |
+
"learning_rate": 9.136012183692596e-06,
|
| 11261 |
+
"loss": 0.0821,
|
| 11262 |
+
"step": 14750
|
| 11263 |
+
},
|
| 11264 |
+
{
|
| 11265 |
+
"epoch": 6.911902530459232,
|
| 11266 |
+
"eval_loss": 0.04055028408765793,
|
| 11267 |
+
"eval_pearson_cosine": 0.7788665525758081,
|
| 11268 |
+
"eval_pearson_dot": 0.6376645263249117,
|
| 11269 |
+
"eval_pearson_euclidean": 0.727043862062235,
|
| 11270 |
+
"eval_pearson_manhattan": 0.7265471618369332,
|
| 11271 |
+
"eval_runtime": 40.6024,
|
| 11272 |
+
"eval_samples_per_second": 36.944,
|
| 11273 |
+
"eval_spearman_cosine": 0.7822270840908377,
|
| 11274 |
+
"eval_spearman_dot": 0.6567383479059669,
|
| 11275 |
+
"eval_spearman_euclidean": 0.744619349149987,
|
| 11276 |
+
"eval_spearman_manhattan": 0.7446135946974944,
|
| 11277 |
+
"eval_steps_per_second": 36.944,
|
| 11278 |
+
"step": 14750
|
| 11279 |
+
},
|
| 11280 |
+
{
|
| 11281 |
+
"epoch": 6.916588566073102,
|
| 11282 |
+
"grad_norm": 2.586047649383545,
|
| 11283 |
+
"learning_rate": 9.135426429240863e-06,
|
| 11284 |
+
"loss": 0.0965,
|
| 11285 |
+
"step": 14760
|
| 11286 |
+
},
|
| 11287 |
+
{
|
| 11288 |
+
"epoch": 6.921274601686973,
|
| 11289 |
+
"grad_norm": 1.9667673110961914,
|
| 11290 |
+
"learning_rate": 9.13484067478913e-06,
|
| 11291 |
+
"loss": 0.0913,
|
| 11292 |
+
"step": 14770
|
| 11293 |
+
},
|
| 11294 |
+
{
|
| 11295 |
+
"epoch": 6.925960637300843,
|
| 11296 |
+
"grad_norm": 2.6598875522613525,
|
| 11297 |
+
"learning_rate": 9.134254920337395e-06,
|
| 11298 |
+
"loss": 0.0863,
|
| 11299 |
+
"step": 14780
|
| 11300 |
+
},
|
| 11301 |
+
{
|
| 11302 |
+
"epoch": 6.9306466729147145,
|
| 11303 |
+
"grad_norm": 1.5291812419891357,
|
| 11304 |
+
"learning_rate": 9.133669165885662e-06,
|
| 11305 |
+
"loss": 0.0699,
|
| 11306 |
+
"step": 14790
|
| 11307 |
+
},
|
| 11308 |
+
{
|
| 11309 |
+
"epoch": 6.935332708528585,
|
| 11310 |
+
"grad_norm": 1.8771485090255737,
|
| 11311 |
+
"learning_rate": 9.133083411433927e-06,
|
| 11312 |
+
"loss": 0.0836,
|
| 11313 |
+
"step": 14800
|
| 11314 |
+
},
|
| 11315 |
+
{
|
| 11316 |
+
"epoch": 6.9400187441424555,
|
| 11317 |
+
"grad_norm": 1.7967191934585571,
|
| 11318 |
+
"learning_rate": 9.132497656982192e-06,
|
| 11319 |
+
"loss": 0.0833,
|
| 11320 |
+
"step": 14810
|
| 11321 |
+
},
|
| 11322 |
+
{
|
| 11323 |
+
"epoch": 6.944704779756326,
|
| 11324 |
+
"grad_norm": 1.3904474973678589,
|
| 11325 |
+
"learning_rate": 9.131911902530461e-06,
|
| 11326 |
+
"loss": 0.1147,
|
| 11327 |
+
"step": 14820
|
| 11328 |
+
},
|
| 11329 |
+
{
|
| 11330 |
+
"epoch": 6.949390815370196,
|
| 11331 |
+
"grad_norm": 2.315178871154785,
|
| 11332 |
+
"learning_rate": 9.131326148078726e-06,
|
| 11333 |
+
"loss": 0.1054,
|
| 11334 |
+
"step": 14830
|
| 11335 |
+
},
|
| 11336 |
+
{
|
| 11337 |
+
"epoch": 6.954076850984068,
|
| 11338 |
+
"grad_norm": 2.0457489490509033,
|
| 11339 |
+
"learning_rate": 9.130740393626992e-06,
|
| 11340 |
+
"loss": 0.0833,
|
| 11341 |
+
"step": 14840
|
| 11342 |
+
},
|
| 11343 |
+
{
|
| 11344 |
+
"epoch": 6.958762886597938,
|
| 11345 |
+
"grad_norm": 1.5070949792861938,
|
| 11346 |
+
"learning_rate": 9.130154639175258e-06,
|
| 11347 |
+
"loss": 0.0737,
|
| 11348 |
+
"step": 14850
|
| 11349 |
+
},
|
| 11350 |
+
{
|
| 11351 |
+
"epoch": 6.963448922211809,
|
| 11352 |
+
"grad_norm": 2.301478862762451,
|
| 11353 |
+
"learning_rate": 9.129568884723524e-06,
|
| 11354 |
+
"loss": 0.0779,
|
| 11355 |
+
"step": 14860
|
| 11356 |
+
},
|
| 11357 |
+
{
|
| 11358 |
+
"epoch": 6.968134957825679,
|
| 11359 |
+
"grad_norm": 1.9525973796844482,
|
| 11360 |
+
"learning_rate": 9.12898313027179e-06,
|
| 11361 |
+
"loss": 0.0829,
|
| 11362 |
+
"step": 14870
|
| 11363 |
+
},
|
| 11364 |
+
{
|
| 11365 |
+
"epoch": 6.97282099343955,
|
| 11366 |
+
"grad_norm": 2.3320276737213135,
|
| 11367 |
+
"learning_rate": 9.128397375820058e-06,
|
| 11368 |
+
"loss": 0.09,
|
| 11369 |
+
"step": 14880
|
| 11370 |
+
},
|
| 11371 |
+
{
|
| 11372 |
+
"epoch": 6.977507029053421,
|
| 11373 |
+
"grad_norm": 2.358041286468506,
|
| 11374 |
+
"learning_rate": 9.127811621368323e-06,
|
| 11375 |
+
"loss": 0.0848,
|
| 11376 |
+
"step": 14890
|
| 11377 |
+
},
|
| 11378 |
+
{
|
| 11379 |
+
"epoch": 6.982193064667292,
|
| 11380 |
+
"grad_norm": 2.0248255729675293,
|
| 11381 |
+
"learning_rate": 9.12722586691659e-06,
|
| 11382 |
+
"loss": 0.1023,
|
| 11383 |
+
"step": 14900
|
| 11384 |
+
},
|
| 11385 |
+
{
|
| 11386 |
+
"epoch": 6.986879100281162,
|
| 11387 |
+
"grad_norm": 2.4078421592712402,
|
| 11388 |
+
"learning_rate": 9.126640112464855e-06,
|
| 11389 |
+
"loss": 0.0886,
|
| 11390 |
+
"step": 14910
|
| 11391 |
+
},
|
| 11392 |
+
{
|
| 11393 |
+
"epoch": 6.991565135895033,
|
| 11394 |
+
"grad_norm": 2.0203652381896973,
|
| 11395 |
+
"learning_rate": 9.126054358013122e-06,
|
| 11396 |
+
"loss": 0.0989,
|
| 11397 |
+
"step": 14920
|
| 11398 |
+
},
|
| 11399 |
+
{
|
| 11400 |
+
"epoch": 6.996251171508904,
|
| 11401 |
+
"grad_norm": 2.0305638313293457,
|
| 11402 |
+
"learning_rate": 9.125468603561389e-06,
|
| 11403 |
+
"loss": 0.0854,
|
| 11404 |
+
"step": 14930
|
| 11405 |
+
},
|
| 11406 |
+
{
|
| 11407 |
+
"epoch": 7.0009372071227745,
|
| 11408 |
+
"grad_norm": 2.3170547485351562,
|
| 11409 |
+
"learning_rate": 9.124882849109654e-06,
|
| 11410 |
+
"loss": 0.0888,
|
| 11411 |
+
"step": 14940
|
| 11412 |
+
},
|
| 11413 |
+
{
|
| 11414 |
+
"epoch": 7.005623242736645,
|
| 11415 |
+
"grad_norm": 1.3576775789260864,
|
| 11416 |
+
"learning_rate": 9.124297094657921e-06,
|
| 11417 |
+
"loss": 0.0756,
|
| 11418 |
+
"step": 14950
|
| 11419 |
+
},
|
| 11420 |
+
{
|
| 11421 |
+
"epoch": 7.010309278350515,
|
| 11422 |
+
"grad_norm": 2.177962303161621,
|
| 11423 |
+
"learning_rate": 9.123711340206186e-06,
|
| 11424 |
+
"loss": 0.0679,
|
| 11425 |
+
"step": 14960
|
| 11426 |
+
},
|
| 11427 |
+
{
|
| 11428 |
+
"epoch": 7.014995313964386,
|
| 11429 |
+
"grad_norm": 1.6852316856384277,
|
| 11430 |
+
"learning_rate": 9.123125585754451e-06,
|
| 11431 |
+
"loss": 0.0635,
|
| 11432 |
+
"step": 14970
|
| 11433 |
+
},
|
| 11434 |
+
{
|
| 11435 |
+
"epoch": 7.019681349578256,
|
| 11436 |
+
"grad_norm": 2.245973825454712,
|
| 11437 |
+
"learning_rate": 9.12253983130272e-06,
|
| 11438 |
+
"loss": 0.081,
|
| 11439 |
+
"step": 14980
|
| 11440 |
+
},
|
| 11441 |
+
{
|
| 11442 |
+
"epoch": 7.024367385192128,
|
| 11443 |
+
"grad_norm": 1.2212837934494019,
|
| 11444 |
+
"learning_rate": 9.121954076850985e-06,
|
| 11445 |
+
"loss": 0.0684,
|
| 11446 |
+
"step": 14990
|
| 11447 |
+
},
|
| 11448 |
+
{
|
| 11449 |
+
"epoch": 7.029053420805998,
|
| 11450 |
+
"grad_norm": 2.061438798904419,
|
| 11451 |
+
"learning_rate": 9.12136832239925e-06,
|
| 11452 |
+
"loss": 0.0792,
|
| 11453 |
+
"step": 15000
|
| 11454 |
+
},
|
| 11455 |
+
{
|
| 11456 |
+
"epoch": 7.029053420805998,
|
| 11457 |
+
"eval_loss": 0.04008892923593521,
|
| 11458 |
+
"eval_pearson_cosine": 0.7799962553837254,
|
| 11459 |
+
"eval_pearson_dot": 0.6338093099514381,
|
| 11460 |
+
"eval_pearson_euclidean": 0.740542330523235,
|
| 11461 |
+
"eval_pearson_manhattan": 0.7397823209884535,
|
| 11462 |
+
"eval_runtime": 39.3867,
|
| 11463 |
+
"eval_samples_per_second": 38.084,
|
| 11464 |
+
"eval_spearman_cosine": 0.7833374621050089,
|
| 11465 |
+
"eval_spearman_dot": 0.646744349870265,
|
| 11466 |
+
"eval_spearman_euclidean": 0.7572153670081455,
|
| 11467 |
+
"eval_spearman_manhattan": 0.7568637419859118,
|
| 11468 |
+
"eval_steps_per_second": 38.084,
|
| 11469 |
+
"step": 15000
|
| 11470 |
}
|
| 11471 |
],
|
| 11472 |
"logging_steps": 10,
|