Training in progress, step 8000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +766 -2
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 738367848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f6fca18b3d6839cfa4f9b00cec6f979a279d6161ccf0e227ea2f0e6664d6d3e
|
| 3 |
size 738367848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1476823354
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:138d6cf3c8fe05fea07df883537101df6a3d38e7d05cbcc03796a983de350576
|
| 3 |
size 1476823354
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28cdaddb959868042b846248e699766aefc2fadab97732661ad902989f1034df
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f01a643a1ae2b83dd1c19bc6b73325f7e12cc5322058a11111e293dc5b31ae9d
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a90f4546ff0a4d9c836b2695bc4b1ddad6eb64e578565dd4c83c3a0c3672df7
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:649b5fa0e92e74982a79e3759794b1cfec60cf9441738902668d54e2ffe1767b
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9e8b7d006141b3943e31b1b95143c70d5c410839f60e8892c3ebb5474fa5b82
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab8e9d82889b9d58c21adc3199b61dc25e089ed0456cd04a5834b8213920db8d
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d61bbe5a4669c770dea677fdd22d95a5f9a1874c146a203a6de6b923066699e2
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:208e36b51f1fe5107b8000b99406d4ff1bd7e95578591bc1f581b4593f80e4c6
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81e0e2c967dab9f9c48f59c1d3cd0a40f676964ec54c91035ecabb3e1c2f4b45
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 250,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -5355,6 +5355,770 @@
|
|
| 5355 |
"eval_spearman_manhattan": 0.7397995971405482,
|
| 5356 |
"eval_steps_per_second": 8.263,
|
| 5357 |
"step": 7000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5358 |
}
|
| 5359 |
],
|
| 5360 |
"logging_steps": 10,
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.497656982193065,
|
| 5 |
"eval_steps": 250,
|
| 6 |
+
"global_step": 8000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 5355 |
"eval_spearman_manhattan": 0.7397995971405482,
|
| 5356 |
"eval_steps_per_second": 8.263,
|
| 5357 |
"step": 7000
|
| 5358 |
+
},
|
| 5359 |
+
{
|
| 5360 |
+
"epoch": 6.569821930646673,
|
| 5361 |
+
"grad_norm": 1.1269482374191284,
|
| 5362 |
+
"learning_rate": 9.948674017777388e-06,
|
| 5363 |
+
"loss": 0.0395,
|
| 5364 |
+
"step": 7010
|
| 5365 |
+
},
|
| 5366 |
+
{
|
| 5367 |
+
"epoch": 6.579194001874415,
|
| 5368 |
+
"grad_norm": 0.8978859782218933,
|
| 5369 |
+
"learning_rate": 9.948600799543118e-06,
|
| 5370 |
+
"loss": 0.0438,
|
| 5371 |
+
"step": 7020
|
| 5372 |
+
},
|
| 5373 |
+
{
|
| 5374 |
+
"epoch": 6.588566073102156,
|
| 5375 |
+
"grad_norm": 1.3999450206756592,
|
| 5376 |
+
"learning_rate": 9.94852758130885e-06,
|
| 5377 |
+
"loss": 0.0466,
|
| 5378 |
+
"step": 7030
|
| 5379 |
+
},
|
| 5380 |
+
{
|
| 5381 |
+
"epoch": 6.597938144329897,
|
| 5382 |
+
"grad_norm": 0.985998272895813,
|
| 5383 |
+
"learning_rate": 9.948454363074582e-06,
|
| 5384 |
+
"loss": 0.0474,
|
| 5385 |
+
"step": 7040
|
| 5386 |
+
},
|
| 5387 |
+
{
|
| 5388 |
+
"epoch": 6.607310215557638,
|
| 5389 |
+
"grad_norm": 0.7843828797340393,
|
| 5390 |
+
"learning_rate": 9.948381144840312e-06,
|
| 5391 |
+
"loss": 0.0417,
|
| 5392 |
+
"step": 7050
|
| 5393 |
+
},
|
| 5394 |
+
{
|
| 5395 |
+
"epoch": 6.616682286785379,
|
| 5396 |
+
"grad_norm": 1.64656400680542,
|
| 5397 |
+
"learning_rate": 9.948307926606043e-06,
|
| 5398 |
+
"loss": 0.045,
|
| 5399 |
+
"step": 7060
|
| 5400 |
+
},
|
| 5401 |
+
{
|
| 5402 |
+
"epoch": 6.626054358013121,
|
| 5403 |
+
"grad_norm": 0.6348075866699219,
|
| 5404 |
+
"learning_rate": 9.948234708371774e-06,
|
| 5405 |
+
"loss": 0.0501,
|
| 5406 |
+
"step": 7070
|
| 5407 |
+
},
|
| 5408 |
+
{
|
| 5409 |
+
"epoch": 6.635426429240862,
|
| 5410 |
+
"grad_norm": 1.8781590461730957,
|
| 5411 |
+
"learning_rate": 9.948161490137505e-06,
|
| 5412 |
+
"loss": 0.0445,
|
| 5413 |
+
"step": 7080
|
| 5414 |
+
},
|
| 5415 |
+
{
|
| 5416 |
+
"epoch": 6.644798500468603,
|
| 5417 |
+
"grad_norm": 1.0441402196884155,
|
| 5418 |
+
"learning_rate": 9.948088271903235e-06,
|
| 5419 |
+
"loss": 0.0457,
|
| 5420 |
+
"step": 7090
|
| 5421 |
+
},
|
| 5422 |
+
{
|
| 5423 |
+
"epoch": 6.654170571696345,
|
| 5424 |
+
"grad_norm": 1.2460689544677734,
|
| 5425 |
+
"learning_rate": 9.948015053668966e-06,
|
| 5426 |
+
"loss": 0.0471,
|
| 5427 |
+
"step": 7100
|
| 5428 |
+
},
|
| 5429 |
+
{
|
| 5430 |
+
"epoch": 6.663542642924086,
|
| 5431 |
+
"grad_norm": 0.993414580821991,
|
| 5432 |
+
"learning_rate": 9.947941835434698e-06,
|
| 5433 |
+
"loss": 0.0423,
|
| 5434 |
+
"step": 7110
|
| 5435 |
+
},
|
| 5436 |
+
{
|
| 5437 |
+
"epoch": 6.672914714151828,
|
| 5438 |
+
"grad_norm": 1.2848552465438843,
|
| 5439 |
+
"learning_rate": 9.947868617200428e-06,
|
| 5440 |
+
"loss": 0.0414,
|
| 5441 |
+
"step": 7120
|
| 5442 |
+
},
|
| 5443 |
+
{
|
| 5444 |
+
"epoch": 6.682286785379569,
|
| 5445 |
+
"grad_norm": 1.2903103828430176,
|
| 5446 |
+
"learning_rate": 9.947795398966158e-06,
|
| 5447 |
+
"loss": 0.0402,
|
| 5448 |
+
"step": 7130
|
| 5449 |
+
},
|
| 5450 |
+
{
|
| 5451 |
+
"epoch": 6.69165885660731,
|
| 5452 |
+
"grad_norm": 1.2319235801696777,
|
| 5453 |
+
"learning_rate": 9.94772218073189e-06,
|
| 5454 |
+
"loss": 0.0504,
|
| 5455 |
+
"step": 7140
|
| 5456 |
+
},
|
| 5457 |
+
{
|
| 5458 |
+
"epoch": 6.701030927835052,
|
| 5459 |
+
"grad_norm": 0.8465273976325989,
|
| 5460 |
+
"learning_rate": 9.947648962497621e-06,
|
| 5461 |
+
"loss": 0.0409,
|
| 5462 |
+
"step": 7150
|
| 5463 |
+
},
|
| 5464 |
+
{
|
| 5465 |
+
"epoch": 6.710402999062793,
|
| 5466 |
+
"grad_norm": 1.186928153038025,
|
| 5467 |
+
"learning_rate": 9.947575744263352e-06,
|
| 5468 |
+
"loss": 0.0458,
|
| 5469 |
+
"step": 7160
|
| 5470 |
+
},
|
| 5471 |
+
{
|
| 5472 |
+
"epoch": 6.719775070290535,
|
| 5473 |
+
"grad_norm": 1.3528752326965332,
|
| 5474 |
+
"learning_rate": 9.947502526029083e-06,
|
| 5475 |
+
"loss": 0.0433,
|
| 5476 |
+
"step": 7170
|
| 5477 |
+
},
|
| 5478 |
+
{
|
| 5479 |
+
"epoch": 6.7291471415182755,
|
| 5480 |
+
"grad_norm": 0.8908892273902893,
|
| 5481 |
+
"learning_rate": 9.947429307794814e-06,
|
| 5482 |
+
"loss": 0.0456,
|
| 5483 |
+
"step": 7180
|
| 5484 |
+
},
|
| 5485 |
+
{
|
| 5486 |
+
"epoch": 6.7385192127460165,
|
| 5487 |
+
"grad_norm": 1.1235069036483765,
|
| 5488 |
+
"learning_rate": 9.947356089560544e-06,
|
| 5489 |
+
"loss": 0.0481,
|
| 5490 |
+
"step": 7190
|
| 5491 |
+
},
|
| 5492 |
+
{
|
| 5493 |
+
"epoch": 6.747891283973758,
|
| 5494 |
+
"grad_norm": 1.6809895038604736,
|
| 5495 |
+
"learning_rate": 9.947282871326275e-06,
|
| 5496 |
+
"loss": 0.0454,
|
| 5497 |
+
"step": 7200
|
| 5498 |
+
},
|
| 5499 |
+
{
|
| 5500 |
+
"epoch": 6.757263355201499,
|
| 5501 |
+
"grad_norm": 0.8632039427757263,
|
| 5502 |
+
"learning_rate": 9.947209653092008e-06,
|
| 5503 |
+
"loss": 0.0481,
|
| 5504 |
+
"step": 7210
|
| 5505 |
+
},
|
| 5506 |
+
{
|
| 5507 |
+
"epoch": 6.766635426429241,
|
| 5508 |
+
"grad_norm": 1.2185996770858765,
|
| 5509 |
+
"learning_rate": 9.947136434857738e-06,
|
| 5510 |
+
"loss": 0.0383,
|
| 5511 |
+
"step": 7220
|
| 5512 |
+
},
|
| 5513 |
+
{
|
| 5514 |
+
"epoch": 6.776007497656982,
|
| 5515 |
+
"grad_norm": 0.6979696154594421,
|
| 5516 |
+
"learning_rate": 9.947063216623467e-06,
|
| 5517 |
+
"loss": 0.0435,
|
| 5518 |
+
"step": 7230
|
| 5519 |
+
},
|
| 5520 |
+
{
|
| 5521 |
+
"epoch": 6.785379568884723,
|
| 5522 |
+
"grad_norm": 1.459441065788269,
|
| 5523 |
+
"learning_rate": 9.9469899983892e-06,
|
| 5524 |
+
"loss": 0.0449,
|
| 5525 |
+
"step": 7240
|
| 5526 |
+
},
|
| 5527 |
+
{
|
| 5528 |
+
"epoch": 6.794751640112465,
|
| 5529 |
+
"grad_norm": 1.0957977771759033,
|
| 5530 |
+
"learning_rate": 9.94691678015493e-06,
|
| 5531 |
+
"loss": 0.032,
|
| 5532 |
+
"step": 7250
|
| 5533 |
+
},
|
| 5534 |
+
{
|
| 5535 |
+
"epoch": 6.794751640112465,
|
| 5536 |
+
"eval_loss": 0.03765299916267395,
|
| 5537 |
+
"eval_pearson_cosine": 0.7692482471466064,
|
| 5538 |
+
"eval_pearson_dot": 0.722366452217102,
|
| 5539 |
+
"eval_pearson_euclidean": 0.7316011190414429,
|
| 5540 |
+
"eval_pearson_manhattan": 0.7333144545555115,
|
| 5541 |
+
"eval_runtime": 22.5438,
|
| 5542 |
+
"eval_samples_per_second": 66.537,
|
| 5543 |
+
"eval_spearman_cosine": 0.7695046405395065,
|
| 5544 |
+
"eval_spearman_dot": 0.7242050912795406,
|
| 5545 |
+
"eval_spearman_euclidean": 0.7356828429817377,
|
| 5546 |
+
"eval_spearman_manhattan": 0.737487116385034,
|
| 5547 |
+
"eval_steps_per_second": 8.339,
|
| 5548 |
+
"step": 7250
|
| 5549 |
+
},
|
| 5550 |
+
{
|
| 5551 |
+
"epoch": 6.804123711340206,
|
| 5552 |
+
"grad_norm": 1.377066731452942,
|
| 5553 |
+
"learning_rate": 9.946843561920661e-06,
|
| 5554 |
+
"loss": 0.0529,
|
| 5555 |
+
"step": 7260
|
| 5556 |
+
},
|
| 5557 |
+
{
|
| 5558 |
+
"epoch": 6.813495782567948,
|
| 5559 |
+
"grad_norm": 0.714728057384491,
|
| 5560 |
+
"learning_rate": 9.946770343686392e-06,
|
| 5561 |
+
"loss": 0.0432,
|
| 5562 |
+
"step": 7270
|
| 5563 |
+
},
|
| 5564 |
+
{
|
| 5565 |
+
"epoch": 6.822867853795689,
|
| 5566 |
+
"grad_norm": 1.4324384927749634,
|
| 5567 |
+
"learning_rate": 9.946697125452125e-06,
|
| 5568 |
+
"loss": 0.046,
|
| 5569 |
+
"step": 7280
|
| 5570 |
+
},
|
| 5571 |
+
{
|
| 5572 |
+
"epoch": 6.83223992502343,
|
| 5573 |
+
"grad_norm": 1.2564704418182373,
|
| 5574 |
+
"learning_rate": 9.946623907217854e-06,
|
| 5575 |
+
"loss": 0.046,
|
| 5576 |
+
"step": 7290
|
| 5577 |
+
},
|
| 5578 |
+
{
|
| 5579 |
+
"epoch": 6.841611996251172,
|
| 5580 |
+
"grad_norm": 0.8522197008132935,
|
| 5581 |
+
"learning_rate": 9.946550688983584e-06,
|
| 5582 |
+
"loss": 0.0393,
|
| 5583 |
+
"step": 7300
|
| 5584 |
+
},
|
| 5585 |
+
{
|
| 5586 |
+
"epoch": 6.850984067478913,
|
| 5587 |
+
"grad_norm": 0.8751912117004395,
|
| 5588 |
+
"learning_rate": 9.946477470749317e-06,
|
| 5589 |
+
"loss": 0.0426,
|
| 5590 |
+
"step": 7310
|
| 5591 |
+
},
|
| 5592 |
+
{
|
| 5593 |
+
"epoch": 6.8603561387066545,
|
| 5594 |
+
"grad_norm": 0.8960391879081726,
|
| 5595 |
+
"learning_rate": 9.946404252515048e-06,
|
| 5596 |
+
"loss": 0.0445,
|
| 5597 |
+
"step": 7320
|
| 5598 |
+
},
|
| 5599 |
+
{
|
| 5600 |
+
"epoch": 6.8697282099343955,
|
| 5601 |
+
"grad_norm": 1.092128872871399,
|
| 5602 |
+
"learning_rate": 9.946331034280778e-06,
|
| 5603 |
+
"loss": 0.0459,
|
| 5604 |
+
"step": 7330
|
| 5605 |
+
},
|
| 5606 |
+
{
|
| 5607 |
+
"epoch": 6.8791002811621365,
|
| 5608 |
+
"grad_norm": 1.1840777397155762,
|
| 5609 |
+
"learning_rate": 9.946257816046509e-06,
|
| 5610 |
+
"loss": 0.0387,
|
| 5611 |
+
"step": 7340
|
| 5612 |
+
},
|
| 5613 |
+
{
|
| 5614 |
+
"epoch": 6.888472352389878,
|
| 5615 |
+
"grad_norm": 1.0283764600753784,
|
| 5616 |
+
"learning_rate": 9.94618459781224e-06,
|
| 5617 |
+
"loss": 0.0577,
|
| 5618 |
+
"step": 7350
|
| 5619 |
+
},
|
| 5620 |
+
{
|
| 5621 |
+
"epoch": 6.897844423617619,
|
| 5622 |
+
"grad_norm": 0.749761164188385,
|
| 5623 |
+
"learning_rate": 9.94611137957797e-06,
|
| 5624 |
+
"loss": 0.0414,
|
| 5625 |
+
"step": 7360
|
| 5626 |
+
},
|
| 5627 |
+
{
|
| 5628 |
+
"epoch": 6.907216494845361,
|
| 5629 |
+
"grad_norm": 0.8442000150680542,
|
| 5630 |
+
"learning_rate": 9.946038161343701e-06,
|
| 5631 |
+
"loss": 0.046,
|
| 5632 |
+
"step": 7370
|
| 5633 |
+
},
|
| 5634 |
+
{
|
| 5635 |
+
"epoch": 6.916588566073102,
|
| 5636 |
+
"grad_norm": 1.2296583652496338,
|
| 5637 |
+
"learning_rate": 9.945964943109432e-06,
|
| 5638 |
+
"loss": 0.0412,
|
| 5639 |
+
"step": 7380
|
| 5640 |
+
},
|
| 5641 |
+
{
|
| 5642 |
+
"epoch": 6.925960637300843,
|
| 5643 |
+
"grad_norm": 0.6515626311302185,
|
| 5644 |
+
"learning_rate": 9.945891724875165e-06,
|
| 5645 |
+
"loss": 0.0481,
|
| 5646 |
+
"step": 7390
|
| 5647 |
+
},
|
| 5648 |
+
{
|
| 5649 |
+
"epoch": 6.935332708528585,
|
| 5650 |
+
"grad_norm": 1.8992091417312622,
|
| 5651 |
+
"learning_rate": 9.945818506640895e-06,
|
| 5652 |
+
"loss": 0.0431,
|
| 5653 |
+
"step": 7400
|
| 5654 |
+
},
|
| 5655 |
+
{
|
| 5656 |
+
"epoch": 6.944704779756326,
|
| 5657 |
+
"grad_norm": 1.1663875579833984,
|
| 5658 |
+
"learning_rate": 9.945745288406624e-06,
|
| 5659 |
+
"loss": 0.0459,
|
| 5660 |
+
"step": 7410
|
| 5661 |
+
},
|
| 5662 |
+
{
|
| 5663 |
+
"epoch": 6.954076850984068,
|
| 5664 |
+
"grad_norm": 0.6695976853370667,
|
| 5665 |
+
"learning_rate": 9.945672070172357e-06,
|
| 5666 |
+
"loss": 0.0448,
|
| 5667 |
+
"step": 7420
|
| 5668 |
+
},
|
| 5669 |
+
{
|
| 5670 |
+
"epoch": 6.963448922211809,
|
| 5671 |
+
"grad_norm": 1.158563494682312,
|
| 5672 |
+
"learning_rate": 9.945598851938088e-06,
|
| 5673 |
+
"loss": 0.0398,
|
| 5674 |
+
"step": 7430
|
| 5675 |
+
},
|
| 5676 |
+
{
|
| 5677 |
+
"epoch": 6.97282099343955,
|
| 5678 |
+
"grad_norm": 1.2068713903427124,
|
| 5679 |
+
"learning_rate": 9.945525633703818e-06,
|
| 5680 |
+
"loss": 0.0443,
|
| 5681 |
+
"step": 7440
|
| 5682 |
+
},
|
| 5683 |
+
{
|
| 5684 |
+
"epoch": 6.982193064667292,
|
| 5685 |
+
"grad_norm": 0.9688456654548645,
|
| 5686 |
+
"learning_rate": 9.945452415469549e-06,
|
| 5687 |
+
"loss": 0.0452,
|
| 5688 |
+
"step": 7450
|
| 5689 |
+
},
|
| 5690 |
+
{
|
| 5691 |
+
"epoch": 6.991565135895033,
|
| 5692 |
+
"grad_norm": 1.5483156442642212,
|
| 5693 |
+
"learning_rate": 9.94537919723528e-06,
|
| 5694 |
+
"loss": 0.0498,
|
| 5695 |
+
"step": 7460
|
| 5696 |
+
},
|
| 5697 |
+
{
|
| 5698 |
+
"epoch": 7.0009372071227745,
|
| 5699 |
+
"grad_norm": 1.18287193775177,
|
| 5700 |
+
"learning_rate": 9.94530597900101e-06,
|
| 5701 |
+
"loss": 0.0445,
|
| 5702 |
+
"step": 7470
|
| 5703 |
+
},
|
| 5704 |
+
{
|
| 5705 |
+
"epoch": 7.010309278350515,
|
| 5706 |
+
"grad_norm": 0.7765620946884155,
|
| 5707 |
+
"learning_rate": 9.945232760766741e-06,
|
| 5708 |
+
"loss": 0.0346,
|
| 5709 |
+
"step": 7480
|
| 5710 |
+
},
|
| 5711 |
+
{
|
| 5712 |
+
"epoch": 7.019681349578256,
|
| 5713 |
+
"grad_norm": 0.948760986328125,
|
| 5714 |
+
"learning_rate": 9.945159542532474e-06,
|
| 5715 |
+
"loss": 0.0348,
|
| 5716 |
+
"step": 7490
|
| 5717 |
+
},
|
| 5718 |
+
{
|
| 5719 |
+
"epoch": 7.029053420805998,
|
| 5720 |
+
"grad_norm": 0.9965664744377136,
|
| 5721 |
+
"learning_rate": 9.945086324298205e-06,
|
| 5722 |
+
"loss": 0.0342,
|
| 5723 |
+
"step": 7500
|
| 5724 |
+
},
|
| 5725 |
+
{
|
| 5726 |
+
"epoch": 7.029053420805998,
|
| 5727 |
+
"eval_loss": 0.03782695531845093,
|
| 5728 |
+
"eval_pearson_cosine": 0.768491804599762,
|
| 5729 |
+
"eval_pearson_dot": 0.7183945775032043,
|
| 5730 |
+
"eval_pearson_euclidean": 0.7320147752761841,
|
| 5731 |
+
"eval_pearson_manhattan": 0.7333334684371948,
|
| 5732 |
+
"eval_runtime": 21.6515,
|
| 5733 |
+
"eval_samples_per_second": 69.279,
|
| 5734 |
+
"eval_spearman_cosine": 0.7677979499645443,
|
| 5735 |
+
"eval_spearman_dot": 0.7186610110098233,
|
| 5736 |
+
"eval_spearman_euclidean": 0.7364530110375347,
|
| 5737 |
+
"eval_spearman_manhattan": 0.737620665225201,
|
| 5738 |
+
"eval_steps_per_second": 8.683,
|
| 5739 |
+
"step": 7500
|
| 5740 |
+
},
|
| 5741 |
+
{
|
| 5742 |
+
"epoch": 7.038425492033739,
|
| 5743 |
+
"grad_norm": 0.8594346046447754,
|
| 5744 |
+
"learning_rate": 9.945013106063935e-06,
|
| 5745 |
+
"loss": 0.0318,
|
| 5746 |
+
"step": 7510
|
| 5747 |
+
},
|
| 5748 |
+
{
|
| 5749 |
+
"epoch": 7.047797563261481,
|
| 5750 |
+
"grad_norm": 1.62812340259552,
|
| 5751 |
+
"learning_rate": 9.944939887829666e-06,
|
| 5752 |
+
"loss": 0.0414,
|
| 5753 |
+
"step": 7520
|
| 5754 |
+
},
|
| 5755 |
+
{
|
| 5756 |
+
"epoch": 7.057169634489222,
|
| 5757 |
+
"grad_norm": 1.1017098426818848,
|
| 5758 |
+
"learning_rate": 9.944866669595397e-06,
|
| 5759 |
+
"loss": 0.0327,
|
| 5760 |
+
"step": 7530
|
| 5761 |
+
},
|
| 5762 |
+
{
|
| 5763 |
+
"epoch": 7.066541705716963,
|
| 5764 |
+
"grad_norm": 0.8536505699157715,
|
| 5765 |
+
"learning_rate": 9.944793451361128e-06,
|
| 5766 |
+
"loss": 0.0286,
|
| 5767 |
+
"step": 7540
|
| 5768 |
+
},
|
| 5769 |
+
{
|
| 5770 |
+
"epoch": 7.075913776944705,
|
| 5771 |
+
"grad_norm": 1.0389901399612427,
|
| 5772 |
+
"learning_rate": 9.944720233126858e-06,
|
| 5773 |
+
"loss": 0.0365,
|
| 5774 |
+
"step": 7550
|
| 5775 |
+
},
|
| 5776 |
+
{
|
| 5777 |
+
"epoch": 7.085285848172446,
|
| 5778 |
+
"grad_norm": 1.0682491064071655,
|
| 5779 |
+
"learning_rate": 9.94464701489259e-06,
|
| 5780 |
+
"loss": 0.034,
|
| 5781 |
+
"step": 7560
|
| 5782 |
+
},
|
| 5783 |
+
{
|
| 5784 |
+
"epoch": 7.094657919400188,
|
| 5785 |
+
"grad_norm": 0.8786489963531494,
|
| 5786 |
+
"learning_rate": 9.944573796658321e-06,
|
| 5787 |
+
"loss": 0.0373,
|
| 5788 |
+
"step": 7570
|
| 5789 |
+
},
|
| 5790 |
+
{
|
| 5791 |
+
"epoch": 7.104029990627929,
|
| 5792 |
+
"grad_norm": 1.3642008304595947,
|
| 5793 |
+
"learning_rate": 9.94450057842405e-06,
|
| 5794 |
+
"loss": 0.0314,
|
| 5795 |
+
"step": 7580
|
| 5796 |
+
},
|
| 5797 |
+
{
|
| 5798 |
+
"epoch": 7.11340206185567,
|
| 5799 |
+
"grad_norm": 0.7243325114250183,
|
| 5800 |
+
"learning_rate": 9.944427360189783e-06,
|
| 5801 |
+
"loss": 0.0299,
|
| 5802 |
+
"step": 7590
|
| 5803 |
+
},
|
| 5804 |
+
{
|
| 5805 |
+
"epoch": 7.122774133083412,
|
| 5806 |
+
"grad_norm": 0.6696385145187378,
|
| 5807 |
+
"learning_rate": 9.944354141955514e-06,
|
| 5808 |
+
"loss": 0.0311,
|
| 5809 |
+
"step": 7600
|
| 5810 |
+
},
|
| 5811 |
+
{
|
| 5812 |
+
"epoch": 7.1321462043111525,
|
| 5813 |
+
"grad_norm": 1.03152334690094,
|
| 5814 |
+
"learning_rate": 9.944280923721244e-06,
|
| 5815 |
+
"loss": 0.0355,
|
| 5816 |
+
"step": 7610
|
| 5817 |
+
},
|
| 5818 |
+
{
|
| 5819 |
+
"epoch": 7.141518275538894,
|
| 5820 |
+
"grad_norm": 0.8586616516113281,
|
| 5821 |
+
"learning_rate": 9.944207705486975e-06,
|
| 5822 |
+
"loss": 0.0394,
|
| 5823 |
+
"step": 7620
|
| 5824 |
+
},
|
| 5825 |
+
{
|
| 5826 |
+
"epoch": 7.150890346766635,
|
| 5827 |
+
"grad_norm": 0.9514285922050476,
|
| 5828 |
+
"learning_rate": 9.944134487252706e-06,
|
| 5829 |
+
"loss": 0.035,
|
| 5830 |
+
"step": 7630
|
| 5831 |
+
},
|
| 5832 |
+
{
|
| 5833 |
+
"epoch": 7.160262417994376,
|
| 5834 |
+
"grad_norm": 0.8053460717201233,
|
| 5835 |
+
"learning_rate": 9.944061269018437e-06,
|
| 5836 |
+
"loss": 0.0312,
|
| 5837 |
+
"step": 7640
|
| 5838 |
+
},
|
| 5839 |
+
{
|
| 5840 |
+
"epoch": 7.169634489222118,
|
| 5841 |
+
"grad_norm": 1.0056674480438232,
|
| 5842 |
+
"learning_rate": 9.943988050784167e-06,
|
| 5843 |
+
"loss": 0.0371,
|
| 5844 |
+
"step": 7650
|
| 5845 |
+
},
|
| 5846 |
+
{
|
| 5847 |
+
"epoch": 7.179006560449859,
|
| 5848 |
+
"grad_norm": 0.7738359570503235,
|
| 5849 |
+
"learning_rate": 9.943914832549898e-06,
|
| 5850 |
+
"loss": 0.0302,
|
| 5851 |
+
"step": 7660
|
| 5852 |
+
},
|
| 5853 |
+
{
|
| 5854 |
+
"epoch": 7.188378631677601,
|
| 5855 |
+
"grad_norm": 1.039197325706482,
|
| 5856 |
+
"learning_rate": 9.94384161431563e-06,
|
| 5857 |
+
"loss": 0.0316,
|
| 5858 |
+
"step": 7670
|
| 5859 |
+
},
|
| 5860 |
+
{
|
| 5861 |
+
"epoch": 7.197750702905342,
|
| 5862 |
+
"grad_norm": 1.578165888786316,
|
| 5863 |
+
"learning_rate": 9.943768396081361e-06,
|
| 5864 |
+
"loss": 0.0388,
|
| 5865 |
+
"step": 7680
|
| 5866 |
+
},
|
| 5867 |
+
{
|
| 5868 |
+
"epoch": 7.207122774133083,
|
| 5869 |
+
"grad_norm": 1.1753205060958862,
|
| 5870 |
+
"learning_rate": 9.943695177847092e-06,
|
| 5871 |
+
"loss": 0.0387,
|
| 5872 |
+
"step": 7690
|
| 5873 |
+
},
|
| 5874 |
+
{
|
| 5875 |
+
"epoch": 7.216494845360825,
|
| 5876 |
+
"grad_norm": 1.295299768447876,
|
| 5877 |
+
"learning_rate": 9.943621959612823e-06,
|
| 5878 |
+
"loss": 0.0417,
|
| 5879 |
+
"step": 7700
|
| 5880 |
+
},
|
| 5881 |
+
{
|
| 5882 |
+
"epoch": 7.225866916588566,
|
| 5883 |
+
"grad_norm": 0.9477363228797913,
|
| 5884 |
+
"learning_rate": 9.943548741378554e-06,
|
| 5885 |
+
"loss": 0.0305,
|
| 5886 |
+
"step": 7710
|
| 5887 |
+
},
|
| 5888 |
+
{
|
| 5889 |
+
"epoch": 7.235238987816308,
|
| 5890 |
+
"grad_norm": 1.0547223091125488,
|
| 5891 |
+
"learning_rate": 9.943475523144284e-06,
|
| 5892 |
+
"loss": 0.0314,
|
| 5893 |
+
"step": 7720
|
| 5894 |
+
},
|
| 5895 |
+
{
|
| 5896 |
+
"epoch": 7.244611059044049,
|
| 5897 |
+
"grad_norm": 1.4873117208480835,
|
| 5898 |
+
"learning_rate": 9.943402304910015e-06,
|
| 5899 |
+
"loss": 0.0302,
|
| 5900 |
+
"step": 7730
|
| 5901 |
+
},
|
| 5902 |
+
{
|
| 5903 |
+
"epoch": 7.25398313027179,
|
| 5904 |
+
"grad_norm": 0.9882778525352478,
|
| 5905 |
+
"learning_rate": 9.943329086675748e-06,
|
| 5906 |
+
"loss": 0.0328,
|
| 5907 |
+
"step": 7740
|
| 5908 |
+
},
|
| 5909 |
+
{
|
| 5910 |
+
"epoch": 7.2633552014995315,
|
| 5911 |
+
"grad_norm": 1.3187719583511353,
|
| 5912 |
+
"learning_rate": 9.943255868441477e-06,
|
| 5913 |
+
"loss": 0.0341,
|
| 5914 |
+
"step": 7750
|
| 5915 |
+
},
|
| 5916 |
+
{
|
| 5917 |
+
"epoch": 7.2633552014995315,
|
| 5918 |
+
"eval_loss": 0.03773624449968338,
|
| 5919 |
+
"eval_pearson_cosine": 0.7699387073516846,
|
| 5920 |
+
"eval_pearson_dot": 0.7237234115600586,
|
| 5921 |
+
"eval_pearson_euclidean": 0.7316513061523438,
|
| 5922 |
+
"eval_pearson_manhattan": 0.7335678339004517,
|
| 5923 |
+
"eval_runtime": 22.1612,
|
| 5924 |
+
"eval_samples_per_second": 67.686,
|
| 5925 |
+
"eval_spearman_cosine": 0.7694615753118931,
|
| 5926 |
+
"eval_spearman_dot": 0.7243788947148158,
|
| 5927 |
+
"eval_spearman_euclidean": 0.7361849268567764,
|
| 5928 |
+
"eval_spearman_manhattan": 0.7377945356892571,
|
| 5929 |
+
"eval_steps_per_second": 8.483,
|
| 5930 |
+
"step": 7750
|
| 5931 |
+
},
|
| 5932 |
+
{
|
| 5933 |
+
"epoch": 7.2727272727272725,
|
| 5934 |
+
"grad_norm": 1.0984870195388794,
|
| 5935 |
+
"learning_rate": 9.943182650207207e-06,
|
| 5936 |
+
"loss": 0.0329,
|
| 5937 |
+
"step": 7760
|
| 5938 |
+
},
|
| 5939 |
+
{
|
| 5940 |
+
"epoch": 7.282099343955014,
|
| 5941 |
+
"grad_norm": 0.7666100263595581,
|
| 5942 |
+
"learning_rate": 9.94310943197294e-06,
|
| 5943 |
+
"loss": 0.0358,
|
| 5944 |
+
"step": 7770
|
| 5945 |
+
},
|
| 5946 |
+
{
|
| 5947 |
+
"epoch": 7.291471415182755,
|
| 5948 |
+
"grad_norm": 0.9941838383674622,
|
| 5949 |
+
"learning_rate": 9.94303621373867e-06,
|
| 5950 |
+
"loss": 0.0351,
|
| 5951 |
+
"step": 7780
|
| 5952 |
+
},
|
| 5953 |
+
{
|
| 5954 |
+
"epoch": 7.300843486410496,
|
| 5955 |
+
"grad_norm": 1.3012335300445557,
|
| 5956 |
+
"learning_rate": 9.942962995504401e-06,
|
| 5957 |
+
"loss": 0.0296,
|
| 5958 |
+
"step": 7790
|
| 5959 |
+
},
|
| 5960 |
+
{
|
| 5961 |
+
"epoch": 7.310215557638238,
|
| 5962 |
+
"grad_norm": 1.1914719343185425,
|
| 5963 |
+
"learning_rate": 9.942889777270132e-06,
|
| 5964 |
+
"loss": 0.0333,
|
| 5965 |
+
"step": 7800
|
| 5966 |
+
},
|
| 5967 |
+
{
|
| 5968 |
+
"epoch": 7.319587628865979,
|
| 5969 |
+
"grad_norm": 1.1405929327011108,
|
| 5970 |
+
"learning_rate": 9.942816559035863e-06,
|
| 5971 |
+
"loss": 0.0408,
|
| 5972 |
+
"step": 7810
|
| 5973 |
+
},
|
| 5974 |
+
{
|
| 5975 |
+
"epoch": 7.328959700093721,
|
| 5976 |
+
"grad_norm": 0.665600061416626,
|
| 5977 |
+
"learning_rate": 9.942743340801594e-06,
|
| 5978 |
+
"loss": 0.0314,
|
| 5979 |
+
"step": 7820
|
| 5980 |
+
},
|
| 5981 |
+
{
|
| 5982 |
+
"epoch": 7.338331771321462,
|
| 5983 |
+
"grad_norm": 1.2029966115951538,
|
| 5984 |
+
"learning_rate": 9.942670122567324e-06,
|
| 5985 |
+
"loss": 0.041,
|
| 5986 |
+
"step": 7830
|
| 5987 |
+
},
|
| 5988 |
+
{
|
| 5989 |
+
"epoch": 7.347703842549203,
|
| 5990 |
+
"grad_norm": 0.44810751080513,
|
| 5991 |
+
"learning_rate": 9.942596904333057e-06,
|
| 5992 |
+
"loss": 0.0317,
|
| 5993 |
+
"step": 7840
|
| 5994 |
+
},
|
| 5995 |
+
{
|
| 5996 |
+
"epoch": 7.357075913776945,
|
| 5997 |
+
"grad_norm": 1.565082311630249,
|
| 5998 |
+
"learning_rate": 9.942523686098788e-06,
|
| 5999 |
+
"loss": 0.035,
|
| 6000 |
+
"step": 7850
|
| 6001 |
+
},
|
| 6002 |
+
{
|
| 6003 |
+
"epoch": 7.366447985004686,
|
| 6004 |
+
"grad_norm": 1.6850316524505615,
|
| 6005 |
+
"learning_rate": 9.942450467864517e-06,
|
| 6006 |
+
"loss": 0.0365,
|
| 6007 |
+
"step": 7860
|
| 6008 |
+
},
|
| 6009 |
+
{
|
| 6010 |
+
"epoch": 7.375820056232428,
|
| 6011 |
+
"grad_norm": 1.0027261972427368,
|
| 6012 |
+
"learning_rate": 9.942377249630249e-06,
|
| 6013 |
+
"loss": 0.0309,
|
| 6014 |
+
"step": 7870
|
| 6015 |
+
},
|
| 6016 |
+
{
|
| 6017 |
+
"epoch": 7.385192127460169,
|
| 6018 |
+
"grad_norm": 0.51674485206604,
|
| 6019 |
+
"learning_rate": 9.94230403139598e-06,
|
| 6020 |
+
"loss": 0.0321,
|
| 6021 |
+
"step": 7880
|
| 6022 |
+
},
|
| 6023 |
+
{
|
| 6024 |
+
"epoch": 7.39456419868791,
|
| 6025 |
+
"grad_norm": 1.0429599285125732,
|
| 6026 |
+
"learning_rate": 9.94223081316171e-06,
|
| 6027 |
+
"loss": 0.033,
|
| 6028 |
+
"step": 7890
|
| 6029 |
+
},
|
| 6030 |
+
{
|
| 6031 |
+
"epoch": 7.4039362699156515,
|
| 6032 |
+
"grad_norm": 0.618232250213623,
|
| 6033 |
+
"learning_rate": 9.942157594927441e-06,
|
| 6034 |
+
"loss": 0.0353,
|
| 6035 |
+
"step": 7900
|
| 6036 |
+
},
|
| 6037 |
+
{
|
| 6038 |
+
"epoch": 7.413308341143392,
|
| 6039 |
+
"grad_norm": 0.9780518412590027,
|
| 6040 |
+
"learning_rate": 9.942084376693174e-06,
|
| 6041 |
+
"loss": 0.0354,
|
| 6042 |
+
"step": 7910
|
| 6043 |
+
},
|
| 6044 |
+
{
|
| 6045 |
+
"epoch": 7.422680412371134,
|
| 6046 |
+
"grad_norm": 1.214362621307373,
|
| 6047 |
+
"learning_rate": 9.942011158458903e-06,
|
| 6048 |
+
"loss": 0.0338,
|
| 6049 |
+
"step": 7920
|
| 6050 |
+
},
|
| 6051 |
+
{
|
| 6052 |
+
"epoch": 7.432052483598875,
|
| 6053 |
+
"grad_norm": 1.202986240386963,
|
| 6054 |
+
"learning_rate": 9.941937940224634e-06,
|
| 6055 |
+
"loss": 0.0387,
|
| 6056 |
+
"step": 7930
|
| 6057 |
+
},
|
| 6058 |
+
{
|
| 6059 |
+
"epoch": 7.441424554826616,
|
| 6060 |
+
"grad_norm": 1.4128488302230835,
|
| 6061 |
+
"learning_rate": 9.941864721990366e-06,
|
| 6062 |
+
"loss": 0.0315,
|
| 6063 |
+
"step": 7940
|
| 6064 |
+
},
|
| 6065 |
+
{
|
| 6066 |
+
"epoch": 7.450796626054358,
|
| 6067 |
+
"grad_norm": 0.7198026180267334,
|
| 6068 |
+
"learning_rate": 9.941791503756097e-06,
|
| 6069 |
+
"loss": 0.0338,
|
| 6070 |
+
"step": 7950
|
| 6071 |
+
},
|
| 6072 |
+
{
|
| 6073 |
+
"epoch": 7.460168697282099,
|
| 6074 |
+
"grad_norm": 1.1124250888824463,
|
| 6075 |
+
"learning_rate": 9.941718285521828e-06,
|
| 6076 |
+
"loss": 0.0352,
|
| 6077 |
+
"step": 7960
|
| 6078 |
+
},
|
| 6079 |
+
{
|
| 6080 |
+
"epoch": 7.469540768509841,
|
| 6081 |
+
"grad_norm": 1.0420817136764526,
|
| 6082 |
+
"learning_rate": 9.941645067287558e-06,
|
| 6083 |
+
"loss": 0.0338,
|
| 6084 |
+
"step": 7970
|
| 6085 |
+
},
|
| 6086 |
+
{
|
| 6087 |
+
"epoch": 7.478912839737582,
|
| 6088 |
+
"grad_norm": 0.9638373255729675,
|
| 6089 |
+
"learning_rate": 9.941571849053289e-06,
|
| 6090 |
+
"loss": 0.0356,
|
| 6091 |
+
"step": 7980
|
| 6092 |
+
},
|
| 6093 |
+
{
|
| 6094 |
+
"epoch": 7.488284910965323,
|
| 6095 |
+
"grad_norm": 0.8584896922111511,
|
| 6096 |
+
"learning_rate": 9.94149863081902e-06,
|
| 6097 |
+
"loss": 0.0353,
|
| 6098 |
+
"step": 7990
|
| 6099 |
+
},
|
| 6100 |
+
{
|
| 6101 |
+
"epoch": 7.497656982193065,
|
| 6102 |
+
"grad_norm": 0.7161556482315063,
|
| 6103 |
+
"learning_rate": 9.94142541258475e-06,
|
| 6104 |
+
"loss": 0.0329,
|
| 6105 |
+
"step": 8000
|
| 6106 |
+
},
|
| 6107 |
+
{
|
| 6108 |
+
"epoch": 7.497656982193065,
|
| 6109 |
+
"eval_loss": 0.03753030672669411,
|
| 6110 |
+
"eval_pearson_cosine": 0.7705868482589722,
|
| 6111 |
+
"eval_pearson_dot": 0.7248358726501465,
|
| 6112 |
+
"eval_pearson_euclidean": 0.734631359577179,
|
| 6113 |
+
"eval_pearson_manhattan": 0.7363988161087036,
|
| 6114 |
+
"eval_runtime": 22.3628,
|
| 6115 |
+
"eval_samples_per_second": 67.076,
|
| 6116 |
+
"eval_spearman_cosine": 0.769708288306187,
|
| 6117 |
+
"eval_spearman_dot": 0.7249767839130733,
|
| 6118 |
+
"eval_spearman_euclidean": 0.7394619718544255,
|
| 6119 |
+
"eval_spearman_manhattan": 0.7409361299302836,
|
| 6120 |
+
"eval_steps_per_second": 8.407,
|
| 6121 |
+
"step": 8000
|
| 6122 |
}
|
| 6123 |
],
|
| 6124 |
"logging_steps": 10,
|