Training in progress, step 10000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1777 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737632172
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbfded01e29c2f16226927197c7b53cb17e6b0e25f4e77f11587c6e4e8cecdca
|
| 3 |
size 737632172
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475354682
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51c9c2501538e245b7dc88214c50a178f28922e91dc78a7635e1dfef030205c3
|
| 3 |
size 1475354682
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a2fbcd26bac3ea7dc02fc9ede5b8a1914ca51611473722a11a969e1f26ac0ee
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66d97b511d2fdb8061e5bf72c139923941c148260fac1caedd654028da6986c1
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3839473129eb8c438ab312370daa55eb10a0790f33d38fc5eaa24859b54b0d1f
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5088a0d34c7015afe60457fbb3f0a4740839369017a42ea4b3250322c2d63ceb
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9cac0eb25286b75549fa2030810940adf357064a83facaf5c58ebe37190b6ac
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0a57d29811122d52bd53f81af680412b91dde1cd2a12fa885d8a54388be8e2d
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c90ab29b255eaf920ecc1cba0b586e426f8e2db67b44a65576693f84178a04f
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4efbfa3cfb1bb8fb9c3380e65959a8b4eaf3bceb0507a26ffba1a3e4636ddb1
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b579900d94a8c528190bb9fc0315439f3c057f344b31a3968eaa60ed56b9c9f5
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -5313,6 +5313,1780 @@
|
|
| 5313 |
"learning_rate": 9.98699802855441e-06,
|
| 5314 |
"loss": 11.8236,
|
| 5315 |
"step": 7500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5316 |
}
|
| 5317 |
],
|
| 5318 |
"logging_steps": 10,
|
|
@@ -5332,7 +7106,7 @@
|
|
| 5332 |
"attributes": {}
|
| 5333 |
}
|
| 5334 |
},
|
| 5335 |
-
"total_flos":
|
| 5336 |
"train_batch_size": 4,
|
| 5337 |
"trial_name": null,
|
| 5338 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.44380093309146185,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 10000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 5313 |
"learning_rate": 9.98699802855441e-06,
|
| 5314 |
"loss": 11.8236,
|
| 5315 |
"step": 7500
|
| 5316 |
+
},
|
| 5317 |
+
{
|
| 5318 |
+
"epoch": 0.3332945007516878,
|
| 5319 |
+
"grad_norm": 76.12377166748047,
|
| 5320 |
+
"learning_rate": 9.986980692592483e-06,
|
| 5321 |
+
"loss": 11.2932,
|
| 5322 |
+
"step": 7510
|
| 5323 |
+
},
|
| 5324 |
+
{
|
| 5325 |
+
"epoch": 0.3337383016847793,
|
| 5326 |
+
"grad_norm": 94.77151489257812,
|
| 5327 |
+
"learning_rate": 9.986963356630556e-06,
|
| 5328 |
+
"loss": 12.1823,
|
| 5329 |
+
"step": 7520
|
| 5330 |
+
},
|
| 5331 |
+
{
|
| 5332 |
+
"epoch": 0.33418210261787074,
|
| 5333 |
+
"grad_norm": 100.01810455322266,
|
| 5334 |
+
"learning_rate": 9.986946020668629e-06,
|
| 5335 |
+
"loss": 12.6788,
|
| 5336 |
+
"step": 7530
|
| 5337 |
+
},
|
| 5338 |
+
{
|
| 5339 |
+
"epoch": 0.3346259035509622,
|
| 5340 |
+
"grad_norm": 132.56448364257812,
|
| 5341 |
+
"learning_rate": 9.9869286847067e-06,
|
| 5342 |
+
"loss": 12.149,
|
| 5343 |
+
"step": 7540
|
| 5344 |
+
},
|
| 5345 |
+
{
|
| 5346 |
+
"epoch": 0.33506970448405365,
|
| 5347 |
+
"grad_norm": 113.610107421875,
|
| 5348 |
+
"learning_rate": 9.986911348744773e-06,
|
| 5349 |
+
"loss": 11.8811,
|
| 5350 |
+
"step": 7550
|
| 5351 |
+
},
|
| 5352 |
+
{
|
| 5353 |
+
"epoch": 0.33551350541714514,
|
| 5354 |
+
"grad_norm": 94.93478393554688,
|
| 5355 |
+
"learning_rate": 9.986894012782846e-06,
|
| 5356 |
+
"loss": 11.5749,
|
| 5357 |
+
"step": 7560
|
| 5358 |
+
},
|
| 5359 |
+
{
|
| 5360 |
+
"epoch": 0.33595730635023663,
|
| 5361 |
+
"grad_norm": 92.85311126708984,
|
| 5362 |
+
"learning_rate": 9.986876676820918e-06,
|
| 5363 |
+
"loss": 11.7571,
|
| 5364 |
+
"step": 7570
|
| 5365 |
+
},
|
| 5366 |
+
{
|
| 5367 |
+
"epoch": 0.33640110728332806,
|
| 5368 |
+
"grad_norm": 79.2991943359375,
|
| 5369 |
+
"learning_rate": 9.986859340858991e-06,
|
| 5370 |
+
"loss": 11.3534,
|
| 5371 |
+
"step": 7580
|
| 5372 |
+
},
|
| 5373 |
+
{
|
| 5374 |
+
"epoch": 0.33684490821641955,
|
| 5375 |
+
"grad_norm": 103.73065185546875,
|
| 5376 |
+
"learning_rate": 9.986842004897064e-06,
|
| 5377 |
+
"loss": 11.6249,
|
| 5378 |
+
"step": 7590
|
| 5379 |
+
},
|
| 5380 |
+
{
|
| 5381 |
+
"epoch": 0.337288709149511,
|
| 5382 |
+
"grad_norm": 79.18226623535156,
|
| 5383 |
+
"learning_rate": 9.986824668935135e-06,
|
| 5384 |
+
"loss": 11.634,
|
| 5385 |
+
"step": 7600
|
| 5386 |
+
},
|
| 5387 |
+
{
|
| 5388 |
+
"epoch": 0.33773251008260247,
|
| 5389 |
+
"grad_norm": 100.51226806640625,
|
| 5390 |
+
"learning_rate": 9.986807332973208e-06,
|
| 5391 |
+
"loss": 11.2835,
|
| 5392 |
+
"step": 7610
|
| 5393 |
+
},
|
| 5394 |
+
{
|
| 5395 |
+
"epoch": 0.3381763110156939,
|
| 5396 |
+
"grad_norm": 92.74190521240234,
|
| 5397 |
+
"learning_rate": 9.986789997011281e-06,
|
| 5398 |
+
"loss": 11.3273,
|
| 5399 |
+
"step": 7620
|
| 5400 |
+
},
|
| 5401 |
+
{
|
| 5402 |
+
"epoch": 0.3386201119487854,
|
| 5403 |
+
"grad_norm": 89.3246841430664,
|
| 5404 |
+
"learning_rate": 9.986772661049353e-06,
|
| 5405 |
+
"loss": 12.6279,
|
| 5406 |
+
"step": 7630
|
| 5407 |
+
},
|
| 5408 |
+
{
|
| 5409 |
+
"epoch": 0.3390639128818768,
|
| 5410 |
+
"grad_norm": 117.41140747070312,
|
| 5411 |
+
"learning_rate": 9.986755325087426e-06,
|
| 5412 |
+
"loss": 11.9215,
|
| 5413 |
+
"step": 7640
|
| 5414 |
+
},
|
| 5415 |
+
{
|
| 5416 |
+
"epoch": 0.3395077138149683,
|
| 5417 |
+
"grad_norm": 105.69800567626953,
|
| 5418 |
+
"learning_rate": 9.986737989125499e-06,
|
| 5419 |
+
"loss": 11.4488,
|
| 5420 |
+
"step": 7650
|
| 5421 |
+
},
|
| 5422 |
+
{
|
| 5423 |
+
"epoch": 0.33995151474805974,
|
| 5424 |
+
"grad_norm": 99.95671844482422,
|
| 5425 |
+
"learning_rate": 9.98672065316357e-06,
|
| 5426 |
+
"loss": 11.5145,
|
| 5427 |
+
"step": 7660
|
| 5428 |
+
},
|
| 5429 |
+
{
|
| 5430 |
+
"epoch": 0.3403953156811512,
|
| 5431 |
+
"grad_norm": 85.2020263671875,
|
| 5432 |
+
"learning_rate": 9.986703317201643e-06,
|
| 5433 |
+
"loss": 11.0807,
|
| 5434 |
+
"step": 7670
|
| 5435 |
+
},
|
| 5436 |
+
{
|
| 5437 |
+
"epoch": 0.34083911661424265,
|
| 5438 |
+
"grad_norm": 92.34159088134766,
|
| 5439 |
+
"learning_rate": 9.986685981239717e-06,
|
| 5440 |
+
"loss": 11.5631,
|
| 5441 |
+
"step": 7680
|
| 5442 |
+
},
|
| 5443 |
+
{
|
| 5444 |
+
"epoch": 0.34128291754733414,
|
| 5445 |
+
"grad_norm": 89.49488067626953,
|
| 5446 |
+
"learning_rate": 9.986668645277788e-06,
|
| 5447 |
+
"loss": 11.5445,
|
| 5448 |
+
"step": 7690
|
| 5449 |
+
},
|
| 5450 |
+
{
|
| 5451 |
+
"epoch": 0.34172671848042563,
|
| 5452 |
+
"grad_norm": 96.70406341552734,
|
| 5453 |
+
"learning_rate": 9.986651309315861e-06,
|
| 5454 |
+
"loss": 11.5824,
|
| 5455 |
+
"step": 7700
|
| 5456 |
+
},
|
| 5457 |
+
{
|
| 5458 |
+
"epoch": 0.34217051941351706,
|
| 5459 |
+
"grad_norm": 87.1051254272461,
|
| 5460 |
+
"learning_rate": 9.986633973353934e-06,
|
| 5461 |
+
"loss": 11.9654,
|
| 5462 |
+
"step": 7710
|
| 5463 |
+
},
|
| 5464 |
+
{
|
| 5465 |
+
"epoch": 0.34261432034660855,
|
| 5466 |
+
"grad_norm": 94.70158386230469,
|
| 5467 |
+
"learning_rate": 9.986616637392005e-06,
|
| 5468 |
+
"loss": 10.8494,
|
| 5469 |
+
"step": 7720
|
| 5470 |
+
},
|
| 5471 |
+
{
|
| 5472 |
+
"epoch": 0.3430581212797,
|
| 5473 |
+
"grad_norm": 110.57974243164062,
|
| 5474 |
+
"learning_rate": 9.986599301430079e-06,
|
| 5475 |
+
"loss": 11.244,
|
| 5476 |
+
"step": 7730
|
| 5477 |
+
},
|
| 5478 |
+
{
|
| 5479 |
+
"epoch": 0.34350192221279147,
|
| 5480 |
+
"grad_norm": 87.20235443115234,
|
| 5481 |
+
"learning_rate": 9.986581965468152e-06,
|
| 5482 |
+
"loss": 11.3683,
|
| 5483 |
+
"step": 7740
|
| 5484 |
+
},
|
| 5485 |
+
{
|
| 5486 |
+
"epoch": 0.3439457231458829,
|
| 5487 |
+
"grad_norm": 89.35726165771484,
|
| 5488 |
+
"learning_rate": 9.986564629506225e-06,
|
| 5489 |
+
"loss": 11.1392,
|
| 5490 |
+
"step": 7750
|
| 5491 |
+
},
|
| 5492 |
+
{
|
| 5493 |
+
"epoch": 0.3443895240789744,
|
| 5494 |
+
"grad_norm": 97.75891876220703,
|
| 5495 |
+
"learning_rate": 9.986547293544296e-06,
|
| 5496 |
+
"loss": 11.761,
|
| 5497 |
+
"step": 7760
|
| 5498 |
+
},
|
| 5499 |
+
{
|
| 5500 |
+
"epoch": 0.3448333250120658,
|
| 5501 |
+
"grad_norm": 84.02690887451172,
|
| 5502 |
+
"learning_rate": 9.98652995758237e-06,
|
| 5503 |
+
"loss": 11.2544,
|
| 5504 |
+
"step": 7770
|
| 5505 |
+
},
|
| 5506 |
+
{
|
| 5507 |
+
"epoch": 0.3452771259451573,
|
| 5508 |
+
"grad_norm": 89.77345275878906,
|
| 5509 |
+
"learning_rate": 9.986512621620442e-06,
|
| 5510 |
+
"loss": 12.2648,
|
| 5511 |
+
"step": 7780
|
| 5512 |
+
},
|
| 5513 |
+
{
|
| 5514 |
+
"epoch": 0.34572092687824874,
|
| 5515 |
+
"grad_norm": 96.23056030273438,
|
| 5516 |
+
"learning_rate": 9.986495285658514e-06,
|
| 5517 |
+
"loss": 11.7363,
|
| 5518 |
+
"step": 7790
|
| 5519 |
+
},
|
| 5520 |
+
{
|
| 5521 |
+
"epoch": 0.3461647278113402,
|
| 5522 |
+
"grad_norm": 83.2893295288086,
|
| 5523 |
+
"learning_rate": 9.986477949696587e-06,
|
| 5524 |
+
"loss": 11.6393,
|
| 5525 |
+
"step": 7800
|
| 5526 |
+
},
|
| 5527 |
+
{
|
| 5528 |
+
"epoch": 0.3466085287444317,
|
| 5529 |
+
"grad_norm": 88.02169036865234,
|
| 5530 |
+
"learning_rate": 9.98646061373466e-06,
|
| 5531 |
+
"loss": 11.9822,
|
| 5532 |
+
"step": 7810
|
| 5533 |
+
},
|
| 5534 |
+
{
|
| 5535 |
+
"epoch": 0.34705232967752314,
|
| 5536 |
+
"grad_norm": 90.59603881835938,
|
| 5537 |
+
"learning_rate": 9.986443277772731e-06,
|
| 5538 |
+
"loss": 11.1564,
|
| 5539 |
+
"step": 7820
|
| 5540 |
+
},
|
| 5541 |
+
{
|
| 5542 |
+
"epoch": 0.34749613061061463,
|
| 5543 |
+
"grad_norm": 80.67443084716797,
|
| 5544 |
+
"learning_rate": 9.986425941810804e-06,
|
| 5545 |
+
"loss": 11.7602,
|
| 5546 |
+
"step": 7830
|
| 5547 |
+
},
|
| 5548 |
+
{
|
| 5549 |
+
"epoch": 0.34793993154370606,
|
| 5550 |
+
"grad_norm": 110.11127471923828,
|
| 5551 |
+
"learning_rate": 9.986408605848877e-06,
|
| 5552 |
+
"loss": 11.5633,
|
| 5553 |
+
"step": 7840
|
| 5554 |
+
},
|
| 5555 |
+
{
|
| 5556 |
+
"epoch": 0.34838373247679755,
|
| 5557 |
+
"grad_norm": 93.8271255493164,
|
| 5558 |
+
"learning_rate": 9.986391269886949e-06,
|
| 5559 |
+
"loss": 11.7892,
|
| 5560 |
+
"step": 7850
|
| 5561 |
+
},
|
| 5562 |
+
{
|
| 5563 |
+
"epoch": 0.348827533409889,
|
| 5564 |
+
"grad_norm": 108.33939361572266,
|
| 5565 |
+
"learning_rate": 9.986373933925022e-06,
|
| 5566 |
+
"loss": 11.5551,
|
| 5567 |
+
"step": 7860
|
| 5568 |
+
},
|
| 5569 |
+
{
|
| 5570 |
+
"epoch": 0.34927133434298047,
|
| 5571 |
+
"grad_norm": 92.38509368896484,
|
| 5572 |
+
"learning_rate": 9.986356597963095e-06,
|
| 5573 |
+
"loss": 11.3955,
|
| 5574 |
+
"step": 7870
|
| 5575 |
+
},
|
| 5576 |
+
{
|
| 5577 |
+
"epoch": 0.3497151352760719,
|
| 5578 |
+
"grad_norm": 83.159423828125,
|
| 5579 |
+
"learning_rate": 9.986339262001166e-06,
|
| 5580 |
+
"loss": 11.7543,
|
| 5581 |
+
"step": 7880
|
| 5582 |
+
},
|
| 5583 |
+
{
|
| 5584 |
+
"epoch": 0.3501589362091634,
|
| 5585 |
+
"grad_norm": 90.84649658203125,
|
| 5586 |
+
"learning_rate": 9.98632192603924e-06,
|
| 5587 |
+
"loss": 11.3032,
|
| 5588 |
+
"step": 7890
|
| 5589 |
+
},
|
| 5590 |
+
{
|
| 5591 |
+
"epoch": 0.3506027371422548,
|
| 5592 |
+
"grad_norm": 98.68833923339844,
|
| 5593 |
+
"learning_rate": 9.986304590077312e-06,
|
| 5594 |
+
"loss": 11.6455,
|
| 5595 |
+
"step": 7900
|
| 5596 |
+
},
|
| 5597 |
+
{
|
| 5598 |
+
"epoch": 0.3510465380753463,
|
| 5599 |
+
"grad_norm": 96.10926055908203,
|
| 5600 |
+
"learning_rate": 9.986287254115384e-06,
|
| 5601 |
+
"loss": 11.6115,
|
| 5602 |
+
"step": 7910
|
| 5603 |
+
},
|
| 5604 |
+
{
|
| 5605 |
+
"epoch": 0.3514903390084378,
|
| 5606 |
+
"grad_norm": 108.88908386230469,
|
| 5607 |
+
"learning_rate": 9.986269918153457e-06,
|
| 5608 |
+
"loss": 11.0329,
|
| 5609 |
+
"step": 7920
|
| 5610 |
+
},
|
| 5611 |
+
{
|
| 5612 |
+
"epoch": 0.3519341399415292,
|
| 5613 |
+
"grad_norm": 85.24256896972656,
|
| 5614 |
+
"learning_rate": 9.98625258219153e-06,
|
| 5615 |
+
"loss": 11.3518,
|
| 5616 |
+
"step": 7930
|
| 5617 |
+
},
|
| 5618 |
+
{
|
| 5619 |
+
"epoch": 0.3523779408746207,
|
| 5620 |
+
"grad_norm": 100.0363540649414,
|
| 5621 |
+
"learning_rate": 9.986235246229601e-06,
|
| 5622 |
+
"loss": 11.4287,
|
| 5623 |
+
"step": 7940
|
| 5624 |
+
},
|
| 5625 |
+
{
|
| 5626 |
+
"epoch": 0.35282174180771214,
|
| 5627 |
+
"grad_norm": 101.91362762451172,
|
| 5628 |
+
"learning_rate": 9.986217910267674e-06,
|
| 5629 |
+
"loss": 10.8322,
|
| 5630 |
+
"step": 7950
|
| 5631 |
+
},
|
| 5632 |
+
{
|
| 5633 |
+
"epoch": 0.35326554274080363,
|
| 5634 |
+
"grad_norm": 78.89401245117188,
|
| 5635 |
+
"learning_rate": 9.986200574305748e-06,
|
| 5636 |
+
"loss": 11.2172,
|
| 5637 |
+
"step": 7960
|
| 5638 |
+
},
|
| 5639 |
+
{
|
| 5640 |
+
"epoch": 0.35370934367389506,
|
| 5641 |
+
"grad_norm": 99.1776123046875,
|
| 5642 |
+
"learning_rate": 9.98618323834382e-06,
|
| 5643 |
+
"loss": 11.6107,
|
| 5644 |
+
"step": 7970
|
| 5645 |
+
},
|
| 5646 |
+
{
|
| 5647 |
+
"epoch": 0.35415314460698655,
|
| 5648 |
+
"grad_norm": 88.57538604736328,
|
| 5649 |
+
"learning_rate": 9.986165902381892e-06,
|
| 5650 |
+
"loss": 11.4944,
|
| 5651 |
+
"step": 7980
|
| 5652 |
+
},
|
| 5653 |
+
{
|
| 5654 |
+
"epoch": 0.354596945540078,
|
| 5655 |
+
"grad_norm": 91.77239227294922,
|
| 5656 |
+
"learning_rate": 9.986148566419965e-06,
|
| 5657 |
+
"loss": 11.4736,
|
| 5658 |
+
"step": 7990
|
| 5659 |
+
},
|
| 5660 |
+
{
|
| 5661 |
+
"epoch": 0.35504074647316947,
|
| 5662 |
+
"grad_norm": 118.60701751708984,
|
| 5663 |
+
"learning_rate": 9.986131230458038e-06,
|
| 5664 |
+
"loss": 11.3651,
|
| 5665 |
+
"step": 8000
|
| 5666 |
+
},
|
| 5667 |
+
{
|
| 5668 |
+
"epoch": 0.35504074647316947,
|
| 5669 |
+
"eval_loss": 0.35784557461738586,
|
| 5670 |
+
"eval_runtime": 673.1581,
|
| 5671 |
+
"eval_samples_per_second": 1804.021,
|
| 5672 |
+
"eval_steps_per_second": 56.376,
|
| 5673 |
+
"step": 8000
|
| 5674 |
+
},
|
| 5675 |
+
{
|
| 5676 |
+
"epoch": 0.3554845474062609,
|
| 5677 |
+
"grad_norm": 78.45508575439453,
|
| 5678 |
+
"learning_rate": 9.98611389449611e-06,
|
| 5679 |
+
"loss": 11.5346,
|
| 5680 |
+
"step": 8010
|
| 5681 |
+
},
|
| 5682 |
+
{
|
| 5683 |
+
"epoch": 0.3559283483393524,
|
| 5684 |
+
"grad_norm": 93.72156524658203,
|
| 5685 |
+
"learning_rate": 9.986096558534183e-06,
|
| 5686 |
+
"loss": 11.7047,
|
| 5687 |
+
"step": 8020
|
| 5688 |
+
},
|
| 5689 |
+
{
|
| 5690 |
+
"epoch": 0.35637214927244387,
|
| 5691 |
+
"grad_norm": 108.60897064208984,
|
| 5692 |
+
"learning_rate": 9.986079222572256e-06,
|
| 5693 |
+
"loss": 11.5851,
|
| 5694 |
+
"step": 8030
|
| 5695 |
+
},
|
| 5696 |
+
{
|
| 5697 |
+
"epoch": 0.3568159502055353,
|
| 5698 |
+
"grad_norm": 98.00389862060547,
|
| 5699 |
+
"learning_rate": 9.986061886610327e-06,
|
| 5700 |
+
"loss": 11.8359,
|
| 5701 |
+
"step": 8040
|
| 5702 |
+
},
|
| 5703 |
+
{
|
| 5704 |
+
"epoch": 0.3572597511386268,
|
| 5705 |
+
"grad_norm": 96.44226837158203,
|
| 5706 |
+
"learning_rate": 9.9860445506484e-06,
|
| 5707 |
+
"loss": 11.6475,
|
| 5708 |
+
"step": 8050
|
| 5709 |
+
},
|
| 5710 |
+
{
|
| 5711 |
+
"epoch": 0.3577035520717182,
|
| 5712 |
+
"grad_norm": 80.3302993774414,
|
| 5713 |
+
"learning_rate": 9.986027214686473e-06,
|
| 5714 |
+
"loss": 11.3945,
|
| 5715 |
+
"step": 8060
|
| 5716 |
+
},
|
| 5717 |
+
{
|
| 5718 |
+
"epoch": 0.3581473530048097,
|
| 5719 |
+
"grad_norm": 96.11526489257812,
|
| 5720 |
+
"learning_rate": 9.986009878724545e-06,
|
| 5721 |
+
"loss": 11.0479,
|
| 5722 |
+
"step": 8070
|
| 5723 |
+
},
|
| 5724 |
+
{
|
| 5725 |
+
"epoch": 0.35859115393790114,
|
| 5726 |
+
"grad_norm": 97.33485412597656,
|
| 5727 |
+
"learning_rate": 9.985992542762618e-06,
|
| 5728 |
+
"loss": 11.8224,
|
| 5729 |
+
"step": 8080
|
| 5730 |
+
},
|
| 5731 |
+
{
|
| 5732 |
+
"epoch": 0.35903495487099263,
|
| 5733 |
+
"grad_norm": 82.29984283447266,
|
| 5734 |
+
"learning_rate": 9.98597520680069e-06,
|
| 5735 |
+
"loss": 11.5318,
|
| 5736 |
+
"step": 8090
|
| 5737 |
+
},
|
| 5738 |
+
{
|
| 5739 |
+
"epoch": 0.35947875580408406,
|
| 5740 |
+
"grad_norm": 93.84577941894531,
|
| 5741 |
+
"learning_rate": 9.985957870838762e-06,
|
| 5742 |
+
"loss": 11.4744,
|
| 5743 |
+
"step": 8100
|
| 5744 |
+
},
|
| 5745 |
+
{
|
| 5746 |
+
"epoch": 0.35992255673717555,
|
| 5747 |
+
"grad_norm": 84.21646881103516,
|
| 5748 |
+
"learning_rate": 9.985940534876835e-06,
|
| 5749 |
+
"loss": 10.9926,
|
| 5750 |
+
"step": 8110
|
| 5751 |
+
},
|
| 5752 |
+
{
|
| 5753 |
+
"epoch": 0.360366357670267,
|
| 5754 |
+
"grad_norm": 83.08773040771484,
|
| 5755 |
+
"learning_rate": 9.985923198914908e-06,
|
| 5756 |
+
"loss": 10.9091,
|
| 5757 |
+
"step": 8120
|
| 5758 |
+
},
|
| 5759 |
+
{
|
| 5760 |
+
"epoch": 0.36081015860335847,
|
| 5761 |
+
"grad_norm": 79.4489974975586,
|
| 5762 |
+
"learning_rate": 9.98590586295298e-06,
|
| 5763 |
+
"loss": 10.9493,
|
| 5764 |
+
"step": 8130
|
| 5765 |
+
},
|
| 5766 |
+
{
|
| 5767 |
+
"epoch": 0.3612539595364499,
|
| 5768 |
+
"grad_norm": 85.71393585205078,
|
| 5769 |
+
"learning_rate": 9.985888526991053e-06,
|
| 5770 |
+
"loss": 11.4266,
|
| 5771 |
+
"step": 8140
|
| 5772 |
+
},
|
| 5773 |
+
{
|
| 5774 |
+
"epoch": 0.3616977604695414,
|
| 5775 |
+
"grad_norm": 81.63021087646484,
|
| 5776 |
+
"learning_rate": 9.985871191029126e-06,
|
| 5777 |
+
"loss": 11.6757,
|
| 5778 |
+
"step": 8150
|
| 5779 |
+
},
|
| 5780 |
+
{
|
| 5781 |
+
"epoch": 0.36214156140263287,
|
| 5782 |
+
"grad_norm": 91.55906677246094,
|
| 5783 |
+
"learning_rate": 9.985853855067197e-06,
|
| 5784 |
+
"loss": 11.6191,
|
| 5785 |
+
"step": 8160
|
| 5786 |
+
},
|
| 5787 |
+
{
|
| 5788 |
+
"epoch": 0.3625853623357243,
|
| 5789 |
+
"grad_norm": 80.61488342285156,
|
| 5790 |
+
"learning_rate": 9.98583651910527e-06,
|
| 5791 |
+
"loss": 11.5607,
|
| 5792 |
+
"step": 8170
|
| 5793 |
+
},
|
| 5794 |
+
{
|
| 5795 |
+
"epoch": 0.3630291632688158,
|
| 5796 |
+
"grad_norm": 100.6302261352539,
|
| 5797 |
+
"learning_rate": 9.985819183143343e-06,
|
| 5798 |
+
"loss": 11.6338,
|
| 5799 |
+
"step": 8180
|
| 5800 |
+
},
|
| 5801 |
+
{
|
| 5802 |
+
"epoch": 0.3634729642019072,
|
| 5803 |
+
"grad_norm": 98.94048309326172,
|
| 5804 |
+
"learning_rate": 9.985801847181416e-06,
|
| 5805 |
+
"loss": 11.8984,
|
| 5806 |
+
"step": 8190
|
| 5807 |
+
},
|
| 5808 |
+
{
|
| 5809 |
+
"epoch": 0.3639167651349987,
|
| 5810 |
+
"grad_norm": 94.3434066772461,
|
| 5811 |
+
"learning_rate": 9.985784511219488e-06,
|
| 5812 |
+
"loss": 11.4629,
|
| 5813 |
+
"step": 8200
|
| 5814 |
+
},
|
| 5815 |
+
{
|
| 5816 |
+
"epoch": 0.36436056606809014,
|
| 5817 |
+
"grad_norm": 117.29963684082031,
|
| 5818 |
+
"learning_rate": 9.985767175257561e-06,
|
| 5819 |
+
"loss": 11.5717,
|
| 5820 |
+
"step": 8210
|
| 5821 |
+
},
|
| 5822 |
+
{
|
| 5823 |
+
"epoch": 0.36480436700118163,
|
| 5824 |
+
"grad_norm": 96.46138763427734,
|
| 5825 |
+
"learning_rate": 9.985749839295634e-06,
|
| 5826 |
+
"loss": 11.2935,
|
| 5827 |
+
"step": 8220
|
| 5828 |
+
},
|
| 5829 |
+
{
|
| 5830 |
+
"epoch": 0.36524816793427306,
|
| 5831 |
+
"grad_norm": 88.48851776123047,
|
| 5832 |
+
"learning_rate": 9.985732503333705e-06,
|
| 5833 |
+
"loss": 11.4625,
|
| 5834 |
+
"step": 8230
|
| 5835 |
+
},
|
| 5836 |
+
{
|
| 5837 |
+
"epoch": 0.36569196886736455,
|
| 5838 |
+
"grad_norm": 90.18971252441406,
|
| 5839 |
+
"learning_rate": 9.985715167371778e-06,
|
| 5840 |
+
"loss": 11.2449,
|
| 5841 |
+
"step": 8240
|
| 5842 |
+
},
|
| 5843 |
+
{
|
| 5844 |
+
"epoch": 0.366135769800456,
|
| 5845 |
+
"grad_norm": 87.7426986694336,
|
| 5846 |
+
"learning_rate": 9.985697831409852e-06,
|
| 5847 |
+
"loss": 11.3782,
|
| 5848 |
+
"step": 8250
|
| 5849 |
+
},
|
| 5850 |
+
{
|
| 5851 |
+
"epoch": 0.36657957073354747,
|
| 5852 |
+
"grad_norm": 97.00252532958984,
|
| 5853 |
+
"learning_rate": 9.985680495447923e-06,
|
| 5854 |
+
"loss": 11.3337,
|
| 5855 |
+
"step": 8260
|
| 5856 |
+
},
|
| 5857 |
+
{
|
| 5858 |
+
"epoch": 0.36702337166663895,
|
| 5859 |
+
"grad_norm": 109.61273193359375,
|
| 5860 |
+
"learning_rate": 9.985663159485996e-06,
|
| 5861 |
+
"loss": 11.0625,
|
| 5862 |
+
"step": 8270
|
| 5863 |
+
},
|
| 5864 |
+
{
|
| 5865 |
+
"epoch": 0.3674671725997304,
|
| 5866 |
+
"grad_norm": 86.43873596191406,
|
| 5867 |
+
"learning_rate": 9.985645823524069e-06,
|
| 5868 |
+
"loss": 10.7205,
|
| 5869 |
+
"step": 8280
|
| 5870 |
+
},
|
| 5871 |
+
{
|
| 5872 |
+
"epoch": 0.36791097353282187,
|
| 5873 |
+
"grad_norm": 110.65450286865234,
|
| 5874 |
+
"learning_rate": 9.98562848756214e-06,
|
| 5875 |
+
"loss": 11.3779,
|
| 5876 |
+
"step": 8290
|
| 5877 |
+
},
|
| 5878 |
+
{
|
| 5879 |
+
"epoch": 0.3683547744659133,
|
| 5880 |
+
"grad_norm": 97.357421875,
|
| 5881 |
+
"learning_rate": 9.985611151600214e-06,
|
| 5882 |
+
"loss": 11.049,
|
| 5883 |
+
"step": 8300
|
| 5884 |
+
},
|
| 5885 |
+
{
|
| 5886 |
+
"epoch": 0.3687985753990048,
|
| 5887 |
+
"grad_norm": 72.67398834228516,
|
| 5888 |
+
"learning_rate": 9.985593815638287e-06,
|
| 5889 |
+
"loss": 10.9327,
|
| 5890 |
+
"step": 8310
|
| 5891 |
+
},
|
| 5892 |
+
{
|
| 5893 |
+
"epoch": 0.3692423763320962,
|
| 5894 |
+
"grad_norm": 80.5442886352539,
|
| 5895 |
+
"learning_rate": 9.985576479676358e-06,
|
| 5896 |
+
"loss": 11.3067,
|
| 5897 |
+
"step": 8320
|
| 5898 |
+
},
|
| 5899 |
+
{
|
| 5900 |
+
"epoch": 0.3696861772651877,
|
| 5901 |
+
"grad_norm": 104.49150848388672,
|
| 5902 |
+
"learning_rate": 9.985559143714431e-06,
|
| 5903 |
+
"loss": 11.7007,
|
| 5904 |
+
"step": 8330
|
| 5905 |
+
},
|
| 5906 |
+
{
|
| 5907 |
+
"epoch": 0.37012997819827914,
|
| 5908 |
+
"grad_norm": 93.68840789794922,
|
| 5909 |
+
"learning_rate": 9.985541807752504e-06,
|
| 5910 |
+
"loss": 12.1018,
|
| 5911 |
+
"step": 8340
|
| 5912 |
+
},
|
| 5913 |
+
{
|
| 5914 |
+
"epoch": 0.37057377913137063,
|
| 5915 |
+
"grad_norm": 92.62474060058594,
|
| 5916 |
+
"learning_rate": 9.985524471790576e-06,
|
| 5917 |
+
"loss": 11.2158,
|
| 5918 |
+
"step": 8350
|
| 5919 |
+
},
|
| 5920 |
+
{
|
| 5921 |
+
"epoch": 0.37101758006446206,
|
| 5922 |
+
"grad_norm": 94.18134307861328,
|
| 5923 |
+
"learning_rate": 9.985507135828649e-06,
|
| 5924 |
+
"loss": 11.551,
|
| 5925 |
+
"step": 8360
|
| 5926 |
+
},
|
| 5927 |
+
{
|
| 5928 |
+
"epoch": 0.37146138099755355,
|
| 5929 |
+
"grad_norm": 97.85765838623047,
|
| 5930 |
+
"learning_rate": 9.985489799866722e-06,
|
| 5931 |
+
"loss": 11.6042,
|
| 5932 |
+
"step": 8370
|
| 5933 |
+
},
|
| 5934 |
+
{
|
| 5935 |
+
"epoch": 0.37190518193064503,
|
| 5936 |
+
"grad_norm": 88.52871704101562,
|
| 5937 |
+
"learning_rate": 9.985472463904795e-06,
|
| 5938 |
+
"loss": 11.8907,
|
| 5939 |
+
"step": 8380
|
| 5940 |
+
},
|
| 5941 |
+
{
|
| 5942 |
+
"epoch": 0.37234898286373647,
|
| 5943 |
+
"grad_norm": 95.93720245361328,
|
| 5944 |
+
"learning_rate": 9.985455127942866e-06,
|
| 5945 |
+
"loss": 11.1994,
|
| 5946 |
+
"step": 8390
|
| 5947 |
+
},
|
| 5948 |
+
{
|
| 5949 |
+
"epoch": 0.37279278379682795,
|
| 5950 |
+
"grad_norm": 73.47252655029297,
|
| 5951 |
+
"learning_rate": 9.98543779198094e-06,
|
| 5952 |
+
"loss": 11.1229,
|
| 5953 |
+
"step": 8400
|
| 5954 |
+
},
|
| 5955 |
+
{
|
| 5956 |
+
"epoch": 0.3732365847299194,
|
| 5957 |
+
"grad_norm": 87.63044738769531,
|
| 5958 |
+
"learning_rate": 9.985420456019012e-06,
|
| 5959 |
+
"loss": 11.2802,
|
| 5960 |
+
"step": 8410
|
| 5961 |
+
},
|
| 5962 |
+
{
|
| 5963 |
+
"epoch": 0.37368038566301087,
|
| 5964 |
+
"grad_norm": 85.62527465820312,
|
| 5965 |
+
"learning_rate": 9.985403120057084e-06,
|
| 5966 |
+
"loss": 11.4917,
|
| 5967 |
+
"step": 8420
|
| 5968 |
+
},
|
| 5969 |
+
{
|
| 5970 |
+
"epoch": 0.3741241865961023,
|
| 5971 |
+
"grad_norm": 84.97439575195312,
|
| 5972 |
+
"learning_rate": 9.985385784095157e-06,
|
| 5973 |
+
"loss": 11.4111,
|
| 5974 |
+
"step": 8430
|
| 5975 |
+
},
|
| 5976 |
+
{
|
| 5977 |
+
"epoch": 0.3745679875291938,
|
| 5978 |
+
"grad_norm": 91.50364685058594,
|
| 5979 |
+
"learning_rate": 9.98536844813323e-06,
|
| 5980 |
+
"loss": 11.4548,
|
| 5981 |
+
"step": 8440
|
| 5982 |
+
},
|
| 5983 |
+
{
|
| 5984 |
+
"epoch": 0.3750117884622852,
|
| 5985 |
+
"grad_norm": 91.25043487548828,
|
| 5986 |
+
"learning_rate": 9.985351112171301e-06,
|
| 5987 |
+
"loss": 11.793,
|
| 5988 |
+
"step": 8450
|
| 5989 |
+
},
|
| 5990 |
+
{
|
| 5991 |
+
"epoch": 0.3754555893953767,
|
| 5992 |
+
"grad_norm": 93.69058227539062,
|
| 5993 |
+
"learning_rate": 9.985333776209374e-06,
|
| 5994 |
+
"loss": 11.0371,
|
| 5995 |
+
"step": 8460
|
| 5996 |
+
},
|
| 5997 |
+
{
|
| 5998 |
+
"epoch": 0.37589939032846814,
|
| 5999 |
+
"grad_norm": 89.45205688476562,
|
| 6000 |
+
"learning_rate": 9.985316440247447e-06,
|
| 6001 |
+
"loss": 11.3039,
|
| 6002 |
+
"step": 8470
|
| 6003 |
+
},
|
| 6004 |
+
{
|
| 6005 |
+
"epoch": 0.37634319126155963,
|
| 6006 |
+
"grad_norm": 97.13536071777344,
|
| 6007 |
+
"learning_rate": 9.98529910428552e-06,
|
| 6008 |
+
"loss": 11.1143,
|
| 6009 |
+
"step": 8480
|
| 6010 |
+
},
|
| 6011 |
+
{
|
| 6012 |
+
"epoch": 0.3767869921946511,
|
| 6013 |
+
"grad_norm": 95.88386535644531,
|
| 6014 |
+
"learning_rate": 9.985281768323592e-06,
|
| 6015 |
+
"loss": 10.9521,
|
| 6016 |
+
"step": 8490
|
| 6017 |
+
},
|
| 6018 |
+
{
|
| 6019 |
+
"epoch": 0.37723079312774255,
|
| 6020 |
+
"grad_norm": 107.3424072265625,
|
| 6021 |
+
"learning_rate": 9.985264432361665e-06,
|
| 6022 |
+
"loss": 11.4563,
|
| 6023 |
+
"step": 8500
|
| 6024 |
+
},
|
| 6025 |
+
{
|
| 6026 |
+
"epoch": 0.37767459406083403,
|
| 6027 |
+
"grad_norm": 78.75535583496094,
|
| 6028 |
+
"learning_rate": 9.985247096399738e-06,
|
| 6029 |
+
"loss": 11.6325,
|
| 6030 |
+
"step": 8510
|
| 6031 |
+
},
|
| 6032 |
+
{
|
| 6033 |
+
"epoch": 0.37811839499392547,
|
| 6034 |
+
"grad_norm": 93.4799575805664,
|
| 6035 |
+
"learning_rate": 9.98522976043781e-06,
|
| 6036 |
+
"loss": 11.5845,
|
| 6037 |
+
"step": 8520
|
| 6038 |
+
},
|
| 6039 |
+
{
|
| 6040 |
+
"epoch": 0.37856219592701695,
|
| 6041 |
+
"grad_norm": 82.9742202758789,
|
| 6042 |
+
"learning_rate": 9.985212424475882e-06,
|
| 6043 |
+
"loss": 10.9067,
|
| 6044 |
+
"step": 8530
|
| 6045 |
+
},
|
| 6046 |
+
{
|
| 6047 |
+
"epoch": 0.3790059968601084,
|
| 6048 |
+
"grad_norm": 86.02015686035156,
|
| 6049 |
+
"learning_rate": 9.985195088513956e-06,
|
| 6050 |
+
"loss": 11.7097,
|
| 6051 |
+
"step": 8540
|
| 6052 |
+
},
|
| 6053 |
+
{
|
| 6054 |
+
"epoch": 0.37944979779319987,
|
| 6055 |
+
"grad_norm": 78.73582458496094,
|
| 6056 |
+
"learning_rate": 9.985177752552027e-06,
|
| 6057 |
+
"loss": 11.3756,
|
| 6058 |
+
"step": 8550
|
| 6059 |
+
},
|
| 6060 |
+
{
|
| 6061 |
+
"epoch": 0.3798935987262913,
|
| 6062 |
+
"grad_norm": 86.17765808105469,
|
| 6063 |
+
"learning_rate": 9.9851604165901e-06,
|
| 6064 |
+
"loss": 11.1242,
|
| 6065 |
+
"step": 8560
|
| 6066 |
+
},
|
| 6067 |
+
{
|
| 6068 |
+
"epoch": 0.3803373996593828,
|
| 6069 |
+
"grad_norm": 103.56576538085938,
|
| 6070 |
+
"learning_rate": 9.985143080628173e-06,
|
| 6071 |
+
"loss": 11.5288,
|
| 6072 |
+
"step": 8570
|
| 6073 |
+
},
|
| 6074 |
+
{
|
| 6075 |
+
"epoch": 0.3807812005924742,
|
| 6076 |
+
"grad_norm": 96.16366577148438,
|
| 6077 |
+
"learning_rate": 9.985125744666244e-06,
|
| 6078 |
+
"loss": 11.3524,
|
| 6079 |
+
"step": 8580
|
| 6080 |
+
},
|
| 6081 |
+
{
|
| 6082 |
+
"epoch": 0.3812250015255657,
|
| 6083 |
+
"grad_norm": 79.89984893798828,
|
| 6084 |
+
"learning_rate": 9.985108408704318e-06,
|
| 6085 |
+
"loss": 11.2257,
|
| 6086 |
+
"step": 8590
|
| 6087 |
+
},
|
| 6088 |
+
{
|
| 6089 |
+
"epoch": 0.3816688024586572,
|
| 6090 |
+
"grad_norm": 91.93770599365234,
|
| 6091 |
+
"learning_rate": 9.98509107274239e-06,
|
| 6092 |
+
"loss": 11.5471,
|
| 6093 |
+
"step": 8600
|
| 6094 |
+
},
|
| 6095 |
+
{
|
| 6096 |
+
"epoch": 0.38211260339174863,
|
| 6097 |
+
"grad_norm": 87.27505493164062,
|
| 6098 |
+
"learning_rate": 9.985073736780464e-06,
|
| 6099 |
+
"loss": 11.2449,
|
| 6100 |
+
"step": 8610
|
| 6101 |
+
},
|
| 6102 |
+
{
|
| 6103 |
+
"epoch": 0.3825564043248401,
|
| 6104 |
+
"grad_norm": 93.93415069580078,
|
| 6105 |
+
"learning_rate": 9.985056400818535e-06,
|
| 6106 |
+
"loss": 11.0035,
|
| 6107 |
+
"step": 8620
|
| 6108 |
+
},
|
| 6109 |
+
{
|
| 6110 |
+
"epoch": 0.38300020525793155,
|
| 6111 |
+
"grad_norm": 88.42649841308594,
|
| 6112 |
+
"learning_rate": 9.985039064856608e-06,
|
| 6113 |
+
"loss": 11.3949,
|
| 6114 |
+
"step": 8630
|
| 6115 |
+
},
|
| 6116 |
+
{
|
| 6117 |
+
"epoch": 0.38344400619102303,
|
| 6118 |
+
"grad_norm": 87.21992492675781,
|
| 6119 |
+
"learning_rate": 9.985021728894681e-06,
|
| 6120 |
+
"loss": 11.419,
|
| 6121 |
+
"step": 8640
|
| 6122 |
+
},
|
| 6123 |
+
{
|
| 6124 |
+
"epoch": 0.38388780712411447,
|
| 6125 |
+
"grad_norm": 96.35975646972656,
|
| 6126 |
+
"learning_rate": 9.985004392932753e-06,
|
| 6127 |
+
"loss": 10.8626,
|
| 6128 |
+
"step": 8650
|
| 6129 |
+
},
|
| 6130 |
+
{
|
| 6131 |
+
"epoch": 0.38433160805720595,
|
| 6132 |
+
"grad_norm": 90.79749298095703,
|
| 6133 |
+
"learning_rate": 9.984987056970826e-06,
|
| 6134 |
+
"loss": 11.9004,
|
| 6135 |
+
"step": 8660
|
| 6136 |
+
},
|
| 6137 |
+
{
|
| 6138 |
+
"epoch": 0.3847754089902974,
|
| 6139 |
+
"grad_norm": 88.30585479736328,
|
| 6140 |
+
"learning_rate": 9.984969721008899e-06,
|
| 6141 |
+
"loss": 11.8872,
|
| 6142 |
+
"step": 8670
|
| 6143 |
+
},
|
| 6144 |
+
{
|
| 6145 |
+
"epoch": 0.38521920992338887,
|
| 6146 |
+
"grad_norm": 68.6938247680664,
|
| 6147 |
+
"learning_rate": 9.98495238504697e-06,
|
| 6148 |
+
"loss": 11.1578,
|
| 6149 |
+
"step": 8680
|
| 6150 |
+
},
|
| 6151 |
+
{
|
| 6152 |
+
"epoch": 0.3856630108564803,
|
| 6153 |
+
"grad_norm": 87.89897918701172,
|
| 6154 |
+
"learning_rate": 9.984935049085043e-06,
|
| 6155 |
+
"loss": 11.0376,
|
| 6156 |
+
"step": 8690
|
| 6157 |
+
},
|
| 6158 |
+
{
|
| 6159 |
+
"epoch": 0.3861068117895718,
|
| 6160 |
+
"grad_norm": 103.39437103271484,
|
| 6161 |
+
"learning_rate": 9.984917713123116e-06,
|
| 6162 |
+
"loss": 11.6238,
|
| 6163 |
+
"step": 8700
|
| 6164 |
+
},
|
| 6165 |
+
{
|
| 6166 |
+
"epoch": 0.3865506127226632,
|
| 6167 |
+
"grad_norm": 82.58814239501953,
|
| 6168 |
+
"learning_rate": 9.984900377161188e-06,
|
| 6169 |
+
"loss": 11.2413,
|
| 6170 |
+
"step": 8710
|
| 6171 |
+
},
|
| 6172 |
+
{
|
| 6173 |
+
"epoch": 0.3869944136557547,
|
| 6174 |
+
"grad_norm": 73.86261749267578,
|
| 6175 |
+
"learning_rate": 9.98488304119926e-06,
|
| 6176 |
+
"loss": 10.9585,
|
| 6177 |
+
"step": 8720
|
| 6178 |
+
},
|
| 6179 |
+
{
|
| 6180 |
+
"epoch": 0.3874382145888462,
|
| 6181 |
+
"grad_norm": 100.28836822509766,
|
| 6182 |
+
"learning_rate": 9.984865705237334e-06,
|
| 6183 |
+
"loss": 10.9422,
|
| 6184 |
+
"step": 8730
|
| 6185 |
+
},
|
| 6186 |
+
{
|
| 6187 |
+
"epoch": 0.38788201552193763,
|
| 6188 |
+
"grad_norm": 88.46509552001953,
|
| 6189 |
+
"learning_rate": 9.984848369275407e-06,
|
| 6190 |
+
"loss": 11.1186,
|
| 6191 |
+
"step": 8740
|
| 6192 |
+
},
|
| 6193 |
+
{
|
| 6194 |
+
"epoch": 0.3883258164550291,
|
| 6195 |
+
"grad_norm": 90.18559265136719,
|
| 6196 |
+
"learning_rate": 9.984831033313478e-06,
|
| 6197 |
+
"loss": 11.438,
|
| 6198 |
+
"step": 8750
|
| 6199 |
+
},
|
| 6200 |
+
{
|
| 6201 |
+
"epoch": 0.38876961738812055,
|
| 6202 |
+
"grad_norm": 99.76158142089844,
|
| 6203 |
+
"learning_rate": 9.984813697351551e-06,
|
| 6204 |
+
"loss": 11.6788,
|
| 6205 |
+
"step": 8760
|
| 6206 |
+
},
|
| 6207 |
+
{
|
| 6208 |
+
"epoch": 0.38921341832121203,
|
| 6209 |
+
"grad_norm": 78.58843994140625,
|
| 6210 |
+
"learning_rate": 9.984796361389624e-06,
|
| 6211 |
+
"loss": 11.2294,
|
| 6212 |
+
"step": 8770
|
| 6213 |
+
},
|
| 6214 |
+
{
|
| 6215 |
+
"epoch": 0.38965721925430347,
|
| 6216 |
+
"grad_norm": 117.36835479736328,
|
| 6217 |
+
"learning_rate": 9.984779025427696e-06,
|
| 6218 |
+
"loss": 11.3633,
|
| 6219 |
+
"step": 8780
|
| 6220 |
+
},
|
| 6221 |
+
{
|
| 6222 |
+
"epoch": 0.39010102018739495,
|
| 6223 |
+
"grad_norm": 81.84542846679688,
|
| 6224 |
+
"learning_rate": 9.984761689465769e-06,
|
| 6225 |
+
"loss": 11.9371,
|
| 6226 |
+
"step": 8790
|
| 6227 |
+
},
|
| 6228 |
+
{
|
| 6229 |
+
"epoch": 0.3905448211204864,
|
| 6230 |
+
"grad_norm": 84.65067291259766,
|
| 6231 |
+
"learning_rate": 9.984744353503842e-06,
|
| 6232 |
+
"loss": 11.3477,
|
| 6233 |
+
"step": 8800
|
| 6234 |
+
},
|
| 6235 |
+
{
|
| 6236 |
+
"epoch": 0.3909886220535779,
|
| 6237 |
+
"grad_norm": 86.41151428222656,
|
| 6238 |
+
"learning_rate": 9.984727017541913e-06,
|
| 6239 |
+
"loss": 11.537,
|
| 6240 |
+
"step": 8810
|
| 6241 |
+
},
|
| 6242 |
+
{
|
| 6243 |
+
"epoch": 0.3914324229866693,
|
| 6244 |
+
"grad_norm": 85.5174331665039,
|
| 6245 |
+
"learning_rate": 9.984709681579986e-06,
|
| 6246 |
+
"loss": 11.5844,
|
| 6247 |
+
"step": 8820
|
| 6248 |
+
},
|
| 6249 |
+
{
|
| 6250 |
+
"epoch": 0.3918762239197608,
|
| 6251 |
+
"grad_norm": 103.24414825439453,
|
| 6252 |
+
"learning_rate": 9.98469234561806e-06,
|
| 6253 |
+
"loss": 10.8962,
|
| 6254 |
+
"step": 8830
|
| 6255 |
+
},
|
| 6256 |
+
{
|
| 6257 |
+
"epoch": 0.3923200248528523,
|
| 6258 |
+
"grad_norm": 101.08570098876953,
|
| 6259 |
+
"learning_rate": 9.984675009656131e-06,
|
| 6260 |
+
"loss": 11.1054,
|
| 6261 |
+
"step": 8840
|
| 6262 |
+
},
|
| 6263 |
+
{
|
| 6264 |
+
"epoch": 0.3927638257859437,
|
| 6265 |
+
"grad_norm": 89.28972625732422,
|
| 6266 |
+
"learning_rate": 9.984657673694204e-06,
|
| 6267 |
+
"loss": 10.9952,
|
| 6268 |
+
"step": 8850
|
| 6269 |
+
},
|
| 6270 |
+
{
|
| 6271 |
+
"epoch": 0.3932076267190352,
|
| 6272 |
+
"grad_norm": 97.5589370727539,
|
| 6273 |
+
"learning_rate": 9.984640337732277e-06,
|
| 6274 |
+
"loss": 11.2572,
|
| 6275 |
+
"step": 8860
|
| 6276 |
+
},
|
| 6277 |
+
{
|
| 6278 |
+
"epoch": 0.39365142765212663,
|
| 6279 |
+
"grad_norm": 85.52608489990234,
|
| 6280 |
+
"learning_rate": 9.98462300177035e-06,
|
| 6281 |
+
"loss": 11.2462,
|
| 6282 |
+
"step": 8870
|
| 6283 |
+
},
|
| 6284 |
+
{
|
| 6285 |
+
"epoch": 0.3940952285852181,
|
| 6286 |
+
"grad_norm": 88.88489532470703,
|
| 6287 |
+
"learning_rate": 9.984605665808422e-06,
|
| 6288 |
+
"loss": 11.1294,
|
| 6289 |
+
"step": 8880
|
| 6290 |
+
},
|
| 6291 |
+
{
|
| 6292 |
+
"epoch": 0.39453902951830955,
|
| 6293 |
+
"grad_norm": 98.8681411743164,
|
| 6294 |
+
"learning_rate": 9.984588329846495e-06,
|
| 6295 |
+
"loss": 11.6495,
|
| 6296 |
+
"step": 8890
|
| 6297 |
+
},
|
| 6298 |
+
{
|
| 6299 |
+
"epoch": 0.39498283045140103,
|
| 6300 |
+
"grad_norm": 91.84007263183594,
|
| 6301 |
+
"learning_rate": 9.984570993884568e-06,
|
| 6302 |
+
"loss": 11.3472,
|
| 6303 |
+
"step": 8900
|
| 6304 |
+
},
|
| 6305 |
+
{
|
| 6306 |
+
"epoch": 0.39542663138449247,
|
| 6307 |
+
"grad_norm": 68.3472671508789,
|
| 6308 |
+
"learning_rate": 9.984553657922639e-06,
|
| 6309 |
+
"loss": 11.424,
|
| 6310 |
+
"step": 8910
|
| 6311 |
+
},
|
| 6312 |
+
{
|
| 6313 |
+
"epoch": 0.39587043231758395,
|
| 6314 |
+
"grad_norm": 83.57421875,
|
| 6315 |
+
"learning_rate": 9.984536321960712e-06,
|
| 6316 |
+
"loss": 11.078,
|
| 6317 |
+
"step": 8920
|
| 6318 |
+
},
|
| 6319 |
+
{
|
| 6320 |
+
"epoch": 0.3963142332506754,
|
| 6321 |
+
"grad_norm": 87.4074935913086,
|
| 6322 |
+
"learning_rate": 9.984518985998785e-06,
|
| 6323 |
+
"loss": 11.178,
|
| 6324 |
+
"step": 8930
|
| 6325 |
+
},
|
| 6326 |
+
{
|
| 6327 |
+
"epoch": 0.3967580341837669,
|
| 6328 |
+
"grad_norm": 73.35061645507812,
|
| 6329 |
+
"learning_rate": 9.984501650036857e-06,
|
| 6330 |
+
"loss": 11.0786,
|
| 6331 |
+
"step": 8940
|
| 6332 |
+
},
|
| 6333 |
+
{
|
| 6334 |
+
"epoch": 0.39720183511685836,
|
| 6335 |
+
"grad_norm": 79.557861328125,
|
| 6336 |
+
"learning_rate": 9.98448431407493e-06,
|
| 6337 |
+
"loss": 11.3864,
|
| 6338 |
+
"step": 8950
|
| 6339 |
+
},
|
| 6340 |
+
{
|
| 6341 |
+
"epoch": 0.3976456360499498,
|
| 6342 |
+
"grad_norm": 86.81566619873047,
|
| 6343 |
+
"learning_rate": 9.984466978113003e-06,
|
| 6344 |
+
"loss": 10.7141,
|
| 6345 |
+
"step": 8960
|
| 6346 |
+
},
|
| 6347 |
+
{
|
| 6348 |
+
"epoch": 0.3980894369830413,
|
| 6349 |
+
"grad_norm": 86.90424346923828,
|
| 6350 |
+
"learning_rate": 9.984449642151074e-06,
|
| 6351 |
+
"loss": 10.7673,
|
| 6352 |
+
"step": 8970
|
| 6353 |
+
},
|
| 6354 |
+
{
|
| 6355 |
+
"epoch": 0.3985332379161327,
|
| 6356 |
+
"grad_norm": 92.93916320800781,
|
| 6357 |
+
"learning_rate": 9.984432306189147e-06,
|
| 6358 |
+
"loss": 11.2373,
|
| 6359 |
+
"step": 8980
|
| 6360 |
+
},
|
| 6361 |
+
{
|
| 6362 |
+
"epoch": 0.3989770388492242,
|
| 6363 |
+
"grad_norm": 96.02029418945312,
|
| 6364 |
+
"learning_rate": 9.98441497022722e-06,
|
| 6365 |
+
"loss": 11.1749,
|
| 6366 |
+
"step": 8990
|
| 6367 |
+
},
|
| 6368 |
+
{
|
| 6369 |
+
"epoch": 0.39942083978231563,
|
| 6370 |
+
"grad_norm": 83.88191223144531,
|
| 6371 |
+
"learning_rate": 9.984397634265293e-06,
|
| 6372 |
+
"loss": 11.7742,
|
| 6373 |
+
"step": 9000
|
| 6374 |
+
},
|
| 6375 |
+
{
|
| 6376 |
+
"epoch": 0.39942083978231563,
|
| 6377 |
+
"eval_loss": 0.35243040323257446,
|
| 6378 |
+
"eval_runtime": 674.8358,
|
| 6379 |
+
"eval_samples_per_second": 1799.535,
|
| 6380 |
+
"eval_steps_per_second": 56.236,
|
| 6381 |
+
"step": 9000
|
| 6382 |
+
},
|
| 6383 |
+
{
|
| 6384 |
+
"epoch": 0.3998646407154071,
|
| 6385 |
+
"grad_norm": 73.33333587646484,
|
| 6386 |
+
"learning_rate": 9.984380298303365e-06,
|
| 6387 |
+
"loss": 10.9059,
|
| 6388 |
+
"step": 9010
|
| 6389 |
+
},
|
| 6390 |
+
{
|
| 6391 |
+
"epoch": 0.40030844164849855,
|
| 6392 |
+
"grad_norm": 87.8101806640625,
|
| 6393 |
+
"learning_rate": 9.984362962341438e-06,
|
| 6394 |
+
"loss": 11.2553,
|
| 6395 |
+
"step": 9020
|
| 6396 |
+
},
|
| 6397 |
+
{
|
| 6398 |
+
"epoch": 0.40075224258159003,
|
| 6399 |
+
"grad_norm": 79.9136734008789,
|
| 6400 |
+
"learning_rate": 9.984345626379511e-06,
|
| 6401 |
+
"loss": 11.2405,
|
| 6402 |
+
"step": 9030
|
| 6403 |
+
},
|
| 6404 |
+
{
|
| 6405 |
+
"epoch": 0.40119604351468147,
|
| 6406 |
+
"grad_norm": 92.52330017089844,
|
| 6407 |
+
"learning_rate": 9.984328290417582e-06,
|
| 6408 |
+
"loss": 10.9439,
|
| 6409 |
+
"step": 9040
|
| 6410 |
+
},
|
| 6411 |
+
{
|
| 6412 |
+
"epoch": 0.40163984444777295,
|
| 6413 |
+
"grad_norm": 92.92615509033203,
|
| 6414 |
+
"learning_rate": 9.984310954455655e-06,
|
| 6415 |
+
"loss": 11.3613,
|
| 6416 |
+
"step": 9050
|
| 6417 |
+
},
|
| 6418 |
+
{
|
| 6419 |
+
"epoch": 0.40208364538086444,
|
| 6420 |
+
"grad_norm": 87.73091125488281,
|
| 6421 |
+
"learning_rate": 9.984293618493729e-06,
|
| 6422 |
+
"loss": 11.2295,
|
| 6423 |
+
"step": 9060
|
| 6424 |
+
},
|
| 6425 |
+
{
|
| 6426 |
+
"epoch": 0.4025274463139559,
|
| 6427 |
+
"grad_norm": 94.13227081298828,
|
| 6428 |
+
"learning_rate": 9.9842762825318e-06,
|
| 6429 |
+
"loss": 11.305,
|
| 6430 |
+
"step": 9070
|
| 6431 |
+
},
|
| 6432 |
+
{
|
| 6433 |
+
"epoch": 0.40297124724704736,
|
| 6434 |
+
"grad_norm": 77.8934097290039,
|
| 6435 |
+
"learning_rate": 9.984258946569873e-06,
|
| 6436 |
+
"loss": 11.0494,
|
| 6437 |
+
"step": 9080
|
| 6438 |
+
},
|
| 6439 |
+
{
|
| 6440 |
+
"epoch": 0.4034150481801388,
|
| 6441 |
+
"grad_norm": 76.98465728759766,
|
| 6442 |
+
"learning_rate": 9.984241610607946e-06,
|
| 6443 |
+
"loss": 11.0135,
|
| 6444 |
+
"step": 9090
|
| 6445 |
+
},
|
| 6446 |
+
{
|
| 6447 |
+
"epoch": 0.4038588491132303,
|
| 6448 |
+
"grad_norm": 86.28607177734375,
|
| 6449 |
+
"learning_rate": 9.984224274646019e-06,
|
| 6450 |
+
"loss": 11.2476,
|
| 6451 |
+
"step": 9100
|
| 6452 |
+
},
|
| 6453 |
+
{
|
| 6454 |
+
"epoch": 0.4043026500463217,
|
| 6455 |
+
"grad_norm": 83.22606658935547,
|
| 6456 |
+
"learning_rate": 9.98420693868409e-06,
|
| 6457 |
+
"loss": 11.472,
|
| 6458 |
+
"step": 9110
|
| 6459 |
+
},
|
| 6460 |
+
{
|
| 6461 |
+
"epoch": 0.4047464509794132,
|
| 6462 |
+
"grad_norm": 87.87577819824219,
|
| 6463 |
+
"learning_rate": 9.984189602722164e-06,
|
| 6464 |
+
"loss": 11.237,
|
| 6465 |
+
"step": 9120
|
| 6466 |
+
},
|
| 6467 |
+
{
|
| 6468 |
+
"epoch": 0.40519025191250463,
|
| 6469 |
+
"grad_norm": 93.7770004272461,
|
| 6470 |
+
"learning_rate": 9.984172266760237e-06,
|
| 6471 |
+
"loss": 11.2706,
|
| 6472 |
+
"step": 9130
|
| 6473 |
+
},
|
| 6474 |
+
{
|
| 6475 |
+
"epoch": 0.4056340528455961,
|
| 6476 |
+
"grad_norm": 80.65582275390625,
|
| 6477 |
+
"learning_rate": 9.984154930798308e-06,
|
| 6478 |
+
"loss": 11.3943,
|
| 6479 |
+
"step": 9140
|
| 6480 |
+
},
|
| 6481 |
+
{
|
| 6482 |
+
"epoch": 0.40607785377868755,
|
| 6483 |
+
"grad_norm": 86.08631896972656,
|
| 6484 |
+
"learning_rate": 9.984137594836381e-06,
|
| 6485 |
+
"loss": 11.2704,
|
| 6486 |
+
"step": 9150
|
| 6487 |
+
},
|
| 6488 |
+
{
|
| 6489 |
+
"epoch": 0.40652165471177903,
|
| 6490 |
+
"grad_norm": 79.62726593017578,
|
| 6491 |
+
"learning_rate": 9.984120258874454e-06,
|
| 6492 |
+
"loss": 11.5724,
|
| 6493 |
+
"step": 9160
|
| 6494 |
+
},
|
| 6495 |
+
{
|
| 6496 |
+
"epoch": 0.40696545564487047,
|
| 6497 |
+
"grad_norm": 88.90939331054688,
|
| 6498 |
+
"learning_rate": 9.984102922912526e-06,
|
| 6499 |
+
"loss": 11.2739,
|
| 6500 |
+
"step": 9170
|
| 6501 |
+
},
|
| 6502 |
+
{
|
| 6503 |
+
"epoch": 0.40740925657796195,
|
| 6504 |
+
"grad_norm": 92.309814453125,
|
| 6505 |
+
"learning_rate": 9.984085586950599e-06,
|
| 6506 |
+
"loss": 11.535,
|
| 6507 |
+
"step": 9180
|
| 6508 |
+
},
|
| 6509 |
+
{
|
| 6510 |
+
"epoch": 0.40785305751105344,
|
| 6511 |
+
"grad_norm": 83.53838348388672,
|
| 6512 |
+
"learning_rate": 9.984068250988672e-06,
|
| 6513 |
+
"loss": 11.3033,
|
| 6514 |
+
"step": 9190
|
| 6515 |
+
},
|
| 6516 |
+
{
|
| 6517 |
+
"epoch": 0.4082968584441449,
|
| 6518 |
+
"grad_norm": 92.0191421508789,
|
| 6519 |
+
"learning_rate": 9.984050915026743e-06,
|
| 6520 |
+
"loss": 11.8525,
|
| 6521 |
+
"step": 9200
|
| 6522 |
+
},
|
| 6523 |
+
{
|
| 6524 |
+
"epoch": 0.40874065937723636,
|
| 6525 |
+
"grad_norm": 96.62713623046875,
|
| 6526 |
+
"learning_rate": 9.984033579064816e-06,
|
| 6527 |
+
"loss": 10.9853,
|
| 6528 |
+
"step": 9210
|
| 6529 |
+
},
|
| 6530 |
+
{
|
| 6531 |
+
"epoch": 0.4091844603103278,
|
| 6532 |
+
"grad_norm": 92.35746765136719,
|
| 6533 |
+
"learning_rate": 9.98401624310289e-06,
|
| 6534 |
+
"loss": 11.1736,
|
| 6535 |
+
"step": 9220
|
| 6536 |
+
},
|
| 6537 |
+
{
|
| 6538 |
+
"epoch": 0.4096282612434193,
|
| 6539 |
+
"grad_norm": 87.0098876953125,
|
| 6540 |
+
"learning_rate": 9.98399890714096e-06,
|
| 6541 |
+
"loss": 11.3787,
|
| 6542 |
+
"step": 9230
|
| 6543 |
+
},
|
| 6544 |
+
{
|
| 6545 |
+
"epoch": 0.4100720621765107,
|
| 6546 |
+
"grad_norm": 75.16019439697266,
|
| 6547 |
+
"learning_rate": 9.983981571179034e-06,
|
| 6548 |
+
"loss": 11.0318,
|
| 6549 |
+
"step": 9240
|
| 6550 |
+
},
|
| 6551 |
+
{
|
| 6552 |
+
"epoch": 0.4105158631096022,
|
| 6553 |
+
"grad_norm": 89.38213348388672,
|
| 6554 |
+
"learning_rate": 9.983964235217107e-06,
|
| 6555 |
+
"loss": 10.8708,
|
| 6556 |
+
"step": 9250
|
| 6557 |
+
},
|
| 6558 |
+
{
|
| 6559 |
+
"epoch": 0.41095966404269363,
|
| 6560 |
+
"grad_norm": 78.94715881347656,
|
| 6561 |
+
"learning_rate": 9.983946899255178e-06,
|
| 6562 |
+
"loss": 11.1103,
|
| 6563 |
+
"step": 9260
|
| 6564 |
+
},
|
| 6565 |
+
{
|
| 6566 |
+
"epoch": 0.4114034649757851,
|
| 6567 |
+
"grad_norm": 93.04794311523438,
|
| 6568 |
+
"learning_rate": 9.983929563293251e-06,
|
| 6569 |
+
"loss": 11.2785,
|
| 6570 |
+
"step": 9270
|
| 6571 |
+
},
|
| 6572 |
+
{
|
| 6573 |
+
"epoch": 0.41184726590887655,
|
| 6574 |
+
"grad_norm": 91.3328857421875,
|
| 6575 |
+
"learning_rate": 9.983912227331324e-06,
|
| 6576 |
+
"loss": 11.3683,
|
| 6577 |
+
"step": 9280
|
| 6578 |
+
},
|
| 6579 |
+
{
|
| 6580 |
+
"epoch": 0.41229106684196803,
|
| 6581 |
+
"grad_norm": 83.09625244140625,
|
| 6582 |
+
"learning_rate": 9.983894891369397e-06,
|
| 6583 |
+
"loss": 10.9742,
|
| 6584 |
+
"step": 9290
|
| 6585 |
+
},
|
| 6586 |
+
{
|
| 6587 |
+
"epoch": 0.4127348677750595,
|
| 6588 |
+
"grad_norm": 88.08326721191406,
|
| 6589 |
+
"learning_rate": 9.983877555407469e-06,
|
| 6590 |
+
"loss": 11.1625,
|
| 6591 |
+
"step": 9300
|
| 6592 |
+
},
|
| 6593 |
+
{
|
| 6594 |
+
"epoch": 0.41317866870815095,
|
| 6595 |
+
"grad_norm": 102.78692626953125,
|
| 6596 |
+
"learning_rate": 9.983860219445542e-06,
|
| 6597 |
+
"loss": 11.5739,
|
| 6598 |
+
"step": 9310
|
| 6599 |
+
},
|
| 6600 |
+
{
|
| 6601 |
+
"epoch": 0.41362246964124244,
|
| 6602 |
+
"grad_norm": 95.94892883300781,
|
| 6603 |
+
"learning_rate": 9.983842883483615e-06,
|
| 6604 |
+
"loss": 10.9203,
|
| 6605 |
+
"step": 9320
|
| 6606 |
+
},
|
| 6607 |
+
{
|
| 6608 |
+
"epoch": 0.4140662705743339,
|
| 6609 |
+
"grad_norm": 116.64979553222656,
|
| 6610 |
+
"learning_rate": 9.983825547521686e-06,
|
| 6611 |
+
"loss": 11.2924,
|
| 6612 |
+
"step": 9330
|
| 6613 |
+
},
|
| 6614 |
+
{
|
| 6615 |
+
"epoch": 0.41451007150742536,
|
| 6616 |
+
"grad_norm": 83.40705871582031,
|
| 6617 |
+
"learning_rate": 9.98380821155976e-06,
|
| 6618 |
+
"loss": 12.1134,
|
| 6619 |
+
"step": 9340
|
| 6620 |
+
},
|
| 6621 |
+
{
|
| 6622 |
+
"epoch": 0.4149538724405168,
|
| 6623 |
+
"grad_norm": 92.19294738769531,
|
| 6624 |
+
"learning_rate": 9.983790875597833e-06,
|
| 6625 |
+
"loss": 11.2714,
|
| 6626 |
+
"step": 9350
|
| 6627 |
+
},
|
| 6628 |
+
{
|
| 6629 |
+
"epoch": 0.4153976733736083,
|
| 6630 |
+
"grad_norm": 78.88662719726562,
|
| 6631 |
+
"learning_rate": 9.983773539635904e-06,
|
| 6632 |
+
"loss": 10.875,
|
| 6633 |
+
"step": 9360
|
| 6634 |
+
},
|
| 6635 |
+
{
|
| 6636 |
+
"epoch": 0.4158414743066997,
|
| 6637 |
+
"grad_norm": 82.31551361083984,
|
| 6638 |
+
"learning_rate": 9.983756203673977e-06,
|
| 6639 |
+
"loss": 10.9331,
|
| 6640 |
+
"step": 9370
|
| 6641 |
+
},
|
| 6642 |
+
{
|
| 6643 |
+
"epoch": 0.4162852752397912,
|
| 6644 |
+
"grad_norm": 92.06917572021484,
|
| 6645 |
+
"learning_rate": 9.98373886771205e-06,
|
| 6646 |
+
"loss": 11.0438,
|
| 6647 |
+
"step": 9380
|
| 6648 |
+
},
|
| 6649 |
+
{
|
| 6650 |
+
"epoch": 0.41672907617288263,
|
| 6651 |
+
"grad_norm": 81.9530029296875,
|
| 6652 |
+
"learning_rate": 9.983721531750121e-06,
|
| 6653 |
+
"loss": 11.4246,
|
| 6654 |
+
"step": 9390
|
| 6655 |
+
},
|
| 6656 |
+
{
|
| 6657 |
+
"epoch": 0.4171728771059741,
|
| 6658 |
+
"grad_norm": 88.1327896118164,
|
| 6659 |
+
"learning_rate": 9.983704195788195e-06,
|
| 6660 |
+
"loss": 11.0952,
|
| 6661 |
+
"step": 9400
|
| 6662 |
+
},
|
| 6663 |
+
{
|
| 6664 |
+
"epoch": 0.4176166780390656,
|
| 6665 |
+
"grad_norm": 98.14168548583984,
|
| 6666 |
+
"learning_rate": 9.983686859826268e-06,
|
| 6667 |
+
"loss": 10.9578,
|
| 6668 |
+
"step": 9410
|
| 6669 |
+
},
|
| 6670 |
+
{
|
| 6671 |
+
"epoch": 0.41806047897215703,
|
| 6672 |
+
"grad_norm": 98.64930725097656,
|
| 6673 |
+
"learning_rate": 9.983669523864339e-06,
|
| 6674 |
+
"loss": 12.0788,
|
| 6675 |
+
"step": 9420
|
| 6676 |
+
},
|
| 6677 |
+
{
|
| 6678 |
+
"epoch": 0.4185042799052485,
|
| 6679 |
+
"grad_norm": 90.62484741210938,
|
| 6680 |
+
"learning_rate": 9.983652187902412e-06,
|
| 6681 |
+
"loss": 11.4321,
|
| 6682 |
+
"step": 9430
|
| 6683 |
+
},
|
| 6684 |
+
{
|
| 6685 |
+
"epoch": 0.41894808083833995,
|
| 6686 |
+
"grad_norm": 86.90058898925781,
|
| 6687 |
+
"learning_rate": 9.983634851940485e-06,
|
| 6688 |
+
"loss": 11.3379,
|
| 6689 |
+
"step": 9440
|
| 6690 |
+
},
|
| 6691 |
+
{
|
| 6692 |
+
"epoch": 0.41939188177143144,
|
| 6693 |
+
"grad_norm": 95.19513702392578,
|
| 6694 |
+
"learning_rate": 9.983617515978557e-06,
|
| 6695 |
+
"loss": 11.4713,
|
| 6696 |
+
"step": 9450
|
| 6697 |
+
},
|
| 6698 |
+
{
|
| 6699 |
+
"epoch": 0.4198356827045229,
|
| 6700 |
+
"grad_norm": 86.9664535522461,
|
| 6701 |
+
"learning_rate": 9.98360018001663e-06,
|
| 6702 |
+
"loss": 11.6491,
|
| 6703 |
+
"step": 9460
|
| 6704 |
+
},
|
| 6705 |
+
{
|
| 6706 |
+
"epoch": 0.42027948363761436,
|
| 6707 |
+
"grad_norm": 83.81656646728516,
|
| 6708 |
+
"learning_rate": 9.983582844054703e-06,
|
| 6709 |
+
"loss": 10.9501,
|
| 6710 |
+
"step": 9470
|
| 6711 |
+
},
|
| 6712 |
+
{
|
| 6713 |
+
"epoch": 0.4207232845707058,
|
| 6714 |
+
"grad_norm": 80.9144058227539,
|
| 6715 |
+
"learning_rate": 9.983565508092774e-06,
|
| 6716 |
+
"loss": 11.2825,
|
| 6717 |
+
"step": 9480
|
| 6718 |
+
},
|
| 6719 |
+
{
|
| 6720 |
+
"epoch": 0.4211670855037973,
|
| 6721 |
+
"grad_norm": 85.7936782836914,
|
| 6722 |
+
"learning_rate": 9.983548172130847e-06,
|
| 6723 |
+
"loss": 10.8854,
|
| 6724 |
+
"step": 9490
|
| 6725 |
+
},
|
| 6726 |
+
{
|
| 6727 |
+
"epoch": 0.4216108864368887,
|
| 6728 |
+
"grad_norm": 94.24036407470703,
|
| 6729 |
+
"learning_rate": 9.98353083616892e-06,
|
| 6730 |
+
"loss": 11.5424,
|
| 6731 |
+
"step": 9500
|
| 6732 |
+
},
|
| 6733 |
+
{
|
| 6734 |
+
"epoch": 0.4220546873699802,
|
| 6735 |
+
"grad_norm": 86.44004821777344,
|
| 6736 |
+
"learning_rate": 9.983513500206993e-06,
|
| 6737 |
+
"loss": 11.6961,
|
| 6738 |
+
"step": 9510
|
| 6739 |
+
},
|
| 6740 |
+
{
|
| 6741 |
+
"epoch": 0.4224984883030717,
|
| 6742 |
+
"grad_norm": 79.09918975830078,
|
| 6743 |
+
"learning_rate": 9.983496164245065e-06,
|
| 6744 |
+
"loss": 11.5077,
|
| 6745 |
+
"step": 9520
|
| 6746 |
+
},
|
| 6747 |
+
{
|
| 6748 |
+
"epoch": 0.4229422892361631,
|
| 6749 |
+
"grad_norm": 106.33499908447266,
|
| 6750 |
+
"learning_rate": 9.983478828283138e-06,
|
| 6751 |
+
"loss": 11.7945,
|
| 6752 |
+
"step": 9530
|
| 6753 |
+
},
|
| 6754 |
+
{
|
| 6755 |
+
"epoch": 0.4233860901692546,
|
| 6756 |
+
"grad_norm": 89.4134292602539,
|
| 6757 |
+
"learning_rate": 9.983461492321211e-06,
|
| 6758 |
+
"loss": 11.2233,
|
| 6759 |
+
"step": 9540
|
| 6760 |
+
},
|
| 6761 |
+
{
|
| 6762 |
+
"epoch": 0.42382989110234603,
|
| 6763 |
+
"grad_norm": 85.91355895996094,
|
| 6764 |
+
"learning_rate": 9.983444156359282e-06,
|
| 6765 |
+
"loss": 11.2504,
|
| 6766 |
+
"step": 9550
|
| 6767 |
+
},
|
| 6768 |
+
{
|
| 6769 |
+
"epoch": 0.4242736920354375,
|
| 6770 |
+
"grad_norm": 74.06096649169922,
|
| 6771 |
+
"learning_rate": 9.983426820397355e-06,
|
| 6772 |
+
"loss": 11.6198,
|
| 6773 |
+
"step": 9560
|
| 6774 |
+
},
|
| 6775 |
+
{
|
| 6776 |
+
"epoch": 0.42471749296852895,
|
| 6777 |
+
"grad_norm": 81.07852172851562,
|
| 6778 |
+
"learning_rate": 9.983409484435428e-06,
|
| 6779 |
+
"loss": 11.7593,
|
| 6780 |
+
"step": 9570
|
| 6781 |
+
},
|
| 6782 |
+
{
|
| 6783 |
+
"epoch": 0.42516129390162044,
|
| 6784 |
+
"grad_norm": 87.0907211303711,
|
| 6785 |
+
"learning_rate": 9.9833921484735e-06,
|
| 6786 |
+
"loss": 11.0808,
|
| 6787 |
+
"step": 9580
|
| 6788 |
+
},
|
| 6789 |
+
{
|
| 6790 |
+
"epoch": 0.4256050948347119,
|
| 6791 |
+
"grad_norm": 95.18062591552734,
|
| 6792 |
+
"learning_rate": 9.983374812511573e-06,
|
| 6793 |
+
"loss": 11.0543,
|
| 6794 |
+
"step": 9590
|
| 6795 |
+
},
|
| 6796 |
+
{
|
| 6797 |
+
"epoch": 0.42604889576780336,
|
| 6798 |
+
"grad_norm": 85.12203979492188,
|
| 6799 |
+
"learning_rate": 9.983357476549646e-06,
|
| 6800 |
+
"loss": 11.0986,
|
| 6801 |
+
"step": 9600
|
| 6802 |
+
},
|
| 6803 |
+
{
|
| 6804 |
+
"epoch": 0.4264926967008948,
|
| 6805 |
+
"grad_norm": 80.9763412475586,
|
| 6806 |
+
"learning_rate": 9.983340140587717e-06,
|
| 6807 |
+
"loss": 11.1147,
|
| 6808 |
+
"step": 9610
|
| 6809 |
+
},
|
| 6810 |
+
{
|
| 6811 |
+
"epoch": 0.4269364976339863,
|
| 6812 |
+
"grad_norm": 80.79862213134766,
|
| 6813 |
+
"learning_rate": 9.98332280462579e-06,
|
| 6814 |
+
"loss": 10.9127,
|
| 6815 |
+
"step": 9620
|
| 6816 |
+
},
|
| 6817 |
+
{
|
| 6818 |
+
"epoch": 0.4273802985670777,
|
| 6819 |
+
"grad_norm": 93.28567504882812,
|
| 6820 |
+
"learning_rate": 9.983305468663863e-06,
|
| 6821 |
+
"loss": 11.6276,
|
| 6822 |
+
"step": 9630
|
| 6823 |
+
},
|
| 6824 |
+
{
|
| 6825 |
+
"epoch": 0.4278240995001692,
|
| 6826 |
+
"grad_norm": 92.6642074584961,
|
| 6827 |
+
"learning_rate": 9.983288132701935e-06,
|
| 6828 |
+
"loss": 11.6817,
|
| 6829 |
+
"step": 9640
|
| 6830 |
+
},
|
| 6831 |
+
{
|
| 6832 |
+
"epoch": 0.4282679004332607,
|
| 6833 |
+
"grad_norm": 84.80957794189453,
|
| 6834 |
+
"learning_rate": 9.983270796740008e-06,
|
| 6835 |
+
"loss": 11.0266,
|
| 6836 |
+
"step": 9650
|
| 6837 |
+
},
|
| 6838 |
+
{
|
| 6839 |
+
"epoch": 0.4287117013663521,
|
| 6840 |
+
"grad_norm": 80.59945678710938,
|
| 6841 |
+
"learning_rate": 9.983253460778081e-06,
|
| 6842 |
+
"loss": 11.211,
|
| 6843 |
+
"step": 9660
|
| 6844 |
+
},
|
| 6845 |
+
{
|
| 6846 |
+
"epoch": 0.4291555022994436,
|
| 6847 |
+
"grad_norm": 83.12669372558594,
|
| 6848 |
+
"learning_rate": 9.983236124816152e-06,
|
| 6849 |
+
"loss": 11.2012,
|
| 6850 |
+
"step": 9670
|
| 6851 |
+
},
|
| 6852 |
+
{
|
| 6853 |
+
"epoch": 0.42959930323253503,
|
| 6854 |
+
"grad_norm": 92.85382080078125,
|
| 6855 |
+
"learning_rate": 9.983218788854225e-06,
|
| 6856 |
+
"loss": 10.8469,
|
| 6857 |
+
"step": 9680
|
| 6858 |
+
},
|
| 6859 |
+
{
|
| 6860 |
+
"epoch": 0.4300431041656265,
|
| 6861 |
+
"grad_norm": 89.2972640991211,
|
| 6862 |
+
"learning_rate": 9.983201452892299e-06,
|
| 6863 |
+
"loss": 11.5653,
|
| 6864 |
+
"step": 9690
|
| 6865 |
+
},
|
| 6866 |
+
{
|
| 6867 |
+
"epoch": 0.43048690509871795,
|
| 6868 |
+
"grad_norm": 82.58189392089844,
|
| 6869 |
+
"learning_rate": 9.98318411693037e-06,
|
| 6870 |
+
"loss": 11.5281,
|
| 6871 |
+
"step": 9700
|
| 6872 |
+
},
|
| 6873 |
+
{
|
| 6874 |
+
"epoch": 0.43093070603180944,
|
| 6875 |
+
"grad_norm": 75.15813446044922,
|
| 6876 |
+
"learning_rate": 9.983166780968443e-06,
|
| 6877 |
+
"loss": 11.4775,
|
| 6878 |
+
"step": 9710
|
| 6879 |
+
},
|
| 6880 |
+
{
|
| 6881 |
+
"epoch": 0.4313745069649009,
|
| 6882 |
+
"grad_norm": 87.26850128173828,
|
| 6883 |
+
"learning_rate": 9.983149445006516e-06,
|
| 6884 |
+
"loss": 11.4795,
|
| 6885 |
+
"step": 9720
|
| 6886 |
+
},
|
| 6887 |
+
{
|
| 6888 |
+
"epoch": 0.43181830789799236,
|
| 6889 |
+
"grad_norm": 80.3275375366211,
|
| 6890 |
+
"learning_rate": 9.98313210904459e-06,
|
| 6891 |
+
"loss": 10.6572,
|
| 6892 |
+
"step": 9730
|
| 6893 |
+
},
|
| 6894 |
+
{
|
| 6895 |
+
"epoch": 0.4322621088310838,
|
| 6896 |
+
"grad_norm": 77.77581024169922,
|
| 6897 |
+
"learning_rate": 9.98311477308266e-06,
|
| 6898 |
+
"loss": 11.1713,
|
| 6899 |
+
"step": 9740
|
| 6900 |
+
},
|
| 6901 |
+
{
|
| 6902 |
+
"epoch": 0.4327059097641753,
|
| 6903 |
+
"grad_norm": 86.08430480957031,
|
| 6904 |
+
"learning_rate": 9.983097437120734e-06,
|
| 6905 |
+
"loss": 11.2765,
|
| 6906 |
+
"step": 9750
|
| 6907 |
+
},
|
| 6908 |
+
{
|
| 6909 |
+
"epoch": 0.43314971069726677,
|
| 6910 |
+
"grad_norm": 80.2632827758789,
|
| 6911 |
+
"learning_rate": 9.983080101158807e-06,
|
| 6912 |
+
"loss": 11.1026,
|
| 6913 |
+
"step": 9760
|
| 6914 |
+
},
|
| 6915 |
+
{
|
| 6916 |
+
"epoch": 0.4335935116303582,
|
| 6917 |
+
"grad_norm": 90.77334594726562,
|
| 6918 |
+
"learning_rate": 9.983062765196878e-06,
|
| 6919 |
+
"loss": 11.2743,
|
| 6920 |
+
"step": 9770
|
| 6921 |
+
},
|
| 6922 |
+
{
|
| 6923 |
+
"epoch": 0.4340373125634497,
|
| 6924 |
+
"grad_norm": 85.51402282714844,
|
| 6925 |
+
"learning_rate": 9.983045429234951e-06,
|
| 6926 |
+
"loss": 11.1924,
|
| 6927 |
+
"step": 9780
|
| 6928 |
+
},
|
| 6929 |
+
{
|
| 6930 |
+
"epoch": 0.4344811134965411,
|
| 6931 |
+
"grad_norm": 87.34100341796875,
|
| 6932 |
+
"learning_rate": 9.983028093273024e-06,
|
| 6933 |
+
"loss": 11.5827,
|
| 6934 |
+
"step": 9790
|
| 6935 |
+
},
|
| 6936 |
+
{
|
| 6937 |
+
"epoch": 0.4349249144296326,
|
| 6938 |
+
"grad_norm": 104.64212036132812,
|
| 6939 |
+
"learning_rate": 9.983010757311096e-06,
|
| 6940 |
+
"loss": 11.2375,
|
| 6941 |
+
"step": 9800
|
| 6942 |
+
},
|
| 6943 |
+
{
|
| 6944 |
+
"epoch": 0.43536871536272403,
|
| 6945 |
+
"grad_norm": 70.56439208984375,
|
| 6946 |
+
"learning_rate": 9.982993421349169e-06,
|
| 6947 |
+
"loss": 11.2364,
|
| 6948 |
+
"step": 9810
|
| 6949 |
+
},
|
| 6950 |
+
{
|
| 6951 |
+
"epoch": 0.4358125162958155,
|
| 6952 |
+
"grad_norm": 83.87458038330078,
|
| 6953 |
+
"learning_rate": 9.982976085387242e-06,
|
| 6954 |
+
"loss": 11.6634,
|
| 6955 |
+
"step": 9820
|
| 6956 |
+
},
|
| 6957 |
+
{
|
| 6958 |
+
"epoch": 0.43625631722890695,
|
| 6959 |
+
"grad_norm": 85.76553344726562,
|
| 6960 |
+
"learning_rate": 9.982958749425313e-06,
|
| 6961 |
+
"loss": 11.5362,
|
| 6962 |
+
"step": 9830
|
| 6963 |
+
},
|
| 6964 |
+
{
|
| 6965 |
+
"epoch": 0.43670011816199844,
|
| 6966 |
+
"grad_norm": 89.0379867553711,
|
| 6967 |
+
"learning_rate": 9.982941413463386e-06,
|
| 6968 |
+
"loss": 11.1996,
|
| 6969 |
+
"step": 9840
|
| 6970 |
+
},
|
| 6971 |
+
{
|
| 6972 |
+
"epoch": 0.4371439190950899,
|
| 6973 |
+
"grad_norm": 103.95638275146484,
|
| 6974 |
+
"learning_rate": 9.98292407750146e-06,
|
| 6975 |
+
"loss": 10.9446,
|
| 6976 |
+
"step": 9850
|
| 6977 |
+
},
|
| 6978 |
+
{
|
| 6979 |
+
"epoch": 0.43758772002818136,
|
| 6980 |
+
"grad_norm": 82.41026306152344,
|
| 6981 |
+
"learning_rate": 9.98290674153953e-06,
|
| 6982 |
+
"loss": 11.219,
|
| 6983 |
+
"step": 9860
|
| 6984 |
+
},
|
| 6985 |
+
{
|
| 6986 |
+
"epoch": 0.43803152096127285,
|
| 6987 |
+
"grad_norm": 86.83589172363281,
|
| 6988 |
+
"learning_rate": 9.982889405577604e-06,
|
| 6989 |
+
"loss": 11.787,
|
| 6990 |
+
"step": 9870
|
| 6991 |
+
},
|
| 6992 |
+
{
|
| 6993 |
+
"epoch": 0.4384753218943643,
|
| 6994 |
+
"grad_norm": 80.76217651367188,
|
| 6995 |
+
"learning_rate": 9.982872069615677e-06,
|
| 6996 |
+
"loss": 10.6874,
|
| 6997 |
+
"step": 9880
|
| 6998 |
+
},
|
| 6999 |
+
{
|
| 7000 |
+
"epoch": 0.43891912282745577,
|
| 7001 |
+
"grad_norm": 79.48180389404297,
|
| 7002 |
+
"learning_rate": 9.982854733653748e-06,
|
| 7003 |
+
"loss": 11.0988,
|
| 7004 |
+
"step": 9890
|
| 7005 |
+
},
|
| 7006 |
+
{
|
| 7007 |
+
"epoch": 0.4393629237605472,
|
| 7008 |
+
"grad_norm": 96.88164520263672,
|
| 7009 |
+
"learning_rate": 9.982837397691821e-06,
|
| 7010 |
+
"loss": 11.2804,
|
| 7011 |
+
"step": 9900
|
| 7012 |
+
},
|
| 7013 |
+
{
|
| 7014 |
+
"epoch": 0.4398067246936387,
|
| 7015 |
+
"grad_norm": 78.75862121582031,
|
| 7016 |
+
"learning_rate": 9.982820061729894e-06,
|
| 7017 |
+
"loss": 10.8656,
|
| 7018 |
+
"step": 9910
|
| 7019 |
+
},
|
| 7020 |
+
{
|
| 7021 |
+
"epoch": 0.4402505256267301,
|
| 7022 |
+
"grad_norm": 74.44935607910156,
|
| 7023 |
+
"learning_rate": 9.982802725767966e-06,
|
| 7024 |
+
"loss": 11.1552,
|
| 7025 |
+
"step": 9920
|
| 7026 |
+
},
|
| 7027 |
+
{
|
| 7028 |
+
"epoch": 0.4406943265598216,
|
| 7029 |
+
"grad_norm": 90.43840789794922,
|
| 7030 |
+
"learning_rate": 9.982785389806039e-06,
|
| 7031 |
+
"loss": 10.7856,
|
| 7032 |
+
"step": 9930
|
| 7033 |
+
},
|
| 7034 |
+
{
|
| 7035 |
+
"epoch": 0.44113812749291303,
|
| 7036 |
+
"grad_norm": 100.49180603027344,
|
| 7037 |
+
"learning_rate": 9.982768053844112e-06,
|
| 7038 |
+
"loss": 11.1607,
|
| 7039 |
+
"step": 9940
|
| 7040 |
+
},
|
| 7041 |
+
{
|
| 7042 |
+
"epoch": 0.4415819284260045,
|
| 7043 |
+
"grad_norm": 78.78294372558594,
|
| 7044 |
+
"learning_rate": 9.982750717882185e-06,
|
| 7045 |
+
"loss": 11.0682,
|
| 7046 |
+
"step": 9950
|
| 7047 |
+
},
|
| 7048 |
+
{
|
| 7049 |
+
"epoch": 0.44202572935909595,
|
| 7050 |
+
"grad_norm": 98.88738250732422,
|
| 7051 |
+
"learning_rate": 9.982733381920256e-06,
|
| 7052 |
+
"loss": 11.5788,
|
| 7053 |
+
"step": 9960
|
| 7054 |
+
},
|
| 7055 |
+
{
|
| 7056 |
+
"epoch": 0.44246953029218744,
|
| 7057 |
+
"grad_norm": 83.48590087890625,
|
| 7058 |
+
"learning_rate": 9.98271604595833e-06,
|
| 7059 |
+
"loss": 11.1786,
|
| 7060 |
+
"step": 9970
|
| 7061 |
+
},
|
| 7062 |
+
{
|
| 7063 |
+
"epoch": 0.44291333122527893,
|
| 7064 |
+
"grad_norm": 71.23033142089844,
|
| 7065 |
+
"learning_rate": 9.982698709996403e-06,
|
| 7066 |
+
"loss": 10.6031,
|
| 7067 |
+
"step": 9980
|
| 7068 |
+
},
|
| 7069 |
+
{
|
| 7070 |
+
"epoch": 0.44335713215837036,
|
| 7071 |
+
"grad_norm": 92.292236328125,
|
| 7072 |
+
"learning_rate": 9.982681374034474e-06,
|
| 7073 |
+
"loss": 11.3427,
|
| 7074 |
+
"step": 9990
|
| 7075 |
+
},
|
| 7076 |
+
{
|
| 7077 |
+
"epoch": 0.44380093309146185,
|
| 7078 |
+
"grad_norm": 85.08587646484375,
|
| 7079 |
+
"learning_rate": 9.982664038072547e-06,
|
| 7080 |
+
"loss": 11.022,
|
| 7081 |
+
"step": 10000
|
| 7082 |
+
},
|
| 7083 |
+
{
|
| 7084 |
+
"epoch": 0.44380093309146185,
|
| 7085 |
+
"eval_loss": 0.34827741980552673,
|
| 7086 |
+
"eval_runtime": 673.7997,
|
| 7087 |
+
"eval_samples_per_second": 1802.303,
|
| 7088 |
+
"eval_steps_per_second": 56.322,
|
| 7089 |
+
"step": 10000
|
| 7090 |
}
|
| 7091 |
],
|
| 7092 |
"logging_steps": 10,
|
|
|
|
| 7106 |
"attributes": {}
|
| 7107 |
}
|
| 7108 |
},
|
| 7109 |
+
"total_flos": 3.489723205025792e+18,
|
| 7110 |
"train_batch_size": 4,
|
| 7111 |
"trial_name": null,
|
| 7112 |
"trial_params": null
|