Training in progress, step 50000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3511 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 306619286
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42e515690f5aa44788037bc691fc50cda62efdc1e9ec95468ccfc1c14ddc5921
|
| 3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919972410
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d73eb6c224b233f29f674cdd64d26045c1f7d11e30ce8854b7e7b89712608f32
|
| 3 |
size 919972410
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a2fbcd26bac3ea7dc02fc9ede5b8a1914ca51611473722a11a969e1f26ac0ee
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66d97b511d2fdb8061e5bf72c139923941c148260fac1caedd654028da6986c1
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3839473129eb8c438ab312370daa55eb10a0790f33d38fc5eaa24859b54b0d1f
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5088a0d34c7015afe60457fbb3f0a4740839369017a42ea4b3250322c2d63ceb
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9cac0eb25286b75549fa2030810940adf357064a83facaf5c58ebe37190b6ac
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0a57d29811122d52bd53f81af680412b91dde1cd2a12fa885d8a54388be8e2d
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c90ab29b255eaf920ecc1cba0b586e426f8e2db67b44a65576693f84178a04f
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4efbfa3cfb1bb8fb9c3380e65959a8b4eaf3bceb0507a26ffba1a3e4636ddb1
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f4a7a81ed03f103247da707b419a2fc41f93cae3cdeaa774ea677c3726570ee
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 5000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -31579,6 +31579,3514 @@
|
|
| 31579 |
"eval_samples_per_second": 3194.103,
|
| 31580 |
"eval_steps_per_second": 49.909,
|
| 31581 |
"step": 45000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31582 |
}
|
| 31583 |
],
|
| 31584 |
"logging_steps": 10,
|
|
@@ -31598,7 +35106,7 @@
|
|
| 31598 |
"attributes": {}
|
| 31599 |
}
|
| 31600 |
},
|
| 31601 |
-
"total_flos":
|
| 31602 |
"train_batch_size": 8,
|
| 31603 |
"trial_name": null,
|
| 31604 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9278654223991353,
|
| 5 |
"eval_steps": 5000,
|
| 6 |
+
"global_step": 50000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 31579 |
"eval_samples_per_second": 3194.103,
|
| 31580 |
"eval_steps_per_second": 49.909,
|
| 31581 |
"step": 45000
|
| 31582 |
+
},
|
| 31583 |
+
{
|
| 31584 |
+
"epoch": 0.8352644532437016,
|
| 31585 |
+
"grad_norm": 35.46875,
|
| 31586 |
+
"learning_rate": 9.869489967023093e-06,
|
| 31587 |
+
"loss": 18.7658,
|
| 31588 |
+
"step": 45010
|
| 31589 |
+
},
|
| 31590 |
+
{
|
| 31591 |
+
"epoch": 0.8354500263281813,
|
| 31592 |
+
"grad_norm": 38.0625,
|
| 31593 |
+
"learning_rate": 9.86946097123705e-06,
|
| 31594 |
+
"loss": 19.191,
|
| 31595 |
+
"step": 45020
|
| 31596 |
+
},
|
| 31597 |
+
{
|
| 31598 |
+
"epoch": 0.8356355994126612,
|
| 31599 |
+
"grad_norm": 35.5625,
|
| 31600 |
+
"learning_rate": 9.869431975451008e-06,
|
| 31601 |
+
"loss": 19.1063,
|
| 31602 |
+
"step": 45030
|
| 31603 |
+
},
|
| 31604 |
+
{
|
| 31605 |
+
"epoch": 0.835821172497141,
|
| 31606 |
+
"grad_norm": 36.21875,
|
| 31607 |
+
"learning_rate": 9.869402979664966e-06,
|
| 31608 |
+
"loss": 19.1574,
|
| 31609 |
+
"step": 45040
|
| 31610 |
+
},
|
| 31611 |
+
{
|
| 31612 |
+
"epoch": 0.8360067455816208,
|
| 31613 |
+
"grad_norm": 36.3125,
|
| 31614 |
+
"learning_rate": 9.869373983878923e-06,
|
| 31615 |
+
"loss": 18.7662,
|
| 31616 |
+
"step": 45050
|
| 31617 |
+
},
|
| 31618 |
+
{
|
| 31619 |
+
"epoch": 0.8361923186661007,
|
| 31620 |
+
"grad_norm": 34.78125,
|
| 31621 |
+
"learning_rate": 9.869344988092882e-06,
|
| 31622 |
+
"loss": 19.5668,
|
| 31623 |
+
"step": 45060
|
| 31624 |
+
},
|
| 31625 |
+
{
|
| 31626 |
+
"epoch": 0.8363778917505805,
|
| 31627 |
+
"grad_norm": 37.59375,
|
| 31628 |
+
"learning_rate": 9.86931599230684e-06,
|
| 31629 |
+
"loss": 18.7352,
|
| 31630 |
+
"step": 45070
|
| 31631 |
+
},
|
| 31632 |
+
{
|
| 31633 |
+
"epoch": 0.8365634648350603,
|
| 31634 |
+
"grad_norm": 35.15625,
|
| 31635 |
+
"learning_rate": 9.869286996520795e-06,
|
| 31636 |
+
"loss": 18.9398,
|
| 31637 |
+
"step": 45080
|
| 31638 |
+
},
|
| 31639 |
+
{
|
| 31640 |
+
"epoch": 0.8367490379195401,
|
| 31641 |
+
"grad_norm": 37.15625,
|
| 31642 |
+
"learning_rate": 9.869258000734754e-06,
|
| 31643 |
+
"loss": 18.3637,
|
| 31644 |
+
"step": 45090
|
| 31645 |
+
},
|
| 31646 |
+
{
|
| 31647 |
+
"epoch": 0.83693461100402,
|
| 31648 |
+
"grad_norm": 37.28125,
|
| 31649 |
+
"learning_rate": 9.869229004948712e-06,
|
| 31650 |
+
"loss": 18.9402,
|
| 31651 |
+
"step": 45100
|
| 31652 |
+
},
|
| 31653 |
+
{
|
| 31654 |
+
"epoch": 0.8371201840884998,
|
| 31655 |
+
"grad_norm": 36.21875,
|
| 31656 |
+
"learning_rate": 9.869200009162669e-06,
|
| 31657 |
+
"loss": 18.9655,
|
| 31658 |
+
"step": 45110
|
| 31659 |
+
},
|
| 31660 |
+
{
|
| 31661 |
+
"epoch": 0.8373057571729796,
|
| 31662 |
+
"grad_norm": 36.03125,
|
| 31663 |
+
"learning_rate": 9.869171013376627e-06,
|
| 31664 |
+
"loss": 19.3164,
|
| 31665 |
+
"step": 45120
|
| 31666 |
+
},
|
| 31667 |
+
{
|
| 31668 |
+
"epoch": 0.8374913302574595,
|
| 31669 |
+
"grad_norm": 37.5,
|
| 31670 |
+
"learning_rate": 9.869142017590584e-06,
|
| 31671 |
+
"loss": 19.1801,
|
| 31672 |
+
"step": 45130
|
| 31673 |
+
},
|
| 31674 |
+
{
|
| 31675 |
+
"epoch": 0.8376769033419393,
|
| 31676 |
+
"grad_norm": 34.65625,
|
| 31677 |
+
"learning_rate": 9.869113021804541e-06,
|
| 31678 |
+
"loss": 19.3652,
|
| 31679 |
+
"step": 45140
|
| 31680 |
+
},
|
| 31681 |
+
{
|
| 31682 |
+
"epoch": 0.8378624764264191,
|
| 31683 |
+
"grad_norm": 37.5,
|
| 31684 |
+
"learning_rate": 9.869084026018499e-06,
|
| 31685 |
+
"loss": 18.9179,
|
| 31686 |
+
"step": 45150
|
| 31687 |
+
},
|
| 31688 |
+
{
|
| 31689 |
+
"epoch": 0.838048049510899,
|
| 31690 |
+
"grad_norm": 35.875,
|
| 31691 |
+
"learning_rate": 9.869055030232458e-06,
|
| 31692 |
+
"loss": 19.1069,
|
| 31693 |
+
"step": 45160
|
| 31694 |
+
},
|
| 31695 |
+
{
|
| 31696 |
+
"epoch": 0.8382336225953788,
|
| 31697 |
+
"grad_norm": 32.875,
|
| 31698 |
+
"learning_rate": 9.869026034446415e-06,
|
| 31699 |
+
"loss": 18.7498,
|
| 31700 |
+
"step": 45170
|
| 31701 |
+
},
|
| 31702 |
+
{
|
| 31703 |
+
"epoch": 0.8384191956798586,
|
| 31704 |
+
"grad_norm": 36.15625,
|
| 31705 |
+
"learning_rate": 9.868997038660373e-06,
|
| 31706 |
+
"loss": 19.555,
|
| 31707 |
+
"step": 45180
|
| 31708 |
+
},
|
| 31709 |
+
{
|
| 31710 |
+
"epoch": 0.8386047687643384,
|
| 31711 |
+
"grad_norm": 34.9375,
|
| 31712 |
+
"learning_rate": 9.86896804287433e-06,
|
| 31713 |
+
"loss": 18.9055,
|
| 31714 |
+
"step": 45190
|
| 31715 |
+
},
|
| 31716 |
+
{
|
| 31717 |
+
"epoch": 0.8387903418488183,
|
| 31718 |
+
"grad_norm": 36.3125,
|
| 31719 |
+
"learning_rate": 9.868939047088287e-06,
|
| 31720 |
+
"loss": 18.6655,
|
| 31721 |
+
"step": 45200
|
| 31722 |
+
},
|
| 31723 |
+
{
|
| 31724 |
+
"epoch": 0.838975914933298,
|
| 31725 |
+
"grad_norm": 35.5,
|
| 31726 |
+
"learning_rate": 9.868910051302245e-06,
|
| 31727 |
+
"loss": 19.2085,
|
| 31728 |
+
"step": 45210
|
| 31729 |
+
},
|
| 31730 |
+
{
|
| 31731 |
+
"epoch": 0.8391614880177779,
|
| 31732 |
+
"grad_norm": 36.75,
|
| 31733 |
+
"learning_rate": 9.868881055516202e-06,
|
| 31734 |
+
"loss": 19.3331,
|
| 31735 |
+
"step": 45220
|
| 31736 |
+
},
|
| 31737 |
+
{
|
| 31738 |
+
"epoch": 0.8393470611022578,
|
| 31739 |
+
"grad_norm": 35.3125,
|
| 31740 |
+
"learning_rate": 9.868852059730161e-06,
|
| 31741 |
+
"loss": 19.1247,
|
| 31742 |
+
"step": 45230
|
| 31743 |
+
},
|
| 31744 |
+
{
|
| 31745 |
+
"epoch": 0.8395326341867375,
|
| 31746 |
+
"grad_norm": 36.59375,
|
| 31747 |
+
"learning_rate": 9.868823063944117e-06,
|
| 31748 |
+
"loss": 19.0293,
|
| 31749 |
+
"step": 45240
|
| 31750 |
+
},
|
| 31751 |
+
{
|
| 31752 |
+
"epoch": 0.8397182072712174,
|
| 31753 |
+
"grad_norm": 34.65625,
|
| 31754 |
+
"learning_rate": 9.868794068158074e-06,
|
| 31755 |
+
"loss": 18.9617,
|
| 31756 |
+
"step": 45250
|
| 31757 |
+
},
|
| 31758 |
+
{
|
| 31759 |
+
"epoch": 0.8399037803556972,
|
| 31760 |
+
"grad_norm": 34.28125,
|
| 31761 |
+
"learning_rate": 9.868765072372034e-06,
|
| 31762 |
+
"loss": 18.8723,
|
| 31763 |
+
"step": 45260
|
| 31764 |
+
},
|
| 31765 |
+
{
|
| 31766 |
+
"epoch": 0.840089353440177,
|
| 31767 |
+
"grad_norm": 37.09375,
|
| 31768 |
+
"learning_rate": 9.868736076585991e-06,
|
| 31769 |
+
"loss": 19.0723,
|
| 31770 |
+
"step": 45270
|
| 31771 |
+
},
|
| 31772 |
+
{
|
| 31773 |
+
"epoch": 0.8402749265246569,
|
| 31774 |
+
"grad_norm": 36.46875,
|
| 31775 |
+
"learning_rate": 9.868707080799948e-06,
|
| 31776 |
+
"loss": 18.9186,
|
| 31777 |
+
"step": 45280
|
| 31778 |
+
},
|
| 31779 |
+
{
|
| 31780 |
+
"epoch": 0.8404604996091367,
|
| 31781 |
+
"grad_norm": 37.03125,
|
| 31782 |
+
"learning_rate": 9.868678085013906e-06,
|
| 31783 |
+
"loss": 19.2951,
|
| 31784 |
+
"step": 45290
|
| 31785 |
+
},
|
| 31786 |
+
{
|
| 31787 |
+
"epoch": 0.8406460726936165,
|
| 31788 |
+
"grad_norm": 35.71875,
|
| 31789 |
+
"learning_rate": 9.868649089227863e-06,
|
| 31790 |
+
"loss": 18.891,
|
| 31791 |
+
"step": 45300
|
| 31792 |
+
},
|
| 31793 |
+
{
|
| 31794 |
+
"epoch": 0.8408316457780963,
|
| 31795 |
+
"grad_norm": 38.75,
|
| 31796 |
+
"learning_rate": 9.86862009344182e-06,
|
| 31797 |
+
"loss": 19.1318,
|
| 31798 |
+
"step": 45310
|
| 31799 |
+
},
|
| 31800 |
+
{
|
| 31801 |
+
"epoch": 0.8410172188625762,
|
| 31802 |
+
"grad_norm": 36.0625,
|
| 31803 |
+
"learning_rate": 9.868591097655778e-06,
|
| 31804 |
+
"loss": 18.8914,
|
| 31805 |
+
"step": 45320
|
| 31806 |
+
},
|
| 31807 |
+
{
|
| 31808 |
+
"epoch": 0.841202791947056,
|
| 31809 |
+
"grad_norm": 33.65625,
|
| 31810 |
+
"learning_rate": 9.868562101869737e-06,
|
| 31811 |
+
"loss": 18.6757,
|
| 31812 |
+
"step": 45330
|
| 31813 |
+
},
|
| 31814 |
+
{
|
| 31815 |
+
"epoch": 0.8413883650315358,
|
| 31816 |
+
"grad_norm": 37.125,
|
| 31817 |
+
"learning_rate": 9.868533106083695e-06,
|
| 31818 |
+
"loss": 19.2032,
|
| 31819 |
+
"step": 45340
|
| 31820 |
+
},
|
| 31821 |
+
{
|
| 31822 |
+
"epoch": 0.8415739381160157,
|
| 31823 |
+
"grad_norm": 34.6875,
|
| 31824 |
+
"learning_rate": 9.86850411029765e-06,
|
| 31825 |
+
"loss": 18.8749,
|
| 31826 |
+
"step": 45350
|
| 31827 |
+
},
|
| 31828 |
+
{
|
| 31829 |
+
"epoch": 0.8417595112004955,
|
| 31830 |
+
"grad_norm": 35.65625,
|
| 31831 |
+
"learning_rate": 9.86847511451161e-06,
|
| 31832 |
+
"loss": 19.2826,
|
| 31833 |
+
"step": 45360
|
| 31834 |
+
},
|
| 31835 |
+
{
|
| 31836 |
+
"epoch": 0.8419450842849753,
|
| 31837 |
+
"grad_norm": 36.875,
|
| 31838 |
+
"learning_rate": 9.868446118725567e-06,
|
| 31839 |
+
"loss": 18.9139,
|
| 31840 |
+
"step": 45370
|
| 31841 |
+
},
|
| 31842 |
+
{
|
| 31843 |
+
"epoch": 0.8421306573694551,
|
| 31844 |
+
"grad_norm": 38.0,
|
| 31845 |
+
"learning_rate": 9.868417122939524e-06,
|
| 31846 |
+
"loss": 18.9206,
|
| 31847 |
+
"step": 45380
|
| 31848 |
+
},
|
| 31849 |
+
{
|
| 31850 |
+
"epoch": 0.842316230453935,
|
| 31851 |
+
"grad_norm": 35.6875,
|
| 31852 |
+
"learning_rate": 9.868388127153482e-06,
|
| 31853 |
+
"loss": 18.9808,
|
| 31854 |
+
"step": 45390
|
| 31855 |
+
},
|
| 31856 |
+
{
|
| 31857 |
+
"epoch": 0.8425018035384148,
|
| 31858 |
+
"grad_norm": 34.4375,
|
| 31859 |
+
"learning_rate": 9.868359131367439e-06,
|
| 31860 |
+
"loss": 18.8303,
|
| 31861 |
+
"step": 45400
|
| 31862 |
+
},
|
| 31863 |
+
{
|
| 31864 |
+
"epoch": 0.8426873766228946,
|
| 31865 |
+
"grad_norm": 36.5625,
|
| 31866 |
+
"learning_rate": 9.868330135581396e-06,
|
| 31867 |
+
"loss": 18.862,
|
| 31868 |
+
"step": 45410
|
| 31869 |
+
},
|
| 31870 |
+
{
|
| 31871 |
+
"epoch": 0.8428729497073745,
|
| 31872 |
+
"grad_norm": 37.1875,
|
| 31873 |
+
"learning_rate": 9.868301139795354e-06,
|
| 31874 |
+
"loss": 19.1974,
|
| 31875 |
+
"step": 45420
|
| 31876 |
+
},
|
| 31877 |
+
{
|
| 31878 |
+
"epoch": 0.8430585227918542,
|
| 31879 |
+
"grad_norm": 36.78125,
|
| 31880 |
+
"learning_rate": 9.868272144009313e-06,
|
| 31881 |
+
"loss": 19.5463,
|
| 31882 |
+
"step": 45430
|
| 31883 |
+
},
|
| 31884 |
+
{
|
| 31885 |
+
"epoch": 0.8432440958763341,
|
| 31886 |
+
"grad_norm": 34.59375,
|
| 31887 |
+
"learning_rate": 9.86824314822327e-06,
|
| 31888 |
+
"loss": 18.7967,
|
| 31889 |
+
"step": 45440
|
| 31890 |
+
},
|
| 31891 |
+
{
|
| 31892 |
+
"epoch": 0.843429668960814,
|
| 31893 |
+
"grad_norm": 36.75,
|
| 31894 |
+
"learning_rate": 9.868214152437226e-06,
|
| 31895 |
+
"loss": 19.0146,
|
| 31896 |
+
"step": 45450
|
| 31897 |
+
},
|
| 31898 |
+
{
|
| 31899 |
+
"epoch": 0.8436152420452937,
|
| 31900 |
+
"grad_norm": 36.65625,
|
| 31901 |
+
"learning_rate": 9.868185156651185e-06,
|
| 31902 |
+
"loss": 18.5762,
|
| 31903 |
+
"step": 45460
|
| 31904 |
+
},
|
| 31905 |
+
{
|
| 31906 |
+
"epoch": 0.8438008151297736,
|
| 31907 |
+
"grad_norm": 38.625,
|
| 31908 |
+
"learning_rate": 9.868156160865143e-06,
|
| 31909 |
+
"loss": 19.1253,
|
| 31910 |
+
"step": 45470
|
| 31911 |
+
},
|
| 31912 |
+
{
|
| 31913 |
+
"epoch": 0.8439863882142534,
|
| 31914 |
+
"grad_norm": 35.96875,
|
| 31915 |
+
"learning_rate": 9.8681271650791e-06,
|
| 31916 |
+
"loss": 18.8985,
|
| 31917 |
+
"step": 45480
|
| 31918 |
+
},
|
| 31919 |
+
{
|
| 31920 |
+
"epoch": 0.8441719612987333,
|
| 31921 |
+
"grad_norm": 36.625,
|
| 31922 |
+
"learning_rate": 9.868098169293057e-06,
|
| 31923 |
+
"loss": 18.4796,
|
| 31924 |
+
"step": 45490
|
| 31925 |
+
},
|
| 31926 |
+
{
|
| 31927 |
+
"epoch": 0.844357534383213,
|
| 31928 |
+
"grad_norm": 35.5625,
|
| 31929 |
+
"learning_rate": 9.868069173507015e-06,
|
| 31930 |
+
"loss": 18.9359,
|
| 31931 |
+
"step": 45500
|
| 31932 |
+
},
|
| 31933 |
+
{
|
| 31934 |
+
"epoch": 0.8445431074676929,
|
| 31935 |
+
"grad_norm": 35.125,
|
| 31936 |
+
"learning_rate": 9.868040177720972e-06,
|
| 31937 |
+
"loss": 19.2384,
|
| 31938 |
+
"step": 45510
|
| 31939 |
+
},
|
| 31940 |
+
{
|
| 31941 |
+
"epoch": 0.8447286805521728,
|
| 31942 |
+
"grad_norm": 38.3125,
|
| 31943 |
+
"learning_rate": 9.86801118193493e-06,
|
| 31944 |
+
"loss": 18.8187,
|
| 31945 |
+
"step": 45520
|
| 31946 |
+
},
|
| 31947 |
+
{
|
| 31948 |
+
"epoch": 0.8449142536366525,
|
| 31949 |
+
"grad_norm": 37.375,
|
| 31950 |
+
"learning_rate": 9.867982186148887e-06,
|
| 31951 |
+
"loss": 19.2896,
|
| 31952 |
+
"step": 45530
|
| 31953 |
+
},
|
| 31954 |
+
{
|
| 31955 |
+
"epoch": 0.8450998267211324,
|
| 31956 |
+
"grad_norm": 34.875,
|
| 31957 |
+
"learning_rate": 9.867953190362846e-06,
|
| 31958 |
+
"loss": 18.9661,
|
| 31959 |
+
"step": 45540
|
| 31960 |
+
},
|
| 31961 |
+
{
|
| 31962 |
+
"epoch": 0.8452853998056122,
|
| 31963 |
+
"grad_norm": 34.15625,
|
| 31964 |
+
"learning_rate": 9.867924194576803e-06,
|
| 31965 |
+
"loss": 19.0476,
|
| 31966 |
+
"step": 45550
|
| 31967 |
+
},
|
| 31968 |
+
{
|
| 31969 |
+
"epoch": 0.845470972890092,
|
| 31970 |
+
"grad_norm": 36.8125,
|
| 31971 |
+
"learning_rate": 9.867895198790761e-06,
|
| 31972 |
+
"loss": 19.2686,
|
| 31973 |
+
"step": 45560
|
| 31974 |
+
},
|
| 31975 |
+
{
|
| 31976 |
+
"epoch": 0.8456565459745718,
|
| 31977 |
+
"grad_norm": 36.5,
|
| 31978 |
+
"learning_rate": 9.867866203004718e-06,
|
| 31979 |
+
"loss": 19.37,
|
| 31980 |
+
"step": 45570
|
| 31981 |
+
},
|
| 31982 |
+
{
|
| 31983 |
+
"epoch": 0.8458421190590517,
|
| 31984 |
+
"grad_norm": 35.9375,
|
| 31985 |
+
"learning_rate": 9.867837207218676e-06,
|
| 31986 |
+
"loss": 19.5579,
|
| 31987 |
+
"step": 45580
|
| 31988 |
+
},
|
| 31989 |
+
{
|
| 31990 |
+
"epoch": 0.8460276921435315,
|
| 31991 |
+
"grad_norm": 35.6875,
|
| 31992 |
+
"learning_rate": 9.867808211432633e-06,
|
| 31993 |
+
"loss": 19.0715,
|
| 31994 |
+
"step": 45590
|
| 31995 |
+
},
|
| 31996 |
+
{
|
| 31997 |
+
"epoch": 0.8462132652280113,
|
| 31998 |
+
"grad_norm": 35.375,
|
| 31999 |
+
"learning_rate": 9.86777921564659e-06,
|
| 32000 |
+
"loss": 19.0432,
|
| 32001 |
+
"step": 45600
|
| 32002 |
+
},
|
| 32003 |
+
{
|
| 32004 |
+
"epoch": 0.8463988383124912,
|
| 32005 |
+
"grad_norm": 36.875,
|
| 32006 |
+
"learning_rate": 9.867750219860548e-06,
|
| 32007 |
+
"loss": 18.8202,
|
| 32008 |
+
"step": 45610
|
| 32009 |
+
},
|
| 32010 |
+
{
|
| 32011 |
+
"epoch": 0.8465844113969709,
|
| 32012 |
+
"grad_norm": 34.375,
|
| 32013 |
+
"learning_rate": 9.867721224074505e-06,
|
| 32014 |
+
"loss": 18.784,
|
| 32015 |
+
"step": 45620
|
| 32016 |
+
},
|
| 32017 |
+
{
|
| 32018 |
+
"epoch": 0.8467699844814508,
|
| 32019 |
+
"grad_norm": 36.0,
|
| 32020 |
+
"learning_rate": 9.867692228288463e-06,
|
| 32021 |
+
"loss": 18.8642,
|
| 32022 |
+
"step": 45630
|
| 32023 |
+
},
|
| 32024 |
+
{
|
| 32025 |
+
"epoch": 0.8469555575659307,
|
| 32026 |
+
"grad_norm": 36.875,
|
| 32027 |
+
"learning_rate": 9.867663232502422e-06,
|
| 32028 |
+
"loss": 19.0021,
|
| 32029 |
+
"step": 45640
|
| 32030 |
+
},
|
| 32031 |
+
{
|
| 32032 |
+
"epoch": 0.8471411306504104,
|
| 32033 |
+
"grad_norm": 35.09375,
|
| 32034 |
+
"learning_rate": 9.86763423671638e-06,
|
| 32035 |
+
"loss": 18.3599,
|
| 32036 |
+
"step": 45650
|
| 32037 |
+
},
|
| 32038 |
+
{
|
| 32039 |
+
"epoch": 0.8473267037348903,
|
| 32040 |
+
"grad_norm": 36.59375,
|
| 32041 |
+
"learning_rate": 9.867605240930337e-06,
|
| 32042 |
+
"loss": 18.9329,
|
| 32043 |
+
"step": 45660
|
| 32044 |
+
},
|
| 32045 |
+
{
|
| 32046 |
+
"epoch": 0.8475122768193701,
|
| 32047 |
+
"grad_norm": 36.9375,
|
| 32048 |
+
"learning_rate": 9.867576245144294e-06,
|
| 32049 |
+
"loss": 18.6595,
|
| 32050 |
+
"step": 45670
|
| 32051 |
+
},
|
| 32052 |
+
{
|
| 32053 |
+
"epoch": 0.84769784990385,
|
| 32054 |
+
"grad_norm": 35.96875,
|
| 32055 |
+
"learning_rate": 9.867547249358251e-06,
|
| 32056 |
+
"loss": 19.3835,
|
| 32057 |
+
"step": 45680
|
| 32058 |
+
},
|
| 32059 |
+
{
|
| 32060 |
+
"epoch": 0.8478834229883297,
|
| 32061 |
+
"grad_norm": 36.875,
|
| 32062 |
+
"learning_rate": 9.867518253572209e-06,
|
| 32063 |
+
"loss": 18.894,
|
| 32064 |
+
"step": 45690
|
| 32065 |
+
},
|
| 32066 |
+
{
|
| 32067 |
+
"epoch": 0.8480689960728096,
|
| 32068 |
+
"grad_norm": 35.40625,
|
| 32069 |
+
"learning_rate": 9.867489257786166e-06,
|
| 32070 |
+
"loss": 18.9299,
|
| 32071 |
+
"step": 45700
|
| 32072 |
+
},
|
| 32073 |
+
{
|
| 32074 |
+
"epoch": 0.8482545691572895,
|
| 32075 |
+
"grad_norm": 37.46875,
|
| 32076 |
+
"learning_rate": 9.867460262000125e-06,
|
| 32077 |
+
"loss": 19.3052,
|
| 32078 |
+
"step": 45710
|
| 32079 |
+
},
|
| 32080 |
+
{
|
| 32081 |
+
"epoch": 0.8484401422417692,
|
| 32082 |
+
"grad_norm": 34.59375,
|
| 32083 |
+
"learning_rate": 9.867431266214081e-06,
|
| 32084 |
+
"loss": 18.9821,
|
| 32085 |
+
"step": 45720
|
| 32086 |
+
},
|
| 32087 |
+
{
|
| 32088 |
+
"epoch": 0.8486257153262491,
|
| 32089 |
+
"grad_norm": 36.3125,
|
| 32090 |
+
"learning_rate": 9.867402270428039e-06,
|
| 32091 |
+
"loss": 18.9228,
|
| 32092 |
+
"step": 45730
|
| 32093 |
+
},
|
| 32094 |
+
{
|
| 32095 |
+
"epoch": 0.848811288410729,
|
| 32096 |
+
"grad_norm": 35.90625,
|
| 32097 |
+
"learning_rate": 9.867373274641998e-06,
|
| 32098 |
+
"loss": 19.1518,
|
| 32099 |
+
"step": 45740
|
| 32100 |
+
},
|
| 32101 |
+
{
|
| 32102 |
+
"epoch": 0.8489968614952087,
|
| 32103 |
+
"grad_norm": 36.4375,
|
| 32104 |
+
"learning_rate": 9.867344278855955e-06,
|
| 32105 |
+
"loss": 18.6011,
|
| 32106 |
+
"step": 45750
|
| 32107 |
+
},
|
| 32108 |
+
{
|
| 32109 |
+
"epoch": 0.8491824345796886,
|
| 32110 |
+
"grad_norm": 34.75,
|
| 32111 |
+
"learning_rate": 9.867315283069912e-06,
|
| 32112 |
+
"loss": 18.7753,
|
| 32113 |
+
"step": 45760
|
| 32114 |
+
},
|
| 32115 |
+
{
|
| 32116 |
+
"epoch": 0.8493680076641684,
|
| 32117 |
+
"grad_norm": 36.75,
|
| 32118 |
+
"learning_rate": 9.86728628728387e-06,
|
| 32119 |
+
"loss": 19.2827,
|
| 32120 |
+
"step": 45770
|
| 32121 |
+
},
|
| 32122 |
+
{
|
| 32123 |
+
"epoch": 0.8495535807486482,
|
| 32124 |
+
"grad_norm": 35.53125,
|
| 32125 |
+
"learning_rate": 9.867257291497827e-06,
|
| 32126 |
+
"loss": 18.5532,
|
| 32127 |
+
"step": 45780
|
| 32128 |
+
},
|
| 32129 |
+
{
|
| 32130 |
+
"epoch": 0.849739153833128,
|
| 32131 |
+
"grad_norm": 37.34375,
|
| 32132 |
+
"learning_rate": 9.867228295711785e-06,
|
| 32133 |
+
"loss": 19.2479,
|
| 32134 |
+
"step": 45790
|
| 32135 |
+
},
|
| 32136 |
+
{
|
| 32137 |
+
"epoch": 0.8499247269176079,
|
| 32138 |
+
"grad_norm": 37.6875,
|
| 32139 |
+
"learning_rate": 9.867199299925742e-06,
|
| 32140 |
+
"loss": 19.3353,
|
| 32141 |
+
"step": 45800
|
| 32142 |
+
},
|
| 32143 |
+
{
|
| 32144 |
+
"epoch": 0.8501103000020876,
|
| 32145 |
+
"grad_norm": 37.03125,
|
| 32146 |
+
"learning_rate": 9.867170304139701e-06,
|
| 32147 |
+
"loss": 19.4561,
|
| 32148 |
+
"step": 45810
|
| 32149 |
+
},
|
| 32150 |
+
{
|
| 32151 |
+
"epoch": 0.8502958730865675,
|
| 32152 |
+
"grad_norm": 36.5,
|
| 32153 |
+
"learning_rate": 9.867141308353659e-06,
|
| 32154 |
+
"loss": 19.2609,
|
| 32155 |
+
"step": 45820
|
| 32156 |
+
},
|
| 32157 |
+
{
|
| 32158 |
+
"epoch": 0.8504814461710474,
|
| 32159 |
+
"grad_norm": 35.59375,
|
| 32160 |
+
"learning_rate": 9.867112312567614e-06,
|
| 32161 |
+
"loss": 18.6296,
|
| 32162 |
+
"step": 45830
|
| 32163 |
+
},
|
| 32164 |
+
{
|
| 32165 |
+
"epoch": 0.8506670192555271,
|
| 32166 |
+
"grad_norm": 36.25,
|
| 32167 |
+
"learning_rate": 9.867083316781573e-06,
|
| 32168 |
+
"loss": 18.561,
|
| 32169 |
+
"step": 45840
|
| 32170 |
+
},
|
| 32171 |
+
{
|
| 32172 |
+
"epoch": 0.850852592340007,
|
| 32173 |
+
"grad_norm": 37.0,
|
| 32174 |
+
"learning_rate": 9.86705432099553e-06,
|
| 32175 |
+
"loss": 19.1894,
|
| 32176 |
+
"step": 45850
|
| 32177 |
+
},
|
| 32178 |
+
{
|
| 32179 |
+
"epoch": 0.8510381654244868,
|
| 32180 |
+
"grad_norm": 39.0625,
|
| 32181 |
+
"learning_rate": 9.867025325209488e-06,
|
| 32182 |
+
"loss": 19.0677,
|
| 32183 |
+
"step": 45860
|
| 32184 |
+
},
|
| 32185 |
+
{
|
| 32186 |
+
"epoch": 0.8512237385089667,
|
| 32187 |
+
"grad_norm": 37.15625,
|
| 32188 |
+
"learning_rate": 9.866996329423446e-06,
|
| 32189 |
+
"loss": 19.3011,
|
| 32190 |
+
"step": 45870
|
| 32191 |
+
},
|
| 32192 |
+
{
|
| 32193 |
+
"epoch": 0.8514093115934465,
|
| 32194 |
+
"grad_norm": 38.15625,
|
| 32195 |
+
"learning_rate": 9.866967333637403e-06,
|
| 32196 |
+
"loss": 18.3706,
|
| 32197 |
+
"step": 45880
|
| 32198 |
+
},
|
| 32199 |
+
{
|
| 32200 |
+
"epoch": 0.8515948846779263,
|
| 32201 |
+
"grad_norm": 36.5625,
|
| 32202 |
+
"learning_rate": 9.86693833785136e-06,
|
| 32203 |
+
"loss": 18.8431,
|
| 32204 |
+
"step": 45890
|
| 32205 |
+
},
|
| 32206 |
+
{
|
| 32207 |
+
"epoch": 0.8517804577624062,
|
| 32208 |
+
"grad_norm": 37.78125,
|
| 32209 |
+
"learning_rate": 9.866909342065318e-06,
|
| 32210 |
+
"loss": 18.7596,
|
| 32211 |
+
"step": 45900
|
| 32212 |
+
},
|
| 32213 |
+
{
|
| 32214 |
+
"epoch": 0.8519660308468859,
|
| 32215 |
+
"grad_norm": 37.40625,
|
| 32216 |
+
"learning_rate": 9.866880346279277e-06,
|
| 32217 |
+
"loss": 19.2939,
|
| 32218 |
+
"step": 45910
|
| 32219 |
+
},
|
| 32220 |
+
{
|
| 32221 |
+
"epoch": 0.8521516039313658,
|
| 32222 |
+
"grad_norm": 36.96875,
|
| 32223 |
+
"learning_rate": 9.866851350493234e-06,
|
| 32224 |
+
"loss": 18.9418,
|
| 32225 |
+
"step": 45920
|
| 32226 |
+
},
|
| 32227 |
+
{
|
| 32228 |
+
"epoch": 0.8523371770158457,
|
| 32229 |
+
"grad_norm": 34.34375,
|
| 32230 |
+
"learning_rate": 9.866822354707192e-06,
|
| 32231 |
+
"loss": 18.5307,
|
| 32232 |
+
"step": 45930
|
| 32233 |
+
},
|
| 32234 |
+
{
|
| 32235 |
+
"epoch": 0.8525227501003254,
|
| 32236 |
+
"grad_norm": 39.0625,
|
| 32237 |
+
"learning_rate": 9.866793358921149e-06,
|
| 32238 |
+
"loss": 19.5498,
|
| 32239 |
+
"step": 45940
|
| 32240 |
+
},
|
| 32241 |
+
{
|
| 32242 |
+
"epoch": 0.8527083231848053,
|
| 32243 |
+
"grad_norm": 35.28125,
|
| 32244 |
+
"learning_rate": 9.866764363135107e-06,
|
| 32245 |
+
"loss": 18.329,
|
| 32246 |
+
"step": 45950
|
| 32247 |
+
},
|
| 32248 |
+
{
|
| 32249 |
+
"epoch": 0.8528938962692851,
|
| 32250 |
+
"grad_norm": 37.09375,
|
| 32251 |
+
"learning_rate": 9.866735367349064e-06,
|
| 32252 |
+
"loss": 18.971,
|
| 32253 |
+
"step": 45960
|
| 32254 |
+
},
|
| 32255 |
+
{
|
| 32256 |
+
"epoch": 0.8530794693537649,
|
| 32257 |
+
"grad_norm": 36.03125,
|
| 32258 |
+
"learning_rate": 9.866706371563021e-06,
|
| 32259 |
+
"loss": 18.7174,
|
| 32260 |
+
"step": 45970
|
| 32261 |
+
},
|
| 32262 |
+
{
|
| 32263 |
+
"epoch": 0.8532650424382447,
|
| 32264 |
+
"grad_norm": 35.25,
|
| 32265 |
+
"learning_rate": 9.866677375776979e-06,
|
| 32266 |
+
"loss": 18.7271,
|
| 32267 |
+
"step": 45980
|
| 32268 |
+
},
|
| 32269 |
+
{
|
| 32270 |
+
"epoch": 0.8534506155227246,
|
| 32271 |
+
"grad_norm": 33.03125,
|
| 32272 |
+
"learning_rate": 9.866648379990936e-06,
|
| 32273 |
+
"loss": 19.0984,
|
| 32274 |
+
"step": 45990
|
| 32275 |
+
},
|
| 32276 |
+
{
|
| 32277 |
+
"epoch": 0.8536361886072044,
|
| 32278 |
+
"grad_norm": 37.09375,
|
| 32279 |
+
"learning_rate": 9.866619384204894e-06,
|
| 32280 |
+
"loss": 19.3761,
|
| 32281 |
+
"step": 46000
|
| 32282 |
+
},
|
| 32283 |
+
{
|
| 32284 |
+
"epoch": 0.8538217616916842,
|
| 32285 |
+
"grad_norm": 38.1875,
|
| 32286 |
+
"learning_rate": 9.866590388418853e-06,
|
| 32287 |
+
"loss": 18.9924,
|
| 32288 |
+
"step": 46010
|
| 32289 |
+
},
|
| 32290 |
+
{
|
| 32291 |
+
"epoch": 0.8540073347761641,
|
| 32292 |
+
"grad_norm": 35.90625,
|
| 32293 |
+
"learning_rate": 9.86656139263281e-06,
|
| 32294 |
+
"loss": 18.9491,
|
| 32295 |
+
"step": 46020
|
| 32296 |
+
},
|
| 32297 |
+
{
|
| 32298 |
+
"epoch": 0.854192907860644,
|
| 32299 |
+
"grad_norm": 34.875,
|
| 32300 |
+
"learning_rate": 9.866532396846767e-06,
|
| 32301 |
+
"loss": 19.4705,
|
| 32302 |
+
"step": 46030
|
| 32303 |
+
},
|
| 32304 |
+
{
|
| 32305 |
+
"epoch": 0.8543784809451237,
|
| 32306 |
+
"grad_norm": 36.09375,
|
| 32307 |
+
"learning_rate": 9.866503401060725e-06,
|
| 32308 |
+
"loss": 18.1977,
|
| 32309 |
+
"step": 46040
|
| 32310 |
+
},
|
| 32311 |
+
{
|
| 32312 |
+
"epoch": 0.8545640540296036,
|
| 32313 |
+
"grad_norm": 36.25,
|
| 32314 |
+
"learning_rate": 9.866474405274682e-06,
|
| 32315 |
+
"loss": 18.6364,
|
| 32316 |
+
"step": 46050
|
| 32317 |
+
},
|
| 32318 |
+
{
|
| 32319 |
+
"epoch": 0.8547496271140834,
|
| 32320 |
+
"grad_norm": 34.6875,
|
| 32321 |
+
"learning_rate": 9.86644540948864e-06,
|
| 32322 |
+
"loss": 18.6337,
|
| 32323 |
+
"step": 46060
|
| 32324 |
+
},
|
| 32325 |
+
{
|
| 32326 |
+
"epoch": 0.8549352001985632,
|
| 32327 |
+
"grad_norm": 36.9375,
|
| 32328 |
+
"learning_rate": 9.866416413702597e-06,
|
| 32329 |
+
"loss": 19.3606,
|
| 32330 |
+
"step": 46070
|
| 32331 |
+
},
|
| 32332 |
+
{
|
| 32333 |
+
"epoch": 0.855120773283043,
|
| 32334 |
+
"grad_norm": 36.59375,
|
| 32335 |
+
"learning_rate": 9.866387417916555e-06,
|
| 32336 |
+
"loss": 19.6339,
|
| 32337 |
+
"step": 46080
|
| 32338 |
+
},
|
| 32339 |
+
{
|
| 32340 |
+
"epoch": 0.8553063463675229,
|
| 32341 |
+
"grad_norm": 36.6875,
|
| 32342 |
+
"learning_rate": 9.866358422130514e-06,
|
| 32343 |
+
"loss": 19.3116,
|
| 32344 |
+
"step": 46090
|
| 32345 |
+
},
|
| 32346 |
+
{
|
| 32347 |
+
"epoch": 0.8554919194520026,
|
| 32348 |
+
"grad_norm": 37.21875,
|
| 32349 |
+
"learning_rate": 9.86632942634447e-06,
|
| 32350 |
+
"loss": 19.3043,
|
| 32351 |
+
"step": 46100
|
| 32352 |
+
},
|
| 32353 |
+
{
|
| 32354 |
+
"epoch": 0.8556774925364825,
|
| 32355 |
+
"grad_norm": 36.1875,
|
| 32356 |
+
"learning_rate": 9.866300430558427e-06,
|
| 32357 |
+
"loss": 18.8968,
|
| 32358 |
+
"step": 46110
|
| 32359 |
+
},
|
| 32360 |
+
{
|
| 32361 |
+
"epoch": 0.8558630656209624,
|
| 32362 |
+
"grad_norm": 36.9375,
|
| 32363 |
+
"learning_rate": 9.866271434772386e-06,
|
| 32364 |
+
"loss": 19.0006,
|
| 32365 |
+
"step": 46120
|
| 32366 |
+
},
|
| 32367 |
+
{
|
| 32368 |
+
"epoch": 0.8560486387054421,
|
| 32369 |
+
"grad_norm": 35.8125,
|
| 32370 |
+
"learning_rate": 9.866242438986343e-06,
|
| 32371 |
+
"loss": 19.0822,
|
| 32372 |
+
"step": 46130
|
| 32373 |
+
},
|
| 32374 |
+
{
|
| 32375 |
+
"epoch": 0.856234211789922,
|
| 32376 |
+
"grad_norm": 38.0,
|
| 32377 |
+
"learning_rate": 9.8662134432003e-06,
|
| 32378 |
+
"loss": 18.9034,
|
| 32379 |
+
"step": 46140
|
| 32380 |
+
},
|
| 32381 |
+
{
|
| 32382 |
+
"epoch": 0.8564197848744018,
|
| 32383 |
+
"grad_norm": 35.46875,
|
| 32384 |
+
"learning_rate": 9.866184447414258e-06,
|
| 32385 |
+
"loss": 18.5902,
|
| 32386 |
+
"step": 46150
|
| 32387 |
+
},
|
| 32388 |
+
{
|
| 32389 |
+
"epoch": 0.8566053579588816,
|
| 32390 |
+
"grad_norm": 35.84375,
|
| 32391 |
+
"learning_rate": 9.866155451628215e-06,
|
| 32392 |
+
"loss": 18.9346,
|
| 32393 |
+
"step": 46160
|
| 32394 |
+
},
|
| 32395 |
+
{
|
| 32396 |
+
"epoch": 0.8567909310433615,
|
| 32397 |
+
"grad_norm": 37.09375,
|
| 32398 |
+
"learning_rate": 9.866126455842173e-06,
|
| 32399 |
+
"loss": 19.1755,
|
| 32400 |
+
"step": 46170
|
| 32401 |
+
},
|
| 32402 |
+
{
|
| 32403 |
+
"epoch": 0.8569765041278413,
|
| 32404 |
+
"grad_norm": 37.46875,
|
| 32405 |
+
"learning_rate": 9.86609746005613e-06,
|
| 32406 |
+
"loss": 19.1133,
|
| 32407 |
+
"step": 46180
|
| 32408 |
+
},
|
| 32409 |
+
{
|
| 32410 |
+
"epoch": 0.8571620772123211,
|
| 32411 |
+
"grad_norm": 35.875,
|
| 32412 |
+
"learning_rate": 9.86606846427009e-06,
|
| 32413 |
+
"loss": 18.9241,
|
| 32414 |
+
"step": 46190
|
| 32415 |
+
},
|
| 32416 |
+
{
|
| 32417 |
+
"epoch": 0.8573476502968009,
|
| 32418 |
+
"grad_norm": 35.46875,
|
| 32419 |
+
"learning_rate": 9.866039468484045e-06,
|
| 32420 |
+
"loss": 19.4059,
|
| 32421 |
+
"step": 46200
|
| 32422 |
+
},
|
| 32423 |
+
{
|
| 32424 |
+
"epoch": 0.8575332233812808,
|
| 32425 |
+
"grad_norm": 33.375,
|
| 32426 |
+
"learning_rate": 9.866010472698003e-06,
|
| 32427 |
+
"loss": 19.2368,
|
| 32428 |
+
"step": 46210
|
| 32429 |
+
},
|
| 32430 |
+
{
|
| 32431 |
+
"epoch": 0.8577187964657607,
|
| 32432 |
+
"grad_norm": 35.875,
|
| 32433 |
+
"learning_rate": 9.865981476911962e-06,
|
| 32434 |
+
"loss": 18.9356,
|
| 32435 |
+
"step": 46220
|
| 32436 |
+
},
|
| 32437 |
+
{
|
| 32438 |
+
"epoch": 0.8579043695502404,
|
| 32439 |
+
"grad_norm": 36.6875,
|
| 32440 |
+
"learning_rate": 9.865952481125919e-06,
|
| 32441 |
+
"loss": 19.2764,
|
| 32442 |
+
"step": 46230
|
| 32443 |
+
},
|
| 32444 |
+
{
|
| 32445 |
+
"epoch": 0.8580899426347203,
|
| 32446 |
+
"grad_norm": 36.8125,
|
| 32447 |
+
"learning_rate": 9.865923485339876e-06,
|
| 32448 |
+
"loss": 19.1283,
|
| 32449 |
+
"step": 46240
|
| 32450 |
+
},
|
| 32451 |
+
{
|
| 32452 |
+
"epoch": 0.8582755157192001,
|
| 32453 |
+
"grad_norm": 36.4375,
|
| 32454 |
+
"learning_rate": 9.865894489553834e-06,
|
| 32455 |
+
"loss": 18.773,
|
| 32456 |
+
"step": 46250
|
| 32457 |
+
},
|
| 32458 |
+
{
|
| 32459 |
+
"epoch": 0.8584610888036799,
|
| 32460 |
+
"grad_norm": 34.15625,
|
| 32461 |
+
"learning_rate": 9.865865493767791e-06,
|
| 32462 |
+
"loss": 19.0013,
|
| 32463 |
+
"step": 46260
|
| 32464 |
+
},
|
| 32465 |
+
{
|
| 32466 |
+
"epoch": 0.8586466618881597,
|
| 32467 |
+
"grad_norm": 35.5,
|
| 32468 |
+
"learning_rate": 9.865836497981749e-06,
|
| 32469 |
+
"loss": 19.106,
|
| 32470 |
+
"step": 46270
|
| 32471 |
+
},
|
| 32472 |
+
{
|
| 32473 |
+
"epoch": 0.8588322349726396,
|
| 32474 |
+
"grad_norm": 36.46875,
|
| 32475 |
+
"learning_rate": 9.865807502195706e-06,
|
| 32476 |
+
"loss": 19.1127,
|
| 32477 |
+
"step": 46280
|
| 32478 |
+
},
|
| 32479 |
+
{
|
| 32480 |
+
"epoch": 0.8590178080571194,
|
| 32481 |
+
"grad_norm": 36.03125,
|
| 32482 |
+
"learning_rate": 9.865778506409665e-06,
|
| 32483 |
+
"loss": 19.1271,
|
| 32484 |
+
"step": 46290
|
| 32485 |
+
},
|
| 32486 |
+
{
|
| 32487 |
+
"epoch": 0.8592033811415992,
|
| 32488 |
+
"grad_norm": 36.25,
|
| 32489 |
+
"learning_rate": 9.865749510623623e-06,
|
| 32490 |
+
"loss": 18.7041,
|
| 32491 |
+
"step": 46300
|
| 32492 |
+
},
|
| 32493 |
+
{
|
| 32494 |
+
"epoch": 0.8593889542260791,
|
| 32495 |
+
"grad_norm": 35.96875,
|
| 32496 |
+
"learning_rate": 9.865720514837578e-06,
|
| 32497 |
+
"loss": 19.1869,
|
| 32498 |
+
"step": 46310
|
| 32499 |
+
},
|
| 32500 |
+
{
|
| 32501 |
+
"epoch": 0.8595745273105588,
|
| 32502 |
+
"grad_norm": 37.34375,
|
| 32503 |
+
"learning_rate": 9.865691519051537e-06,
|
| 32504 |
+
"loss": 19.1163,
|
| 32505 |
+
"step": 46320
|
| 32506 |
+
},
|
| 32507 |
+
{
|
| 32508 |
+
"epoch": 0.8597601003950387,
|
| 32509 |
+
"grad_norm": 38.09375,
|
| 32510 |
+
"learning_rate": 9.865662523265495e-06,
|
| 32511 |
+
"loss": 18.9266,
|
| 32512 |
+
"step": 46330
|
| 32513 |
+
},
|
| 32514 |
+
{
|
| 32515 |
+
"epoch": 0.8599456734795186,
|
| 32516 |
+
"grad_norm": 36.0,
|
| 32517 |
+
"learning_rate": 9.865633527479452e-06,
|
| 32518 |
+
"loss": 19.044,
|
| 32519 |
+
"step": 46340
|
| 32520 |
+
},
|
| 32521 |
+
{
|
| 32522 |
+
"epoch": 0.8601312465639983,
|
| 32523 |
+
"grad_norm": 36.96875,
|
| 32524 |
+
"learning_rate": 9.86560453169341e-06,
|
| 32525 |
+
"loss": 19.1646,
|
| 32526 |
+
"step": 46350
|
| 32527 |
+
},
|
| 32528 |
+
{
|
| 32529 |
+
"epoch": 0.8603168196484782,
|
| 32530 |
+
"grad_norm": 38.4375,
|
| 32531 |
+
"learning_rate": 9.865575535907369e-06,
|
| 32532 |
+
"loss": 18.5803,
|
| 32533 |
+
"step": 46360
|
| 32534 |
+
},
|
| 32535 |
+
{
|
| 32536 |
+
"epoch": 0.860502392732958,
|
| 32537 |
+
"grad_norm": 36.25,
|
| 32538 |
+
"learning_rate": 9.865546540121324e-06,
|
| 32539 |
+
"loss": 19.0525,
|
| 32540 |
+
"step": 46370
|
| 32541 |
+
},
|
| 32542 |
+
{
|
| 32543 |
+
"epoch": 0.8606879658174378,
|
| 32544 |
+
"grad_norm": 36.78125,
|
| 32545 |
+
"learning_rate": 9.865517544335282e-06,
|
| 32546 |
+
"loss": 18.6633,
|
| 32547 |
+
"step": 46380
|
| 32548 |
+
},
|
| 32549 |
+
{
|
| 32550 |
+
"epoch": 0.8608735389019176,
|
| 32551 |
+
"grad_norm": 35.59375,
|
| 32552 |
+
"learning_rate": 9.865488548549241e-06,
|
| 32553 |
+
"loss": 19.2111,
|
| 32554 |
+
"step": 46390
|
| 32555 |
+
},
|
| 32556 |
+
{
|
| 32557 |
+
"epoch": 0.8610591119863975,
|
| 32558 |
+
"grad_norm": 37.34375,
|
| 32559 |
+
"learning_rate": 9.865459552763198e-06,
|
| 32560 |
+
"loss": 19.2641,
|
| 32561 |
+
"step": 46400
|
| 32562 |
+
},
|
| 32563 |
+
{
|
| 32564 |
+
"epoch": 0.8612446850708774,
|
| 32565 |
+
"grad_norm": 35.1875,
|
| 32566 |
+
"learning_rate": 9.865430556977156e-06,
|
| 32567 |
+
"loss": 19.0881,
|
| 32568 |
+
"step": 46410
|
| 32569 |
+
},
|
| 32570 |
+
{
|
| 32571 |
+
"epoch": 0.8614302581553571,
|
| 32572 |
+
"grad_norm": 38.5625,
|
| 32573 |
+
"learning_rate": 9.865401561191113e-06,
|
| 32574 |
+
"loss": 19.2938,
|
| 32575 |
+
"step": 46420
|
| 32576 |
+
},
|
| 32577 |
+
{
|
| 32578 |
+
"epoch": 0.861615831239837,
|
| 32579 |
+
"grad_norm": 36.21875,
|
| 32580 |
+
"learning_rate": 9.86537256540507e-06,
|
| 32581 |
+
"loss": 18.9959,
|
| 32582 |
+
"step": 46430
|
| 32583 |
+
},
|
| 32584 |
+
{
|
| 32585 |
+
"epoch": 0.8618014043243168,
|
| 32586 |
+
"grad_norm": 36.40625,
|
| 32587 |
+
"learning_rate": 9.865343569619028e-06,
|
| 32588 |
+
"loss": 19.1233,
|
| 32589 |
+
"step": 46440
|
| 32590 |
+
},
|
| 32591 |
+
{
|
| 32592 |
+
"epoch": 0.8619869774087966,
|
| 32593 |
+
"grad_norm": 39.0625,
|
| 32594 |
+
"learning_rate": 9.865314573832985e-06,
|
| 32595 |
+
"loss": 18.7088,
|
| 32596 |
+
"step": 46450
|
| 32597 |
+
},
|
| 32598 |
+
{
|
| 32599 |
+
"epoch": 0.8621725504932765,
|
| 32600 |
+
"grad_norm": 35.25,
|
| 32601 |
+
"learning_rate": 9.865285578046944e-06,
|
| 32602 |
+
"loss": 18.7742,
|
| 32603 |
+
"step": 46460
|
| 32604 |
+
},
|
| 32605 |
+
{
|
| 32606 |
+
"epoch": 0.8623581235777563,
|
| 32607 |
+
"grad_norm": 37.875,
|
| 32608 |
+
"learning_rate": 9.8652565822609e-06,
|
| 32609 |
+
"loss": 19.0445,
|
| 32610 |
+
"step": 46470
|
| 32611 |
+
},
|
| 32612 |
+
{
|
| 32613 |
+
"epoch": 0.8625436966622361,
|
| 32614 |
+
"grad_norm": 36.125,
|
| 32615 |
+
"learning_rate": 9.865227586474858e-06,
|
| 32616 |
+
"loss": 18.9246,
|
| 32617 |
+
"step": 46480
|
| 32618 |
+
},
|
| 32619 |
+
{
|
| 32620 |
+
"epoch": 0.8627292697467159,
|
| 32621 |
+
"grad_norm": 35.8125,
|
| 32622 |
+
"learning_rate": 9.865198590688817e-06,
|
| 32623 |
+
"loss": 19.4552,
|
| 32624 |
+
"step": 46490
|
| 32625 |
+
},
|
| 32626 |
+
{
|
| 32627 |
+
"epoch": 0.8629148428311958,
|
| 32628 |
+
"grad_norm": 37.59375,
|
| 32629 |
+
"learning_rate": 9.865169594902774e-06,
|
| 32630 |
+
"loss": 19.0426,
|
| 32631 |
+
"step": 46500
|
| 32632 |
+
},
|
| 32633 |
+
{
|
| 32634 |
+
"epoch": 0.8631004159156755,
|
| 32635 |
+
"grad_norm": 35.0,
|
| 32636 |
+
"learning_rate": 9.865140599116732e-06,
|
| 32637 |
+
"loss": 18.9247,
|
| 32638 |
+
"step": 46510
|
| 32639 |
+
},
|
| 32640 |
+
{
|
| 32641 |
+
"epoch": 0.8632859890001554,
|
| 32642 |
+
"grad_norm": 36.28125,
|
| 32643 |
+
"learning_rate": 9.865111603330689e-06,
|
| 32644 |
+
"loss": 19.2894,
|
| 32645 |
+
"step": 46520
|
| 32646 |
+
},
|
| 32647 |
+
{
|
| 32648 |
+
"epoch": 0.8634715620846353,
|
| 32649 |
+
"grad_norm": 36.78125,
|
| 32650 |
+
"learning_rate": 9.865082607544646e-06,
|
| 32651 |
+
"loss": 18.7233,
|
| 32652 |
+
"step": 46530
|
| 32653 |
+
},
|
| 32654 |
+
{
|
| 32655 |
+
"epoch": 0.863657135169115,
|
| 32656 |
+
"grad_norm": 35.25,
|
| 32657 |
+
"learning_rate": 9.865053611758604e-06,
|
| 32658 |
+
"loss": 19.2367,
|
| 32659 |
+
"step": 46540
|
| 32660 |
+
},
|
| 32661 |
+
{
|
| 32662 |
+
"epoch": 0.8638427082535949,
|
| 32663 |
+
"grad_norm": 38.46875,
|
| 32664 |
+
"learning_rate": 9.865024615972561e-06,
|
| 32665 |
+
"loss": 19.3671,
|
| 32666 |
+
"step": 46550
|
| 32667 |
+
},
|
| 32668 |
+
{
|
| 32669 |
+
"epoch": 0.8640282813380747,
|
| 32670 |
+
"grad_norm": 33.9375,
|
| 32671 |
+
"learning_rate": 9.864995620186519e-06,
|
| 32672 |
+
"loss": 19.4396,
|
| 32673 |
+
"step": 46560
|
| 32674 |
+
},
|
| 32675 |
+
{
|
| 32676 |
+
"epoch": 0.8642138544225546,
|
| 32677 |
+
"grad_norm": 37.15625,
|
| 32678 |
+
"learning_rate": 9.864966624400478e-06,
|
| 32679 |
+
"loss": 18.6888,
|
| 32680 |
+
"step": 46570
|
| 32681 |
+
},
|
| 32682 |
+
{
|
| 32683 |
+
"epoch": 0.8643994275070344,
|
| 32684 |
+
"grad_norm": 35.40625,
|
| 32685 |
+
"learning_rate": 9.864937628614433e-06,
|
| 32686 |
+
"loss": 19.0491,
|
| 32687 |
+
"step": 46580
|
| 32688 |
+
},
|
| 32689 |
+
{
|
| 32690 |
+
"epoch": 0.8645850005915142,
|
| 32691 |
+
"grad_norm": 38.65625,
|
| 32692 |
+
"learning_rate": 9.864908632828392e-06,
|
| 32693 |
+
"loss": 19.1106,
|
| 32694 |
+
"step": 46590
|
| 32695 |
+
},
|
| 32696 |
+
{
|
| 32697 |
+
"epoch": 0.8647705736759941,
|
| 32698 |
+
"grad_norm": 34.84375,
|
| 32699 |
+
"learning_rate": 9.86487963704235e-06,
|
| 32700 |
+
"loss": 19.0092,
|
| 32701 |
+
"step": 46600
|
| 32702 |
+
},
|
| 32703 |
+
{
|
| 32704 |
+
"epoch": 0.8649561467604738,
|
| 32705 |
+
"grad_norm": 35.625,
|
| 32706 |
+
"learning_rate": 9.864850641256307e-06,
|
| 32707 |
+
"loss": 19.4733,
|
| 32708 |
+
"step": 46610
|
| 32709 |
+
},
|
| 32710 |
+
{
|
| 32711 |
+
"epoch": 0.8651417198449537,
|
| 32712 |
+
"grad_norm": 38.1875,
|
| 32713 |
+
"learning_rate": 9.864821645470265e-06,
|
| 32714 |
+
"loss": 18.892,
|
| 32715 |
+
"step": 46620
|
| 32716 |
+
},
|
| 32717 |
+
{
|
| 32718 |
+
"epoch": 0.8653272929294336,
|
| 32719 |
+
"grad_norm": 35.6875,
|
| 32720 |
+
"learning_rate": 9.864792649684222e-06,
|
| 32721 |
+
"loss": 18.8302,
|
| 32722 |
+
"step": 46630
|
| 32723 |
+
},
|
| 32724 |
+
{
|
| 32725 |
+
"epoch": 0.8655128660139133,
|
| 32726 |
+
"grad_norm": 35.09375,
|
| 32727 |
+
"learning_rate": 9.86476365389818e-06,
|
| 32728 |
+
"loss": 19.2244,
|
| 32729 |
+
"step": 46640
|
| 32730 |
+
},
|
| 32731 |
+
{
|
| 32732 |
+
"epoch": 0.8656984390983932,
|
| 32733 |
+
"grad_norm": 35.40625,
|
| 32734 |
+
"learning_rate": 9.864734658112137e-06,
|
| 32735 |
+
"loss": 18.9491,
|
| 32736 |
+
"step": 46650
|
| 32737 |
+
},
|
| 32738 |
+
{
|
| 32739 |
+
"epoch": 0.865884012182873,
|
| 32740 |
+
"grad_norm": 35.9375,
|
| 32741 |
+
"learning_rate": 9.864705662326094e-06,
|
| 32742 |
+
"loss": 18.688,
|
| 32743 |
+
"step": 46660
|
| 32744 |
+
},
|
| 32745 |
+
{
|
| 32746 |
+
"epoch": 0.8660695852673528,
|
| 32747 |
+
"grad_norm": 36.4375,
|
| 32748 |
+
"learning_rate": 9.864676666540053e-06,
|
| 32749 |
+
"loss": 18.8324,
|
| 32750 |
+
"step": 46670
|
| 32751 |
+
},
|
| 32752 |
+
{
|
| 32753 |
+
"epoch": 0.8662551583518326,
|
| 32754 |
+
"grad_norm": 37.3125,
|
| 32755 |
+
"learning_rate": 9.86464767075401e-06,
|
| 32756 |
+
"loss": 18.8215,
|
| 32757 |
+
"step": 46680
|
| 32758 |
+
},
|
| 32759 |
+
{
|
| 32760 |
+
"epoch": 0.8664407314363125,
|
| 32761 |
+
"grad_norm": 36.65625,
|
| 32762 |
+
"learning_rate": 9.864618674967967e-06,
|
| 32763 |
+
"loss": 18.8459,
|
| 32764 |
+
"step": 46690
|
| 32765 |
+
},
|
| 32766 |
+
{
|
| 32767 |
+
"epoch": 0.8666263045207923,
|
| 32768 |
+
"grad_norm": 35.34375,
|
| 32769 |
+
"learning_rate": 9.864589679181926e-06,
|
| 32770 |
+
"loss": 18.5788,
|
| 32771 |
+
"step": 46700
|
| 32772 |
+
},
|
| 32773 |
+
{
|
| 32774 |
+
"epoch": 0.8668118776052721,
|
| 32775 |
+
"grad_norm": 36.3125,
|
| 32776 |
+
"learning_rate": 9.864560683395883e-06,
|
| 32777 |
+
"loss": 18.8399,
|
| 32778 |
+
"step": 46710
|
| 32779 |
+
},
|
| 32780 |
+
{
|
| 32781 |
+
"epoch": 0.866997450689752,
|
| 32782 |
+
"grad_norm": 36.1875,
|
| 32783 |
+
"learning_rate": 9.86453168760984e-06,
|
| 32784 |
+
"loss": 19.4052,
|
| 32785 |
+
"step": 46720
|
| 32786 |
+
},
|
| 32787 |
+
{
|
| 32788 |
+
"epoch": 0.8671830237742317,
|
| 32789 |
+
"grad_norm": 36.46875,
|
| 32790 |
+
"learning_rate": 9.864502691823798e-06,
|
| 32791 |
+
"loss": 18.9626,
|
| 32792 |
+
"step": 46730
|
| 32793 |
+
},
|
| 32794 |
+
{
|
| 32795 |
+
"epoch": 0.8673685968587116,
|
| 32796 |
+
"grad_norm": 37.1875,
|
| 32797 |
+
"learning_rate": 9.864473696037755e-06,
|
| 32798 |
+
"loss": 18.9508,
|
| 32799 |
+
"step": 46740
|
| 32800 |
+
},
|
| 32801 |
+
{
|
| 32802 |
+
"epoch": 0.8675541699431915,
|
| 32803 |
+
"grad_norm": 34.65625,
|
| 32804 |
+
"learning_rate": 9.864444700251713e-06,
|
| 32805 |
+
"loss": 18.9008,
|
| 32806 |
+
"step": 46750
|
| 32807 |
+
},
|
| 32808 |
+
{
|
| 32809 |
+
"epoch": 0.8677397430276713,
|
| 32810 |
+
"grad_norm": 38.40625,
|
| 32811 |
+
"learning_rate": 9.86441570446567e-06,
|
| 32812 |
+
"loss": 18.8394,
|
| 32813 |
+
"step": 46760
|
| 32814 |
+
},
|
| 32815 |
+
{
|
| 32816 |
+
"epoch": 0.8679253161121511,
|
| 32817 |
+
"grad_norm": 35.96875,
|
| 32818 |
+
"learning_rate": 9.86438670867963e-06,
|
| 32819 |
+
"loss": 18.9625,
|
| 32820 |
+
"step": 46770
|
| 32821 |
+
},
|
| 32822 |
+
{
|
| 32823 |
+
"epoch": 0.8681108891966309,
|
| 32824 |
+
"grad_norm": 34.5625,
|
| 32825 |
+
"learning_rate": 9.864357712893587e-06,
|
| 32826 |
+
"loss": 18.8886,
|
| 32827 |
+
"step": 46780
|
| 32828 |
+
},
|
| 32829 |
+
{
|
| 32830 |
+
"epoch": 0.8682964622811108,
|
| 32831 |
+
"grad_norm": 38.40625,
|
| 32832 |
+
"learning_rate": 9.864328717107542e-06,
|
| 32833 |
+
"loss": 18.8803,
|
| 32834 |
+
"step": 46790
|
| 32835 |
+
},
|
| 32836 |
+
{
|
| 32837 |
+
"epoch": 0.8684820353655905,
|
| 32838 |
+
"grad_norm": 35.84375,
|
| 32839 |
+
"learning_rate": 9.864299721321501e-06,
|
| 32840 |
+
"loss": 19.1182,
|
| 32841 |
+
"step": 46800
|
| 32842 |
+
},
|
| 32843 |
+
{
|
| 32844 |
+
"epoch": 0.8686676084500704,
|
| 32845 |
+
"grad_norm": 37.40625,
|
| 32846 |
+
"learning_rate": 9.864270725535459e-06,
|
| 32847 |
+
"loss": 18.4934,
|
| 32848 |
+
"step": 46810
|
| 32849 |
+
},
|
| 32850 |
+
{
|
| 32851 |
+
"epoch": 0.8688531815345503,
|
| 32852 |
+
"grad_norm": 36.875,
|
| 32853 |
+
"learning_rate": 9.864241729749416e-06,
|
| 32854 |
+
"loss": 18.8362,
|
| 32855 |
+
"step": 46820
|
| 32856 |
+
},
|
| 32857 |
+
{
|
| 32858 |
+
"epoch": 0.86903875461903,
|
| 32859 |
+
"grad_norm": 37.46875,
|
| 32860 |
+
"learning_rate": 9.864212733963374e-06,
|
| 32861 |
+
"loss": 18.8735,
|
| 32862 |
+
"step": 46830
|
| 32863 |
+
},
|
| 32864 |
+
{
|
| 32865 |
+
"epoch": 0.8692243277035099,
|
| 32866 |
+
"grad_norm": 34.375,
|
| 32867 |
+
"learning_rate": 9.864183738177333e-06,
|
| 32868 |
+
"loss": 19.1104,
|
| 32869 |
+
"step": 46840
|
| 32870 |
+
},
|
| 32871 |
+
{
|
| 32872 |
+
"epoch": 0.8694099007879897,
|
| 32873 |
+
"grad_norm": 36.375,
|
| 32874 |
+
"learning_rate": 9.864154742391288e-06,
|
| 32875 |
+
"loss": 18.7271,
|
| 32876 |
+
"step": 46850
|
| 32877 |
+
},
|
| 32878 |
+
{
|
| 32879 |
+
"epoch": 0.8695954738724695,
|
| 32880 |
+
"grad_norm": 37.0,
|
| 32881 |
+
"learning_rate": 9.864125746605246e-06,
|
| 32882 |
+
"loss": 19.1983,
|
| 32883 |
+
"step": 46860
|
| 32884 |
+
},
|
| 32885 |
+
{
|
| 32886 |
+
"epoch": 0.8697810469569494,
|
| 32887 |
+
"grad_norm": 36.5625,
|
| 32888 |
+
"learning_rate": 9.864096750819205e-06,
|
| 32889 |
+
"loss": 19.5429,
|
| 32890 |
+
"step": 46870
|
| 32891 |
+
},
|
| 32892 |
+
{
|
| 32893 |
+
"epoch": 0.8699666200414292,
|
| 32894 |
+
"grad_norm": 36.90625,
|
| 32895 |
+
"learning_rate": 9.864067755033162e-06,
|
| 32896 |
+
"loss": 19.0335,
|
| 32897 |
+
"step": 46880
|
| 32898 |
+
},
|
| 32899 |
+
{
|
| 32900 |
+
"epoch": 0.870152193125909,
|
| 32901 |
+
"grad_norm": 38.6875,
|
| 32902 |
+
"learning_rate": 9.86403875924712e-06,
|
| 32903 |
+
"loss": 19.148,
|
| 32904 |
+
"step": 46890
|
| 32905 |
+
},
|
| 32906 |
+
{
|
| 32907 |
+
"epoch": 0.8703377662103888,
|
| 32908 |
+
"grad_norm": 36.5,
|
| 32909 |
+
"learning_rate": 9.864009763461077e-06,
|
| 32910 |
+
"loss": 19.0772,
|
| 32911 |
+
"step": 46900
|
| 32912 |
+
},
|
| 32913 |
+
{
|
| 32914 |
+
"epoch": 0.8705233392948687,
|
| 32915 |
+
"grad_norm": 37.5625,
|
| 32916 |
+
"learning_rate": 9.863980767675035e-06,
|
| 32917 |
+
"loss": 18.9625,
|
| 32918 |
+
"step": 46910
|
| 32919 |
+
},
|
| 32920 |
+
{
|
| 32921 |
+
"epoch": 0.8707089123793486,
|
| 32922 |
+
"grad_norm": 36.0,
|
| 32923 |
+
"learning_rate": 9.863951771888992e-06,
|
| 32924 |
+
"loss": 18.4917,
|
| 32925 |
+
"step": 46920
|
| 32926 |
+
},
|
| 32927 |
+
{
|
| 32928 |
+
"epoch": 0.8708944854638283,
|
| 32929 |
+
"grad_norm": 34.78125,
|
| 32930 |
+
"learning_rate": 9.86392277610295e-06,
|
| 32931 |
+
"loss": 19.0948,
|
| 32932 |
+
"step": 46930
|
| 32933 |
+
},
|
| 32934 |
+
{
|
| 32935 |
+
"epoch": 0.8710800585483082,
|
| 32936 |
+
"grad_norm": 36.84375,
|
| 32937 |
+
"learning_rate": 9.863893780316908e-06,
|
| 32938 |
+
"loss": 18.7454,
|
| 32939 |
+
"step": 46940
|
| 32940 |
+
},
|
| 32941 |
+
{
|
| 32942 |
+
"epoch": 0.871265631632788,
|
| 32943 |
+
"grad_norm": 35.4375,
|
| 32944 |
+
"learning_rate": 9.863864784530866e-06,
|
| 32945 |
+
"loss": 19.0614,
|
| 32946 |
+
"step": 46950
|
| 32947 |
+
},
|
| 32948 |
+
{
|
| 32949 |
+
"epoch": 0.8714512047172678,
|
| 32950 |
+
"grad_norm": 35.65625,
|
| 32951 |
+
"learning_rate": 9.863835788744822e-06,
|
| 32952 |
+
"loss": 19.0184,
|
| 32953 |
+
"step": 46960
|
| 32954 |
+
},
|
| 32955 |
+
{
|
| 32956 |
+
"epoch": 0.8716367778017476,
|
| 32957 |
+
"grad_norm": 36.0625,
|
| 32958 |
+
"learning_rate": 9.86380679295878e-06,
|
| 32959 |
+
"loss": 18.9749,
|
| 32960 |
+
"step": 46970
|
| 32961 |
+
},
|
| 32962 |
+
{
|
| 32963 |
+
"epoch": 0.8718223508862275,
|
| 32964 |
+
"grad_norm": 38.625,
|
| 32965 |
+
"learning_rate": 9.863777797172738e-06,
|
| 32966 |
+
"loss": 18.9738,
|
| 32967 |
+
"step": 46980
|
| 32968 |
+
},
|
| 32969 |
+
{
|
| 32970 |
+
"epoch": 0.8720079239707073,
|
| 32971 |
+
"grad_norm": 36.0625,
|
| 32972 |
+
"learning_rate": 9.863748801386696e-06,
|
| 32973 |
+
"loss": 18.6942,
|
| 32974 |
+
"step": 46990
|
| 32975 |
+
},
|
| 32976 |
+
{
|
| 32977 |
+
"epoch": 0.8721934970551871,
|
| 32978 |
+
"grad_norm": 37.3125,
|
| 32979 |
+
"learning_rate": 9.863719805600653e-06,
|
| 32980 |
+
"loss": 19.1092,
|
| 32981 |
+
"step": 47000
|
| 32982 |
+
},
|
| 32983 |
+
{
|
| 32984 |
+
"epoch": 0.872379070139667,
|
| 32985 |
+
"grad_norm": 36.375,
|
| 32986 |
+
"learning_rate": 9.86369080981461e-06,
|
| 32987 |
+
"loss": 19.1041,
|
| 32988 |
+
"step": 47010
|
| 32989 |
+
},
|
| 32990 |
+
{
|
| 32991 |
+
"epoch": 0.8725646432241467,
|
| 32992 |
+
"grad_norm": 36.46875,
|
| 32993 |
+
"learning_rate": 9.863661814028568e-06,
|
| 32994 |
+
"loss": 18.8907,
|
| 32995 |
+
"step": 47020
|
| 32996 |
+
},
|
| 32997 |
+
{
|
| 32998 |
+
"epoch": 0.8727502163086266,
|
| 32999 |
+
"grad_norm": 36.625,
|
| 33000 |
+
"learning_rate": 9.863632818242525e-06,
|
| 33001 |
+
"loss": 18.604,
|
| 33002 |
+
"step": 47030
|
| 33003 |
+
},
|
| 33004 |
+
{
|
| 33005 |
+
"epoch": 0.8729357893931065,
|
| 33006 |
+
"grad_norm": 34.40625,
|
| 33007 |
+
"learning_rate": 9.863603822456483e-06,
|
| 33008 |
+
"loss": 18.9317,
|
| 33009 |
+
"step": 47040
|
| 33010 |
+
},
|
| 33011 |
+
{
|
| 33012 |
+
"epoch": 0.8731213624775862,
|
| 33013 |
+
"grad_norm": 35.78125,
|
| 33014 |
+
"learning_rate": 9.863574826670442e-06,
|
| 33015 |
+
"loss": 18.8261,
|
| 33016 |
+
"step": 47050
|
| 33017 |
+
},
|
| 33018 |
+
{
|
| 33019 |
+
"epoch": 0.8733069355620661,
|
| 33020 |
+
"grad_norm": 35.375,
|
| 33021 |
+
"learning_rate": 9.863545830884397e-06,
|
| 33022 |
+
"loss": 18.7842,
|
| 33023 |
+
"step": 47060
|
| 33024 |
+
},
|
| 33025 |
+
{
|
| 33026 |
+
"epoch": 0.8734925086465459,
|
| 33027 |
+
"grad_norm": 35.65625,
|
| 33028 |
+
"learning_rate": 9.863516835098356e-06,
|
| 33029 |
+
"loss": 19.1264,
|
| 33030 |
+
"step": 47070
|
| 33031 |
+
},
|
| 33032 |
+
{
|
| 33033 |
+
"epoch": 0.8736780817310257,
|
| 33034 |
+
"grad_norm": 37.59375,
|
| 33035 |
+
"learning_rate": 9.863487839312314e-06,
|
| 33036 |
+
"loss": 18.9965,
|
| 33037 |
+
"step": 47080
|
| 33038 |
+
},
|
| 33039 |
+
{
|
| 33040 |
+
"epoch": 0.8738636548155055,
|
| 33041 |
+
"grad_norm": 34.65625,
|
| 33042 |
+
"learning_rate": 9.863458843526271e-06,
|
| 33043 |
+
"loss": 18.7441,
|
| 33044 |
+
"step": 47090
|
| 33045 |
+
},
|
| 33046 |
+
{
|
| 33047 |
+
"epoch": 0.8740492278999854,
|
| 33048 |
+
"grad_norm": 35.03125,
|
| 33049 |
+
"learning_rate": 9.863429847740229e-06,
|
| 33050 |
+
"loss": 19.2158,
|
| 33051 |
+
"step": 47100
|
| 33052 |
+
},
|
| 33053 |
+
{
|
| 33054 |
+
"epoch": 0.8742348009844653,
|
| 33055 |
+
"grad_norm": 37.53125,
|
| 33056 |
+
"learning_rate": 9.863400851954186e-06,
|
| 33057 |
+
"loss": 19.4903,
|
| 33058 |
+
"step": 47110
|
| 33059 |
+
},
|
| 33060 |
+
{
|
| 33061 |
+
"epoch": 0.874420374068945,
|
| 33062 |
+
"grad_norm": 35.09375,
|
| 33063 |
+
"learning_rate": 9.863371856168144e-06,
|
| 33064 |
+
"loss": 19.3462,
|
| 33065 |
+
"step": 47120
|
| 33066 |
+
},
|
| 33067 |
+
{
|
| 33068 |
+
"epoch": 0.8746059471534249,
|
| 33069 |
+
"grad_norm": 34.96875,
|
| 33070 |
+
"learning_rate": 9.863342860382101e-06,
|
| 33071 |
+
"loss": 18.5112,
|
| 33072 |
+
"step": 47130
|
| 33073 |
+
},
|
| 33074 |
+
{
|
| 33075 |
+
"epoch": 0.8747915202379047,
|
| 33076 |
+
"grad_norm": 35.84375,
|
| 33077 |
+
"learning_rate": 9.863313864596058e-06,
|
| 33078 |
+
"loss": 18.7684,
|
| 33079 |
+
"step": 47140
|
| 33080 |
+
},
|
| 33081 |
+
{
|
| 33082 |
+
"epoch": 0.8749770933223845,
|
| 33083 |
+
"grad_norm": 37.84375,
|
| 33084 |
+
"learning_rate": 9.863284868810017e-06,
|
| 33085 |
+
"loss": 18.657,
|
| 33086 |
+
"step": 47150
|
| 33087 |
+
},
|
| 33088 |
+
{
|
| 33089 |
+
"epoch": 0.8751626664068644,
|
| 33090 |
+
"grad_norm": 34.625,
|
| 33091 |
+
"learning_rate": 9.863255873023975e-06,
|
| 33092 |
+
"loss": 18.6456,
|
| 33093 |
+
"step": 47160
|
| 33094 |
+
},
|
| 33095 |
+
{
|
| 33096 |
+
"epoch": 0.8753482394913442,
|
| 33097 |
+
"grad_norm": 34.75,
|
| 33098 |
+
"learning_rate": 9.86322687723793e-06,
|
| 33099 |
+
"loss": 18.837,
|
| 33100 |
+
"step": 47170
|
| 33101 |
+
},
|
| 33102 |
+
{
|
| 33103 |
+
"epoch": 0.875533812575824,
|
| 33104 |
+
"grad_norm": 36.25,
|
| 33105 |
+
"learning_rate": 9.86319788145189e-06,
|
| 33106 |
+
"loss": 18.7192,
|
| 33107 |
+
"step": 47180
|
| 33108 |
+
},
|
| 33109 |
+
{
|
| 33110 |
+
"epoch": 0.8757193856603038,
|
| 33111 |
+
"grad_norm": 36.1875,
|
| 33112 |
+
"learning_rate": 9.863168885665847e-06,
|
| 33113 |
+
"loss": 18.5609,
|
| 33114 |
+
"step": 47190
|
| 33115 |
+
},
|
| 33116 |
+
{
|
| 33117 |
+
"epoch": 0.8759049587447837,
|
| 33118 |
+
"grad_norm": 35.5625,
|
| 33119 |
+
"learning_rate": 9.863139889879804e-06,
|
| 33120 |
+
"loss": 18.5446,
|
| 33121 |
+
"step": 47200
|
| 33122 |
+
},
|
| 33123 |
+
{
|
| 33124 |
+
"epoch": 0.8760905318292634,
|
| 33125 |
+
"grad_norm": 35.34375,
|
| 33126 |
+
"learning_rate": 9.863110894093762e-06,
|
| 33127 |
+
"loss": 19.2299,
|
| 33128 |
+
"step": 47210
|
| 33129 |
+
},
|
| 33130 |
+
{
|
| 33131 |
+
"epoch": 0.8762761049137433,
|
| 33132 |
+
"grad_norm": 36.78125,
|
| 33133 |
+
"learning_rate": 9.86308189830772e-06,
|
| 33134 |
+
"loss": 18.9342,
|
| 33135 |
+
"step": 47220
|
| 33136 |
+
},
|
| 33137 |
+
{
|
| 33138 |
+
"epoch": 0.8764616779982232,
|
| 33139 |
+
"grad_norm": 35.5,
|
| 33140 |
+
"learning_rate": 9.863052902521677e-06,
|
| 33141 |
+
"loss": 18.7533,
|
| 33142 |
+
"step": 47230
|
| 33143 |
+
},
|
| 33144 |
+
{
|
| 33145 |
+
"epoch": 0.8766472510827029,
|
| 33146 |
+
"grad_norm": 36.28125,
|
| 33147 |
+
"learning_rate": 9.863023906735634e-06,
|
| 33148 |
+
"loss": 18.6551,
|
| 33149 |
+
"step": 47240
|
| 33150 |
+
},
|
| 33151 |
+
{
|
| 33152 |
+
"epoch": 0.8768328241671828,
|
| 33153 |
+
"grad_norm": 36.84375,
|
| 33154 |
+
"learning_rate": 9.862994910949593e-06,
|
| 33155 |
+
"loss": 18.4443,
|
| 33156 |
+
"step": 47250
|
| 33157 |
+
},
|
| 33158 |
+
{
|
| 33159 |
+
"epoch": 0.8770183972516626,
|
| 33160 |
+
"grad_norm": 38.1875,
|
| 33161 |
+
"learning_rate": 9.86296591516355e-06,
|
| 33162 |
+
"loss": 18.5278,
|
| 33163 |
+
"step": 47260
|
| 33164 |
+
},
|
| 33165 |
+
{
|
| 33166 |
+
"epoch": 0.8772039703361424,
|
| 33167 |
+
"grad_norm": 37.03125,
|
| 33168 |
+
"learning_rate": 9.862936919377508e-06,
|
| 33169 |
+
"loss": 18.8955,
|
| 33170 |
+
"step": 47270
|
| 33171 |
+
},
|
| 33172 |
+
{
|
| 33173 |
+
"epoch": 0.8773895434206223,
|
| 33174 |
+
"grad_norm": 37.125,
|
| 33175 |
+
"learning_rate": 9.862907923591465e-06,
|
| 33176 |
+
"loss": 18.8201,
|
| 33177 |
+
"step": 47280
|
| 33178 |
+
},
|
| 33179 |
+
{
|
| 33180 |
+
"epoch": 0.8775751165051021,
|
| 33181 |
+
"grad_norm": 35.0625,
|
| 33182 |
+
"learning_rate": 9.862878927805423e-06,
|
| 33183 |
+
"loss": 18.9443,
|
| 33184 |
+
"step": 47290
|
| 33185 |
+
},
|
| 33186 |
+
{
|
| 33187 |
+
"epoch": 0.877760689589582,
|
| 33188 |
+
"grad_norm": 34.78125,
|
| 33189 |
+
"learning_rate": 9.86284993201938e-06,
|
| 33190 |
+
"loss": 19.0257,
|
| 33191 |
+
"step": 47300
|
| 33192 |
+
},
|
| 33193 |
+
{
|
| 33194 |
+
"epoch": 0.8779462626740617,
|
| 33195 |
+
"grad_norm": 37.40625,
|
| 33196 |
+
"learning_rate": 9.862820936233338e-06,
|
| 33197 |
+
"loss": 19.4971,
|
| 33198 |
+
"step": 47310
|
| 33199 |
+
},
|
| 33200 |
+
{
|
| 33201 |
+
"epoch": 0.8781318357585416,
|
| 33202 |
+
"grad_norm": 35.75,
|
| 33203 |
+
"learning_rate": 9.862791940447297e-06,
|
| 33204 |
+
"loss": 19.3668,
|
| 33205 |
+
"step": 47320
|
| 33206 |
+
},
|
| 33207 |
+
{
|
| 33208 |
+
"epoch": 0.8783174088430215,
|
| 33209 |
+
"grad_norm": 36.40625,
|
| 33210 |
+
"learning_rate": 9.862762944661252e-06,
|
| 33211 |
+
"loss": 18.8734,
|
| 33212 |
+
"step": 47330
|
| 33213 |
+
},
|
| 33214 |
+
{
|
| 33215 |
+
"epoch": 0.8785029819275012,
|
| 33216 |
+
"grad_norm": 35.8125,
|
| 33217 |
+
"learning_rate": 9.86273394887521e-06,
|
| 33218 |
+
"loss": 19.0038,
|
| 33219 |
+
"step": 47340
|
| 33220 |
+
},
|
| 33221 |
+
{
|
| 33222 |
+
"epoch": 0.8786885550119811,
|
| 33223 |
+
"grad_norm": 36.65625,
|
| 33224 |
+
"learning_rate": 9.862704953089169e-06,
|
| 33225 |
+
"loss": 18.8259,
|
| 33226 |
+
"step": 47350
|
| 33227 |
+
},
|
| 33228 |
+
{
|
| 33229 |
+
"epoch": 0.8788741280964609,
|
| 33230 |
+
"grad_norm": 36.875,
|
| 33231 |
+
"learning_rate": 9.862675957303126e-06,
|
| 33232 |
+
"loss": 18.5228,
|
| 33233 |
+
"step": 47360
|
| 33234 |
+
},
|
| 33235 |
+
{
|
| 33236 |
+
"epoch": 0.8790597011809407,
|
| 33237 |
+
"grad_norm": 36.6875,
|
| 33238 |
+
"learning_rate": 9.862646961517084e-06,
|
| 33239 |
+
"loss": 19.1345,
|
| 33240 |
+
"step": 47370
|
| 33241 |
+
},
|
| 33242 |
+
{
|
| 33243 |
+
"epoch": 0.8792452742654205,
|
| 33244 |
+
"grad_norm": 36.0625,
|
| 33245 |
+
"learning_rate": 9.862617965731041e-06,
|
| 33246 |
+
"loss": 19.2282,
|
| 33247 |
+
"step": 47380
|
| 33248 |
+
},
|
| 33249 |
+
{
|
| 33250 |
+
"epoch": 0.8794308473499004,
|
| 33251 |
+
"grad_norm": 37.0,
|
| 33252 |
+
"learning_rate": 9.862588969944999e-06,
|
| 33253 |
+
"loss": 19.0534,
|
| 33254 |
+
"step": 47390
|
| 33255 |
+
},
|
| 33256 |
+
{
|
| 33257 |
+
"epoch": 0.8796164204343802,
|
| 33258 |
+
"grad_norm": 36.59375,
|
| 33259 |
+
"learning_rate": 9.862559974158956e-06,
|
| 33260 |
+
"loss": 19.0539,
|
| 33261 |
+
"step": 47400
|
| 33262 |
+
},
|
| 33263 |
+
{
|
| 33264 |
+
"epoch": 0.87980199351886,
|
| 33265 |
+
"grad_norm": 37.78125,
|
| 33266 |
+
"learning_rate": 9.862530978372913e-06,
|
| 33267 |
+
"loss": 18.8846,
|
| 33268 |
+
"step": 47410
|
| 33269 |
+
},
|
| 33270 |
+
{
|
| 33271 |
+
"epoch": 0.8799875666033399,
|
| 33272 |
+
"grad_norm": 35.5625,
|
| 33273 |
+
"learning_rate": 9.862501982586873e-06,
|
| 33274 |
+
"loss": 19.2657,
|
| 33275 |
+
"step": 47420
|
| 33276 |
+
},
|
| 33277 |
+
{
|
| 33278 |
+
"epoch": 0.8801731396878196,
|
| 33279 |
+
"grad_norm": 37.09375,
|
| 33280 |
+
"learning_rate": 9.86247298680083e-06,
|
| 33281 |
+
"loss": 18.8702,
|
| 33282 |
+
"step": 47430
|
| 33283 |
+
},
|
| 33284 |
+
{
|
| 33285 |
+
"epoch": 0.8803587127722995,
|
| 33286 |
+
"grad_norm": 36.71875,
|
| 33287 |
+
"learning_rate": 9.862443991014786e-06,
|
| 33288 |
+
"loss": 18.7863,
|
| 33289 |
+
"step": 47440
|
| 33290 |
+
},
|
| 33291 |
+
{
|
| 33292 |
+
"epoch": 0.8805442858567794,
|
| 33293 |
+
"grad_norm": 36.90625,
|
| 33294 |
+
"learning_rate": 9.862414995228745e-06,
|
| 33295 |
+
"loss": 19.1751,
|
| 33296 |
+
"step": 47450
|
| 33297 |
+
},
|
| 33298 |
+
{
|
| 33299 |
+
"epoch": 0.8807298589412592,
|
| 33300 |
+
"grad_norm": 36.75,
|
| 33301 |
+
"learning_rate": 9.862385999442702e-06,
|
| 33302 |
+
"loss": 18.5023,
|
| 33303 |
+
"step": 47460
|
| 33304 |
+
},
|
| 33305 |
+
{
|
| 33306 |
+
"epoch": 0.880915432025739,
|
| 33307 |
+
"grad_norm": 35.375,
|
| 33308 |
+
"learning_rate": 9.86235700365666e-06,
|
| 33309 |
+
"loss": 18.9846,
|
| 33310 |
+
"step": 47470
|
| 33311 |
+
},
|
| 33312 |
+
{
|
| 33313 |
+
"epoch": 0.8811010051102188,
|
| 33314 |
+
"grad_norm": 34.90625,
|
| 33315 |
+
"learning_rate": 9.862328007870617e-06,
|
| 33316 |
+
"loss": 19.0925,
|
| 33317 |
+
"step": 47480
|
| 33318 |
+
},
|
| 33319 |
+
{
|
| 33320 |
+
"epoch": 0.8812865781946987,
|
| 33321 |
+
"grad_norm": 37.6875,
|
| 33322 |
+
"learning_rate": 9.862299012084574e-06,
|
| 33323 |
+
"loss": 19.0749,
|
| 33324 |
+
"step": 47490
|
| 33325 |
+
},
|
| 33326 |
+
{
|
| 33327 |
+
"epoch": 0.8814721512791784,
|
| 33328 |
+
"grad_norm": 38.9375,
|
| 33329 |
+
"learning_rate": 9.862270016298532e-06,
|
| 33330 |
+
"loss": 18.9493,
|
| 33331 |
+
"step": 47500
|
| 33332 |
+
},
|
| 33333 |
+
{
|
| 33334 |
+
"epoch": 0.8816577243636583,
|
| 33335 |
+
"grad_norm": 35.15625,
|
| 33336 |
+
"learning_rate": 9.86224102051249e-06,
|
| 33337 |
+
"loss": 19.2803,
|
| 33338 |
+
"step": 47510
|
| 33339 |
+
},
|
| 33340 |
+
{
|
| 33341 |
+
"epoch": 0.8818432974481382,
|
| 33342 |
+
"grad_norm": 39.0,
|
| 33343 |
+
"learning_rate": 9.862212024726448e-06,
|
| 33344 |
+
"loss": 19.335,
|
| 33345 |
+
"step": 47520
|
| 33346 |
+
},
|
| 33347 |
+
{
|
| 33348 |
+
"epoch": 0.8820288705326179,
|
| 33349 |
+
"grad_norm": 35.5625,
|
| 33350 |
+
"learning_rate": 9.862183028940406e-06,
|
| 33351 |
+
"loss": 18.7052,
|
| 33352 |
+
"step": 47530
|
| 33353 |
+
},
|
| 33354 |
+
{
|
| 33355 |
+
"epoch": 0.8822144436170978,
|
| 33356 |
+
"grad_norm": 38.09375,
|
| 33357 |
+
"learning_rate": 9.862154033154363e-06,
|
| 33358 |
+
"loss": 19.1641,
|
| 33359 |
+
"step": 47540
|
| 33360 |
+
},
|
| 33361 |
+
{
|
| 33362 |
+
"epoch": 0.8824000167015776,
|
| 33363 |
+
"grad_norm": 37.125,
|
| 33364 |
+
"learning_rate": 9.86212503736832e-06,
|
| 33365 |
+
"loss": 19.243,
|
| 33366 |
+
"step": 47550
|
| 33367 |
+
},
|
| 33368 |
+
{
|
| 33369 |
+
"epoch": 0.8825855897860574,
|
| 33370 |
+
"grad_norm": 37.71875,
|
| 33371 |
+
"learning_rate": 9.862096041582278e-06,
|
| 33372 |
+
"loss": 19.053,
|
| 33373 |
+
"step": 47560
|
| 33374 |
+
},
|
| 33375 |
+
{
|
| 33376 |
+
"epoch": 0.8827711628705373,
|
| 33377 |
+
"grad_norm": 32.5,
|
| 33378 |
+
"learning_rate": 9.862067045796235e-06,
|
| 33379 |
+
"loss": 18.7586,
|
| 33380 |
+
"step": 47570
|
| 33381 |
+
},
|
| 33382 |
+
{
|
| 33383 |
+
"epoch": 0.8829567359550171,
|
| 33384 |
+
"grad_norm": 35.03125,
|
| 33385 |
+
"learning_rate": 9.862038050010193e-06,
|
| 33386 |
+
"loss": 19.1617,
|
| 33387 |
+
"step": 47580
|
| 33388 |
+
},
|
| 33389 |
+
{
|
| 33390 |
+
"epoch": 0.8831423090394969,
|
| 33391 |
+
"grad_norm": 36.59375,
|
| 33392 |
+
"learning_rate": 9.86200905422415e-06,
|
| 33393 |
+
"loss": 18.6394,
|
| 33394 |
+
"step": 47590
|
| 33395 |
+
},
|
| 33396 |
+
{
|
| 33397 |
+
"epoch": 0.8833278821239767,
|
| 33398 |
+
"grad_norm": 36.75,
|
| 33399 |
+
"learning_rate": 9.861980058438108e-06,
|
| 33400 |
+
"loss": 18.9452,
|
| 33401 |
+
"step": 47600
|
| 33402 |
+
},
|
| 33403 |
+
{
|
| 33404 |
+
"epoch": 0.8835134552084566,
|
| 33405 |
+
"grad_norm": 35.0,
|
| 33406 |
+
"learning_rate": 9.861951062652065e-06,
|
| 33407 |
+
"loss": 19.132,
|
| 33408 |
+
"step": 47610
|
| 33409 |
+
},
|
| 33410 |
+
{
|
| 33411 |
+
"epoch": 0.8836990282929363,
|
| 33412 |
+
"grad_norm": 38.15625,
|
| 33413 |
+
"learning_rate": 9.861922066866022e-06,
|
| 33414 |
+
"loss": 19.0798,
|
| 33415 |
+
"step": 47620
|
| 33416 |
+
},
|
| 33417 |
+
{
|
| 33418 |
+
"epoch": 0.8838846013774162,
|
| 33419 |
+
"grad_norm": 35.96875,
|
| 33420 |
+
"learning_rate": 9.861893071079981e-06,
|
| 33421 |
+
"loss": 18.363,
|
| 33422 |
+
"step": 47630
|
| 33423 |
+
},
|
| 33424 |
+
{
|
| 33425 |
+
"epoch": 0.8840701744618961,
|
| 33426 |
+
"grad_norm": 35.59375,
|
| 33427 |
+
"learning_rate": 9.861864075293939e-06,
|
| 33428 |
+
"loss": 18.5769,
|
| 33429 |
+
"step": 47640
|
| 33430 |
+
},
|
| 33431 |
+
{
|
| 33432 |
+
"epoch": 0.8842557475463759,
|
| 33433 |
+
"grad_norm": 35.1875,
|
| 33434 |
+
"learning_rate": 9.861835079507896e-06,
|
| 33435 |
+
"loss": 18.8611,
|
| 33436 |
+
"step": 47650
|
| 33437 |
+
},
|
| 33438 |
+
{
|
| 33439 |
+
"epoch": 0.8844413206308557,
|
| 33440 |
+
"grad_norm": 36.125,
|
| 33441 |
+
"learning_rate": 9.861806083721854e-06,
|
| 33442 |
+
"loss": 18.9997,
|
| 33443 |
+
"step": 47660
|
| 33444 |
+
},
|
| 33445 |
+
{
|
| 33446 |
+
"epoch": 0.8846268937153355,
|
| 33447 |
+
"grad_norm": 35.5625,
|
| 33448 |
+
"learning_rate": 9.861777087935811e-06,
|
| 33449 |
+
"loss": 18.8766,
|
| 33450 |
+
"step": 47670
|
| 33451 |
+
},
|
| 33452 |
+
{
|
| 33453 |
+
"epoch": 0.8848124667998154,
|
| 33454 |
+
"grad_norm": 34.6875,
|
| 33455 |
+
"learning_rate": 9.861748092149768e-06,
|
| 33456 |
+
"loss": 19.2543,
|
| 33457 |
+
"step": 47680
|
| 33458 |
+
},
|
| 33459 |
+
{
|
| 33460 |
+
"epoch": 0.8849980398842952,
|
| 33461 |
+
"grad_norm": 35.25,
|
| 33462 |
+
"learning_rate": 9.861719096363726e-06,
|
| 33463 |
+
"loss": 18.987,
|
| 33464 |
+
"step": 47690
|
| 33465 |
+
},
|
| 33466 |
+
{
|
| 33467 |
+
"epoch": 0.885183612968775,
|
| 33468 |
+
"grad_norm": 38.28125,
|
| 33469 |
+
"learning_rate": 9.861690100577685e-06,
|
| 33470 |
+
"loss": 18.7889,
|
| 33471 |
+
"step": 47700
|
| 33472 |
+
},
|
| 33473 |
+
{
|
| 33474 |
+
"epoch": 0.8853691860532549,
|
| 33475 |
+
"grad_norm": 36.9375,
|
| 33476 |
+
"learning_rate": 9.86166110479164e-06,
|
| 33477 |
+
"loss": 18.8273,
|
| 33478 |
+
"step": 47710
|
| 33479 |
+
},
|
| 33480 |
+
{
|
| 33481 |
+
"epoch": 0.8855547591377346,
|
| 33482 |
+
"grad_norm": 38.03125,
|
| 33483 |
+
"learning_rate": 9.861632109005598e-06,
|
| 33484 |
+
"loss": 18.6823,
|
| 33485 |
+
"step": 47720
|
| 33486 |
+
},
|
| 33487 |
+
{
|
| 33488 |
+
"epoch": 0.8857403322222145,
|
| 33489 |
+
"grad_norm": 36.03125,
|
| 33490 |
+
"learning_rate": 9.861603113219557e-06,
|
| 33491 |
+
"loss": 18.8164,
|
| 33492 |
+
"step": 47730
|
| 33493 |
+
},
|
| 33494 |
+
{
|
| 33495 |
+
"epoch": 0.8859259053066944,
|
| 33496 |
+
"grad_norm": 33.46875,
|
| 33497 |
+
"learning_rate": 9.861574117433515e-06,
|
| 33498 |
+
"loss": 18.9653,
|
| 33499 |
+
"step": 47740
|
| 33500 |
+
},
|
| 33501 |
+
{
|
| 33502 |
+
"epoch": 0.8861114783911741,
|
| 33503 |
+
"grad_norm": 35.03125,
|
| 33504 |
+
"learning_rate": 9.861545121647472e-06,
|
| 33505 |
+
"loss": 18.7712,
|
| 33506 |
+
"step": 47750
|
| 33507 |
+
},
|
| 33508 |
+
{
|
| 33509 |
+
"epoch": 0.886297051475654,
|
| 33510 |
+
"grad_norm": 35.5,
|
| 33511 |
+
"learning_rate": 9.86151612586143e-06,
|
| 33512 |
+
"loss": 19.0222,
|
| 33513 |
+
"step": 47760
|
| 33514 |
+
},
|
| 33515 |
+
{
|
| 33516 |
+
"epoch": 0.8864826245601338,
|
| 33517 |
+
"grad_norm": 37.375,
|
| 33518 |
+
"learning_rate": 9.861487130075387e-06,
|
| 33519 |
+
"loss": 19.0491,
|
| 33520 |
+
"step": 47770
|
| 33521 |
+
},
|
| 33522 |
+
{
|
| 33523 |
+
"epoch": 0.8866681976446136,
|
| 33524 |
+
"grad_norm": 37.46875,
|
| 33525 |
+
"learning_rate": 9.861458134289344e-06,
|
| 33526 |
+
"loss": 19.0221,
|
| 33527 |
+
"step": 47780
|
| 33528 |
+
},
|
| 33529 |
+
{
|
| 33530 |
+
"epoch": 0.8868537707290934,
|
| 33531 |
+
"grad_norm": 38.96875,
|
| 33532 |
+
"learning_rate": 9.861429138503302e-06,
|
| 33533 |
+
"loss": 19.1566,
|
| 33534 |
+
"step": 47790
|
| 33535 |
+
},
|
| 33536 |
+
{
|
| 33537 |
+
"epoch": 0.8870393438135733,
|
| 33538 |
+
"grad_norm": 37.34375,
|
| 33539 |
+
"learning_rate": 9.86140014271726e-06,
|
| 33540 |
+
"loss": 18.9649,
|
| 33541 |
+
"step": 47800
|
| 33542 |
+
},
|
| 33543 |
+
{
|
| 33544 |
+
"epoch": 0.887224916898053,
|
| 33545 |
+
"grad_norm": 36.28125,
|
| 33546 |
+
"learning_rate": 9.861371146931216e-06,
|
| 33547 |
+
"loss": 18.9498,
|
| 33548 |
+
"step": 47810
|
| 33549 |
+
},
|
| 33550 |
+
{
|
| 33551 |
+
"epoch": 0.8874104899825329,
|
| 33552 |
+
"grad_norm": 37.125,
|
| 33553 |
+
"learning_rate": 9.861342151145174e-06,
|
| 33554 |
+
"loss": 18.8002,
|
| 33555 |
+
"step": 47820
|
| 33556 |
+
},
|
| 33557 |
+
{
|
| 33558 |
+
"epoch": 0.8875960630670128,
|
| 33559 |
+
"grad_norm": 35.5,
|
| 33560 |
+
"learning_rate": 9.861313155359133e-06,
|
| 33561 |
+
"loss": 19.0088,
|
| 33562 |
+
"step": 47830
|
| 33563 |
+
},
|
| 33564 |
+
{
|
| 33565 |
+
"epoch": 0.8877816361514926,
|
| 33566 |
+
"grad_norm": 37.53125,
|
| 33567 |
+
"learning_rate": 9.86128415957309e-06,
|
| 33568 |
+
"loss": 18.5887,
|
| 33569 |
+
"step": 47840
|
| 33570 |
+
},
|
| 33571 |
+
{
|
| 33572 |
+
"epoch": 0.8879672092359724,
|
| 33573 |
+
"grad_norm": 36.375,
|
| 33574 |
+
"learning_rate": 9.861255163787048e-06,
|
| 33575 |
+
"loss": 18.9607,
|
| 33576 |
+
"step": 47850
|
| 33577 |
+
},
|
| 33578 |
+
{
|
| 33579 |
+
"epoch": 0.8881527823204522,
|
| 33580 |
+
"grad_norm": 35.15625,
|
| 33581 |
+
"learning_rate": 9.861226168001005e-06,
|
| 33582 |
+
"loss": 18.6494,
|
| 33583 |
+
"step": 47860
|
| 33584 |
+
},
|
| 33585 |
+
{
|
| 33586 |
+
"epoch": 0.8883383554049321,
|
| 33587 |
+
"grad_norm": 35.78125,
|
| 33588 |
+
"learning_rate": 9.861197172214963e-06,
|
| 33589 |
+
"loss": 19.3655,
|
| 33590 |
+
"step": 47870
|
| 33591 |
+
},
|
| 33592 |
+
{
|
| 33593 |
+
"epoch": 0.8885239284894119,
|
| 33594 |
+
"grad_norm": 36.9375,
|
| 33595 |
+
"learning_rate": 9.86116817642892e-06,
|
| 33596 |
+
"loss": 19.1031,
|
| 33597 |
+
"step": 47880
|
| 33598 |
+
},
|
| 33599 |
+
{
|
| 33600 |
+
"epoch": 0.8887095015738917,
|
| 33601 |
+
"grad_norm": 37.15625,
|
| 33602 |
+
"learning_rate": 9.861139180642877e-06,
|
| 33603 |
+
"loss": 18.8792,
|
| 33604 |
+
"step": 47890
|
| 33605 |
+
},
|
| 33606 |
+
{
|
| 33607 |
+
"epoch": 0.8888950746583716,
|
| 33608 |
+
"grad_norm": 36.09375,
|
| 33609 |
+
"learning_rate": 9.861110184856837e-06,
|
| 33610 |
+
"loss": 18.7694,
|
| 33611 |
+
"step": 47900
|
| 33612 |
+
},
|
| 33613 |
+
{
|
| 33614 |
+
"epoch": 0.8890806477428513,
|
| 33615 |
+
"grad_norm": 34.125,
|
| 33616 |
+
"learning_rate": 9.861081189070794e-06,
|
| 33617 |
+
"loss": 19.1518,
|
| 33618 |
+
"step": 47910
|
| 33619 |
+
},
|
| 33620 |
+
{
|
| 33621 |
+
"epoch": 0.8892662208273312,
|
| 33622 |
+
"grad_norm": 35.90625,
|
| 33623 |
+
"learning_rate": 9.86105219328475e-06,
|
| 33624 |
+
"loss": 19.2373,
|
| 33625 |
+
"step": 47920
|
| 33626 |
+
},
|
| 33627 |
+
{
|
| 33628 |
+
"epoch": 0.8894517939118111,
|
| 33629 |
+
"grad_norm": 38.25,
|
| 33630 |
+
"learning_rate": 9.861023197498709e-06,
|
| 33631 |
+
"loss": 18.6068,
|
| 33632 |
+
"step": 47930
|
| 33633 |
+
},
|
| 33634 |
+
{
|
| 33635 |
+
"epoch": 0.8896373669962908,
|
| 33636 |
+
"grad_norm": 37.46875,
|
| 33637 |
+
"learning_rate": 9.860994201712666e-06,
|
| 33638 |
+
"loss": 18.9451,
|
| 33639 |
+
"step": 47940
|
| 33640 |
+
},
|
| 33641 |
+
{
|
| 33642 |
+
"epoch": 0.8898229400807707,
|
| 33643 |
+
"grad_norm": 38.53125,
|
| 33644 |
+
"learning_rate": 9.860965205926624e-06,
|
| 33645 |
+
"loss": 18.7446,
|
| 33646 |
+
"step": 47950
|
| 33647 |
+
},
|
| 33648 |
+
{
|
| 33649 |
+
"epoch": 0.8900085131652505,
|
| 33650 |
+
"grad_norm": 36.28125,
|
| 33651 |
+
"learning_rate": 9.860936210140581e-06,
|
| 33652 |
+
"loss": 18.5875,
|
| 33653 |
+
"step": 47960
|
| 33654 |
+
},
|
| 33655 |
+
{
|
| 33656 |
+
"epoch": 0.8901940862497303,
|
| 33657 |
+
"grad_norm": 35.0,
|
| 33658 |
+
"learning_rate": 9.860907214354538e-06,
|
| 33659 |
+
"loss": 19.145,
|
| 33660 |
+
"step": 47970
|
| 33661 |
+
},
|
| 33662 |
+
{
|
| 33663 |
+
"epoch": 0.8903796593342101,
|
| 33664 |
+
"grad_norm": 39.1875,
|
| 33665 |
+
"learning_rate": 9.860878218568496e-06,
|
| 33666 |
+
"loss": 18.6372,
|
| 33667 |
+
"step": 47980
|
| 33668 |
+
},
|
| 33669 |
+
{
|
| 33670 |
+
"epoch": 0.89056523241869,
|
| 33671 |
+
"grad_norm": 36.1875,
|
| 33672 |
+
"learning_rate": 9.860849222782453e-06,
|
| 33673 |
+
"loss": 19.0401,
|
| 33674 |
+
"step": 47990
|
| 33675 |
+
},
|
| 33676 |
+
{
|
| 33677 |
+
"epoch": 0.8907508055031699,
|
| 33678 |
+
"grad_norm": 36.03125,
|
| 33679 |
+
"learning_rate": 9.860820226996412e-06,
|
| 33680 |
+
"loss": 19.0229,
|
| 33681 |
+
"step": 48000
|
| 33682 |
+
},
|
| 33683 |
+
{
|
| 33684 |
+
"epoch": 0.8909363785876496,
|
| 33685 |
+
"grad_norm": 36.125,
|
| 33686 |
+
"learning_rate": 9.86079123121037e-06,
|
| 33687 |
+
"loss": 18.9806,
|
| 33688 |
+
"step": 48010
|
| 33689 |
+
},
|
| 33690 |
+
{
|
| 33691 |
+
"epoch": 0.8911219516721295,
|
| 33692 |
+
"grad_norm": 36.25,
|
| 33693 |
+
"learning_rate": 9.860762235424327e-06,
|
| 33694 |
+
"loss": 19.2438,
|
| 33695 |
+
"step": 48020
|
| 33696 |
+
},
|
| 33697 |
+
{
|
| 33698 |
+
"epoch": 0.8913075247566093,
|
| 33699 |
+
"grad_norm": 36.875,
|
| 33700 |
+
"learning_rate": 9.860733239638285e-06,
|
| 33701 |
+
"loss": 18.9621,
|
| 33702 |
+
"step": 48030
|
| 33703 |
+
},
|
| 33704 |
+
{
|
| 33705 |
+
"epoch": 0.8914930978410891,
|
| 33706 |
+
"grad_norm": 33.46875,
|
| 33707 |
+
"learning_rate": 9.860704243852242e-06,
|
| 33708 |
+
"loss": 18.8874,
|
| 33709 |
+
"step": 48040
|
| 33710 |
+
},
|
| 33711 |
+
{
|
| 33712 |
+
"epoch": 0.891678670925569,
|
| 33713 |
+
"grad_norm": 35.65625,
|
| 33714 |
+
"learning_rate": 9.8606752480662e-06,
|
| 33715 |
+
"loss": 18.5955,
|
| 33716 |
+
"step": 48050
|
| 33717 |
+
},
|
| 33718 |
+
{
|
| 33719 |
+
"epoch": 0.8918642440100488,
|
| 33720 |
+
"grad_norm": 35.21875,
|
| 33721 |
+
"learning_rate": 9.860646252280157e-06,
|
| 33722 |
+
"loss": 19.0383,
|
| 33723 |
+
"step": 48060
|
| 33724 |
+
},
|
| 33725 |
+
{
|
| 33726 |
+
"epoch": 0.8920498170945286,
|
| 33727 |
+
"grad_norm": 39.28125,
|
| 33728 |
+
"learning_rate": 9.860617256494114e-06,
|
| 33729 |
+
"loss": 19.1283,
|
| 33730 |
+
"step": 48070
|
| 33731 |
+
},
|
| 33732 |
+
{
|
| 33733 |
+
"epoch": 0.8922353901790084,
|
| 33734 |
+
"grad_norm": 36.65625,
|
| 33735 |
+
"learning_rate": 9.860588260708072e-06,
|
| 33736 |
+
"loss": 18.4688,
|
| 33737 |
+
"step": 48080
|
| 33738 |
+
},
|
| 33739 |
+
{
|
| 33740 |
+
"epoch": 0.8924209632634883,
|
| 33741 |
+
"grad_norm": 35.59375,
|
| 33742 |
+
"learning_rate": 9.860559264922029e-06,
|
| 33743 |
+
"loss": 18.8262,
|
| 33744 |
+
"step": 48090
|
| 33745 |
+
},
|
| 33746 |
+
{
|
| 33747 |
+
"epoch": 0.892606536347968,
|
| 33748 |
+
"grad_norm": 35.53125,
|
| 33749 |
+
"learning_rate": 9.860530269135988e-06,
|
| 33750 |
+
"loss": 18.8162,
|
| 33751 |
+
"step": 48100
|
| 33752 |
+
},
|
| 33753 |
+
{
|
| 33754 |
+
"epoch": 0.8927921094324479,
|
| 33755 |
+
"grad_norm": 36.75,
|
| 33756 |
+
"learning_rate": 9.860501273349945e-06,
|
| 33757 |
+
"loss": 18.8916,
|
| 33758 |
+
"step": 48110
|
| 33759 |
+
},
|
| 33760 |
+
{
|
| 33761 |
+
"epoch": 0.8929776825169278,
|
| 33762 |
+
"grad_norm": 36.90625,
|
| 33763 |
+
"learning_rate": 9.860472277563903e-06,
|
| 33764 |
+
"loss": 18.898,
|
| 33765 |
+
"step": 48120
|
| 33766 |
+
},
|
| 33767 |
+
{
|
| 33768 |
+
"epoch": 0.8931632556014075,
|
| 33769 |
+
"grad_norm": 39.03125,
|
| 33770 |
+
"learning_rate": 9.86044328177786e-06,
|
| 33771 |
+
"loss": 18.7842,
|
| 33772 |
+
"step": 48130
|
| 33773 |
+
},
|
| 33774 |
+
{
|
| 33775 |
+
"epoch": 0.8933488286858874,
|
| 33776 |
+
"grad_norm": 35.375,
|
| 33777 |
+
"learning_rate": 9.860414285991818e-06,
|
| 33778 |
+
"loss": 19.06,
|
| 33779 |
+
"step": 48140
|
| 33780 |
+
},
|
| 33781 |
+
{
|
| 33782 |
+
"epoch": 0.8935344017703672,
|
| 33783 |
+
"grad_norm": 34.03125,
|
| 33784 |
+
"learning_rate": 9.860385290205775e-06,
|
| 33785 |
+
"loss": 18.5382,
|
| 33786 |
+
"step": 48150
|
| 33787 |
+
},
|
| 33788 |
+
{
|
| 33789 |
+
"epoch": 0.893719974854847,
|
| 33790 |
+
"grad_norm": 34.25,
|
| 33791 |
+
"learning_rate": 9.860356294419732e-06,
|
| 33792 |
+
"loss": 19.0277,
|
| 33793 |
+
"step": 48160
|
| 33794 |
+
},
|
| 33795 |
+
{
|
| 33796 |
+
"epoch": 0.8939055479393269,
|
| 33797 |
+
"grad_norm": 36.21875,
|
| 33798 |
+
"learning_rate": 9.86032729863369e-06,
|
| 33799 |
+
"loss": 18.6719,
|
| 33800 |
+
"step": 48170
|
| 33801 |
+
},
|
| 33802 |
+
{
|
| 33803 |
+
"epoch": 0.8940911210238067,
|
| 33804 |
+
"grad_norm": 35.3125,
|
| 33805 |
+
"learning_rate": 9.860298302847649e-06,
|
| 33806 |
+
"loss": 18.9148,
|
| 33807 |
+
"step": 48180
|
| 33808 |
+
},
|
| 33809 |
+
{
|
| 33810 |
+
"epoch": 0.8942766941082866,
|
| 33811 |
+
"grad_norm": 35.78125,
|
| 33812 |
+
"learning_rate": 9.860269307061605e-06,
|
| 33813 |
+
"loss": 19.3434,
|
| 33814 |
+
"step": 48190
|
| 33815 |
+
},
|
| 33816 |
+
{
|
| 33817 |
+
"epoch": 0.8944622671927663,
|
| 33818 |
+
"grad_norm": 37.125,
|
| 33819 |
+
"learning_rate": 9.860240311275562e-06,
|
| 33820 |
+
"loss": 18.9732,
|
| 33821 |
+
"step": 48200
|
| 33822 |
+
},
|
| 33823 |
+
{
|
| 33824 |
+
"epoch": 0.8946478402772462,
|
| 33825 |
+
"grad_norm": 34.59375,
|
| 33826 |
+
"learning_rate": 9.860211315489521e-06,
|
| 33827 |
+
"loss": 18.8076,
|
| 33828 |
+
"step": 48210
|
| 33829 |
+
},
|
| 33830 |
+
{
|
| 33831 |
+
"epoch": 0.8948334133617261,
|
| 33832 |
+
"grad_norm": 36.90625,
|
| 33833 |
+
"learning_rate": 9.860182319703479e-06,
|
| 33834 |
+
"loss": 18.5892,
|
| 33835 |
+
"step": 48220
|
| 33836 |
+
},
|
| 33837 |
+
{
|
| 33838 |
+
"epoch": 0.8950189864462058,
|
| 33839 |
+
"grad_norm": 38.1875,
|
| 33840 |
+
"learning_rate": 9.860153323917436e-06,
|
| 33841 |
+
"loss": 18.7777,
|
| 33842 |
+
"step": 48230
|
| 33843 |
+
},
|
| 33844 |
+
{
|
| 33845 |
+
"epoch": 0.8952045595306857,
|
| 33846 |
+
"grad_norm": 35.84375,
|
| 33847 |
+
"learning_rate": 9.860124328131393e-06,
|
| 33848 |
+
"loss": 18.6854,
|
| 33849 |
+
"step": 48240
|
| 33850 |
+
},
|
| 33851 |
+
{
|
| 33852 |
+
"epoch": 0.8953901326151655,
|
| 33853 |
+
"grad_norm": 36.96875,
|
| 33854 |
+
"learning_rate": 9.86009533234535e-06,
|
| 33855 |
+
"loss": 18.9375,
|
| 33856 |
+
"step": 48250
|
| 33857 |
+
},
|
| 33858 |
+
{
|
| 33859 |
+
"epoch": 0.8955757056996453,
|
| 33860 |
+
"grad_norm": 36.5625,
|
| 33861 |
+
"learning_rate": 9.860066336559308e-06,
|
| 33862 |
+
"loss": 18.5619,
|
| 33863 |
+
"step": 48260
|
| 33864 |
+
},
|
| 33865 |
+
{
|
| 33866 |
+
"epoch": 0.8957612787841251,
|
| 33867 |
+
"grad_norm": 35.46875,
|
| 33868 |
+
"learning_rate": 9.860037340773266e-06,
|
| 33869 |
+
"loss": 18.8577,
|
| 33870 |
+
"step": 48270
|
| 33871 |
+
},
|
| 33872 |
+
{
|
| 33873 |
+
"epoch": 0.895946851868605,
|
| 33874 |
+
"grad_norm": 35.0,
|
| 33875 |
+
"learning_rate": 9.860008344987225e-06,
|
| 33876 |
+
"loss": 19.1907,
|
| 33877 |
+
"step": 48280
|
| 33878 |
+
},
|
| 33879 |
+
{
|
| 33880 |
+
"epoch": 0.8961324249530848,
|
| 33881 |
+
"grad_norm": 36.25,
|
| 33882 |
+
"learning_rate": 9.859979349201182e-06,
|
| 33883 |
+
"loss": 18.8677,
|
| 33884 |
+
"step": 48290
|
| 33885 |
+
},
|
| 33886 |
+
{
|
| 33887 |
+
"epoch": 0.8963179980375646,
|
| 33888 |
+
"grad_norm": 36.375,
|
| 33889 |
+
"learning_rate": 9.859950353415138e-06,
|
| 33890 |
+
"loss": 18.9737,
|
| 33891 |
+
"step": 48300
|
| 33892 |
+
},
|
| 33893 |
+
{
|
| 33894 |
+
"epoch": 0.8965035711220445,
|
| 33895 |
+
"grad_norm": 36.71875,
|
| 33896 |
+
"learning_rate": 9.859921357629097e-06,
|
| 33897 |
+
"loss": 19.0863,
|
| 33898 |
+
"step": 48310
|
| 33899 |
+
},
|
| 33900 |
+
{
|
| 33901 |
+
"epoch": 0.8966891442065242,
|
| 33902 |
+
"grad_norm": 35.875,
|
| 33903 |
+
"learning_rate": 9.859892361843054e-06,
|
| 33904 |
+
"loss": 18.5042,
|
| 33905 |
+
"step": 48320
|
| 33906 |
+
},
|
| 33907 |
+
{
|
| 33908 |
+
"epoch": 0.8968747172910041,
|
| 33909 |
+
"grad_norm": 37.15625,
|
| 33910 |
+
"learning_rate": 9.859863366057012e-06,
|
| 33911 |
+
"loss": 19.0693,
|
| 33912 |
+
"step": 48330
|
| 33913 |
+
},
|
| 33914 |
+
{
|
| 33915 |
+
"epoch": 0.897060290375484,
|
| 33916 |
+
"grad_norm": 37.5,
|
| 33917 |
+
"learning_rate": 9.85983437027097e-06,
|
| 33918 |
+
"loss": 18.991,
|
| 33919 |
+
"step": 48340
|
| 33920 |
+
},
|
| 33921 |
+
{
|
| 33922 |
+
"epoch": 0.8972458634599638,
|
| 33923 |
+
"grad_norm": 35.125,
|
| 33924 |
+
"learning_rate": 9.859805374484927e-06,
|
| 33925 |
+
"loss": 18.5443,
|
| 33926 |
+
"step": 48350
|
| 33927 |
+
},
|
| 33928 |
+
{
|
| 33929 |
+
"epoch": 0.8974314365444436,
|
| 33930 |
+
"grad_norm": 39.0625,
|
| 33931 |
+
"learning_rate": 9.859776378698884e-06,
|
| 33932 |
+
"loss": 18.6623,
|
| 33933 |
+
"step": 48360
|
| 33934 |
+
},
|
| 33935 |
+
{
|
| 33936 |
+
"epoch": 0.8976170096289234,
|
| 33937 |
+
"grad_norm": 37.15625,
|
| 33938 |
+
"learning_rate": 9.859747382912841e-06,
|
| 33939 |
+
"loss": 18.8904,
|
| 33940 |
+
"step": 48370
|
| 33941 |
+
},
|
| 33942 |
+
{
|
| 33943 |
+
"epoch": 0.8978025827134033,
|
| 33944 |
+
"grad_norm": 35.71875,
|
| 33945 |
+
"learning_rate": 9.8597183871268e-06,
|
| 33946 |
+
"loss": 18.4478,
|
| 33947 |
+
"step": 48380
|
| 33948 |
+
},
|
| 33949 |
+
{
|
| 33950 |
+
"epoch": 0.897988155797883,
|
| 33951 |
+
"grad_norm": 35.6875,
|
| 33952 |
+
"learning_rate": 9.859689391340758e-06,
|
| 33953 |
+
"loss": 18.6417,
|
| 33954 |
+
"step": 48390
|
| 33955 |
+
},
|
| 33956 |
+
{
|
| 33957 |
+
"epoch": 0.8981737288823629,
|
| 33958 |
+
"grad_norm": 36.71875,
|
| 33959 |
+
"learning_rate": 9.859660395554714e-06,
|
| 33960 |
+
"loss": 18.5121,
|
| 33961 |
+
"step": 48400
|
| 33962 |
+
},
|
| 33963 |
+
{
|
| 33964 |
+
"epoch": 0.8983593019668428,
|
| 33965 |
+
"grad_norm": 38.53125,
|
| 33966 |
+
"learning_rate": 9.859631399768673e-06,
|
| 33967 |
+
"loss": 18.3646,
|
| 33968 |
+
"step": 48410
|
| 33969 |
+
},
|
| 33970 |
+
{
|
| 33971 |
+
"epoch": 0.8985448750513225,
|
| 33972 |
+
"grad_norm": 37.15625,
|
| 33973 |
+
"learning_rate": 9.85960240398263e-06,
|
| 33974 |
+
"loss": 18.9429,
|
| 33975 |
+
"step": 48420
|
| 33976 |
+
},
|
| 33977 |
+
{
|
| 33978 |
+
"epoch": 0.8987304481358024,
|
| 33979 |
+
"grad_norm": 36.46875,
|
| 33980 |
+
"learning_rate": 9.859573408196588e-06,
|
| 33981 |
+
"loss": 19.2573,
|
| 33982 |
+
"step": 48430
|
| 33983 |
+
},
|
| 33984 |
+
{
|
| 33985 |
+
"epoch": 0.8989160212202822,
|
| 33986 |
+
"grad_norm": 37.28125,
|
| 33987 |
+
"learning_rate": 9.859544412410545e-06,
|
| 33988 |
+
"loss": 19.204,
|
| 33989 |
+
"step": 48440
|
| 33990 |
+
},
|
| 33991 |
+
{
|
| 33992 |
+
"epoch": 0.899101594304762,
|
| 33993 |
+
"grad_norm": 37.8125,
|
| 33994 |
+
"learning_rate": 9.859515416624504e-06,
|
| 33995 |
+
"loss": 18.7448,
|
| 33996 |
+
"step": 48450
|
| 33997 |
+
},
|
| 33998 |
+
{
|
| 33999 |
+
"epoch": 0.8992871673892419,
|
| 34000 |
+
"grad_norm": 35.6875,
|
| 34001 |
+
"learning_rate": 9.85948642083846e-06,
|
| 34002 |
+
"loss": 19.055,
|
| 34003 |
+
"step": 48460
|
| 34004 |
+
},
|
| 34005 |
+
{
|
| 34006 |
+
"epoch": 0.8994727404737217,
|
| 34007 |
+
"grad_norm": 37.71875,
|
| 34008 |
+
"learning_rate": 9.859457425052417e-06,
|
| 34009 |
+
"loss": 18.3525,
|
| 34010 |
+
"step": 48470
|
| 34011 |
+
},
|
| 34012 |
+
{
|
| 34013 |
+
"epoch": 0.8996583135582015,
|
| 34014 |
+
"grad_norm": 35.1875,
|
| 34015 |
+
"learning_rate": 9.859428429266376e-06,
|
| 34016 |
+
"loss": 18.8309,
|
| 34017 |
+
"step": 48480
|
| 34018 |
+
},
|
| 34019 |
+
{
|
| 34020 |
+
"epoch": 0.8998438866426813,
|
| 34021 |
+
"grad_norm": 34.84375,
|
| 34022 |
+
"learning_rate": 9.859399433480334e-06,
|
| 34023 |
+
"loss": 18.8916,
|
| 34024 |
+
"step": 48490
|
| 34025 |
+
},
|
| 34026 |
+
{
|
| 34027 |
+
"epoch": 0.9000294597271612,
|
| 34028 |
+
"grad_norm": 38.09375,
|
| 34029 |
+
"learning_rate": 9.859370437694291e-06,
|
| 34030 |
+
"loss": 19.0495,
|
| 34031 |
+
"step": 48500
|
| 34032 |
+
},
|
| 34033 |
+
{
|
| 34034 |
+
"epoch": 0.900215032811641,
|
| 34035 |
+
"grad_norm": 38.71875,
|
| 34036 |
+
"learning_rate": 9.859341441908249e-06,
|
| 34037 |
+
"loss": 19.1123,
|
| 34038 |
+
"step": 48510
|
| 34039 |
+
},
|
| 34040 |
+
{
|
| 34041 |
+
"epoch": 0.9004006058961208,
|
| 34042 |
+
"grad_norm": 39.46875,
|
| 34043 |
+
"learning_rate": 9.859312446122206e-06,
|
| 34044 |
+
"loss": 19.1905,
|
| 34045 |
+
"step": 48520
|
| 34046 |
+
},
|
| 34047 |
+
{
|
| 34048 |
+
"epoch": 0.9005861789806007,
|
| 34049 |
+
"grad_norm": 39.09375,
|
| 34050 |
+
"learning_rate": 9.859283450336163e-06,
|
| 34051 |
+
"loss": 19.0494,
|
| 34052 |
+
"step": 48530
|
| 34053 |
+
},
|
| 34054 |
+
{
|
| 34055 |
+
"epoch": 0.9007717520650805,
|
| 34056 |
+
"grad_norm": 35.4375,
|
| 34057 |
+
"learning_rate": 9.85925445455012e-06,
|
| 34058 |
+
"loss": 18.9496,
|
| 34059 |
+
"step": 48540
|
| 34060 |
+
},
|
| 34061 |
+
{
|
| 34062 |
+
"epoch": 0.9009573251495603,
|
| 34063 |
+
"grad_norm": 36.03125,
|
| 34064 |
+
"learning_rate": 9.85922545876408e-06,
|
| 34065 |
+
"loss": 18.5396,
|
| 34066 |
+
"step": 48550
|
| 34067 |
+
},
|
| 34068 |
+
{
|
| 34069 |
+
"epoch": 0.9011428982340401,
|
| 34070 |
+
"grad_norm": 38.8125,
|
| 34071 |
+
"learning_rate": 9.859196462978036e-06,
|
| 34072 |
+
"loss": 18.9276,
|
| 34073 |
+
"step": 48560
|
| 34074 |
+
},
|
| 34075 |
+
{
|
| 34076 |
+
"epoch": 0.90132847131852,
|
| 34077 |
+
"grad_norm": 39.15625,
|
| 34078 |
+
"learning_rate": 9.859167467191993e-06,
|
| 34079 |
+
"loss": 18.6237,
|
| 34080 |
+
"step": 48570
|
| 34081 |
+
},
|
| 34082 |
+
{
|
| 34083 |
+
"epoch": 0.9015140444029998,
|
| 34084 |
+
"grad_norm": 38.1875,
|
| 34085 |
+
"learning_rate": 9.859138471405952e-06,
|
| 34086 |
+
"loss": 19.0431,
|
| 34087 |
+
"step": 48580
|
| 34088 |
+
},
|
| 34089 |
+
{
|
| 34090 |
+
"epoch": 0.9016996174874796,
|
| 34091 |
+
"grad_norm": 35.53125,
|
| 34092 |
+
"learning_rate": 9.85910947561991e-06,
|
| 34093 |
+
"loss": 19.2482,
|
| 34094 |
+
"step": 48590
|
| 34095 |
+
},
|
| 34096 |
+
{
|
| 34097 |
+
"epoch": 0.9018851905719595,
|
| 34098 |
+
"grad_norm": 36.4375,
|
| 34099 |
+
"learning_rate": 9.859080479833867e-06,
|
| 34100 |
+
"loss": 19.0075,
|
| 34101 |
+
"step": 48600
|
| 34102 |
+
},
|
| 34103 |
+
{
|
| 34104 |
+
"epoch": 0.9020707636564392,
|
| 34105 |
+
"grad_norm": 36.125,
|
| 34106 |
+
"learning_rate": 9.859051484047824e-06,
|
| 34107 |
+
"loss": 19.0288,
|
| 34108 |
+
"step": 48610
|
| 34109 |
+
},
|
| 34110 |
+
{
|
| 34111 |
+
"epoch": 0.9022563367409191,
|
| 34112 |
+
"grad_norm": 37.6875,
|
| 34113 |
+
"learning_rate": 9.859022488261782e-06,
|
| 34114 |
+
"loss": 18.5252,
|
| 34115 |
+
"step": 48620
|
| 34116 |
+
},
|
| 34117 |
+
{
|
| 34118 |
+
"epoch": 0.902441909825399,
|
| 34119 |
+
"grad_norm": 37.0,
|
| 34120 |
+
"learning_rate": 9.858993492475739e-06,
|
| 34121 |
+
"loss": 19.3277,
|
| 34122 |
+
"step": 48630
|
| 34123 |
+
},
|
| 34124 |
+
{
|
| 34125 |
+
"epoch": 0.9026274829098787,
|
| 34126 |
+
"grad_norm": 35.8125,
|
| 34127 |
+
"learning_rate": 9.858964496689697e-06,
|
| 34128 |
+
"loss": 18.7398,
|
| 34129 |
+
"step": 48640
|
| 34130 |
+
},
|
| 34131 |
+
{
|
| 34132 |
+
"epoch": 0.9028130559943586,
|
| 34133 |
+
"grad_norm": 37.8125,
|
| 34134 |
+
"learning_rate": 9.858935500903654e-06,
|
| 34135 |
+
"loss": 19.119,
|
| 34136 |
+
"step": 48650
|
| 34137 |
+
},
|
| 34138 |
+
{
|
| 34139 |
+
"epoch": 0.9029986290788384,
|
| 34140 |
+
"grad_norm": 37.46875,
|
| 34141 |
+
"learning_rate": 9.858906505117613e-06,
|
| 34142 |
+
"loss": 18.9497,
|
| 34143 |
+
"step": 48660
|
| 34144 |
+
},
|
| 34145 |
+
{
|
| 34146 |
+
"epoch": 0.9031842021633182,
|
| 34147 |
+
"grad_norm": 35.3125,
|
| 34148 |
+
"learning_rate": 9.858877509331569e-06,
|
| 34149 |
+
"loss": 18.5547,
|
| 34150 |
+
"step": 48670
|
| 34151 |
+
},
|
| 34152 |
+
{
|
| 34153 |
+
"epoch": 0.903369775247798,
|
| 34154 |
+
"grad_norm": 38.53125,
|
| 34155 |
+
"learning_rate": 9.858848513545526e-06,
|
| 34156 |
+
"loss": 19.0391,
|
| 34157 |
+
"step": 48680
|
| 34158 |
+
},
|
| 34159 |
+
{
|
| 34160 |
+
"epoch": 0.9035553483322779,
|
| 34161 |
+
"grad_norm": 33.28125,
|
| 34162 |
+
"learning_rate": 9.858819517759485e-06,
|
| 34163 |
+
"loss": 18.7029,
|
| 34164 |
+
"step": 48690
|
| 34165 |
+
},
|
| 34166 |
+
{
|
| 34167 |
+
"epoch": 0.9037409214167577,
|
| 34168 |
+
"grad_norm": 37.46875,
|
| 34169 |
+
"learning_rate": 9.858790521973443e-06,
|
| 34170 |
+
"loss": 18.9116,
|
| 34171 |
+
"step": 48700
|
| 34172 |
+
},
|
| 34173 |
+
{
|
| 34174 |
+
"epoch": 0.9039264945012375,
|
| 34175 |
+
"grad_norm": 36.5,
|
| 34176 |
+
"learning_rate": 9.8587615261874e-06,
|
| 34177 |
+
"loss": 19.0272,
|
| 34178 |
+
"step": 48710
|
| 34179 |
+
},
|
| 34180 |
+
{
|
| 34181 |
+
"epoch": 0.9041120675857174,
|
| 34182 |
+
"grad_norm": 36.1875,
|
| 34183 |
+
"learning_rate": 9.858732530401357e-06,
|
| 34184 |
+
"loss": 19.181,
|
| 34185 |
+
"step": 48720
|
| 34186 |
+
},
|
| 34187 |
+
{
|
| 34188 |
+
"epoch": 0.9042976406701972,
|
| 34189 |
+
"grad_norm": 36.96875,
|
| 34190 |
+
"learning_rate": 9.858703534615315e-06,
|
| 34191 |
+
"loss": 18.9267,
|
| 34192 |
+
"step": 48730
|
| 34193 |
+
},
|
| 34194 |
+
{
|
| 34195 |
+
"epoch": 0.904483213754677,
|
| 34196 |
+
"grad_norm": 36.96875,
|
| 34197 |
+
"learning_rate": 9.858674538829272e-06,
|
| 34198 |
+
"loss": 19.3512,
|
| 34199 |
+
"step": 48740
|
| 34200 |
+
},
|
| 34201 |
+
{
|
| 34202 |
+
"epoch": 0.9046687868391569,
|
| 34203 |
+
"grad_norm": 37.53125,
|
| 34204 |
+
"learning_rate": 9.85864554304323e-06,
|
| 34205 |
+
"loss": 19.0552,
|
| 34206 |
+
"step": 48750
|
| 34207 |
+
},
|
| 34208 |
+
{
|
| 34209 |
+
"epoch": 0.9048543599236367,
|
| 34210 |
+
"grad_norm": 37.8125,
|
| 34211 |
+
"learning_rate": 9.858616547257189e-06,
|
| 34212 |
+
"loss": 18.5666,
|
| 34213 |
+
"step": 48760
|
| 34214 |
+
},
|
| 34215 |
+
{
|
| 34216 |
+
"epoch": 0.9050399330081165,
|
| 34217 |
+
"grad_norm": 34.96875,
|
| 34218 |
+
"learning_rate": 9.858587551471146e-06,
|
| 34219 |
+
"loss": 18.9902,
|
| 34220 |
+
"step": 48770
|
| 34221 |
+
},
|
| 34222 |
+
{
|
| 34223 |
+
"epoch": 0.9052255060925963,
|
| 34224 |
+
"grad_norm": 36.59375,
|
| 34225 |
+
"learning_rate": 9.858558555685102e-06,
|
| 34226 |
+
"loss": 19.0585,
|
| 34227 |
+
"step": 48780
|
| 34228 |
+
},
|
| 34229 |
+
{
|
| 34230 |
+
"epoch": 0.9054110791770762,
|
| 34231 |
+
"grad_norm": 36.375,
|
| 34232 |
+
"learning_rate": 9.858529559899061e-06,
|
| 34233 |
+
"loss": 18.8288,
|
| 34234 |
+
"step": 48790
|
| 34235 |
+
},
|
| 34236 |
+
{
|
| 34237 |
+
"epoch": 0.905596652261556,
|
| 34238 |
+
"grad_norm": 36.8125,
|
| 34239 |
+
"learning_rate": 9.858500564113018e-06,
|
| 34240 |
+
"loss": 18.8442,
|
| 34241 |
+
"step": 48800
|
| 34242 |
+
},
|
| 34243 |
+
{
|
| 34244 |
+
"epoch": 0.9057822253460358,
|
| 34245 |
+
"grad_norm": 36.625,
|
| 34246 |
+
"learning_rate": 9.858471568326976e-06,
|
| 34247 |
+
"loss": 18.3578,
|
| 34248 |
+
"step": 48810
|
| 34249 |
+
},
|
| 34250 |
+
{
|
| 34251 |
+
"epoch": 0.9059677984305157,
|
| 34252 |
+
"grad_norm": 39.15625,
|
| 34253 |
+
"learning_rate": 9.858442572540933e-06,
|
| 34254 |
+
"loss": 18.6956,
|
| 34255 |
+
"step": 48820
|
| 34256 |
+
},
|
| 34257 |
+
{
|
| 34258 |
+
"epoch": 0.9061533715149954,
|
| 34259 |
+
"grad_norm": 36.65625,
|
| 34260 |
+
"learning_rate": 9.85841357675489e-06,
|
| 34261 |
+
"loss": 18.7518,
|
| 34262 |
+
"step": 48830
|
| 34263 |
+
},
|
| 34264 |
+
{
|
| 34265 |
+
"epoch": 0.9063389445994753,
|
| 34266 |
+
"grad_norm": 39.1875,
|
| 34267 |
+
"learning_rate": 9.858384580968848e-06,
|
| 34268 |
+
"loss": 19.176,
|
| 34269 |
+
"step": 48840
|
| 34270 |
+
},
|
| 34271 |
+
{
|
| 34272 |
+
"epoch": 0.9065245176839551,
|
| 34273 |
+
"grad_norm": 39.0625,
|
| 34274 |
+
"learning_rate": 9.858355585182805e-06,
|
| 34275 |
+
"loss": 18.8138,
|
| 34276 |
+
"step": 48850
|
| 34277 |
+
},
|
| 34278 |
+
{
|
| 34279 |
+
"epoch": 0.9067100907684349,
|
| 34280 |
+
"grad_norm": 34.03125,
|
| 34281 |
+
"learning_rate": 9.858326589396765e-06,
|
| 34282 |
+
"loss": 19.1294,
|
| 34283 |
+
"step": 48860
|
| 34284 |
+
},
|
| 34285 |
+
{
|
| 34286 |
+
"epoch": 0.9068956638529148,
|
| 34287 |
+
"grad_norm": 37.3125,
|
| 34288 |
+
"learning_rate": 9.858297593610722e-06,
|
| 34289 |
+
"loss": 19.0567,
|
| 34290 |
+
"step": 48870
|
| 34291 |
+
},
|
| 34292 |
+
{
|
| 34293 |
+
"epoch": 0.9070812369373946,
|
| 34294 |
+
"grad_norm": 35.8125,
|
| 34295 |
+
"learning_rate": 9.85826859782468e-06,
|
| 34296 |
+
"loss": 18.7387,
|
| 34297 |
+
"step": 48880
|
| 34298 |
+
},
|
| 34299 |
+
{
|
| 34300 |
+
"epoch": 0.9072668100218745,
|
| 34301 |
+
"grad_norm": 36.96875,
|
| 34302 |
+
"learning_rate": 9.858239602038637e-06,
|
| 34303 |
+
"loss": 19.0438,
|
| 34304 |
+
"step": 48890
|
| 34305 |
+
},
|
| 34306 |
+
{
|
| 34307 |
+
"epoch": 0.9074523831063542,
|
| 34308 |
+
"grad_norm": 34.25,
|
| 34309 |
+
"learning_rate": 9.858210606252594e-06,
|
| 34310 |
+
"loss": 18.7993,
|
| 34311 |
+
"step": 48900
|
| 34312 |
+
},
|
| 34313 |
+
{
|
| 34314 |
+
"epoch": 0.9076379561908341,
|
| 34315 |
+
"grad_norm": 37.03125,
|
| 34316 |
+
"learning_rate": 9.858181610466552e-06,
|
| 34317 |
+
"loss": 19.0855,
|
| 34318 |
+
"step": 48910
|
| 34319 |
+
},
|
| 34320 |
+
{
|
| 34321 |
+
"epoch": 0.907823529275314,
|
| 34322 |
+
"grad_norm": 35.625,
|
| 34323 |
+
"learning_rate": 9.858152614680509e-06,
|
| 34324 |
+
"loss": 18.388,
|
| 34325 |
+
"step": 48920
|
| 34326 |
+
},
|
| 34327 |
+
{
|
| 34328 |
+
"epoch": 0.9080091023597937,
|
| 34329 |
+
"grad_norm": 38.625,
|
| 34330 |
+
"learning_rate": 9.858123618894468e-06,
|
| 34331 |
+
"loss": 19.0493,
|
| 34332 |
+
"step": 48930
|
| 34333 |
+
},
|
| 34334 |
+
{
|
| 34335 |
+
"epoch": 0.9081946754442736,
|
| 34336 |
+
"grad_norm": 35.6875,
|
| 34337 |
+
"learning_rate": 9.858094623108424e-06,
|
| 34338 |
+
"loss": 18.9766,
|
| 34339 |
+
"step": 48940
|
| 34340 |
+
},
|
| 34341 |
+
{
|
| 34342 |
+
"epoch": 0.9083802485287534,
|
| 34343 |
+
"grad_norm": 36.40625,
|
| 34344 |
+
"learning_rate": 9.858065627322381e-06,
|
| 34345 |
+
"loss": 18.6864,
|
| 34346 |
+
"step": 48950
|
| 34347 |
+
},
|
| 34348 |
+
{
|
| 34349 |
+
"epoch": 0.9085658216132332,
|
| 34350 |
+
"grad_norm": 37.9375,
|
| 34351 |
+
"learning_rate": 9.85803663153634e-06,
|
| 34352 |
+
"loss": 18.833,
|
| 34353 |
+
"step": 48960
|
| 34354 |
+
},
|
| 34355 |
+
{
|
| 34356 |
+
"epoch": 0.908751394697713,
|
| 34357 |
+
"grad_norm": 37.125,
|
| 34358 |
+
"learning_rate": 9.858007635750298e-06,
|
| 34359 |
+
"loss": 18.6554,
|
| 34360 |
+
"step": 48970
|
| 34361 |
+
},
|
| 34362 |
+
{
|
| 34363 |
+
"epoch": 0.9089369677821929,
|
| 34364 |
+
"grad_norm": 34.1875,
|
| 34365 |
+
"learning_rate": 9.857978639964255e-06,
|
| 34366 |
+
"loss": 18.564,
|
| 34367 |
+
"step": 48980
|
| 34368 |
+
},
|
| 34369 |
+
{
|
| 34370 |
+
"epoch": 0.9091225408666727,
|
| 34371 |
+
"grad_norm": 35.6875,
|
| 34372 |
+
"learning_rate": 9.857949644178213e-06,
|
| 34373 |
+
"loss": 18.8914,
|
| 34374 |
+
"step": 48990
|
| 34375 |
+
},
|
| 34376 |
+
{
|
| 34377 |
+
"epoch": 0.9093081139511525,
|
| 34378 |
+
"grad_norm": 34.71875,
|
| 34379 |
+
"learning_rate": 9.85792064839217e-06,
|
| 34380 |
+
"loss": 19.0573,
|
| 34381 |
+
"step": 49000
|
| 34382 |
+
},
|
| 34383 |
+
{
|
| 34384 |
+
"epoch": 0.9094936870356324,
|
| 34385 |
+
"grad_norm": 33.625,
|
| 34386 |
+
"learning_rate": 9.857891652606127e-06,
|
| 34387 |
+
"loss": 18.3762,
|
| 34388 |
+
"step": 49010
|
| 34389 |
+
},
|
| 34390 |
+
{
|
| 34391 |
+
"epoch": 0.9096792601201121,
|
| 34392 |
+
"grad_norm": 37.6875,
|
| 34393 |
+
"learning_rate": 9.857862656820085e-06,
|
| 34394 |
+
"loss": 19.0526,
|
| 34395 |
+
"step": 49020
|
| 34396 |
+
},
|
| 34397 |
+
{
|
| 34398 |
+
"epoch": 0.909864833204592,
|
| 34399 |
+
"grad_norm": 37.5,
|
| 34400 |
+
"learning_rate": 9.857833661034044e-06,
|
| 34401 |
+
"loss": 18.6206,
|
| 34402 |
+
"step": 49030
|
| 34403 |
+
},
|
| 34404 |
+
{
|
| 34405 |
+
"epoch": 0.9100504062890719,
|
| 34406 |
+
"grad_norm": 36.90625,
|
| 34407 |
+
"learning_rate": 9.857804665248001e-06,
|
| 34408 |
+
"loss": 18.535,
|
| 34409 |
+
"step": 49040
|
| 34410 |
+
},
|
| 34411 |
+
{
|
| 34412 |
+
"epoch": 0.9102359793735516,
|
| 34413 |
+
"grad_norm": 36.25,
|
| 34414 |
+
"learning_rate": 9.857775669461957e-06,
|
| 34415 |
+
"loss": 18.6812,
|
| 34416 |
+
"step": 49050
|
| 34417 |
+
},
|
| 34418 |
+
{
|
| 34419 |
+
"epoch": 0.9104215524580315,
|
| 34420 |
+
"grad_norm": 36.1875,
|
| 34421 |
+
"learning_rate": 9.857746673675916e-06,
|
| 34422 |
+
"loss": 18.8887,
|
| 34423 |
+
"step": 49060
|
| 34424 |
+
},
|
| 34425 |
+
{
|
| 34426 |
+
"epoch": 0.9106071255425113,
|
| 34427 |
+
"grad_norm": 35.5625,
|
| 34428 |
+
"learning_rate": 9.857717677889873e-06,
|
| 34429 |
+
"loss": 18.5824,
|
| 34430 |
+
"step": 49070
|
| 34431 |
+
},
|
| 34432 |
+
{
|
| 34433 |
+
"epoch": 0.9107926986269912,
|
| 34434 |
+
"grad_norm": 35.5,
|
| 34435 |
+
"learning_rate": 9.857688682103831e-06,
|
| 34436 |
+
"loss": 18.4892,
|
| 34437 |
+
"step": 49080
|
| 34438 |
+
},
|
| 34439 |
+
{
|
| 34440 |
+
"epoch": 0.9109782717114709,
|
| 34441 |
+
"grad_norm": 35.875,
|
| 34442 |
+
"learning_rate": 9.857659686317788e-06,
|
| 34443 |
+
"loss": 18.8687,
|
| 34444 |
+
"step": 49090
|
| 34445 |
+
},
|
| 34446 |
+
{
|
| 34447 |
+
"epoch": 0.9111638447959508,
|
| 34448 |
+
"grad_norm": 35.03125,
|
| 34449 |
+
"learning_rate": 9.857630690531746e-06,
|
| 34450 |
+
"loss": 19.2528,
|
| 34451 |
+
"step": 49100
|
| 34452 |
+
},
|
| 34453 |
+
{
|
| 34454 |
+
"epoch": 0.9113494178804307,
|
| 34455 |
+
"grad_norm": 37.1875,
|
| 34456 |
+
"learning_rate": 9.857601694745703e-06,
|
| 34457 |
+
"loss": 18.4546,
|
| 34458 |
+
"step": 49110
|
| 34459 |
+
},
|
| 34460 |
+
{
|
| 34461 |
+
"epoch": 0.9115349909649104,
|
| 34462 |
+
"grad_norm": 36.5625,
|
| 34463 |
+
"learning_rate": 9.85757269895966e-06,
|
| 34464 |
+
"loss": 18.619,
|
| 34465 |
+
"step": 49120
|
| 34466 |
+
},
|
| 34467 |
+
{
|
| 34468 |
+
"epoch": 0.9117205640493903,
|
| 34469 |
+
"grad_norm": 37.625,
|
| 34470 |
+
"learning_rate": 9.857543703173618e-06,
|
| 34471 |
+
"loss": 18.5371,
|
| 34472 |
+
"step": 49130
|
| 34473 |
+
},
|
| 34474 |
+
{
|
| 34475 |
+
"epoch": 0.9119061371338701,
|
| 34476 |
+
"grad_norm": 36.5,
|
| 34477 |
+
"learning_rate": 9.857514707387577e-06,
|
| 34478 |
+
"loss": 19.0488,
|
| 34479 |
+
"step": 49140
|
| 34480 |
+
},
|
| 34481 |
+
{
|
| 34482 |
+
"epoch": 0.9120917102183499,
|
| 34483 |
+
"grad_norm": 36.3125,
|
| 34484 |
+
"learning_rate": 9.857485711601533e-06,
|
| 34485 |
+
"loss": 18.4348,
|
| 34486 |
+
"step": 49150
|
| 34487 |
+
},
|
| 34488 |
+
{
|
| 34489 |
+
"epoch": 0.9122772833028298,
|
| 34490 |
+
"grad_norm": 35.53125,
|
| 34491 |
+
"learning_rate": 9.857456715815492e-06,
|
| 34492 |
+
"loss": 19.2483,
|
| 34493 |
+
"step": 49160
|
| 34494 |
+
},
|
| 34495 |
+
{
|
| 34496 |
+
"epoch": 0.9124628563873096,
|
| 34497 |
+
"grad_norm": 37.90625,
|
| 34498 |
+
"learning_rate": 9.85742772002945e-06,
|
| 34499 |
+
"loss": 18.7975,
|
| 34500 |
+
"step": 49170
|
| 34501 |
+
},
|
| 34502 |
+
{
|
| 34503 |
+
"epoch": 0.9126484294717894,
|
| 34504 |
+
"grad_norm": 35.71875,
|
| 34505 |
+
"learning_rate": 9.857398724243407e-06,
|
| 34506 |
+
"loss": 18.9149,
|
| 34507 |
+
"step": 49180
|
| 34508 |
+
},
|
| 34509 |
+
{
|
| 34510 |
+
"epoch": 0.9128340025562692,
|
| 34511 |
+
"grad_norm": 36.125,
|
| 34512 |
+
"learning_rate": 9.857369728457364e-06,
|
| 34513 |
+
"loss": 18.691,
|
| 34514 |
+
"step": 49190
|
| 34515 |
+
},
|
| 34516 |
+
{
|
| 34517 |
+
"epoch": 0.9130195756407491,
|
| 34518 |
+
"grad_norm": 36.90625,
|
| 34519 |
+
"learning_rate": 9.857340732671321e-06,
|
| 34520 |
+
"loss": 18.4709,
|
| 34521 |
+
"step": 49200
|
| 34522 |
+
},
|
| 34523 |
+
{
|
| 34524 |
+
"epoch": 0.9132051487252288,
|
| 34525 |
+
"grad_norm": 34.5625,
|
| 34526 |
+
"learning_rate": 9.857311736885279e-06,
|
| 34527 |
+
"loss": 18.5962,
|
| 34528 |
+
"step": 49210
|
| 34529 |
+
},
|
| 34530 |
+
{
|
| 34531 |
+
"epoch": 0.9133907218097087,
|
| 34532 |
+
"grad_norm": 35.375,
|
| 34533 |
+
"learning_rate": 9.857282741099236e-06,
|
| 34534 |
+
"loss": 18.9627,
|
| 34535 |
+
"step": 49220
|
| 34536 |
+
},
|
| 34537 |
+
{
|
| 34538 |
+
"epoch": 0.9135762948941886,
|
| 34539 |
+
"grad_norm": 34.8125,
|
| 34540 |
+
"learning_rate": 9.857253745313194e-06,
|
| 34541 |
+
"loss": 18.841,
|
| 34542 |
+
"step": 49230
|
| 34543 |
+
},
|
| 34544 |
+
{
|
| 34545 |
+
"epoch": 0.9137618679786683,
|
| 34546 |
+
"grad_norm": 34.8125,
|
| 34547 |
+
"learning_rate": 9.857224749527153e-06,
|
| 34548 |
+
"loss": 18.694,
|
| 34549 |
+
"step": 49240
|
| 34550 |
+
},
|
| 34551 |
+
{
|
| 34552 |
+
"epoch": 0.9139474410631482,
|
| 34553 |
+
"grad_norm": 37.6875,
|
| 34554 |
+
"learning_rate": 9.85719575374111e-06,
|
| 34555 |
+
"loss": 19.1713,
|
| 34556 |
+
"step": 49250
|
| 34557 |
+
},
|
| 34558 |
+
{
|
| 34559 |
+
"epoch": 0.914133014147628,
|
| 34560 |
+
"grad_norm": 35.625,
|
| 34561 |
+
"learning_rate": 9.857166757955066e-06,
|
| 34562 |
+
"loss": 18.9923,
|
| 34563 |
+
"step": 49260
|
| 34564 |
+
},
|
| 34565 |
+
{
|
| 34566 |
+
"epoch": 0.9143185872321079,
|
| 34567 |
+
"grad_norm": 36.78125,
|
| 34568 |
+
"learning_rate": 9.857137762169025e-06,
|
| 34569 |
+
"loss": 18.8704,
|
| 34570 |
+
"step": 49270
|
| 34571 |
+
},
|
| 34572 |
+
{
|
| 34573 |
+
"epoch": 0.9145041603165877,
|
| 34574 |
+
"grad_norm": 35.875,
|
| 34575 |
+
"learning_rate": 9.857108766382982e-06,
|
| 34576 |
+
"loss": 19.2151,
|
| 34577 |
+
"step": 49280
|
| 34578 |
+
},
|
| 34579 |
+
{
|
| 34580 |
+
"epoch": 0.9146897334010675,
|
| 34581 |
+
"grad_norm": 35.71875,
|
| 34582 |
+
"learning_rate": 9.85707977059694e-06,
|
| 34583 |
+
"loss": 18.6477,
|
| 34584 |
+
"step": 49290
|
| 34585 |
+
},
|
| 34586 |
+
{
|
| 34587 |
+
"epoch": 0.9148753064855474,
|
| 34588 |
+
"grad_norm": 36.875,
|
| 34589 |
+
"learning_rate": 9.857050774810897e-06,
|
| 34590 |
+
"loss": 18.5986,
|
| 34591 |
+
"step": 49300
|
| 34592 |
+
},
|
| 34593 |
+
{
|
| 34594 |
+
"epoch": 0.9150608795700271,
|
| 34595 |
+
"grad_norm": 35.375,
|
| 34596 |
+
"learning_rate": 9.857021779024855e-06,
|
| 34597 |
+
"loss": 18.695,
|
| 34598 |
+
"step": 49310
|
| 34599 |
+
},
|
| 34600 |
+
{
|
| 34601 |
+
"epoch": 0.915246452654507,
|
| 34602 |
+
"grad_norm": 36.0,
|
| 34603 |
+
"learning_rate": 9.856992783238812e-06,
|
| 34604 |
+
"loss": 18.5976,
|
| 34605 |
+
"step": 49320
|
| 34606 |
+
},
|
| 34607 |
+
{
|
| 34608 |
+
"epoch": 0.9154320257389869,
|
| 34609 |
+
"grad_norm": 36.28125,
|
| 34610 |
+
"learning_rate": 9.85696378745277e-06,
|
| 34611 |
+
"loss": 18.6872,
|
| 34612 |
+
"step": 49330
|
| 34613 |
+
},
|
| 34614 |
+
{
|
| 34615 |
+
"epoch": 0.9156175988234666,
|
| 34616 |
+
"grad_norm": 37.875,
|
| 34617 |
+
"learning_rate": 9.856934791666729e-06,
|
| 34618 |
+
"loss": 18.8967,
|
| 34619 |
+
"step": 49340
|
| 34620 |
+
},
|
| 34621 |
+
{
|
| 34622 |
+
"epoch": 0.9158031719079465,
|
| 34623 |
+
"grad_norm": 35.59375,
|
| 34624 |
+
"learning_rate": 9.856905795880686e-06,
|
| 34625 |
+
"loss": 18.6512,
|
| 34626 |
+
"step": 49350
|
| 34627 |
+
},
|
| 34628 |
+
{
|
| 34629 |
+
"epoch": 0.9159887449924263,
|
| 34630 |
+
"grad_norm": 35.9375,
|
| 34631 |
+
"learning_rate": 9.856876800094643e-06,
|
| 34632 |
+
"loss": 18.7375,
|
| 34633 |
+
"step": 49360
|
| 34634 |
+
},
|
| 34635 |
+
{
|
| 34636 |
+
"epoch": 0.9161743180769061,
|
| 34637 |
+
"grad_norm": 36.40625,
|
| 34638 |
+
"learning_rate": 9.8568478043086e-06,
|
| 34639 |
+
"loss": 18.6419,
|
| 34640 |
+
"step": 49370
|
| 34641 |
+
},
|
| 34642 |
+
{
|
| 34643 |
+
"epoch": 0.9163598911613859,
|
| 34644 |
+
"grad_norm": 35.0,
|
| 34645 |
+
"learning_rate": 9.856818808522558e-06,
|
| 34646 |
+
"loss": 18.6441,
|
| 34647 |
+
"step": 49380
|
| 34648 |
+
},
|
| 34649 |
+
{
|
| 34650 |
+
"epoch": 0.9165454642458658,
|
| 34651 |
+
"grad_norm": 35.25,
|
| 34652 |
+
"learning_rate": 9.856789812736516e-06,
|
| 34653 |
+
"loss": 19.2166,
|
| 34654 |
+
"step": 49390
|
| 34655 |
+
},
|
| 34656 |
+
{
|
| 34657 |
+
"epoch": 0.9167310373303456,
|
| 34658 |
+
"grad_norm": 36.1875,
|
| 34659 |
+
"learning_rate": 9.856760816950473e-06,
|
| 34660 |
+
"loss": 18.9291,
|
| 34661 |
+
"step": 49400
|
| 34662 |
+
},
|
| 34663 |
+
{
|
| 34664 |
+
"epoch": 0.9169166104148254,
|
| 34665 |
+
"grad_norm": 38.6875,
|
| 34666 |
+
"learning_rate": 9.856731821164432e-06,
|
| 34667 |
+
"loss": 18.5892,
|
| 34668 |
+
"step": 49410
|
| 34669 |
+
},
|
| 34670 |
+
{
|
| 34671 |
+
"epoch": 0.9171021834993053,
|
| 34672 |
+
"grad_norm": 37.28125,
|
| 34673 |
+
"learning_rate": 9.856702825378388e-06,
|
| 34674 |
+
"loss": 18.7727,
|
| 34675 |
+
"step": 49420
|
| 34676 |
+
},
|
| 34677 |
+
{
|
| 34678 |
+
"epoch": 0.9172877565837851,
|
| 34679 |
+
"grad_norm": 35.40625,
|
| 34680 |
+
"learning_rate": 9.856673829592345e-06,
|
| 34681 |
+
"loss": 19.09,
|
| 34682 |
+
"step": 49430
|
| 34683 |
+
},
|
| 34684 |
+
{
|
| 34685 |
+
"epoch": 0.9174733296682649,
|
| 34686 |
+
"grad_norm": 37.1875,
|
| 34687 |
+
"learning_rate": 9.856644833806304e-06,
|
| 34688 |
+
"loss": 18.3979,
|
| 34689 |
+
"step": 49440
|
| 34690 |
+
},
|
| 34691 |
+
{
|
| 34692 |
+
"epoch": 0.9176589027527448,
|
| 34693 |
+
"grad_norm": 35.25,
|
| 34694 |
+
"learning_rate": 9.856615838020262e-06,
|
| 34695 |
+
"loss": 18.9984,
|
| 34696 |
+
"step": 49450
|
| 34697 |
+
},
|
| 34698 |
+
{
|
| 34699 |
+
"epoch": 0.9178444758372246,
|
| 34700 |
+
"grad_norm": 35.875,
|
| 34701 |
+
"learning_rate": 9.856586842234219e-06,
|
| 34702 |
+
"loss": 18.7745,
|
| 34703 |
+
"step": 49460
|
| 34704 |
+
},
|
| 34705 |
+
{
|
| 34706 |
+
"epoch": 0.9180300489217044,
|
| 34707 |
+
"grad_norm": 36.625,
|
| 34708 |
+
"learning_rate": 9.856557846448177e-06,
|
| 34709 |
+
"loss": 18.9508,
|
| 34710 |
+
"step": 49470
|
| 34711 |
+
},
|
| 34712 |
+
{
|
| 34713 |
+
"epoch": 0.9182156220061842,
|
| 34714 |
+
"grad_norm": 36.5625,
|
| 34715 |
+
"learning_rate": 9.856528850662134e-06,
|
| 34716 |
+
"loss": 18.8949,
|
| 34717 |
+
"step": 49480
|
| 34718 |
+
},
|
| 34719 |
+
{
|
| 34720 |
+
"epoch": 0.9184011950906641,
|
| 34721 |
+
"grad_norm": 37.5,
|
| 34722 |
+
"learning_rate": 9.856499854876091e-06,
|
| 34723 |
+
"loss": 19.3296,
|
| 34724 |
+
"step": 49490
|
| 34725 |
+
},
|
| 34726 |
+
{
|
| 34727 |
+
"epoch": 0.9185867681751438,
|
| 34728 |
+
"grad_norm": 35.71875,
|
| 34729 |
+
"learning_rate": 9.856470859090049e-06,
|
| 34730 |
+
"loss": 19.2355,
|
| 34731 |
+
"step": 49500
|
| 34732 |
+
},
|
| 34733 |
+
{
|
| 34734 |
+
"epoch": 0.9187723412596237,
|
| 34735 |
+
"grad_norm": 36.03125,
|
| 34736 |
+
"learning_rate": 9.856441863304008e-06,
|
| 34737 |
+
"loss": 18.7253,
|
| 34738 |
+
"step": 49510
|
| 34739 |
+
},
|
| 34740 |
+
{
|
| 34741 |
+
"epoch": 0.9189579143441036,
|
| 34742 |
+
"grad_norm": 36.90625,
|
| 34743 |
+
"learning_rate": 9.856412867517965e-06,
|
| 34744 |
+
"loss": 18.6996,
|
| 34745 |
+
"step": 49520
|
| 34746 |
+
},
|
| 34747 |
+
{
|
| 34748 |
+
"epoch": 0.9191434874285833,
|
| 34749 |
+
"grad_norm": 36.34375,
|
| 34750 |
+
"learning_rate": 9.856383871731921e-06,
|
| 34751 |
+
"loss": 19.0339,
|
| 34752 |
+
"step": 49530
|
| 34753 |
+
},
|
| 34754 |
+
{
|
| 34755 |
+
"epoch": 0.9193290605130632,
|
| 34756 |
+
"grad_norm": 35.59375,
|
| 34757 |
+
"learning_rate": 9.85635487594588e-06,
|
| 34758 |
+
"loss": 18.3551,
|
| 34759 |
+
"step": 49540
|
| 34760 |
+
},
|
| 34761 |
+
{
|
| 34762 |
+
"epoch": 0.919514633597543,
|
| 34763 |
+
"grad_norm": 37.1875,
|
| 34764 |
+
"learning_rate": 9.856325880159837e-06,
|
| 34765 |
+
"loss": 18.76,
|
| 34766 |
+
"step": 49550
|
| 34767 |
+
},
|
| 34768 |
+
{
|
| 34769 |
+
"epoch": 0.9197002066820228,
|
| 34770 |
+
"grad_norm": 36.5625,
|
| 34771 |
+
"learning_rate": 9.856296884373795e-06,
|
| 34772 |
+
"loss": 18.2547,
|
| 34773 |
+
"step": 49560
|
| 34774 |
+
},
|
| 34775 |
+
{
|
| 34776 |
+
"epoch": 0.9198857797665027,
|
| 34777 |
+
"grad_norm": 37.5,
|
| 34778 |
+
"learning_rate": 9.856267888587752e-06,
|
| 34779 |
+
"loss": 18.8997,
|
| 34780 |
+
"step": 49570
|
| 34781 |
+
},
|
| 34782 |
+
{
|
| 34783 |
+
"epoch": 0.9200713528509825,
|
| 34784 |
+
"grad_norm": 36.78125,
|
| 34785 |
+
"learning_rate": 9.85623889280171e-06,
|
| 34786 |
+
"loss": 18.4645,
|
| 34787 |
+
"step": 49580
|
| 34788 |
+
},
|
| 34789 |
+
{
|
| 34790 |
+
"epoch": 0.9202569259354623,
|
| 34791 |
+
"grad_norm": 36.78125,
|
| 34792 |
+
"learning_rate": 9.856209897015667e-06,
|
| 34793 |
+
"loss": 18.8955,
|
| 34794 |
+
"step": 49590
|
| 34795 |
+
},
|
| 34796 |
+
{
|
| 34797 |
+
"epoch": 0.9204424990199421,
|
| 34798 |
+
"grad_norm": 37.59375,
|
| 34799 |
+
"learning_rate": 9.856180901229625e-06,
|
| 34800 |
+
"loss": 18.3526,
|
| 34801 |
+
"step": 49600
|
| 34802 |
+
},
|
| 34803 |
+
{
|
| 34804 |
+
"epoch": 0.920628072104422,
|
| 34805 |
+
"grad_norm": 36.53125,
|
| 34806 |
+
"learning_rate": 9.856151905443584e-06,
|
| 34807 |
+
"loss": 19.0135,
|
| 34808 |
+
"step": 49610
|
| 34809 |
+
},
|
| 34810 |
+
{
|
| 34811 |
+
"epoch": 0.9208136451889019,
|
| 34812 |
+
"grad_norm": 36.28125,
|
| 34813 |
+
"learning_rate": 9.856122909657541e-06,
|
| 34814 |
+
"loss": 18.5804,
|
| 34815 |
+
"step": 49620
|
| 34816 |
+
},
|
| 34817 |
+
{
|
| 34818 |
+
"epoch": 0.9209992182733816,
|
| 34819 |
+
"grad_norm": 37.40625,
|
| 34820 |
+
"learning_rate": 9.856093913871498e-06,
|
| 34821 |
+
"loss": 18.6843,
|
| 34822 |
+
"step": 49630
|
| 34823 |
+
},
|
| 34824 |
+
{
|
| 34825 |
+
"epoch": 0.9211847913578615,
|
| 34826 |
+
"grad_norm": 34.40625,
|
| 34827 |
+
"learning_rate": 9.856064918085456e-06,
|
| 34828 |
+
"loss": 19.2493,
|
| 34829 |
+
"step": 49640
|
| 34830 |
+
},
|
| 34831 |
+
{
|
| 34832 |
+
"epoch": 0.9213703644423413,
|
| 34833 |
+
"grad_norm": 36.875,
|
| 34834 |
+
"learning_rate": 9.856035922299413e-06,
|
| 34835 |
+
"loss": 18.6309,
|
| 34836 |
+
"step": 49650
|
| 34837 |
+
},
|
| 34838 |
+
{
|
| 34839 |
+
"epoch": 0.9215559375268211,
|
| 34840 |
+
"grad_norm": 37.5,
|
| 34841 |
+
"learning_rate": 9.85600692651337e-06,
|
| 34842 |
+
"loss": 18.8726,
|
| 34843 |
+
"step": 49660
|
| 34844 |
+
},
|
| 34845 |
+
{
|
| 34846 |
+
"epoch": 0.9217415106113009,
|
| 34847 |
+
"grad_norm": 36.375,
|
| 34848 |
+
"learning_rate": 9.855977930727328e-06,
|
| 34849 |
+
"loss": 18.5765,
|
| 34850 |
+
"step": 49670
|
| 34851 |
+
},
|
| 34852 |
+
{
|
| 34853 |
+
"epoch": 0.9219270836957808,
|
| 34854 |
+
"grad_norm": 36.21875,
|
| 34855 |
+
"learning_rate": 9.855948934941285e-06,
|
| 34856 |
+
"loss": 18.8799,
|
| 34857 |
+
"step": 49680
|
| 34858 |
+
},
|
| 34859 |
+
{
|
| 34860 |
+
"epoch": 0.9221126567802606,
|
| 34861 |
+
"grad_norm": 35.84375,
|
| 34862 |
+
"learning_rate": 9.855919939155243e-06,
|
| 34863 |
+
"loss": 18.758,
|
| 34864 |
+
"step": 49690
|
| 34865 |
+
},
|
| 34866 |
+
{
|
| 34867 |
+
"epoch": 0.9222982298647404,
|
| 34868 |
+
"grad_norm": 36.96875,
|
| 34869 |
+
"learning_rate": 9.8558909433692e-06,
|
| 34870 |
+
"loss": 18.7222,
|
| 34871 |
+
"step": 49700
|
| 34872 |
+
},
|
| 34873 |
+
{
|
| 34874 |
+
"epoch": 0.9224838029492203,
|
| 34875 |
+
"grad_norm": 35.78125,
|
| 34876 |
+
"learning_rate": 9.855861947583158e-06,
|
| 34877 |
+
"loss": 18.7792,
|
| 34878 |
+
"step": 49710
|
| 34879 |
+
},
|
| 34880 |
+
{
|
| 34881 |
+
"epoch": 0.9226693760337,
|
| 34882 |
+
"grad_norm": 35.40625,
|
| 34883 |
+
"learning_rate": 9.855832951797117e-06,
|
| 34884 |
+
"loss": 18.7375,
|
| 34885 |
+
"step": 49720
|
| 34886 |
+
},
|
| 34887 |
+
{
|
| 34888 |
+
"epoch": 0.9228549491181799,
|
| 34889 |
+
"grad_norm": 36.9375,
|
| 34890 |
+
"learning_rate": 9.855803956011074e-06,
|
| 34891 |
+
"loss": 18.734,
|
| 34892 |
+
"step": 49730
|
| 34893 |
+
},
|
| 34894 |
+
{
|
| 34895 |
+
"epoch": 0.9230405222026598,
|
| 34896 |
+
"grad_norm": 37.46875,
|
| 34897 |
+
"learning_rate": 9.855774960225032e-06,
|
| 34898 |
+
"loss": 18.7155,
|
| 34899 |
+
"step": 49740
|
| 34900 |
+
},
|
| 34901 |
+
{
|
| 34902 |
+
"epoch": 0.9232260952871395,
|
| 34903 |
+
"grad_norm": 38.0,
|
| 34904 |
+
"learning_rate": 9.855745964438989e-06,
|
| 34905 |
+
"loss": 19.2284,
|
| 34906 |
+
"step": 49750
|
| 34907 |
+
},
|
| 34908 |
+
{
|
| 34909 |
+
"epoch": 0.9234116683716194,
|
| 34910 |
+
"grad_norm": 38.1875,
|
| 34911 |
+
"learning_rate": 9.855716968652946e-06,
|
| 34912 |
+
"loss": 19.4265,
|
| 34913 |
+
"step": 49760
|
| 34914 |
+
},
|
| 34915 |
+
{
|
| 34916 |
+
"epoch": 0.9235972414560992,
|
| 34917 |
+
"grad_norm": 36.46875,
|
| 34918 |
+
"learning_rate": 9.855687972866904e-06,
|
| 34919 |
+
"loss": 18.7159,
|
| 34920 |
+
"step": 49770
|
| 34921 |
+
},
|
| 34922 |
+
{
|
| 34923 |
+
"epoch": 0.9237828145405791,
|
| 34924 |
+
"grad_norm": 37.53125,
|
| 34925 |
+
"learning_rate": 9.855658977080861e-06,
|
| 34926 |
+
"loss": 18.4277,
|
| 34927 |
+
"step": 49780
|
| 34928 |
+
},
|
| 34929 |
+
{
|
| 34930 |
+
"epoch": 0.9239683876250588,
|
| 34931 |
+
"grad_norm": 37.15625,
|
| 34932 |
+
"learning_rate": 9.85562998129482e-06,
|
| 34933 |
+
"loss": 18.7183,
|
| 34934 |
+
"step": 49790
|
| 34935 |
+
},
|
| 34936 |
+
{
|
| 34937 |
+
"epoch": 0.9241539607095387,
|
| 34938 |
+
"grad_norm": 36.03125,
|
| 34939 |
+
"learning_rate": 9.855600985508776e-06,
|
| 34940 |
+
"loss": 18.8495,
|
| 34941 |
+
"step": 49800
|
| 34942 |
+
},
|
| 34943 |
+
{
|
| 34944 |
+
"epoch": 0.9243395337940186,
|
| 34945 |
+
"grad_norm": 36.0625,
|
| 34946 |
+
"learning_rate": 9.855571989722733e-06,
|
| 34947 |
+
"loss": 19.4189,
|
| 34948 |
+
"step": 49810
|
| 34949 |
+
},
|
| 34950 |
+
{
|
| 34951 |
+
"epoch": 0.9245251068784983,
|
| 34952 |
+
"grad_norm": 37.625,
|
| 34953 |
+
"learning_rate": 9.855542993936693e-06,
|
| 34954 |
+
"loss": 18.4683,
|
| 34955 |
+
"step": 49820
|
| 34956 |
+
},
|
| 34957 |
+
{
|
| 34958 |
+
"epoch": 0.9247106799629782,
|
| 34959 |
+
"grad_norm": 37.09375,
|
| 34960 |
+
"learning_rate": 9.85551399815065e-06,
|
| 34961 |
+
"loss": 19.0064,
|
| 34962 |
+
"step": 49830
|
| 34963 |
+
},
|
| 34964 |
+
{
|
| 34965 |
+
"epoch": 0.924896253047458,
|
| 34966 |
+
"grad_norm": 38.1875,
|
| 34967 |
+
"learning_rate": 9.855485002364607e-06,
|
| 34968 |
+
"loss": 18.644,
|
| 34969 |
+
"step": 49840
|
| 34970 |
+
},
|
| 34971 |
+
{
|
| 34972 |
+
"epoch": 0.9250818261319378,
|
| 34973 |
+
"grad_norm": 37.25,
|
| 34974 |
+
"learning_rate": 9.855456006578565e-06,
|
| 34975 |
+
"loss": 19.2591,
|
| 34976 |
+
"step": 49850
|
| 34977 |
+
},
|
| 34978 |
+
{
|
| 34979 |
+
"epoch": 0.9252673992164177,
|
| 34980 |
+
"grad_norm": 36.90625,
|
| 34981 |
+
"learning_rate": 9.855427010792522e-06,
|
| 34982 |
+
"loss": 18.8081,
|
| 34983 |
+
"step": 49860
|
| 34984 |
+
},
|
| 34985 |
+
{
|
| 34986 |
+
"epoch": 0.9254529723008975,
|
| 34987 |
+
"grad_norm": 36.625,
|
| 34988 |
+
"learning_rate": 9.85539801500648e-06,
|
| 34989 |
+
"loss": 19.0542,
|
| 34990 |
+
"step": 49870
|
| 34991 |
+
},
|
| 34992 |
+
{
|
| 34993 |
+
"epoch": 0.9256385453853773,
|
| 34994 |
+
"grad_norm": 35.8125,
|
| 34995 |
+
"learning_rate": 9.855369019220437e-06,
|
| 34996 |
+
"loss": 18.7709,
|
| 34997 |
+
"step": 49880
|
| 34998 |
+
},
|
| 34999 |
+
{
|
| 35000 |
+
"epoch": 0.9258241184698571,
|
| 35001 |
+
"grad_norm": 38.25,
|
| 35002 |
+
"learning_rate": 9.855340023434396e-06,
|
| 35003 |
+
"loss": 19.0913,
|
| 35004 |
+
"step": 49890
|
| 35005 |
+
},
|
| 35006 |
+
{
|
| 35007 |
+
"epoch": 0.926009691554337,
|
| 35008 |
+
"grad_norm": 35.8125,
|
| 35009 |
+
"learning_rate": 9.855311027648352e-06,
|
| 35010 |
+
"loss": 18.8659,
|
| 35011 |
+
"step": 49900
|
| 35012 |
+
},
|
| 35013 |
+
{
|
| 35014 |
+
"epoch": 0.9261952646388167,
|
| 35015 |
+
"grad_norm": 37.15625,
|
| 35016 |
+
"learning_rate": 9.85528203186231e-06,
|
| 35017 |
+
"loss": 18.3361,
|
| 35018 |
+
"step": 49910
|
| 35019 |
+
},
|
| 35020 |
+
{
|
| 35021 |
+
"epoch": 0.9263808377232966,
|
| 35022 |
+
"grad_norm": 34.78125,
|
| 35023 |
+
"learning_rate": 9.855253036076268e-06,
|
| 35024 |
+
"loss": 19.2276,
|
| 35025 |
+
"step": 49920
|
| 35026 |
+
},
|
| 35027 |
+
{
|
| 35028 |
+
"epoch": 0.9265664108077765,
|
| 35029 |
+
"grad_norm": 37.34375,
|
| 35030 |
+
"learning_rate": 9.855224040290226e-06,
|
| 35031 |
+
"loss": 18.7894,
|
| 35032 |
+
"step": 49930
|
| 35033 |
+
},
|
| 35034 |
+
{
|
| 35035 |
+
"epoch": 0.9267519838922562,
|
| 35036 |
+
"grad_norm": 35.96875,
|
| 35037 |
+
"learning_rate": 9.855195044504183e-06,
|
| 35038 |
+
"loss": 18.6204,
|
| 35039 |
+
"step": 49940
|
| 35040 |
+
},
|
| 35041 |
+
{
|
| 35042 |
+
"epoch": 0.9269375569767361,
|
| 35043 |
+
"grad_norm": 36.90625,
|
| 35044 |
+
"learning_rate": 9.85516604871814e-06,
|
| 35045 |
+
"loss": 18.4182,
|
| 35046 |
+
"step": 49950
|
| 35047 |
+
},
|
| 35048 |
+
{
|
| 35049 |
+
"epoch": 0.9271231300612159,
|
| 35050 |
+
"grad_norm": 35.53125,
|
| 35051 |
+
"learning_rate": 9.855137052932098e-06,
|
| 35052 |
+
"loss": 18.6139,
|
| 35053 |
+
"step": 49960
|
| 35054 |
+
},
|
| 35055 |
+
{
|
| 35056 |
+
"epoch": 0.9273087031456958,
|
| 35057 |
+
"grad_norm": 37.53125,
|
| 35058 |
+
"learning_rate": 9.855108057146055e-06,
|
| 35059 |
+
"loss": 18.7786,
|
| 35060 |
+
"step": 49970
|
| 35061 |
+
},
|
| 35062 |
+
{
|
| 35063 |
+
"epoch": 0.9274942762301756,
|
| 35064 |
+
"grad_norm": 37.375,
|
| 35065 |
+
"learning_rate": 9.855079061360013e-06,
|
| 35066 |
+
"loss": 18.8478,
|
| 35067 |
+
"step": 49980
|
| 35068 |
+
},
|
| 35069 |
+
{
|
| 35070 |
+
"epoch": 0.9276798493146554,
|
| 35071 |
+
"grad_norm": 34.125,
|
| 35072 |
+
"learning_rate": 9.855050065573972e-06,
|
| 35073 |
+
"loss": 18.4067,
|
| 35074 |
+
"step": 49990
|
| 35075 |
+
},
|
| 35076 |
+
{
|
| 35077 |
+
"epoch": 0.9278654223991353,
|
| 35078 |
+
"grad_norm": 36.40625,
|
| 35079 |
+
"learning_rate": 9.85502106978793e-06,
|
| 35080 |
+
"loss": 18.7262,
|
| 35081 |
+
"step": 50000
|
| 35082 |
+
},
|
| 35083 |
+
{
|
| 35084 |
+
"epoch": 0.9278654223991353,
|
| 35085 |
+
"eval_loss": 2.3473334312438965,
|
| 35086 |
+
"eval_runtime": 455.3845,
|
| 35087 |
+
"eval_samples_per_second": 3188.771,
|
| 35088 |
+
"eval_steps_per_second": 49.826,
|
| 35089 |
+
"step": 50000
|
| 35090 |
}
|
| 35091 |
],
|
| 35092 |
"logging_steps": 10,
|
|
|
|
| 35106 |
"attributes": {}
|
| 35107 |
}
|
| 35108 |
},
|
| 35109 |
+
"total_flos": 8.72761379520512e+18,
|
| 35110 |
"train_batch_size": 8,
|
| 35111 |
"trial_name": null,
|
| 35112 |
"trial_params": null
|