Training in progress, step 25000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3511 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 306619286
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba4ebc8d324592b24aa466cb1a17beb4eb518d5cd7415ad4d10867a1f113452a
|
| 3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919972410
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9684e35dfc059389b032c609c2d17105dd7d52f3b875814129afa1ef90d3e36
|
| 3 |
size 919972410
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69ec6e3926fa071bede113523efa3dc6e630c3c7958c54a9ca321cf4d62ed145
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6127ee4f0c13500ec5038fce65af8f7beec63c137c7d4b7c157aa6303cf5879
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da01d1c5eb2cc3a323f97c1f590d13ccfac2a4c5b1479bd378b4e643304f5a4f
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49a3f04d76c0d3acc7d3dd95a04215f368f35a451ae8cba8a2fdba38cda9ca0a
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df7d2c9825dba80cb544920f8cc0c72122f96514e6cd259052a8765b034393e2
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a20a42d44ff48cc162224010190e898fe28598ddad8cd1896d330a3bb1d8ec3
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18ac0dc4f09f25179860561fcea7c5c8f997aabdc46a170665f9dc5a72bc27c6
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a16fcb5411ff961b47eff7378d85105fe9837e0492d19ea5ce3b7c4b77aa3b6
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6276b39eb0a6a4f547784c30a100b3eee72c8aefbe6f0f7bb1ca7dca8f60dc4b
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 5000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -14039,6 +14039,3514 @@
|
|
| 14039 |
"eval_samples_per_second": 3203.381,
|
| 14040 |
"eval_steps_per_second": 50.054,
|
| 14041 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14042 |
}
|
| 14043 |
],
|
| 14044 |
"logging_steps": 10,
|
|
@@ -14058,7 +17566,7 @@
|
|
| 14058 |
"attributes": {}
|
| 14059 |
}
|
| 14060 |
},
|
| 14061 |
-
"total_flos":
|
| 14062 |
"train_batch_size": 8,
|
| 14063 |
"trial_name": null,
|
| 14064 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.46393271119956764,
|
| 5 |
"eval_steps": 5000,
|
| 6 |
+
"global_step": 25000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 14039 |
"eval_samples_per_second": 3203.381,
|
| 14040 |
"eval_steps_per_second": 50.054,
|
| 14041 |
"step": 20000
|
| 14042 |
+
},
|
| 14043 |
+
{
|
| 14044 |
+
"epoch": 0.37133174204413394,
|
| 14045 |
+
"grad_norm": 37.40625,
|
| 14046 |
+
"learning_rate": 9.94197943212913e-06,
|
| 14047 |
+
"loss": 21.1517,
|
| 14048 |
+
"step": 20010
|
| 14049 |
+
},
|
| 14050 |
+
{
|
| 14051 |
+
"epoch": 0.37151731512861375,
|
| 14052 |
+
"grad_norm": 35.5,
|
| 14053 |
+
"learning_rate": 9.941950436343087e-06,
|
| 14054 |
+
"loss": 21.8961,
|
| 14055 |
+
"step": 20020
|
| 14056 |
+
},
|
| 14057 |
+
{
|
| 14058 |
+
"epoch": 0.37170288821309355,
|
| 14059 |
+
"grad_norm": 33.875,
|
| 14060 |
+
"learning_rate": 9.941921440557045e-06,
|
| 14061 |
+
"loss": 21.297,
|
| 14062 |
+
"step": 20030
|
| 14063 |
+
},
|
| 14064 |
+
{
|
| 14065 |
+
"epoch": 0.3718884612975734,
|
| 14066 |
+
"grad_norm": 34.4375,
|
| 14067 |
+
"learning_rate": 9.941892444771002e-06,
|
| 14068 |
+
"loss": 21.2152,
|
| 14069 |
+
"step": 20040
|
| 14070 |
+
},
|
| 14071 |
+
{
|
| 14072 |
+
"epoch": 0.3720740343820532,
|
| 14073 |
+
"grad_norm": 33.75,
|
| 14074 |
+
"learning_rate": 9.94186344898496e-06,
|
| 14075 |
+
"loss": 21.5122,
|
| 14076 |
+
"step": 20050
|
| 14077 |
+
},
|
| 14078 |
+
{
|
| 14079 |
+
"epoch": 0.372259607466533,
|
| 14080 |
+
"grad_norm": 33.375,
|
| 14081 |
+
"learning_rate": 9.941834453198917e-06,
|
| 14082 |
+
"loss": 21.7115,
|
| 14083 |
+
"step": 20060
|
| 14084 |
+
},
|
| 14085 |
+
{
|
| 14086 |
+
"epoch": 0.3724451805510129,
|
| 14087 |
+
"grad_norm": 34.90625,
|
| 14088 |
+
"learning_rate": 9.941805457412876e-06,
|
| 14089 |
+
"loss": 21.4614,
|
| 14090 |
+
"step": 20070
|
| 14091 |
+
},
|
| 14092 |
+
{
|
| 14093 |
+
"epoch": 0.3726307536354927,
|
| 14094 |
+
"grad_norm": 34.28125,
|
| 14095 |
+
"learning_rate": 9.941776461626833e-06,
|
| 14096 |
+
"loss": 21.2661,
|
| 14097 |
+
"step": 20080
|
| 14098 |
+
},
|
| 14099 |
+
{
|
| 14100 |
+
"epoch": 0.37281632671997256,
|
| 14101 |
+
"grad_norm": 32.09375,
|
| 14102 |
+
"learning_rate": 9.94174746584079e-06,
|
| 14103 |
+
"loss": 21.056,
|
| 14104 |
+
"step": 20090
|
| 14105 |
+
},
|
| 14106 |
+
{
|
| 14107 |
+
"epoch": 0.37300189980445236,
|
| 14108 |
+
"grad_norm": 34.96875,
|
| 14109 |
+
"learning_rate": 9.941718470054748e-06,
|
| 14110 |
+
"loss": 21.249,
|
| 14111 |
+
"step": 20100
|
| 14112 |
+
},
|
| 14113 |
+
{
|
| 14114 |
+
"epoch": 0.37318747288893217,
|
| 14115 |
+
"grad_norm": 34.5,
|
| 14116 |
+
"learning_rate": 9.941689474268706e-06,
|
| 14117 |
+
"loss": 20.9543,
|
| 14118 |
+
"step": 20110
|
| 14119 |
+
},
|
| 14120 |
+
{
|
| 14121 |
+
"epoch": 0.37337304597341203,
|
| 14122 |
+
"grad_norm": 35.21875,
|
| 14123 |
+
"learning_rate": 9.941660478482663e-06,
|
| 14124 |
+
"loss": 21.4361,
|
| 14125 |
+
"step": 20120
|
| 14126 |
+
},
|
| 14127 |
+
{
|
| 14128 |
+
"epoch": 0.37355861905789184,
|
| 14129 |
+
"grad_norm": 34.875,
|
| 14130 |
+
"learning_rate": 9.94163148269662e-06,
|
| 14131 |
+
"loss": 21.5762,
|
| 14132 |
+
"step": 20130
|
| 14133 |
+
},
|
| 14134 |
+
{
|
| 14135 |
+
"epoch": 0.37374419214237165,
|
| 14136 |
+
"grad_norm": 37.21875,
|
| 14137 |
+
"learning_rate": 9.941602486910578e-06,
|
| 14138 |
+
"loss": 21.4474,
|
| 14139 |
+
"step": 20140
|
| 14140 |
+
},
|
| 14141 |
+
{
|
| 14142 |
+
"epoch": 0.3739297652268515,
|
| 14143 |
+
"grad_norm": 36.28125,
|
| 14144 |
+
"learning_rate": 9.941573491124535e-06,
|
| 14145 |
+
"loss": 21.4356,
|
| 14146 |
+
"step": 20150
|
| 14147 |
+
},
|
| 14148 |
+
{
|
| 14149 |
+
"epoch": 0.3741153383113313,
|
| 14150 |
+
"grad_norm": 33.8125,
|
| 14151 |
+
"learning_rate": 9.941544495338493e-06,
|
| 14152 |
+
"loss": 21.2281,
|
| 14153 |
+
"step": 20160
|
| 14154 |
+
},
|
| 14155 |
+
{
|
| 14156 |
+
"epoch": 0.3743009113958112,
|
| 14157 |
+
"grad_norm": 32.8125,
|
| 14158 |
+
"learning_rate": 9.94151549955245e-06,
|
| 14159 |
+
"loss": 21.3117,
|
| 14160 |
+
"step": 20170
|
| 14161 |
+
},
|
| 14162 |
+
{
|
| 14163 |
+
"epoch": 0.374486484480291,
|
| 14164 |
+
"grad_norm": 35.0,
|
| 14165 |
+
"learning_rate": 9.94148650376641e-06,
|
| 14166 |
+
"loss": 21.3198,
|
| 14167 |
+
"step": 20180
|
| 14168 |
+
},
|
| 14169 |
+
{
|
| 14170 |
+
"epoch": 0.3746720575647708,
|
| 14171 |
+
"grad_norm": 37.8125,
|
| 14172 |
+
"learning_rate": 9.941457507980367e-06,
|
| 14173 |
+
"loss": 21.4875,
|
| 14174 |
+
"step": 20190
|
| 14175 |
+
},
|
| 14176 |
+
{
|
| 14177 |
+
"epoch": 0.37485763064925065,
|
| 14178 |
+
"grad_norm": 35.03125,
|
| 14179 |
+
"learning_rate": 9.941428512194324e-06,
|
| 14180 |
+
"loss": 21.4072,
|
| 14181 |
+
"step": 20200
|
| 14182 |
+
},
|
| 14183 |
+
{
|
| 14184 |
+
"epoch": 0.37504320373373046,
|
| 14185 |
+
"grad_norm": 36.40625,
|
| 14186 |
+
"learning_rate": 9.941399516408281e-06,
|
| 14187 |
+
"loss": 21.2224,
|
| 14188 |
+
"step": 20210
|
| 14189 |
+
},
|
| 14190 |
+
{
|
| 14191 |
+
"epoch": 0.37522877681821026,
|
| 14192 |
+
"grad_norm": 36.03125,
|
| 14193 |
+
"learning_rate": 9.941370520622239e-06,
|
| 14194 |
+
"loss": 21.606,
|
| 14195 |
+
"step": 20220
|
| 14196 |
+
},
|
| 14197 |
+
{
|
| 14198 |
+
"epoch": 0.3754143499026901,
|
| 14199 |
+
"grad_norm": 35.28125,
|
| 14200 |
+
"learning_rate": 9.941341524836196e-06,
|
| 14201 |
+
"loss": 21.2329,
|
| 14202 |
+
"step": 20230
|
| 14203 |
+
},
|
| 14204 |
+
{
|
| 14205 |
+
"epoch": 0.37559992298716993,
|
| 14206 |
+
"grad_norm": 36.28125,
|
| 14207 |
+
"learning_rate": 9.941312529050154e-06,
|
| 14208 |
+
"loss": 20.9903,
|
| 14209 |
+
"step": 20240
|
| 14210 |
+
},
|
| 14211 |
+
{
|
| 14212 |
+
"epoch": 0.3757854960716498,
|
| 14213 |
+
"grad_norm": 34.59375,
|
| 14214 |
+
"learning_rate": 9.941283533264111e-06,
|
| 14215 |
+
"loss": 21.4073,
|
| 14216 |
+
"step": 20250
|
| 14217 |
+
},
|
| 14218 |
+
{
|
| 14219 |
+
"epoch": 0.3759710691561296,
|
| 14220 |
+
"grad_norm": 34.9375,
|
| 14221 |
+
"learning_rate": 9.941254537478069e-06,
|
| 14222 |
+
"loss": 21.637,
|
| 14223 |
+
"step": 20260
|
| 14224 |
+
},
|
| 14225 |
+
{
|
| 14226 |
+
"epoch": 0.3761566422406094,
|
| 14227 |
+
"grad_norm": 34.15625,
|
| 14228 |
+
"learning_rate": 9.941225541692026e-06,
|
| 14229 |
+
"loss": 21.1687,
|
| 14230 |
+
"step": 20270
|
| 14231 |
+
},
|
| 14232 |
+
{
|
| 14233 |
+
"epoch": 0.37634221532508927,
|
| 14234 |
+
"grad_norm": 36.84375,
|
| 14235 |
+
"learning_rate": 9.941196545905985e-06,
|
| 14236 |
+
"loss": 21.4842,
|
| 14237 |
+
"step": 20280
|
| 14238 |
+
},
|
| 14239 |
+
{
|
| 14240 |
+
"epoch": 0.3765277884095691,
|
| 14241 |
+
"grad_norm": 36.28125,
|
| 14242 |
+
"learning_rate": 9.941167550119942e-06,
|
| 14243 |
+
"loss": 21.6633,
|
| 14244 |
+
"step": 20290
|
| 14245 |
+
},
|
| 14246 |
+
{
|
| 14247 |
+
"epoch": 0.3767133614940489,
|
| 14248 |
+
"grad_norm": 37.9375,
|
| 14249 |
+
"learning_rate": 9.941138554333898e-06,
|
| 14250 |
+
"loss": 21.217,
|
| 14251 |
+
"step": 20300
|
| 14252 |
+
},
|
| 14253 |
+
{
|
| 14254 |
+
"epoch": 0.37689893457852874,
|
| 14255 |
+
"grad_norm": 34.75,
|
| 14256 |
+
"learning_rate": 9.941109558547857e-06,
|
| 14257 |
+
"loss": 21.5193,
|
| 14258 |
+
"step": 20310
|
| 14259 |
+
},
|
| 14260 |
+
{
|
| 14261 |
+
"epoch": 0.37708450766300855,
|
| 14262 |
+
"grad_norm": 33.09375,
|
| 14263 |
+
"learning_rate": 9.941080562761815e-06,
|
| 14264 |
+
"loss": 21.1617,
|
| 14265 |
+
"step": 20320
|
| 14266 |
+
},
|
| 14267 |
+
{
|
| 14268 |
+
"epoch": 0.37727008074748836,
|
| 14269 |
+
"grad_norm": 34.53125,
|
| 14270 |
+
"learning_rate": 9.941051566975772e-06,
|
| 14271 |
+
"loss": 21.3097,
|
| 14272 |
+
"step": 20330
|
| 14273 |
+
},
|
| 14274 |
+
{
|
| 14275 |
+
"epoch": 0.3774556538319682,
|
| 14276 |
+
"grad_norm": 33.90625,
|
| 14277 |
+
"learning_rate": 9.94102257118973e-06,
|
| 14278 |
+
"loss": 21.4043,
|
| 14279 |
+
"step": 20340
|
| 14280 |
+
},
|
| 14281 |
+
{
|
| 14282 |
+
"epoch": 0.377641226916448,
|
| 14283 |
+
"grad_norm": 35.625,
|
| 14284 |
+
"learning_rate": 9.940993575403689e-06,
|
| 14285 |
+
"loss": 21.6863,
|
| 14286 |
+
"step": 20350
|
| 14287 |
+
},
|
| 14288 |
+
{
|
| 14289 |
+
"epoch": 0.3778268000009279,
|
| 14290 |
+
"grad_norm": 33.78125,
|
| 14291 |
+
"learning_rate": 9.940964579617644e-06,
|
| 14292 |
+
"loss": 21.3448,
|
| 14293 |
+
"step": 20360
|
| 14294 |
+
},
|
| 14295 |
+
{
|
| 14296 |
+
"epoch": 0.3780123730854077,
|
| 14297 |
+
"grad_norm": 36.78125,
|
| 14298 |
+
"learning_rate": 9.940935583831602e-06,
|
| 14299 |
+
"loss": 21.0266,
|
| 14300 |
+
"step": 20370
|
| 14301 |
+
},
|
| 14302 |
+
{
|
| 14303 |
+
"epoch": 0.3781979461698875,
|
| 14304 |
+
"grad_norm": 34.84375,
|
| 14305 |
+
"learning_rate": 9.94090658804556e-06,
|
| 14306 |
+
"loss": 21.4672,
|
| 14307 |
+
"step": 20380
|
| 14308 |
+
},
|
| 14309 |
+
{
|
| 14310 |
+
"epoch": 0.37838351925436736,
|
| 14311 |
+
"grad_norm": 33.78125,
|
| 14312 |
+
"learning_rate": 9.940877592259518e-06,
|
| 14313 |
+
"loss": 21.7418,
|
| 14314 |
+
"step": 20390
|
| 14315 |
+
},
|
| 14316 |
+
{
|
| 14317 |
+
"epoch": 0.37856909233884717,
|
| 14318 |
+
"grad_norm": 35.65625,
|
| 14319 |
+
"learning_rate": 9.940848596473476e-06,
|
| 14320 |
+
"loss": 21.626,
|
| 14321 |
+
"step": 20400
|
| 14322 |
+
},
|
| 14323 |
+
{
|
| 14324 |
+
"epoch": 0.378754665423327,
|
| 14325 |
+
"grad_norm": 34.5,
|
| 14326 |
+
"learning_rate": 9.940819600687433e-06,
|
| 14327 |
+
"loss": 21.6884,
|
| 14328 |
+
"step": 20410
|
| 14329 |
+
},
|
| 14330 |
+
{
|
| 14331 |
+
"epoch": 0.37894023850780684,
|
| 14332 |
+
"grad_norm": 35.6875,
|
| 14333 |
+
"learning_rate": 9.94079060490139e-06,
|
| 14334 |
+
"loss": 21.1694,
|
| 14335 |
+
"step": 20420
|
| 14336 |
+
},
|
| 14337 |
+
{
|
| 14338 |
+
"epoch": 0.37912581159228664,
|
| 14339 |
+
"grad_norm": 35.28125,
|
| 14340 |
+
"learning_rate": 9.940761609115348e-06,
|
| 14341 |
+
"loss": 21.7092,
|
| 14342 |
+
"step": 20430
|
| 14343 |
+
},
|
| 14344 |
+
{
|
| 14345 |
+
"epoch": 0.3793113846767665,
|
| 14346 |
+
"grad_norm": 36.65625,
|
| 14347 |
+
"learning_rate": 9.940732613329305e-06,
|
| 14348 |
+
"loss": 21.0703,
|
| 14349 |
+
"step": 20440
|
| 14350 |
+
},
|
| 14351 |
+
{
|
| 14352 |
+
"epoch": 0.3794969577612463,
|
| 14353 |
+
"grad_norm": 36.6875,
|
| 14354 |
+
"learning_rate": 9.940703617543264e-06,
|
| 14355 |
+
"loss": 21.3644,
|
| 14356 |
+
"step": 20450
|
| 14357 |
+
},
|
| 14358 |
+
{
|
| 14359 |
+
"epoch": 0.3796825308457261,
|
| 14360 |
+
"grad_norm": 35.34375,
|
| 14361 |
+
"learning_rate": 9.94067462175722e-06,
|
| 14362 |
+
"loss": 21.7173,
|
| 14363 |
+
"step": 20460
|
| 14364 |
+
},
|
| 14365 |
+
{
|
| 14366 |
+
"epoch": 0.379868103930206,
|
| 14367 |
+
"grad_norm": 35.03125,
|
| 14368 |
+
"learning_rate": 9.940645625971177e-06,
|
| 14369 |
+
"loss": 21.8825,
|
| 14370 |
+
"step": 20470
|
| 14371 |
+
},
|
| 14372 |
+
{
|
| 14373 |
+
"epoch": 0.3800536770146858,
|
| 14374 |
+
"grad_norm": 35.3125,
|
| 14375 |
+
"learning_rate": 9.940616630185137e-06,
|
| 14376 |
+
"loss": 21.3377,
|
| 14377 |
+
"step": 20480
|
| 14378 |
+
},
|
| 14379 |
+
{
|
| 14380 |
+
"epoch": 0.3802392500991656,
|
| 14381 |
+
"grad_norm": 33.875,
|
| 14382 |
+
"learning_rate": 9.940587634399094e-06,
|
| 14383 |
+
"loss": 21.2556,
|
| 14384 |
+
"step": 20490
|
| 14385 |
+
},
|
| 14386 |
+
{
|
| 14387 |
+
"epoch": 0.38042482318364546,
|
| 14388 |
+
"grad_norm": 35.15625,
|
| 14389 |
+
"learning_rate": 9.940558638613051e-06,
|
| 14390 |
+
"loss": 21.1311,
|
| 14391 |
+
"step": 20500
|
| 14392 |
+
},
|
| 14393 |
+
{
|
| 14394 |
+
"epoch": 0.38061039626812526,
|
| 14395 |
+
"grad_norm": 34.78125,
|
| 14396 |
+
"learning_rate": 9.940529642827009e-06,
|
| 14397 |
+
"loss": 21.4698,
|
| 14398 |
+
"step": 20510
|
| 14399 |
+
},
|
| 14400 |
+
{
|
| 14401 |
+
"epoch": 0.3807959693526051,
|
| 14402 |
+
"grad_norm": 35.8125,
|
| 14403 |
+
"learning_rate": 9.940500647040966e-06,
|
| 14404 |
+
"loss": 21.5138,
|
| 14405 |
+
"step": 20520
|
| 14406 |
+
},
|
| 14407 |
+
{
|
| 14408 |
+
"epoch": 0.38098154243708493,
|
| 14409 |
+
"grad_norm": 36.46875,
|
| 14410 |
+
"learning_rate": 9.940471651254924e-06,
|
| 14411 |
+
"loss": 21.1583,
|
| 14412 |
+
"step": 20530
|
| 14413 |
+
},
|
| 14414 |
+
{
|
| 14415 |
+
"epoch": 0.38116711552156474,
|
| 14416 |
+
"grad_norm": 33.375,
|
| 14417 |
+
"learning_rate": 9.940442655468881e-06,
|
| 14418 |
+
"loss": 21.1859,
|
| 14419 |
+
"step": 20540
|
| 14420 |
+
},
|
| 14421 |
+
{
|
| 14422 |
+
"epoch": 0.3813526886060446,
|
| 14423 |
+
"grad_norm": 35.03125,
|
| 14424 |
+
"learning_rate": 9.94041365968284e-06,
|
| 14425 |
+
"loss": 20.9751,
|
| 14426 |
+
"step": 20550
|
| 14427 |
+
},
|
| 14428 |
+
{
|
| 14429 |
+
"epoch": 0.3815382616905244,
|
| 14430 |
+
"grad_norm": 34.96875,
|
| 14431 |
+
"learning_rate": 9.940384663896798e-06,
|
| 14432 |
+
"loss": 21.1197,
|
| 14433 |
+
"step": 20560
|
| 14434 |
+
},
|
| 14435 |
+
{
|
| 14436 |
+
"epoch": 0.3817238347750042,
|
| 14437 |
+
"grad_norm": 35.9375,
|
| 14438 |
+
"learning_rate": 9.940355668110753e-06,
|
| 14439 |
+
"loss": 21.0839,
|
| 14440 |
+
"step": 20570
|
| 14441 |
+
},
|
| 14442 |
+
{
|
| 14443 |
+
"epoch": 0.3819094078594841,
|
| 14444 |
+
"grad_norm": 36.96875,
|
| 14445 |
+
"learning_rate": 9.940326672324712e-06,
|
| 14446 |
+
"loss": 21.6738,
|
| 14447 |
+
"step": 20580
|
| 14448 |
+
},
|
| 14449 |
+
{
|
| 14450 |
+
"epoch": 0.3820949809439639,
|
| 14451 |
+
"grad_norm": 35.71875,
|
| 14452 |
+
"learning_rate": 9.94029767653867e-06,
|
| 14453 |
+
"loss": 21.366,
|
| 14454 |
+
"step": 20590
|
| 14455 |
+
},
|
| 14456 |
+
{
|
| 14457 |
+
"epoch": 0.38228055402844374,
|
| 14458 |
+
"grad_norm": 36.84375,
|
| 14459 |
+
"learning_rate": 9.940268680752627e-06,
|
| 14460 |
+
"loss": 21.195,
|
| 14461 |
+
"step": 20600
|
| 14462 |
+
},
|
| 14463 |
+
{
|
| 14464 |
+
"epoch": 0.38246612711292355,
|
| 14465 |
+
"grad_norm": 34.84375,
|
| 14466 |
+
"learning_rate": 9.940239684966585e-06,
|
| 14467 |
+
"loss": 21.5918,
|
| 14468 |
+
"step": 20610
|
| 14469 |
+
},
|
| 14470 |
+
{
|
| 14471 |
+
"epoch": 0.38265170019740335,
|
| 14472 |
+
"grad_norm": 36.1875,
|
| 14473 |
+
"learning_rate": 9.940210689180542e-06,
|
| 14474 |
+
"loss": 20.7986,
|
| 14475 |
+
"step": 20620
|
| 14476 |
+
},
|
| 14477 |
+
{
|
| 14478 |
+
"epoch": 0.3828372732818832,
|
| 14479 |
+
"grad_norm": 34.09375,
|
| 14480 |
+
"learning_rate": 9.9401816933945e-06,
|
| 14481 |
+
"loss": 21.4164,
|
| 14482 |
+
"step": 20630
|
| 14483 |
+
},
|
| 14484 |
+
{
|
| 14485 |
+
"epoch": 0.383022846366363,
|
| 14486 |
+
"grad_norm": 34.96875,
|
| 14487 |
+
"learning_rate": 9.940152697608457e-06,
|
| 14488 |
+
"loss": 21.2902,
|
| 14489 |
+
"step": 20640
|
| 14490 |
+
},
|
| 14491 |
+
{
|
| 14492 |
+
"epoch": 0.38320841945084283,
|
| 14493 |
+
"grad_norm": 32.65625,
|
| 14494 |
+
"learning_rate": 9.940123701822416e-06,
|
| 14495 |
+
"loss": 21.2691,
|
| 14496 |
+
"step": 20650
|
| 14497 |
+
},
|
| 14498 |
+
{
|
| 14499 |
+
"epoch": 0.3833939925353227,
|
| 14500 |
+
"grad_norm": 36.75,
|
| 14501 |
+
"learning_rate": 9.940094706036373e-06,
|
| 14502 |
+
"loss": 20.983,
|
| 14503 |
+
"step": 20660
|
| 14504 |
+
},
|
| 14505 |
+
{
|
| 14506 |
+
"epoch": 0.3835795656198025,
|
| 14507 |
+
"grad_norm": 35.15625,
|
| 14508 |
+
"learning_rate": 9.94006571025033e-06,
|
| 14509 |
+
"loss": 20.9418,
|
| 14510 |
+
"step": 20670
|
| 14511 |
+
},
|
| 14512 |
+
{
|
| 14513 |
+
"epoch": 0.3837651387042823,
|
| 14514 |
+
"grad_norm": 37.3125,
|
| 14515 |
+
"learning_rate": 9.940036714464288e-06,
|
| 14516 |
+
"loss": 21.948,
|
| 14517 |
+
"step": 20680
|
| 14518 |
+
},
|
| 14519 |
+
{
|
| 14520 |
+
"epoch": 0.38395071178876217,
|
| 14521 |
+
"grad_norm": 34.78125,
|
| 14522 |
+
"learning_rate": 9.940007718678245e-06,
|
| 14523 |
+
"loss": 22.0029,
|
| 14524 |
+
"step": 20690
|
| 14525 |
+
},
|
| 14526 |
+
{
|
| 14527 |
+
"epoch": 0.384136284873242,
|
| 14528 |
+
"grad_norm": 34.90625,
|
| 14529 |
+
"learning_rate": 9.939978722892203e-06,
|
| 14530 |
+
"loss": 21.1508,
|
| 14531 |
+
"step": 20700
|
| 14532 |
+
},
|
| 14533 |
+
{
|
| 14534 |
+
"epoch": 0.38432185795772184,
|
| 14535 |
+
"grad_norm": 33.90625,
|
| 14536 |
+
"learning_rate": 9.93994972710616e-06,
|
| 14537 |
+
"loss": 21.2856,
|
| 14538 |
+
"step": 20710
|
| 14539 |
+
},
|
| 14540 |
+
{
|
| 14541 |
+
"epoch": 0.38450743104220164,
|
| 14542 |
+
"grad_norm": 35.0,
|
| 14543 |
+
"learning_rate": 9.939920731320118e-06,
|
| 14544 |
+
"loss": 21.1426,
|
| 14545 |
+
"step": 20720
|
| 14546 |
+
},
|
| 14547 |
+
{
|
| 14548 |
+
"epoch": 0.38469300412668145,
|
| 14549 |
+
"grad_norm": 35.28125,
|
| 14550 |
+
"learning_rate": 9.939891735534075e-06,
|
| 14551 |
+
"loss": 21.1099,
|
| 14552 |
+
"step": 20730
|
| 14553 |
+
},
|
| 14554 |
+
{
|
| 14555 |
+
"epoch": 0.3848785772111613,
|
| 14556 |
+
"grad_norm": 35.375,
|
| 14557 |
+
"learning_rate": 9.939862739748033e-06,
|
| 14558 |
+
"loss": 21.4204,
|
| 14559 |
+
"step": 20740
|
| 14560 |
+
},
|
| 14561 |
+
{
|
| 14562 |
+
"epoch": 0.3850641502956411,
|
| 14563 |
+
"grad_norm": 34.28125,
|
| 14564 |
+
"learning_rate": 9.93983374396199e-06,
|
| 14565 |
+
"loss": 21.0373,
|
| 14566 |
+
"step": 20750
|
| 14567 |
+
},
|
| 14568 |
+
{
|
| 14569 |
+
"epoch": 0.3852497233801209,
|
| 14570 |
+
"grad_norm": 34.90625,
|
| 14571 |
+
"learning_rate": 9.939804748175949e-06,
|
| 14572 |
+
"loss": 22.1613,
|
| 14573 |
+
"step": 20760
|
| 14574 |
+
},
|
| 14575 |
+
{
|
| 14576 |
+
"epoch": 0.3854352964646008,
|
| 14577 |
+
"grad_norm": 35.53125,
|
| 14578 |
+
"learning_rate": 9.939775752389906e-06,
|
| 14579 |
+
"loss": 21.1425,
|
| 14580 |
+
"step": 20770
|
| 14581 |
+
},
|
| 14582 |
+
{
|
| 14583 |
+
"epoch": 0.3856208695490806,
|
| 14584 |
+
"grad_norm": 35.3125,
|
| 14585 |
+
"learning_rate": 9.939746756603864e-06,
|
| 14586 |
+
"loss": 21.195,
|
| 14587 |
+
"step": 20780
|
| 14588 |
+
},
|
| 14589 |
+
{
|
| 14590 |
+
"epoch": 0.38580644263356045,
|
| 14591 |
+
"grad_norm": 35.8125,
|
| 14592 |
+
"learning_rate": 9.939717760817821e-06,
|
| 14593 |
+
"loss": 21.2977,
|
| 14594 |
+
"step": 20790
|
| 14595 |
+
},
|
| 14596 |
+
{
|
| 14597 |
+
"epoch": 0.38599201571804026,
|
| 14598 |
+
"grad_norm": 36.46875,
|
| 14599 |
+
"learning_rate": 9.939688765031779e-06,
|
| 14600 |
+
"loss": 21.1699,
|
| 14601 |
+
"step": 20800
|
| 14602 |
+
},
|
| 14603 |
+
{
|
| 14604 |
+
"epoch": 0.38617758880252007,
|
| 14605 |
+
"grad_norm": 35.3125,
|
| 14606 |
+
"learning_rate": 9.939659769245736e-06,
|
| 14607 |
+
"loss": 20.9866,
|
| 14608 |
+
"step": 20810
|
| 14609 |
+
},
|
| 14610 |
+
{
|
| 14611 |
+
"epoch": 0.38636316188699993,
|
| 14612 |
+
"grad_norm": 36.65625,
|
| 14613 |
+
"learning_rate": 9.939630773459693e-06,
|
| 14614 |
+
"loss": 21.6496,
|
| 14615 |
+
"step": 20820
|
| 14616 |
+
},
|
| 14617 |
+
{
|
| 14618 |
+
"epoch": 0.38654873497147973,
|
| 14619 |
+
"grad_norm": 34.71875,
|
| 14620 |
+
"learning_rate": 9.939601777673653e-06,
|
| 14621 |
+
"loss": 21.3789,
|
| 14622 |
+
"step": 20830
|
| 14623 |
+
},
|
| 14624 |
+
{
|
| 14625 |
+
"epoch": 0.38673430805595954,
|
| 14626 |
+
"grad_norm": 33.28125,
|
| 14627 |
+
"learning_rate": 9.939572781887608e-06,
|
| 14628 |
+
"loss": 21.2779,
|
| 14629 |
+
"step": 20840
|
| 14630 |
+
},
|
| 14631 |
+
{
|
| 14632 |
+
"epoch": 0.3869198811404394,
|
| 14633 |
+
"grad_norm": 34.15625,
|
| 14634 |
+
"learning_rate": 9.939543786101566e-06,
|
| 14635 |
+
"loss": 21.5408,
|
| 14636 |
+
"step": 20850
|
| 14637 |
+
},
|
| 14638 |
+
{
|
| 14639 |
+
"epoch": 0.3871054542249192,
|
| 14640 |
+
"grad_norm": 35.0,
|
| 14641 |
+
"learning_rate": 9.939514790315525e-06,
|
| 14642 |
+
"loss": 21.4127,
|
| 14643 |
+
"step": 20860
|
| 14644 |
+
},
|
| 14645 |
+
{
|
| 14646 |
+
"epoch": 0.38729102730939907,
|
| 14647 |
+
"grad_norm": 34.46875,
|
| 14648 |
+
"learning_rate": 9.939485794529482e-06,
|
| 14649 |
+
"loss": 21.4833,
|
| 14650 |
+
"step": 20870
|
| 14651 |
+
},
|
| 14652 |
+
{
|
| 14653 |
+
"epoch": 0.3874766003938789,
|
| 14654 |
+
"grad_norm": 35.625,
|
| 14655 |
+
"learning_rate": 9.93945679874344e-06,
|
| 14656 |
+
"loss": 21.41,
|
| 14657 |
+
"step": 20880
|
| 14658 |
+
},
|
| 14659 |
+
{
|
| 14660 |
+
"epoch": 0.3876621734783587,
|
| 14661 |
+
"grad_norm": 33.6875,
|
| 14662 |
+
"learning_rate": 9.939427802957397e-06,
|
| 14663 |
+
"loss": 21.1335,
|
| 14664 |
+
"step": 20890
|
| 14665 |
+
},
|
| 14666 |
+
{
|
| 14667 |
+
"epoch": 0.38784774656283855,
|
| 14668 |
+
"grad_norm": 34.8125,
|
| 14669 |
+
"learning_rate": 9.939398807171354e-06,
|
| 14670 |
+
"loss": 21.3565,
|
| 14671 |
+
"step": 20900
|
| 14672 |
+
},
|
| 14673 |
+
{
|
| 14674 |
+
"epoch": 0.38803331964731835,
|
| 14675 |
+
"grad_norm": 35.40625,
|
| 14676 |
+
"learning_rate": 9.939369811385312e-06,
|
| 14677 |
+
"loss": 20.9849,
|
| 14678 |
+
"step": 20910
|
| 14679 |
+
},
|
| 14680 |
+
{
|
| 14681 |
+
"epoch": 0.38821889273179816,
|
| 14682 |
+
"grad_norm": 35.0,
|
| 14683 |
+
"learning_rate": 9.93934081559927e-06,
|
| 14684 |
+
"loss": 20.8741,
|
| 14685 |
+
"step": 20920
|
| 14686 |
+
},
|
| 14687 |
+
{
|
| 14688 |
+
"epoch": 0.388404465816278,
|
| 14689 |
+
"grad_norm": 33.625,
|
| 14690 |
+
"learning_rate": 9.939311819813228e-06,
|
| 14691 |
+
"loss": 20.8931,
|
| 14692 |
+
"step": 20930
|
| 14693 |
+
},
|
| 14694 |
+
{
|
| 14695 |
+
"epoch": 0.38859003890075783,
|
| 14696 |
+
"grad_norm": 36.5,
|
| 14697 |
+
"learning_rate": 9.939282824027186e-06,
|
| 14698 |
+
"loss": 21.2333,
|
| 14699 |
+
"step": 20940
|
| 14700 |
+
},
|
| 14701 |
+
{
|
| 14702 |
+
"epoch": 0.38877561198523763,
|
| 14703 |
+
"grad_norm": 35.96875,
|
| 14704 |
+
"learning_rate": 9.939253828241141e-06,
|
| 14705 |
+
"loss": 21.4989,
|
| 14706 |
+
"step": 20950
|
| 14707 |
+
},
|
| 14708 |
+
{
|
| 14709 |
+
"epoch": 0.3889611850697175,
|
| 14710 |
+
"grad_norm": 34.75,
|
| 14711 |
+
"learning_rate": 9.9392248324551e-06,
|
| 14712 |
+
"loss": 20.8834,
|
| 14713 |
+
"step": 20960
|
| 14714 |
+
},
|
| 14715 |
+
{
|
| 14716 |
+
"epoch": 0.3891467581541973,
|
| 14717 |
+
"grad_norm": 33.9375,
|
| 14718 |
+
"learning_rate": 9.939195836669058e-06,
|
| 14719 |
+
"loss": 21.2821,
|
| 14720 |
+
"step": 20970
|
| 14721 |
+
},
|
| 14722 |
+
{
|
| 14723 |
+
"epoch": 0.38933233123867717,
|
| 14724 |
+
"grad_norm": 35.03125,
|
| 14725 |
+
"learning_rate": 9.939166840883015e-06,
|
| 14726 |
+
"loss": 21.2434,
|
| 14727 |
+
"step": 20980
|
| 14728 |
+
},
|
| 14729 |
+
{
|
| 14730 |
+
"epoch": 0.38951790432315697,
|
| 14731 |
+
"grad_norm": 35.71875,
|
| 14732 |
+
"learning_rate": 9.939137845096973e-06,
|
| 14733 |
+
"loss": 21.0523,
|
| 14734 |
+
"step": 20990
|
| 14735 |
+
},
|
| 14736 |
+
{
|
| 14737 |
+
"epoch": 0.3897034774076368,
|
| 14738 |
+
"grad_norm": 35.25,
|
| 14739 |
+
"learning_rate": 9.93910884931093e-06,
|
| 14740 |
+
"loss": 21.281,
|
| 14741 |
+
"step": 21000
|
| 14742 |
+
},
|
| 14743 |
+
{
|
| 14744 |
+
"epoch": 0.38988905049211664,
|
| 14745 |
+
"grad_norm": 35.46875,
|
| 14746 |
+
"learning_rate": 9.939079853524888e-06,
|
| 14747 |
+
"loss": 21.435,
|
| 14748 |
+
"step": 21010
|
| 14749 |
+
},
|
| 14750 |
+
{
|
| 14751 |
+
"epoch": 0.39007462357659645,
|
| 14752 |
+
"grad_norm": 33.46875,
|
| 14753 |
+
"learning_rate": 9.939050857738845e-06,
|
| 14754 |
+
"loss": 21.3981,
|
| 14755 |
+
"step": 21020
|
| 14756 |
+
},
|
| 14757 |
+
{
|
| 14758 |
+
"epoch": 0.39026019666107625,
|
| 14759 |
+
"grad_norm": 34.6875,
|
| 14760 |
+
"learning_rate": 9.939021861952804e-06,
|
| 14761 |
+
"loss": 21.0117,
|
| 14762 |
+
"step": 21030
|
| 14763 |
+
},
|
| 14764 |
+
{
|
| 14765 |
+
"epoch": 0.3904457697455561,
|
| 14766 |
+
"grad_norm": 35.40625,
|
| 14767 |
+
"learning_rate": 9.938992866166762e-06,
|
| 14768 |
+
"loss": 21.1832,
|
| 14769 |
+
"step": 21040
|
| 14770 |
+
},
|
| 14771 |
+
{
|
| 14772 |
+
"epoch": 0.3906313428300359,
|
| 14773 |
+
"grad_norm": 34.8125,
|
| 14774 |
+
"learning_rate": 9.938963870380717e-06,
|
| 14775 |
+
"loss": 21.2241,
|
| 14776 |
+
"step": 21050
|
| 14777 |
+
},
|
| 14778 |
+
{
|
| 14779 |
+
"epoch": 0.3908169159145158,
|
| 14780 |
+
"grad_norm": 33.0,
|
| 14781 |
+
"learning_rate": 9.938934874594676e-06,
|
| 14782 |
+
"loss": 21.5346,
|
| 14783 |
+
"step": 21060
|
| 14784 |
+
},
|
| 14785 |
+
{
|
| 14786 |
+
"epoch": 0.3910024889989956,
|
| 14787 |
+
"grad_norm": 34.6875,
|
| 14788 |
+
"learning_rate": 9.938905878808634e-06,
|
| 14789 |
+
"loss": 21.1235,
|
| 14790 |
+
"step": 21070
|
| 14791 |
+
},
|
| 14792 |
+
{
|
| 14793 |
+
"epoch": 0.3911880620834754,
|
| 14794 |
+
"grad_norm": 34.0625,
|
| 14795 |
+
"learning_rate": 9.938876883022591e-06,
|
| 14796 |
+
"loss": 21.1906,
|
| 14797 |
+
"step": 21080
|
| 14798 |
+
},
|
| 14799 |
+
{
|
| 14800 |
+
"epoch": 0.39137363516795526,
|
| 14801 |
+
"grad_norm": 33.40625,
|
| 14802 |
+
"learning_rate": 9.938847887236549e-06,
|
| 14803 |
+
"loss": 21.4451,
|
| 14804 |
+
"step": 21090
|
| 14805 |
+
},
|
| 14806 |
+
{
|
| 14807 |
+
"epoch": 0.39155920825243506,
|
| 14808 |
+
"grad_norm": 34.875,
|
| 14809 |
+
"learning_rate": 9.938818891450508e-06,
|
| 14810 |
+
"loss": 20.9447,
|
| 14811 |
+
"step": 21100
|
| 14812 |
+
},
|
| 14813 |
+
{
|
| 14814 |
+
"epoch": 0.39174478133691487,
|
| 14815 |
+
"grad_norm": 35.71875,
|
| 14816 |
+
"learning_rate": 9.938789895664463e-06,
|
| 14817 |
+
"loss": 21.0956,
|
| 14818 |
+
"step": 21110
|
| 14819 |
+
},
|
| 14820 |
+
{
|
| 14821 |
+
"epoch": 0.39193035442139473,
|
| 14822 |
+
"grad_norm": 33.125,
|
| 14823 |
+
"learning_rate": 9.93876089987842e-06,
|
| 14824 |
+
"loss": 20.8059,
|
| 14825 |
+
"step": 21120
|
| 14826 |
+
},
|
| 14827 |
+
{
|
| 14828 |
+
"epoch": 0.39211592750587454,
|
| 14829 |
+
"grad_norm": 32.8125,
|
| 14830 |
+
"learning_rate": 9.93873190409238e-06,
|
| 14831 |
+
"loss": 21.3003,
|
| 14832 |
+
"step": 21130
|
| 14833 |
+
},
|
| 14834 |
+
{
|
| 14835 |
+
"epoch": 0.3923015005903544,
|
| 14836 |
+
"grad_norm": 34.53125,
|
| 14837 |
+
"learning_rate": 9.938702908306337e-06,
|
| 14838 |
+
"loss": 21.3167,
|
| 14839 |
+
"step": 21140
|
| 14840 |
+
},
|
| 14841 |
+
{
|
| 14842 |
+
"epoch": 0.3924870736748342,
|
| 14843 |
+
"grad_norm": 35.03125,
|
| 14844 |
+
"learning_rate": 9.938673912520295e-06,
|
| 14845 |
+
"loss": 21.307,
|
| 14846 |
+
"step": 21150
|
| 14847 |
+
},
|
| 14848 |
+
{
|
| 14849 |
+
"epoch": 0.392672646759314,
|
| 14850 |
+
"grad_norm": 35.71875,
|
| 14851 |
+
"learning_rate": 9.938644916734252e-06,
|
| 14852 |
+
"loss": 21.1579,
|
| 14853 |
+
"step": 21160
|
| 14854 |
+
},
|
| 14855 |
+
{
|
| 14856 |
+
"epoch": 0.3928582198437939,
|
| 14857 |
+
"grad_norm": 34.59375,
|
| 14858 |
+
"learning_rate": 9.93861592094821e-06,
|
| 14859 |
+
"loss": 21.4428,
|
| 14860 |
+
"step": 21170
|
| 14861 |
+
},
|
| 14862 |
+
{
|
| 14863 |
+
"epoch": 0.3930437929282737,
|
| 14864 |
+
"grad_norm": 33.59375,
|
| 14865 |
+
"learning_rate": 9.938586925162167e-06,
|
| 14866 |
+
"loss": 21.4122,
|
| 14867 |
+
"step": 21180
|
| 14868 |
+
},
|
| 14869 |
+
{
|
| 14870 |
+
"epoch": 0.3932293660127535,
|
| 14871 |
+
"grad_norm": 36.0625,
|
| 14872 |
+
"learning_rate": 9.938557929376124e-06,
|
| 14873 |
+
"loss": 21.1277,
|
| 14874 |
+
"step": 21190
|
| 14875 |
+
},
|
| 14876 |
+
{
|
| 14877 |
+
"epoch": 0.39341493909723335,
|
| 14878 |
+
"grad_norm": 36.03125,
|
| 14879 |
+
"learning_rate": 9.938528933590082e-06,
|
| 14880 |
+
"loss": 21.1074,
|
| 14881 |
+
"step": 21200
|
| 14882 |
+
},
|
| 14883 |
+
{
|
| 14884 |
+
"epoch": 0.39360051218171316,
|
| 14885 |
+
"grad_norm": 32.875,
|
| 14886 |
+
"learning_rate": 9.938499937804039e-06,
|
| 14887 |
+
"loss": 21.2574,
|
| 14888 |
+
"step": 21210
|
| 14889 |
+
},
|
| 14890 |
+
{
|
| 14891 |
+
"epoch": 0.39378608526619296,
|
| 14892 |
+
"grad_norm": 33.65625,
|
| 14893 |
+
"learning_rate": 9.938470942017997e-06,
|
| 14894 |
+
"loss": 21.3344,
|
| 14895 |
+
"step": 21220
|
| 14896 |
+
},
|
| 14897 |
+
{
|
| 14898 |
+
"epoch": 0.3939716583506728,
|
| 14899 |
+
"grad_norm": 33.46875,
|
| 14900 |
+
"learning_rate": 9.938441946231954e-06,
|
| 14901 |
+
"loss": 21.6166,
|
| 14902 |
+
"step": 21230
|
| 14903 |
+
},
|
| 14904 |
+
{
|
| 14905 |
+
"epoch": 0.39415723143515263,
|
| 14906 |
+
"grad_norm": 32.65625,
|
| 14907 |
+
"learning_rate": 9.938412950445913e-06,
|
| 14908 |
+
"loss": 21.6185,
|
| 14909 |
+
"step": 21240
|
| 14910 |
+
},
|
| 14911 |
+
{
|
| 14912 |
+
"epoch": 0.3943428045196325,
|
| 14913 |
+
"grad_norm": 36.875,
|
| 14914 |
+
"learning_rate": 9.93838395465987e-06,
|
| 14915 |
+
"loss": 21.2259,
|
| 14916 |
+
"step": 21250
|
| 14917 |
+
},
|
| 14918 |
+
{
|
| 14919 |
+
"epoch": 0.3945283776041123,
|
| 14920 |
+
"grad_norm": 34.5,
|
| 14921 |
+
"learning_rate": 9.938354958873828e-06,
|
| 14922 |
+
"loss": 21.6313,
|
| 14923 |
+
"step": 21260
|
| 14924 |
+
},
|
| 14925 |
+
{
|
| 14926 |
+
"epoch": 0.3947139506885921,
|
| 14927 |
+
"grad_norm": 32.90625,
|
| 14928 |
+
"learning_rate": 9.938325963087785e-06,
|
| 14929 |
+
"loss": 21.2726,
|
| 14930 |
+
"step": 21270
|
| 14931 |
+
},
|
| 14932 |
+
{
|
| 14933 |
+
"epoch": 0.39489952377307197,
|
| 14934 |
+
"grad_norm": 35.03125,
|
| 14935 |
+
"learning_rate": 9.938296967301743e-06,
|
| 14936 |
+
"loss": 21.2475,
|
| 14937 |
+
"step": 21280
|
| 14938 |
+
},
|
| 14939 |
+
{
|
| 14940 |
+
"epoch": 0.3950850968575518,
|
| 14941 |
+
"grad_norm": 37.65625,
|
| 14942 |
+
"learning_rate": 9.9382679715157e-06,
|
| 14943 |
+
"loss": 21.1077,
|
| 14944 |
+
"step": 21290
|
| 14945 |
+
},
|
| 14946 |
+
{
|
| 14947 |
+
"epoch": 0.3952706699420316,
|
| 14948 |
+
"grad_norm": 35.0,
|
| 14949 |
+
"learning_rate": 9.938238975729657e-06,
|
| 14950 |
+
"loss": 21.5344,
|
| 14951 |
+
"step": 21300
|
| 14952 |
+
},
|
| 14953 |
+
{
|
| 14954 |
+
"epoch": 0.39545624302651144,
|
| 14955 |
+
"grad_norm": 35.78125,
|
| 14956 |
+
"learning_rate": 9.938209979943617e-06,
|
| 14957 |
+
"loss": 21.3853,
|
| 14958 |
+
"step": 21310
|
| 14959 |
+
},
|
| 14960 |
+
{
|
| 14961 |
+
"epoch": 0.39564181611099125,
|
| 14962 |
+
"grad_norm": 34.96875,
|
| 14963 |
+
"learning_rate": 9.938180984157572e-06,
|
| 14964 |
+
"loss": 21.4214,
|
| 14965 |
+
"step": 21320
|
| 14966 |
+
},
|
| 14967 |
+
{
|
| 14968 |
+
"epoch": 0.3958273891954711,
|
| 14969 |
+
"grad_norm": 34.8125,
|
| 14970 |
+
"learning_rate": 9.93815198837153e-06,
|
| 14971 |
+
"loss": 21.111,
|
| 14972 |
+
"step": 21330
|
| 14973 |
+
},
|
| 14974 |
+
{
|
| 14975 |
+
"epoch": 0.3960129622799509,
|
| 14976 |
+
"grad_norm": 34.8125,
|
| 14977 |
+
"learning_rate": 9.938122992585489e-06,
|
| 14978 |
+
"loss": 21.2438,
|
| 14979 |
+
"step": 21340
|
| 14980 |
+
},
|
| 14981 |
+
{
|
| 14982 |
+
"epoch": 0.3961985353644307,
|
| 14983 |
+
"grad_norm": 34.84375,
|
| 14984 |
+
"learning_rate": 9.938093996799446e-06,
|
| 14985 |
+
"loss": 20.9801,
|
| 14986 |
+
"step": 21350
|
| 14987 |
+
},
|
| 14988 |
+
{
|
| 14989 |
+
"epoch": 0.3963841084489106,
|
| 14990 |
+
"grad_norm": 36.125,
|
| 14991 |
+
"learning_rate": 9.938065001013404e-06,
|
| 14992 |
+
"loss": 20.6129,
|
| 14993 |
+
"step": 21360
|
| 14994 |
+
},
|
| 14995 |
+
{
|
| 14996 |
+
"epoch": 0.3965696815333904,
|
| 14997 |
+
"grad_norm": 32.96875,
|
| 14998 |
+
"learning_rate": 9.938036005227361e-06,
|
| 14999 |
+
"loss": 21.0012,
|
| 15000 |
+
"step": 21370
|
| 15001 |
+
},
|
| 15002 |
+
{
|
| 15003 |
+
"epoch": 0.3967552546178702,
|
| 15004 |
+
"grad_norm": 33.9375,
|
| 15005 |
+
"learning_rate": 9.938007009441318e-06,
|
| 15006 |
+
"loss": 21.3073,
|
| 15007 |
+
"step": 21380
|
| 15008 |
+
},
|
| 15009 |
+
{
|
| 15010 |
+
"epoch": 0.39694082770235006,
|
| 15011 |
+
"grad_norm": 37.4375,
|
| 15012 |
+
"learning_rate": 9.937978013655276e-06,
|
| 15013 |
+
"loss": 21.4929,
|
| 15014 |
+
"step": 21390
|
| 15015 |
+
},
|
| 15016 |
+
{
|
| 15017 |
+
"epoch": 0.39712640078682987,
|
| 15018 |
+
"grad_norm": 33.4375,
|
| 15019 |
+
"learning_rate": 9.937949017869233e-06,
|
| 15020 |
+
"loss": 21.2244,
|
| 15021 |
+
"step": 21400
|
| 15022 |
+
},
|
| 15023 |
+
{
|
| 15024 |
+
"epoch": 0.39731197387130973,
|
| 15025 |
+
"grad_norm": 35.5625,
|
| 15026 |
+
"learning_rate": 9.937920022083192e-06,
|
| 15027 |
+
"loss": 21.2312,
|
| 15028 |
+
"step": 21410
|
| 15029 |
+
},
|
| 15030 |
+
{
|
| 15031 |
+
"epoch": 0.39749754695578954,
|
| 15032 |
+
"grad_norm": 35.375,
|
| 15033 |
+
"learning_rate": 9.93789102629715e-06,
|
| 15034 |
+
"loss": 21.3317,
|
| 15035 |
+
"step": 21420
|
| 15036 |
+
},
|
| 15037 |
+
{
|
| 15038 |
+
"epoch": 0.39768312004026934,
|
| 15039 |
+
"grad_norm": 35.375,
|
| 15040 |
+
"learning_rate": 9.937862030511105e-06,
|
| 15041 |
+
"loss": 21.4387,
|
| 15042 |
+
"step": 21430
|
| 15043 |
+
},
|
| 15044 |
+
{
|
| 15045 |
+
"epoch": 0.3978686931247492,
|
| 15046 |
+
"grad_norm": 35.59375,
|
| 15047 |
+
"learning_rate": 9.937833034725065e-06,
|
| 15048 |
+
"loss": 21.0185,
|
| 15049 |
+
"step": 21440
|
| 15050 |
+
},
|
| 15051 |
+
{
|
| 15052 |
+
"epoch": 0.398054266209229,
|
| 15053 |
+
"grad_norm": 34.5,
|
| 15054 |
+
"learning_rate": 9.937804038939022e-06,
|
| 15055 |
+
"loss": 21.4504,
|
| 15056 |
+
"step": 21450
|
| 15057 |
+
},
|
| 15058 |
+
{
|
| 15059 |
+
"epoch": 0.3982398392937088,
|
| 15060 |
+
"grad_norm": 34.53125,
|
| 15061 |
+
"learning_rate": 9.93777504315298e-06,
|
| 15062 |
+
"loss": 21.5877,
|
| 15063 |
+
"step": 21460
|
| 15064 |
+
},
|
| 15065 |
+
{
|
| 15066 |
+
"epoch": 0.3984254123781887,
|
| 15067 |
+
"grad_norm": 34.75,
|
| 15068 |
+
"learning_rate": 9.937746047366937e-06,
|
| 15069 |
+
"loss": 21.2731,
|
| 15070 |
+
"step": 21470
|
| 15071 |
+
},
|
| 15072 |
+
{
|
| 15073 |
+
"epoch": 0.3986109854626685,
|
| 15074 |
+
"grad_norm": 34.59375,
|
| 15075 |
+
"learning_rate": 9.937717051580894e-06,
|
| 15076 |
+
"loss": 21.3098,
|
| 15077 |
+
"step": 21480
|
| 15078 |
+
},
|
| 15079 |
+
{
|
| 15080 |
+
"epoch": 0.3987965585471483,
|
| 15081 |
+
"grad_norm": 36.78125,
|
| 15082 |
+
"learning_rate": 9.937688055794852e-06,
|
| 15083 |
+
"loss": 21.2444,
|
| 15084 |
+
"step": 21490
|
| 15085 |
+
},
|
| 15086 |
+
{
|
| 15087 |
+
"epoch": 0.39898213163162816,
|
| 15088 |
+
"grad_norm": 35.53125,
|
| 15089 |
+
"learning_rate": 9.937659060008809e-06,
|
| 15090 |
+
"loss": 21.0413,
|
| 15091 |
+
"step": 21500
|
| 15092 |
+
},
|
| 15093 |
+
{
|
| 15094 |
+
"epoch": 0.39916770471610796,
|
| 15095 |
+
"grad_norm": 35.75,
|
| 15096 |
+
"learning_rate": 9.937630064222768e-06,
|
| 15097 |
+
"loss": 21.0791,
|
| 15098 |
+
"step": 21510
|
| 15099 |
+
},
|
| 15100 |
+
{
|
| 15101 |
+
"epoch": 0.3993532778005878,
|
| 15102 |
+
"grad_norm": 34.1875,
|
| 15103 |
+
"learning_rate": 9.937601068436726e-06,
|
| 15104 |
+
"loss": 21.4088,
|
| 15105 |
+
"step": 21520
|
| 15106 |
+
},
|
| 15107 |
+
{
|
| 15108 |
+
"epoch": 0.39953885088506763,
|
| 15109 |
+
"grad_norm": 35.78125,
|
| 15110 |
+
"learning_rate": 9.937572072650683e-06,
|
| 15111 |
+
"loss": 21.4432,
|
| 15112 |
+
"step": 21530
|
| 15113 |
+
},
|
| 15114 |
+
{
|
| 15115 |
+
"epoch": 0.39972442396954744,
|
| 15116 |
+
"grad_norm": 34.53125,
|
| 15117 |
+
"learning_rate": 9.93754307686464e-06,
|
| 15118 |
+
"loss": 20.6209,
|
| 15119 |
+
"step": 21540
|
| 15120 |
+
},
|
| 15121 |
+
{
|
| 15122 |
+
"epoch": 0.3999099970540273,
|
| 15123 |
+
"grad_norm": 35.5625,
|
| 15124 |
+
"learning_rate": 9.937514081078598e-06,
|
| 15125 |
+
"loss": 20.9569,
|
| 15126 |
+
"step": 21550
|
| 15127 |
+
},
|
| 15128 |
+
{
|
| 15129 |
+
"epoch": 0.4000955701385071,
|
| 15130 |
+
"grad_norm": 35.59375,
|
| 15131 |
+
"learning_rate": 9.937485085292555e-06,
|
| 15132 |
+
"loss": 21.1618,
|
| 15133 |
+
"step": 21560
|
| 15134 |
+
},
|
| 15135 |
+
{
|
| 15136 |
+
"epoch": 0.4002811432229869,
|
| 15137 |
+
"grad_norm": 34.65625,
|
| 15138 |
+
"learning_rate": 9.937456089506513e-06,
|
| 15139 |
+
"loss": 21.1128,
|
| 15140 |
+
"step": 21570
|
| 15141 |
+
},
|
| 15142 |
+
{
|
| 15143 |
+
"epoch": 0.4004667163074668,
|
| 15144 |
+
"grad_norm": 34.28125,
|
| 15145 |
+
"learning_rate": 9.937427093720472e-06,
|
| 15146 |
+
"loss": 21.432,
|
| 15147 |
+
"step": 21580
|
| 15148 |
+
},
|
| 15149 |
+
{
|
| 15150 |
+
"epoch": 0.4006522893919466,
|
| 15151 |
+
"grad_norm": 33.9375,
|
| 15152 |
+
"learning_rate": 9.937398097934427e-06,
|
| 15153 |
+
"loss": 20.9126,
|
| 15154 |
+
"step": 21590
|
| 15155 |
+
},
|
| 15156 |
+
{
|
| 15157 |
+
"epoch": 0.40083786247642644,
|
| 15158 |
+
"grad_norm": 36.15625,
|
| 15159 |
+
"learning_rate": 9.937369102148385e-06,
|
| 15160 |
+
"loss": 21.2893,
|
| 15161 |
+
"step": 21600
|
| 15162 |
+
},
|
| 15163 |
+
{
|
| 15164 |
+
"epoch": 0.40102343556090625,
|
| 15165 |
+
"grad_norm": 36.09375,
|
| 15166 |
+
"learning_rate": 9.937340106362344e-06,
|
| 15167 |
+
"loss": 20.9199,
|
| 15168 |
+
"step": 21610
|
| 15169 |
+
},
|
| 15170 |
+
{
|
| 15171 |
+
"epoch": 0.40120900864538606,
|
| 15172 |
+
"grad_norm": 36.375,
|
| 15173 |
+
"learning_rate": 9.937311110576301e-06,
|
| 15174 |
+
"loss": 21.4188,
|
| 15175 |
+
"step": 21620
|
| 15176 |
+
},
|
| 15177 |
+
{
|
| 15178 |
+
"epoch": 0.4013945817298659,
|
| 15179 |
+
"grad_norm": 35.90625,
|
| 15180 |
+
"learning_rate": 9.937282114790259e-06,
|
| 15181 |
+
"loss": 21.0579,
|
| 15182 |
+
"step": 21630
|
| 15183 |
+
},
|
| 15184 |
+
{
|
| 15185 |
+
"epoch": 0.4015801548143457,
|
| 15186 |
+
"grad_norm": 37.46875,
|
| 15187 |
+
"learning_rate": 9.937253119004216e-06,
|
| 15188 |
+
"loss": 21.2522,
|
| 15189 |
+
"step": 21640
|
| 15190 |
+
},
|
| 15191 |
+
{
|
| 15192 |
+
"epoch": 0.40176572789882553,
|
| 15193 |
+
"grad_norm": 35.0625,
|
| 15194 |
+
"learning_rate": 9.937224123218174e-06,
|
| 15195 |
+
"loss": 21.4908,
|
| 15196 |
+
"step": 21650
|
| 15197 |
+
},
|
| 15198 |
+
{
|
| 15199 |
+
"epoch": 0.4019513009833054,
|
| 15200 |
+
"grad_norm": 33.1875,
|
| 15201 |
+
"learning_rate": 9.937195127432131e-06,
|
| 15202 |
+
"loss": 21.2079,
|
| 15203 |
+
"step": 21660
|
| 15204 |
+
},
|
| 15205 |
+
{
|
| 15206 |
+
"epoch": 0.4021368740677852,
|
| 15207 |
+
"grad_norm": 36.125,
|
| 15208 |
+
"learning_rate": 9.937166131646088e-06,
|
| 15209 |
+
"loss": 21.2321,
|
| 15210 |
+
"step": 21670
|
| 15211 |
+
},
|
| 15212 |
+
{
|
| 15213 |
+
"epoch": 0.40232244715226506,
|
| 15214 |
+
"grad_norm": 33.0,
|
| 15215 |
+
"learning_rate": 9.937137135860046e-06,
|
| 15216 |
+
"loss": 20.916,
|
| 15217 |
+
"step": 21680
|
| 15218 |
+
},
|
| 15219 |
+
{
|
| 15220 |
+
"epoch": 0.40250802023674487,
|
| 15221 |
+
"grad_norm": 34.125,
|
| 15222 |
+
"learning_rate": 9.937108140074005e-06,
|
| 15223 |
+
"loss": 21.2056,
|
| 15224 |
+
"step": 21690
|
| 15225 |
+
},
|
| 15226 |
+
{
|
| 15227 |
+
"epoch": 0.4026935933212247,
|
| 15228 |
+
"grad_norm": 34.78125,
|
| 15229 |
+
"learning_rate": 9.93707914428796e-06,
|
| 15230 |
+
"loss": 20.9596,
|
| 15231 |
+
"step": 21700
|
| 15232 |
+
},
|
| 15233 |
+
{
|
| 15234 |
+
"epoch": 0.40287916640570454,
|
| 15235 |
+
"grad_norm": 33.90625,
|
| 15236 |
+
"learning_rate": 9.93705014850192e-06,
|
| 15237 |
+
"loss": 20.9652,
|
| 15238 |
+
"step": 21710
|
| 15239 |
+
},
|
| 15240 |
+
{
|
| 15241 |
+
"epoch": 0.40306473949018434,
|
| 15242 |
+
"grad_norm": 35.9375,
|
| 15243 |
+
"learning_rate": 9.937021152715877e-06,
|
| 15244 |
+
"loss": 21.0897,
|
| 15245 |
+
"step": 21720
|
| 15246 |
+
},
|
| 15247 |
+
{
|
| 15248 |
+
"epoch": 0.40325031257466415,
|
| 15249 |
+
"grad_norm": 34.875,
|
| 15250 |
+
"learning_rate": 9.936992156929834e-06,
|
| 15251 |
+
"loss": 20.8345,
|
| 15252 |
+
"step": 21730
|
| 15253 |
+
},
|
| 15254 |
+
{
|
| 15255 |
+
"epoch": 0.403435885659144,
|
| 15256 |
+
"grad_norm": 35.9375,
|
| 15257 |
+
"learning_rate": 9.936963161143792e-06,
|
| 15258 |
+
"loss": 21.1519,
|
| 15259 |
+
"step": 21740
|
| 15260 |
+
},
|
| 15261 |
+
{
|
| 15262 |
+
"epoch": 0.4036214587436238,
|
| 15263 |
+
"grad_norm": 33.15625,
|
| 15264 |
+
"learning_rate": 9.93693416535775e-06,
|
| 15265 |
+
"loss": 20.8396,
|
| 15266 |
+
"step": 21750
|
| 15267 |
+
},
|
| 15268 |
+
{
|
| 15269 |
+
"epoch": 0.4038070318281036,
|
| 15270 |
+
"grad_norm": 34.3125,
|
| 15271 |
+
"learning_rate": 9.936905169571707e-06,
|
| 15272 |
+
"loss": 21.4036,
|
| 15273 |
+
"step": 21760
|
| 15274 |
+
},
|
| 15275 |
+
{
|
| 15276 |
+
"epoch": 0.4039926049125835,
|
| 15277 |
+
"grad_norm": 35.21875,
|
| 15278 |
+
"learning_rate": 9.936876173785664e-06,
|
| 15279 |
+
"loss": 21.1442,
|
| 15280 |
+
"step": 21770
|
| 15281 |
+
},
|
| 15282 |
+
{
|
| 15283 |
+
"epoch": 0.4041781779970633,
|
| 15284 |
+
"grad_norm": 35.40625,
|
| 15285 |
+
"learning_rate": 9.936847177999622e-06,
|
| 15286 |
+
"loss": 20.9068,
|
| 15287 |
+
"step": 21780
|
| 15288 |
+
},
|
| 15289 |
+
{
|
| 15290 |
+
"epoch": 0.40436375108154315,
|
| 15291 |
+
"grad_norm": 36.1875,
|
| 15292 |
+
"learning_rate": 9.93681818221358e-06,
|
| 15293 |
+
"loss": 21.2187,
|
| 15294 |
+
"step": 21790
|
| 15295 |
+
},
|
| 15296 |
+
{
|
| 15297 |
+
"epoch": 0.40454932416602296,
|
| 15298 |
+
"grad_norm": 35.9375,
|
| 15299 |
+
"learning_rate": 9.936789186427536e-06,
|
| 15300 |
+
"loss": 21.6709,
|
| 15301 |
+
"step": 21800
|
| 15302 |
+
},
|
| 15303 |
+
{
|
| 15304 |
+
"epoch": 0.40473489725050277,
|
| 15305 |
+
"grad_norm": 35.65625,
|
| 15306 |
+
"learning_rate": 9.936760190641494e-06,
|
| 15307 |
+
"loss": 20.8711,
|
| 15308 |
+
"step": 21810
|
| 15309 |
+
},
|
| 15310 |
+
{
|
| 15311 |
+
"epoch": 0.40492047033498263,
|
| 15312 |
+
"grad_norm": 33.875,
|
| 15313 |
+
"learning_rate": 9.936731194855453e-06,
|
| 15314 |
+
"loss": 21.3662,
|
| 15315 |
+
"step": 21820
|
| 15316 |
+
},
|
| 15317 |
+
{
|
| 15318 |
+
"epoch": 0.40510604341946244,
|
| 15319 |
+
"grad_norm": 36.71875,
|
| 15320 |
+
"learning_rate": 9.93670219906941e-06,
|
| 15321 |
+
"loss": 21.4065,
|
| 15322 |
+
"step": 21830
|
| 15323 |
+
},
|
| 15324 |
+
{
|
| 15325 |
+
"epoch": 0.40529161650394224,
|
| 15326 |
+
"grad_norm": 33.0,
|
| 15327 |
+
"learning_rate": 9.936673203283368e-06,
|
| 15328 |
+
"loss": 21.2187,
|
| 15329 |
+
"step": 21840
|
| 15330 |
+
},
|
| 15331 |
+
{
|
| 15332 |
+
"epoch": 0.4054771895884221,
|
| 15333 |
+
"grad_norm": 34.78125,
|
| 15334 |
+
"learning_rate": 9.936644207497325e-06,
|
| 15335 |
+
"loss": 21.1473,
|
| 15336 |
+
"step": 21850
|
| 15337 |
+
},
|
| 15338 |
+
{
|
| 15339 |
+
"epoch": 0.4056627626729019,
|
| 15340 |
+
"grad_norm": 34.78125,
|
| 15341 |
+
"learning_rate": 9.936615211711282e-06,
|
| 15342 |
+
"loss": 20.9902,
|
| 15343 |
+
"step": 21860
|
| 15344 |
+
},
|
| 15345 |
+
{
|
| 15346 |
+
"epoch": 0.4058483357573818,
|
| 15347 |
+
"grad_norm": 35.34375,
|
| 15348 |
+
"learning_rate": 9.93658621592524e-06,
|
| 15349 |
+
"loss": 21.4328,
|
| 15350 |
+
"step": 21870
|
| 15351 |
+
},
|
| 15352 |
+
{
|
| 15353 |
+
"epoch": 0.4060339088418616,
|
| 15354 |
+
"grad_norm": 35.25,
|
| 15355 |
+
"learning_rate": 9.936557220139197e-06,
|
| 15356 |
+
"loss": 21.0445,
|
| 15357 |
+
"step": 21880
|
| 15358 |
+
},
|
| 15359 |
+
{
|
| 15360 |
+
"epoch": 0.4062194819263414,
|
| 15361 |
+
"grad_norm": 35.375,
|
| 15362 |
+
"learning_rate": 9.936528224353156e-06,
|
| 15363 |
+
"loss": 21.3687,
|
| 15364 |
+
"step": 21890
|
| 15365 |
+
},
|
| 15366 |
+
{
|
| 15367 |
+
"epoch": 0.40640505501082125,
|
| 15368 |
+
"grad_norm": 35.8125,
|
| 15369 |
+
"learning_rate": 9.936499228567114e-06,
|
| 15370 |
+
"loss": 20.9341,
|
| 15371 |
+
"step": 21900
|
| 15372 |
+
},
|
| 15373 |
+
{
|
| 15374 |
+
"epoch": 0.40659062809530105,
|
| 15375 |
+
"grad_norm": 33.6875,
|
| 15376 |
+
"learning_rate": 9.93647023278107e-06,
|
| 15377 |
+
"loss": 21.1997,
|
| 15378 |
+
"step": 21910
|
| 15379 |
+
},
|
| 15380 |
+
{
|
| 15381 |
+
"epoch": 0.40677620117978086,
|
| 15382 |
+
"grad_norm": 33.6875,
|
| 15383 |
+
"learning_rate": 9.936441236995029e-06,
|
| 15384 |
+
"loss": 21.0207,
|
| 15385 |
+
"step": 21920
|
| 15386 |
+
},
|
| 15387 |
+
{
|
| 15388 |
+
"epoch": 0.4069617742642607,
|
| 15389 |
+
"grad_norm": 34.15625,
|
| 15390 |
+
"learning_rate": 9.936412241208986e-06,
|
| 15391 |
+
"loss": 21.2807,
|
| 15392 |
+
"step": 21930
|
| 15393 |
+
},
|
| 15394 |
+
{
|
| 15395 |
+
"epoch": 0.40714734734874053,
|
| 15396 |
+
"grad_norm": 36.78125,
|
| 15397 |
+
"learning_rate": 9.936383245422943e-06,
|
| 15398 |
+
"loss": 21.2499,
|
| 15399 |
+
"step": 21940
|
| 15400 |
+
},
|
| 15401 |
+
{
|
| 15402 |
+
"epoch": 0.4073329204332204,
|
| 15403 |
+
"grad_norm": 34.3125,
|
| 15404 |
+
"learning_rate": 9.9363542496369e-06,
|
| 15405 |
+
"loss": 21.1466,
|
| 15406 |
+
"step": 21950
|
| 15407 |
+
},
|
| 15408 |
+
{
|
| 15409 |
+
"epoch": 0.4075184935177002,
|
| 15410 |
+
"grad_norm": 35.125,
|
| 15411 |
+
"learning_rate": 9.93632525385086e-06,
|
| 15412 |
+
"loss": 20.8732,
|
| 15413 |
+
"step": 21960
|
| 15414 |
+
},
|
| 15415 |
+
{
|
| 15416 |
+
"epoch": 0.40770406660218,
|
| 15417 |
+
"grad_norm": 34.5,
|
| 15418 |
+
"learning_rate": 9.936296258064816e-06,
|
| 15419 |
+
"loss": 21.2597,
|
| 15420 |
+
"step": 21970
|
| 15421 |
+
},
|
| 15422 |
+
{
|
| 15423 |
+
"epoch": 0.40788963968665987,
|
| 15424 |
+
"grad_norm": 33.40625,
|
| 15425 |
+
"learning_rate": 9.936267262278773e-06,
|
| 15426 |
+
"loss": 21.0341,
|
| 15427 |
+
"step": 21980
|
| 15428 |
+
},
|
| 15429 |
+
{
|
| 15430 |
+
"epoch": 0.4080752127711397,
|
| 15431 |
+
"grad_norm": 34.90625,
|
| 15432 |
+
"learning_rate": 9.936238266492732e-06,
|
| 15433 |
+
"loss": 20.9766,
|
| 15434 |
+
"step": 21990
|
| 15435 |
+
},
|
| 15436 |
+
{
|
| 15437 |
+
"epoch": 0.4082607858556195,
|
| 15438 |
+
"grad_norm": 33.9375,
|
| 15439 |
+
"learning_rate": 9.93620927070669e-06,
|
| 15440 |
+
"loss": 21.0238,
|
| 15441 |
+
"step": 22000
|
| 15442 |
+
},
|
| 15443 |
+
{
|
| 15444 |
+
"epoch": 0.40844635894009934,
|
| 15445 |
+
"grad_norm": 36.1875,
|
| 15446 |
+
"learning_rate": 9.936180274920647e-06,
|
| 15447 |
+
"loss": 21.1887,
|
| 15448 |
+
"step": 22010
|
| 15449 |
+
},
|
| 15450 |
+
{
|
| 15451 |
+
"epoch": 0.40863193202457915,
|
| 15452 |
+
"grad_norm": 35.78125,
|
| 15453 |
+
"learning_rate": 9.936151279134604e-06,
|
| 15454 |
+
"loss": 21.0594,
|
| 15455 |
+
"step": 22020
|
| 15456 |
+
},
|
| 15457 |
+
{
|
| 15458 |
+
"epoch": 0.408817505109059,
|
| 15459 |
+
"grad_norm": 36.59375,
|
| 15460 |
+
"learning_rate": 9.936122283348562e-06,
|
| 15461 |
+
"loss": 21.4135,
|
| 15462 |
+
"step": 22030
|
| 15463 |
+
},
|
| 15464 |
+
{
|
| 15465 |
+
"epoch": 0.4090030781935388,
|
| 15466 |
+
"grad_norm": 35.03125,
|
| 15467 |
+
"learning_rate": 9.93609328756252e-06,
|
| 15468 |
+
"loss": 21.2114,
|
| 15469 |
+
"step": 22040
|
| 15470 |
+
},
|
| 15471 |
+
{
|
| 15472 |
+
"epoch": 0.4091886512780186,
|
| 15473 |
+
"grad_norm": 38.0,
|
| 15474 |
+
"learning_rate": 9.936064291776477e-06,
|
| 15475 |
+
"loss": 21.0613,
|
| 15476 |
+
"step": 22050
|
| 15477 |
+
},
|
| 15478 |
+
{
|
| 15479 |
+
"epoch": 0.4093742243624985,
|
| 15480 |
+
"grad_norm": 38.5625,
|
| 15481 |
+
"learning_rate": 9.936035295990436e-06,
|
| 15482 |
+
"loss": 21.36,
|
| 15483 |
+
"step": 22060
|
| 15484 |
+
},
|
| 15485 |
+
{
|
| 15486 |
+
"epoch": 0.4095597974469783,
|
| 15487 |
+
"grad_norm": 35.90625,
|
| 15488 |
+
"learning_rate": 9.936006300204391e-06,
|
| 15489 |
+
"loss": 21.3268,
|
| 15490 |
+
"step": 22070
|
| 15491 |
+
},
|
| 15492 |
+
{
|
| 15493 |
+
"epoch": 0.4097453705314581,
|
| 15494 |
+
"grad_norm": 37.0625,
|
| 15495 |
+
"learning_rate": 9.935977304418349e-06,
|
| 15496 |
+
"loss": 21.247,
|
| 15497 |
+
"step": 22080
|
| 15498 |
+
},
|
| 15499 |
+
{
|
| 15500 |
+
"epoch": 0.40993094361593796,
|
| 15501 |
+
"grad_norm": 35.8125,
|
| 15502 |
+
"learning_rate": 9.935948308632308e-06,
|
| 15503 |
+
"loss": 21.2327,
|
| 15504 |
+
"step": 22090
|
| 15505 |
+
},
|
| 15506 |
+
{
|
| 15507 |
+
"epoch": 0.41011651670041777,
|
| 15508 |
+
"grad_norm": 34.53125,
|
| 15509 |
+
"learning_rate": 9.935919312846265e-06,
|
| 15510 |
+
"loss": 21.3096,
|
| 15511 |
+
"step": 22100
|
| 15512 |
+
},
|
| 15513 |
+
{
|
| 15514 |
+
"epoch": 0.4103020897848976,
|
| 15515 |
+
"grad_norm": 35.0,
|
| 15516 |
+
"learning_rate": 9.935890317060223e-06,
|
| 15517 |
+
"loss": 21.2457,
|
| 15518 |
+
"step": 22110
|
| 15519 |
+
},
|
| 15520 |
+
{
|
| 15521 |
+
"epoch": 0.41048766286937743,
|
| 15522 |
+
"grad_norm": 34.65625,
|
| 15523 |
+
"learning_rate": 9.93586132127418e-06,
|
| 15524 |
+
"loss": 21.1049,
|
| 15525 |
+
"step": 22120
|
| 15526 |
+
},
|
| 15527 |
+
{
|
| 15528 |
+
"epoch": 0.41067323595385724,
|
| 15529 |
+
"grad_norm": 37.65625,
|
| 15530 |
+
"learning_rate": 9.935832325488138e-06,
|
| 15531 |
+
"loss": 21.487,
|
| 15532 |
+
"step": 22130
|
| 15533 |
+
},
|
| 15534 |
+
{
|
| 15535 |
+
"epoch": 0.4108588090383371,
|
| 15536 |
+
"grad_norm": 35.125,
|
| 15537 |
+
"learning_rate": 9.935803329702095e-06,
|
| 15538 |
+
"loss": 21.1487,
|
| 15539 |
+
"step": 22140
|
| 15540 |
+
},
|
| 15541 |
+
{
|
| 15542 |
+
"epoch": 0.4110443821228169,
|
| 15543 |
+
"grad_norm": 34.90625,
|
| 15544 |
+
"learning_rate": 9.935774333916052e-06,
|
| 15545 |
+
"loss": 20.6907,
|
| 15546 |
+
"step": 22150
|
| 15547 |
+
},
|
| 15548 |
+
{
|
| 15549 |
+
"epoch": 0.4112299552072967,
|
| 15550 |
+
"grad_norm": 34.1875,
|
| 15551 |
+
"learning_rate": 9.935745338130011e-06,
|
| 15552 |
+
"loss": 21.3479,
|
| 15553 |
+
"step": 22160
|
| 15554 |
+
},
|
| 15555 |
+
{
|
| 15556 |
+
"epoch": 0.4114155282917766,
|
| 15557 |
+
"grad_norm": 36.125,
|
| 15558 |
+
"learning_rate": 9.935716342343969e-06,
|
| 15559 |
+
"loss": 20.9197,
|
| 15560 |
+
"step": 22170
|
| 15561 |
+
},
|
| 15562 |
+
{
|
| 15563 |
+
"epoch": 0.4116011013762564,
|
| 15564 |
+
"grad_norm": 33.6875,
|
| 15565 |
+
"learning_rate": 9.935687346557925e-06,
|
| 15566 |
+
"loss": 21.1271,
|
| 15567 |
+
"step": 22180
|
| 15568 |
+
},
|
| 15569 |
+
{
|
| 15570 |
+
"epoch": 0.4117866744607362,
|
| 15571 |
+
"grad_norm": 34.4375,
|
| 15572 |
+
"learning_rate": 9.935658350771884e-06,
|
| 15573 |
+
"loss": 21.4446,
|
| 15574 |
+
"step": 22190
|
| 15575 |
+
},
|
| 15576 |
+
{
|
| 15577 |
+
"epoch": 0.41197224754521605,
|
| 15578 |
+
"grad_norm": 33.125,
|
| 15579 |
+
"learning_rate": 9.935629354985841e-06,
|
| 15580 |
+
"loss": 20.7713,
|
| 15581 |
+
"step": 22200
|
| 15582 |
+
},
|
| 15583 |
+
{
|
| 15584 |
+
"epoch": 0.41215782062969586,
|
| 15585 |
+
"grad_norm": 34.78125,
|
| 15586 |
+
"learning_rate": 9.935600359199798e-06,
|
| 15587 |
+
"loss": 21.1482,
|
| 15588 |
+
"step": 22210
|
| 15589 |
+
},
|
| 15590 |
+
{
|
| 15591 |
+
"epoch": 0.4123433937141757,
|
| 15592 |
+
"grad_norm": 35.09375,
|
| 15593 |
+
"learning_rate": 9.935571363413756e-06,
|
| 15594 |
+
"loss": 20.867,
|
| 15595 |
+
"step": 22220
|
| 15596 |
+
},
|
| 15597 |
+
{
|
| 15598 |
+
"epoch": 0.4125289667986555,
|
| 15599 |
+
"grad_norm": 34.1875,
|
| 15600 |
+
"learning_rate": 9.935542367627713e-06,
|
| 15601 |
+
"loss": 21.1484,
|
| 15602 |
+
"step": 22230
|
| 15603 |
+
},
|
| 15604 |
+
{
|
| 15605 |
+
"epoch": 0.41271453988313533,
|
| 15606 |
+
"grad_norm": 34.25,
|
| 15607 |
+
"learning_rate": 9.93551337184167e-06,
|
| 15608 |
+
"loss": 21.3381,
|
| 15609 |
+
"step": 22240
|
| 15610 |
+
},
|
| 15611 |
+
{
|
| 15612 |
+
"epoch": 0.4129001129676152,
|
| 15613 |
+
"grad_norm": 34.84375,
|
| 15614 |
+
"learning_rate": 9.935484376055628e-06,
|
| 15615 |
+
"loss": 21.6229,
|
| 15616 |
+
"step": 22250
|
| 15617 |
+
},
|
| 15618 |
+
{
|
| 15619 |
+
"epoch": 0.413085686052095,
|
| 15620 |
+
"grad_norm": 34.21875,
|
| 15621 |
+
"learning_rate": 9.935455380269586e-06,
|
| 15622 |
+
"loss": 20.8437,
|
| 15623 |
+
"step": 22260
|
| 15624 |
+
},
|
| 15625 |
+
{
|
| 15626 |
+
"epoch": 0.4132712591365748,
|
| 15627 |
+
"grad_norm": 32.3125,
|
| 15628 |
+
"learning_rate": 9.935426384483545e-06,
|
| 15629 |
+
"loss": 21.2779,
|
| 15630 |
+
"step": 22270
|
| 15631 |
+
},
|
| 15632 |
+
{
|
| 15633 |
+
"epoch": 0.41345683222105467,
|
| 15634 |
+
"grad_norm": 34.59375,
|
| 15635 |
+
"learning_rate": 9.935397388697502e-06,
|
| 15636 |
+
"loss": 21.1612,
|
| 15637 |
+
"step": 22280
|
| 15638 |
+
},
|
| 15639 |
+
{
|
| 15640 |
+
"epoch": 0.4136424053055345,
|
| 15641 |
+
"grad_norm": 33.78125,
|
| 15642 |
+
"learning_rate": 9.93536839291146e-06,
|
| 15643 |
+
"loss": 20.7929,
|
| 15644 |
+
"step": 22290
|
| 15645 |
+
},
|
| 15646 |
+
{
|
| 15647 |
+
"epoch": 0.41382797839001434,
|
| 15648 |
+
"grad_norm": 33.5625,
|
| 15649 |
+
"learning_rate": 9.935339397125417e-06,
|
| 15650 |
+
"loss": 21.1799,
|
| 15651 |
+
"step": 22300
|
| 15652 |
+
},
|
| 15653 |
+
{
|
| 15654 |
+
"epoch": 0.41401355147449415,
|
| 15655 |
+
"grad_norm": 34.75,
|
| 15656 |
+
"learning_rate": 9.935310401339374e-06,
|
| 15657 |
+
"loss": 20.9251,
|
| 15658 |
+
"step": 22310
|
| 15659 |
+
},
|
| 15660 |
+
{
|
| 15661 |
+
"epoch": 0.41419912455897395,
|
| 15662 |
+
"grad_norm": 36.5,
|
| 15663 |
+
"learning_rate": 9.935281405553332e-06,
|
| 15664 |
+
"loss": 20.9843,
|
| 15665 |
+
"step": 22320
|
| 15666 |
+
},
|
| 15667 |
+
{
|
| 15668 |
+
"epoch": 0.4143846976434538,
|
| 15669 |
+
"grad_norm": 33.1875,
|
| 15670 |
+
"learning_rate": 9.935252409767289e-06,
|
| 15671 |
+
"loss": 20.8824,
|
| 15672 |
+
"step": 22330
|
| 15673 |
+
},
|
| 15674 |
+
{
|
| 15675 |
+
"epoch": 0.4145702707279336,
|
| 15676 |
+
"grad_norm": 35.90625,
|
| 15677 |
+
"learning_rate": 9.935223413981246e-06,
|
| 15678 |
+
"loss": 21.3414,
|
| 15679 |
+
"step": 22340
|
| 15680 |
+
},
|
| 15681 |
+
{
|
| 15682 |
+
"epoch": 0.4147558438124134,
|
| 15683 |
+
"grad_norm": 36.375,
|
| 15684 |
+
"learning_rate": 9.935194418195204e-06,
|
| 15685 |
+
"loss": 20.9215,
|
| 15686 |
+
"step": 22350
|
| 15687 |
+
},
|
| 15688 |
+
{
|
| 15689 |
+
"epoch": 0.4149414168968933,
|
| 15690 |
+
"grad_norm": 35.6875,
|
| 15691 |
+
"learning_rate": 9.935165422409161e-06,
|
| 15692 |
+
"loss": 21.0936,
|
| 15693 |
+
"step": 22360
|
| 15694 |
+
},
|
| 15695 |
+
{
|
| 15696 |
+
"epoch": 0.4151269899813731,
|
| 15697 |
+
"grad_norm": 33.8125,
|
| 15698 |
+
"learning_rate": 9.93513642662312e-06,
|
| 15699 |
+
"loss": 20.9315,
|
| 15700 |
+
"step": 22370
|
| 15701 |
+
},
|
| 15702 |
+
{
|
| 15703 |
+
"epoch": 0.4153125630658529,
|
| 15704 |
+
"grad_norm": 34.4375,
|
| 15705 |
+
"learning_rate": 9.935107430837078e-06,
|
| 15706 |
+
"loss": 20.7623,
|
| 15707 |
+
"step": 22380
|
| 15708 |
+
},
|
| 15709 |
+
{
|
| 15710 |
+
"epoch": 0.41549813615033276,
|
| 15711 |
+
"grad_norm": 36.625,
|
| 15712 |
+
"learning_rate": 9.935078435051034e-06,
|
| 15713 |
+
"loss": 21.2462,
|
| 15714 |
+
"step": 22390
|
| 15715 |
+
},
|
| 15716 |
+
{
|
| 15717 |
+
"epoch": 0.41568370923481257,
|
| 15718 |
+
"grad_norm": 35.5,
|
| 15719 |
+
"learning_rate": 9.935049439264993e-06,
|
| 15720 |
+
"loss": 20.9974,
|
| 15721 |
+
"step": 22400
|
| 15722 |
+
},
|
| 15723 |
+
{
|
| 15724 |
+
"epoch": 0.41586928231929243,
|
| 15725 |
+
"grad_norm": 35.65625,
|
| 15726 |
+
"learning_rate": 9.93502044347895e-06,
|
| 15727 |
+
"loss": 21.6205,
|
| 15728 |
+
"step": 22410
|
| 15729 |
+
},
|
| 15730 |
+
{
|
| 15731 |
+
"epoch": 0.41605485540377224,
|
| 15732 |
+
"grad_norm": 33.625,
|
| 15733 |
+
"learning_rate": 9.934991447692907e-06,
|
| 15734 |
+
"loss": 20.8627,
|
| 15735 |
+
"step": 22420
|
| 15736 |
+
},
|
| 15737 |
+
{
|
| 15738 |
+
"epoch": 0.41624042848825205,
|
| 15739 |
+
"grad_norm": 35.125,
|
| 15740 |
+
"learning_rate": 9.934962451906865e-06,
|
| 15741 |
+
"loss": 21.0586,
|
| 15742 |
+
"step": 22430
|
| 15743 |
+
},
|
| 15744 |
+
{
|
| 15745 |
+
"epoch": 0.4164260015727319,
|
| 15746 |
+
"grad_norm": 36.3125,
|
| 15747 |
+
"learning_rate": 9.934933456120824e-06,
|
| 15748 |
+
"loss": 20.8756,
|
| 15749 |
+
"step": 22440
|
| 15750 |
+
},
|
| 15751 |
+
{
|
| 15752 |
+
"epoch": 0.4166115746572117,
|
| 15753 |
+
"grad_norm": 34.21875,
|
| 15754 |
+
"learning_rate": 9.93490446033478e-06,
|
| 15755 |
+
"loss": 21.1442,
|
| 15756 |
+
"step": 22450
|
| 15757 |
+
},
|
| 15758 |
+
{
|
| 15759 |
+
"epoch": 0.4167971477416915,
|
| 15760 |
+
"grad_norm": 34.8125,
|
| 15761 |
+
"learning_rate": 9.934875464548737e-06,
|
| 15762 |
+
"loss": 21.3179,
|
| 15763 |
+
"step": 22460
|
| 15764 |
+
},
|
| 15765 |
+
{
|
| 15766 |
+
"epoch": 0.4169827208261714,
|
| 15767 |
+
"grad_norm": 35.0625,
|
| 15768 |
+
"learning_rate": 9.934846468762696e-06,
|
| 15769 |
+
"loss": 21.1509,
|
| 15770 |
+
"step": 22470
|
| 15771 |
+
},
|
| 15772 |
+
{
|
| 15773 |
+
"epoch": 0.4171682939106512,
|
| 15774 |
+
"grad_norm": 35.09375,
|
| 15775 |
+
"learning_rate": 9.934817472976654e-06,
|
| 15776 |
+
"loss": 20.9087,
|
| 15777 |
+
"step": 22480
|
| 15778 |
+
},
|
| 15779 |
+
{
|
| 15780 |
+
"epoch": 0.41735386699513105,
|
| 15781 |
+
"grad_norm": 36.9375,
|
| 15782 |
+
"learning_rate": 9.934788477190611e-06,
|
| 15783 |
+
"loss": 21.0097,
|
| 15784 |
+
"step": 22490
|
| 15785 |
+
},
|
| 15786 |
+
{
|
| 15787 |
+
"epoch": 0.41753944007961086,
|
| 15788 |
+
"grad_norm": 37.125,
|
| 15789 |
+
"learning_rate": 9.934759481404568e-06,
|
| 15790 |
+
"loss": 21.1743,
|
| 15791 |
+
"step": 22500
|
| 15792 |
+
},
|
| 15793 |
+
{
|
| 15794 |
+
"epoch": 0.41772501316409066,
|
| 15795 |
+
"grad_norm": 34.4375,
|
| 15796 |
+
"learning_rate": 9.934730485618526e-06,
|
| 15797 |
+
"loss": 21.2852,
|
| 15798 |
+
"step": 22510
|
| 15799 |
+
},
|
| 15800 |
+
{
|
| 15801 |
+
"epoch": 0.4179105862485705,
|
| 15802 |
+
"grad_norm": 32.96875,
|
| 15803 |
+
"learning_rate": 9.934701489832483e-06,
|
| 15804 |
+
"loss": 20.5226,
|
| 15805 |
+
"step": 22520
|
| 15806 |
+
},
|
| 15807 |
+
{
|
| 15808 |
+
"epoch": 0.41809615933305033,
|
| 15809 |
+
"grad_norm": 33.6875,
|
| 15810 |
+
"learning_rate": 9.93467249404644e-06,
|
| 15811 |
+
"loss": 21.4159,
|
| 15812 |
+
"step": 22530
|
| 15813 |
+
},
|
| 15814 |
+
{
|
| 15815 |
+
"epoch": 0.41828173241753014,
|
| 15816 |
+
"grad_norm": 34.40625,
|
| 15817 |
+
"learning_rate": 9.9346434982604e-06,
|
| 15818 |
+
"loss": 20.9896,
|
| 15819 |
+
"step": 22540
|
| 15820 |
+
},
|
| 15821 |
+
{
|
| 15822 |
+
"epoch": 0.41846730550201,
|
| 15823 |
+
"grad_norm": 33.9375,
|
| 15824 |
+
"learning_rate": 9.934614502474355e-06,
|
| 15825 |
+
"loss": 21.1083,
|
| 15826 |
+
"step": 22550
|
| 15827 |
+
},
|
| 15828 |
+
{
|
| 15829 |
+
"epoch": 0.4186528785864898,
|
| 15830 |
+
"grad_norm": 34.34375,
|
| 15831 |
+
"learning_rate": 9.934585506688313e-06,
|
| 15832 |
+
"loss": 21.1427,
|
| 15833 |
+
"step": 22560
|
| 15834 |
+
},
|
| 15835 |
+
{
|
| 15836 |
+
"epoch": 0.41883845167096967,
|
| 15837 |
+
"grad_norm": 35.53125,
|
| 15838 |
+
"learning_rate": 9.934556510902272e-06,
|
| 15839 |
+
"loss": 21.1074,
|
| 15840 |
+
"step": 22570
|
| 15841 |
+
},
|
| 15842 |
+
{
|
| 15843 |
+
"epoch": 0.4190240247554495,
|
| 15844 |
+
"grad_norm": 36.4375,
|
| 15845 |
+
"learning_rate": 9.93452751511623e-06,
|
| 15846 |
+
"loss": 21.1957,
|
| 15847 |
+
"step": 22580
|
| 15848 |
+
},
|
| 15849 |
+
{
|
| 15850 |
+
"epoch": 0.4192095978399293,
|
| 15851 |
+
"grad_norm": 35.71875,
|
| 15852 |
+
"learning_rate": 9.934498519330187e-06,
|
| 15853 |
+
"loss": 21.3737,
|
| 15854 |
+
"step": 22590
|
| 15855 |
+
},
|
| 15856 |
+
{
|
| 15857 |
+
"epoch": 0.41939517092440914,
|
| 15858 |
+
"grad_norm": 35.28125,
|
| 15859 |
+
"learning_rate": 9.934469523544144e-06,
|
| 15860 |
+
"loss": 21.1098,
|
| 15861 |
+
"step": 22600
|
| 15862 |
+
},
|
| 15863 |
+
{
|
| 15864 |
+
"epoch": 0.41958074400888895,
|
| 15865 |
+
"grad_norm": 34.84375,
|
| 15866 |
+
"learning_rate": 9.934440527758102e-06,
|
| 15867 |
+
"loss": 21.206,
|
| 15868 |
+
"step": 22610
|
| 15869 |
+
},
|
| 15870 |
+
{
|
| 15871 |
+
"epoch": 0.41976631709336876,
|
| 15872 |
+
"grad_norm": 35.03125,
|
| 15873 |
+
"learning_rate": 9.934411531972059e-06,
|
| 15874 |
+
"loss": 21.6308,
|
| 15875 |
+
"step": 22620
|
| 15876 |
+
},
|
| 15877 |
+
{
|
| 15878 |
+
"epoch": 0.4199518901778486,
|
| 15879 |
+
"grad_norm": 34.8125,
|
| 15880 |
+
"learning_rate": 9.934382536186016e-06,
|
| 15881 |
+
"loss": 20.8924,
|
| 15882 |
+
"step": 22630
|
| 15883 |
+
},
|
| 15884 |
+
{
|
| 15885 |
+
"epoch": 0.4201374632623284,
|
| 15886 |
+
"grad_norm": 35.78125,
|
| 15887 |
+
"learning_rate": 9.934353540399975e-06,
|
| 15888 |
+
"loss": 20.9669,
|
| 15889 |
+
"step": 22640
|
| 15890 |
+
},
|
| 15891 |
+
{
|
| 15892 |
+
"epoch": 0.42032303634680823,
|
| 15893 |
+
"grad_norm": 35.46875,
|
| 15894 |
+
"learning_rate": 9.934324544613933e-06,
|
| 15895 |
+
"loss": 21.2526,
|
| 15896 |
+
"step": 22650
|
| 15897 |
+
},
|
| 15898 |
+
{
|
| 15899 |
+
"epoch": 0.4205086094312881,
|
| 15900 |
+
"grad_norm": 33.1875,
|
| 15901 |
+
"learning_rate": 9.934295548827889e-06,
|
| 15902 |
+
"loss": 20.8902,
|
| 15903 |
+
"step": 22660
|
| 15904 |
+
},
|
| 15905 |
+
{
|
| 15906 |
+
"epoch": 0.4206941825157679,
|
| 15907 |
+
"grad_norm": 35.8125,
|
| 15908 |
+
"learning_rate": 9.934266553041848e-06,
|
| 15909 |
+
"loss": 21.2633,
|
| 15910 |
+
"step": 22670
|
| 15911 |
+
},
|
| 15912 |
+
{
|
| 15913 |
+
"epoch": 0.42087975560024776,
|
| 15914 |
+
"grad_norm": 37.9375,
|
| 15915 |
+
"learning_rate": 9.934237557255805e-06,
|
| 15916 |
+
"loss": 21.1654,
|
| 15917 |
+
"step": 22680
|
| 15918 |
+
},
|
| 15919 |
+
{
|
| 15920 |
+
"epoch": 0.42106532868472757,
|
| 15921 |
+
"grad_norm": 34.78125,
|
| 15922 |
+
"learning_rate": 9.934208561469762e-06,
|
| 15923 |
+
"loss": 20.9407,
|
| 15924 |
+
"step": 22690
|
| 15925 |
+
},
|
| 15926 |
+
{
|
| 15927 |
+
"epoch": 0.4212509017692074,
|
| 15928 |
+
"grad_norm": 33.09375,
|
| 15929 |
+
"learning_rate": 9.93417956568372e-06,
|
| 15930 |
+
"loss": 21.1454,
|
| 15931 |
+
"step": 22700
|
| 15932 |
+
},
|
| 15933 |
+
{
|
| 15934 |
+
"epoch": 0.42143647485368724,
|
| 15935 |
+
"grad_norm": 34.03125,
|
| 15936 |
+
"learning_rate": 9.934150569897677e-06,
|
| 15937 |
+
"loss": 21.0258,
|
| 15938 |
+
"step": 22710
|
| 15939 |
+
},
|
| 15940 |
+
{
|
| 15941 |
+
"epoch": 0.42162204793816704,
|
| 15942 |
+
"grad_norm": 34.65625,
|
| 15943 |
+
"learning_rate": 9.934121574111635e-06,
|
| 15944 |
+
"loss": 21.0839,
|
| 15945 |
+
"step": 22720
|
| 15946 |
+
},
|
| 15947 |
+
{
|
| 15948 |
+
"epoch": 0.42180762102264685,
|
| 15949 |
+
"grad_norm": 34.09375,
|
| 15950 |
+
"learning_rate": 9.934092578325592e-06,
|
| 15951 |
+
"loss": 21.1634,
|
| 15952 |
+
"step": 22730
|
| 15953 |
+
},
|
| 15954 |
+
{
|
| 15955 |
+
"epoch": 0.4219931941071267,
|
| 15956 |
+
"grad_norm": 35.46875,
|
| 15957 |
+
"learning_rate": 9.934063582539551e-06,
|
| 15958 |
+
"loss": 21.1216,
|
| 15959 |
+
"step": 22740
|
| 15960 |
+
},
|
| 15961 |
+
{
|
| 15962 |
+
"epoch": 0.4221787671916065,
|
| 15963 |
+
"grad_norm": 34.40625,
|
| 15964 |
+
"learning_rate": 9.934034586753509e-06,
|
| 15965 |
+
"loss": 21.0727,
|
| 15966 |
+
"step": 22750
|
| 15967 |
+
},
|
| 15968 |
+
{
|
| 15969 |
+
"epoch": 0.4223643402760864,
|
| 15970 |
+
"grad_norm": 35.625,
|
| 15971 |
+
"learning_rate": 9.934005590967466e-06,
|
| 15972 |
+
"loss": 20.6274,
|
| 15973 |
+
"step": 22760
|
| 15974 |
+
},
|
| 15975 |
+
{
|
| 15976 |
+
"epoch": 0.4225499133605662,
|
| 15977 |
+
"grad_norm": 33.09375,
|
| 15978 |
+
"learning_rate": 9.933976595181423e-06,
|
| 15979 |
+
"loss": 21.0867,
|
| 15980 |
+
"step": 22770
|
| 15981 |
+
},
|
| 15982 |
+
{
|
| 15983 |
+
"epoch": 0.422735486445046,
|
| 15984 |
+
"grad_norm": 36.0625,
|
| 15985 |
+
"learning_rate": 9.933947599395381e-06,
|
| 15986 |
+
"loss": 21.0255,
|
| 15987 |
+
"step": 22780
|
| 15988 |
+
},
|
| 15989 |
+
{
|
| 15990 |
+
"epoch": 0.42292105952952586,
|
| 15991 |
+
"grad_norm": 34.84375,
|
| 15992 |
+
"learning_rate": 9.933918603609338e-06,
|
| 15993 |
+
"loss": 21.1949,
|
| 15994 |
+
"step": 22790
|
| 15995 |
+
},
|
| 15996 |
+
{
|
| 15997 |
+
"epoch": 0.42310663261400566,
|
| 15998 |
+
"grad_norm": 35.5625,
|
| 15999 |
+
"learning_rate": 9.933889607823296e-06,
|
| 16000 |
+
"loss": 20.9742,
|
| 16001 |
+
"step": 22800
|
| 16002 |
+
},
|
| 16003 |
+
{
|
| 16004 |
+
"epoch": 0.42329220569848547,
|
| 16005 |
+
"grad_norm": 34.5625,
|
| 16006 |
+
"learning_rate": 9.933860612037253e-06,
|
| 16007 |
+
"loss": 21.0635,
|
| 16008 |
+
"step": 22810
|
| 16009 |
+
},
|
| 16010 |
+
{
|
| 16011 |
+
"epoch": 0.42347777878296533,
|
| 16012 |
+
"grad_norm": 33.71875,
|
| 16013 |
+
"learning_rate": 9.93383161625121e-06,
|
| 16014 |
+
"loss": 20.6119,
|
| 16015 |
+
"step": 22820
|
| 16016 |
+
},
|
| 16017 |
+
{
|
| 16018 |
+
"epoch": 0.42366335186744514,
|
| 16019 |
+
"grad_norm": 36.5625,
|
| 16020 |
+
"learning_rate": 9.933802620465168e-06,
|
| 16021 |
+
"loss": 20.8544,
|
| 16022 |
+
"step": 22830
|
| 16023 |
+
},
|
| 16024 |
+
{
|
| 16025 |
+
"epoch": 0.423848924951925,
|
| 16026 |
+
"grad_norm": 35.28125,
|
| 16027 |
+
"learning_rate": 9.933773624679125e-06,
|
| 16028 |
+
"loss": 21.1555,
|
| 16029 |
+
"step": 22840
|
| 16030 |
+
},
|
| 16031 |
+
{
|
| 16032 |
+
"epoch": 0.4240344980364048,
|
| 16033 |
+
"grad_norm": 35.84375,
|
| 16034 |
+
"learning_rate": 9.933744628893084e-06,
|
| 16035 |
+
"loss": 20.7869,
|
| 16036 |
+
"step": 22850
|
| 16037 |
+
},
|
| 16038 |
+
{
|
| 16039 |
+
"epoch": 0.4242200711208846,
|
| 16040 |
+
"grad_norm": 33.5625,
|
| 16041 |
+
"learning_rate": 9.933715633107042e-06,
|
| 16042 |
+
"loss": 21.0017,
|
| 16043 |
+
"step": 22860
|
| 16044 |
+
},
|
| 16045 |
+
{
|
| 16046 |
+
"epoch": 0.4244056442053645,
|
| 16047 |
+
"grad_norm": 35.125,
|
| 16048 |
+
"learning_rate": 9.933686637321e-06,
|
| 16049 |
+
"loss": 21.0555,
|
| 16050 |
+
"step": 22870
|
| 16051 |
+
},
|
| 16052 |
+
{
|
| 16053 |
+
"epoch": 0.4245912172898443,
|
| 16054 |
+
"grad_norm": 33.90625,
|
| 16055 |
+
"learning_rate": 9.933657641534957e-06,
|
| 16056 |
+
"loss": 21.1488,
|
| 16057 |
+
"step": 22880
|
| 16058 |
+
},
|
| 16059 |
+
{
|
| 16060 |
+
"epoch": 0.4247767903743241,
|
| 16061 |
+
"grad_norm": 34.40625,
|
| 16062 |
+
"learning_rate": 9.933628645748914e-06,
|
| 16063 |
+
"loss": 20.8459,
|
| 16064 |
+
"step": 22890
|
| 16065 |
+
},
|
| 16066 |
+
{
|
| 16067 |
+
"epoch": 0.42496236345880395,
|
| 16068 |
+
"grad_norm": 33.96875,
|
| 16069 |
+
"learning_rate": 9.933599649962871e-06,
|
| 16070 |
+
"loss": 20.7651,
|
| 16071 |
+
"step": 22900
|
| 16072 |
+
},
|
| 16073 |
+
{
|
| 16074 |
+
"epoch": 0.42514793654328376,
|
| 16075 |
+
"grad_norm": 33.1875,
|
| 16076 |
+
"learning_rate": 9.933570654176829e-06,
|
| 16077 |
+
"loss": 20.9696,
|
| 16078 |
+
"step": 22910
|
| 16079 |
+
},
|
| 16080 |
+
{
|
| 16081 |
+
"epoch": 0.42533350962776356,
|
| 16082 |
+
"grad_norm": 35.96875,
|
| 16083 |
+
"learning_rate": 9.933541658390788e-06,
|
| 16084 |
+
"loss": 21.2031,
|
| 16085 |
+
"step": 22920
|
| 16086 |
+
},
|
| 16087 |
+
{
|
| 16088 |
+
"epoch": 0.4255190827122434,
|
| 16089 |
+
"grad_norm": 35.0,
|
| 16090 |
+
"learning_rate": 9.933512662604744e-06,
|
| 16091 |
+
"loss": 21.2462,
|
| 16092 |
+
"step": 22930
|
| 16093 |
+
},
|
| 16094 |
+
{
|
| 16095 |
+
"epoch": 0.42570465579672323,
|
| 16096 |
+
"grad_norm": 35.15625,
|
| 16097 |
+
"learning_rate": 9.933483666818701e-06,
|
| 16098 |
+
"loss": 20.9242,
|
| 16099 |
+
"step": 22940
|
| 16100 |
+
},
|
| 16101 |
+
{
|
| 16102 |
+
"epoch": 0.4258902288812031,
|
| 16103 |
+
"grad_norm": 35.84375,
|
| 16104 |
+
"learning_rate": 9.93345467103266e-06,
|
| 16105 |
+
"loss": 20.7291,
|
| 16106 |
+
"step": 22950
|
| 16107 |
+
},
|
| 16108 |
+
{
|
| 16109 |
+
"epoch": 0.4260758019656829,
|
| 16110 |
+
"grad_norm": 36.75,
|
| 16111 |
+
"learning_rate": 9.933425675246618e-06,
|
| 16112 |
+
"loss": 21.4801,
|
| 16113 |
+
"step": 22960
|
| 16114 |
+
},
|
| 16115 |
+
{
|
| 16116 |
+
"epoch": 0.4262613750501627,
|
| 16117 |
+
"grad_norm": 36.34375,
|
| 16118 |
+
"learning_rate": 9.933396679460575e-06,
|
| 16119 |
+
"loss": 21.3721,
|
| 16120 |
+
"step": 22970
|
| 16121 |
+
},
|
| 16122 |
+
{
|
| 16123 |
+
"epoch": 0.42644694813464257,
|
| 16124 |
+
"grad_norm": 35.1875,
|
| 16125 |
+
"learning_rate": 9.933367683674532e-06,
|
| 16126 |
+
"loss": 20.9337,
|
| 16127 |
+
"step": 22980
|
| 16128 |
+
},
|
| 16129 |
+
{
|
| 16130 |
+
"epoch": 0.4266325212191224,
|
| 16131 |
+
"grad_norm": 35.28125,
|
| 16132 |
+
"learning_rate": 9.93333868788849e-06,
|
| 16133 |
+
"loss": 21.2105,
|
| 16134 |
+
"step": 22990
|
| 16135 |
+
},
|
| 16136 |
+
{
|
| 16137 |
+
"epoch": 0.4268180943036022,
|
| 16138 |
+
"grad_norm": 33.78125,
|
| 16139 |
+
"learning_rate": 9.933309692102447e-06,
|
| 16140 |
+
"loss": 20.954,
|
| 16141 |
+
"step": 23000
|
| 16142 |
+
},
|
| 16143 |
+
{
|
| 16144 |
+
"epoch": 0.42700366738808204,
|
| 16145 |
+
"grad_norm": 37.15625,
|
| 16146 |
+
"learning_rate": 9.933280696316405e-06,
|
| 16147 |
+
"loss": 21.0472,
|
| 16148 |
+
"step": 23010
|
| 16149 |
+
},
|
| 16150 |
+
{
|
| 16151 |
+
"epoch": 0.42718924047256185,
|
| 16152 |
+
"grad_norm": 36.25,
|
| 16153 |
+
"learning_rate": 9.933251700530364e-06,
|
| 16154 |
+
"loss": 20.8671,
|
| 16155 |
+
"step": 23020
|
| 16156 |
+
},
|
| 16157 |
+
{
|
| 16158 |
+
"epoch": 0.4273748135570417,
|
| 16159 |
+
"grad_norm": 34.03125,
|
| 16160 |
+
"learning_rate": 9.933222704744321e-06,
|
| 16161 |
+
"loss": 20.8912,
|
| 16162 |
+
"step": 23030
|
| 16163 |
+
},
|
| 16164 |
+
{
|
| 16165 |
+
"epoch": 0.4275603866415215,
|
| 16166 |
+
"grad_norm": 33.90625,
|
| 16167 |
+
"learning_rate": 9.933193708958277e-06,
|
| 16168 |
+
"loss": 21.3617,
|
| 16169 |
+
"step": 23040
|
| 16170 |
+
},
|
| 16171 |
+
{
|
| 16172 |
+
"epoch": 0.4277459597260013,
|
| 16173 |
+
"grad_norm": 34.6875,
|
| 16174 |
+
"learning_rate": 9.933164713172236e-06,
|
| 16175 |
+
"loss": 21.0914,
|
| 16176 |
+
"step": 23050
|
| 16177 |
+
},
|
| 16178 |
+
{
|
| 16179 |
+
"epoch": 0.4279315328104812,
|
| 16180 |
+
"grad_norm": 35.65625,
|
| 16181 |
+
"learning_rate": 9.933135717386193e-06,
|
| 16182 |
+
"loss": 21.1626,
|
| 16183 |
+
"step": 23060
|
| 16184 |
+
},
|
| 16185 |
+
{
|
| 16186 |
+
"epoch": 0.428117105894961,
|
| 16187 |
+
"grad_norm": 33.53125,
|
| 16188 |
+
"learning_rate": 9.93310672160015e-06,
|
| 16189 |
+
"loss": 21.385,
|
| 16190 |
+
"step": 23070
|
| 16191 |
+
},
|
| 16192 |
+
{
|
| 16193 |
+
"epoch": 0.4283026789794408,
|
| 16194 |
+
"grad_norm": 34.90625,
|
| 16195 |
+
"learning_rate": 9.933077725814108e-06,
|
| 16196 |
+
"loss": 21.1424,
|
| 16197 |
+
"step": 23080
|
| 16198 |
+
},
|
| 16199 |
+
{
|
| 16200 |
+
"epoch": 0.42848825206392066,
|
| 16201 |
+
"grad_norm": 34.125,
|
| 16202 |
+
"learning_rate": 9.933048730028066e-06,
|
| 16203 |
+
"loss": 20.7616,
|
| 16204 |
+
"step": 23090
|
| 16205 |
+
},
|
| 16206 |
+
{
|
| 16207 |
+
"epoch": 0.42867382514840047,
|
| 16208 |
+
"grad_norm": 34.6875,
|
| 16209 |
+
"learning_rate": 9.933019734242023e-06,
|
| 16210 |
+
"loss": 20.5844,
|
| 16211 |
+
"step": 23100
|
| 16212 |
+
},
|
| 16213 |
+
{
|
| 16214 |
+
"epoch": 0.42885939823288033,
|
| 16215 |
+
"grad_norm": 35.0625,
|
| 16216 |
+
"learning_rate": 9.93299073845598e-06,
|
| 16217 |
+
"loss": 21.0437,
|
| 16218 |
+
"step": 23110
|
| 16219 |
+
},
|
| 16220 |
+
{
|
| 16221 |
+
"epoch": 0.42904497131736014,
|
| 16222 |
+
"grad_norm": 34.34375,
|
| 16223 |
+
"learning_rate": 9.93296174266994e-06,
|
| 16224 |
+
"loss": 20.9484,
|
| 16225 |
+
"step": 23120
|
| 16226 |
+
},
|
| 16227 |
+
{
|
| 16228 |
+
"epoch": 0.42923054440183994,
|
| 16229 |
+
"grad_norm": 33.125,
|
| 16230 |
+
"learning_rate": 9.932932746883897e-06,
|
| 16231 |
+
"loss": 21.0415,
|
| 16232 |
+
"step": 23130
|
| 16233 |
+
},
|
| 16234 |
+
{
|
| 16235 |
+
"epoch": 0.4294161174863198,
|
| 16236 |
+
"grad_norm": 34.15625,
|
| 16237 |
+
"learning_rate": 9.932903751097853e-06,
|
| 16238 |
+
"loss": 21.0687,
|
| 16239 |
+
"step": 23140
|
| 16240 |
+
},
|
| 16241 |
+
{
|
| 16242 |
+
"epoch": 0.4296016905707996,
|
| 16243 |
+
"grad_norm": 34.0625,
|
| 16244 |
+
"learning_rate": 9.932874755311812e-06,
|
| 16245 |
+
"loss": 20.5685,
|
| 16246 |
+
"step": 23150
|
| 16247 |
+
},
|
| 16248 |
+
{
|
| 16249 |
+
"epoch": 0.4297872636552794,
|
| 16250 |
+
"grad_norm": 36.09375,
|
| 16251 |
+
"learning_rate": 9.932845759525769e-06,
|
| 16252 |
+
"loss": 21.2205,
|
| 16253 |
+
"step": 23160
|
| 16254 |
+
},
|
| 16255 |
+
{
|
| 16256 |
+
"epoch": 0.4299728367397593,
|
| 16257 |
+
"grad_norm": 33.71875,
|
| 16258 |
+
"learning_rate": 9.932816763739727e-06,
|
| 16259 |
+
"loss": 21.0816,
|
| 16260 |
+
"step": 23170
|
| 16261 |
+
},
|
| 16262 |
+
{
|
| 16263 |
+
"epoch": 0.4301584098242391,
|
| 16264 |
+
"grad_norm": 36.15625,
|
| 16265 |
+
"learning_rate": 9.932787767953684e-06,
|
| 16266 |
+
"loss": 20.8513,
|
| 16267 |
+
"step": 23180
|
| 16268 |
+
},
|
| 16269 |
+
{
|
| 16270 |
+
"epoch": 0.4303439829087189,
|
| 16271 |
+
"grad_norm": 34.28125,
|
| 16272 |
+
"learning_rate": 9.932758772167643e-06,
|
| 16273 |
+
"loss": 20.9409,
|
| 16274 |
+
"step": 23190
|
| 16275 |
+
},
|
| 16276 |
+
{
|
| 16277 |
+
"epoch": 0.43052955599319875,
|
| 16278 |
+
"grad_norm": 34.1875,
|
| 16279 |
+
"learning_rate": 9.932729776381599e-06,
|
| 16280 |
+
"loss": 20.9464,
|
| 16281 |
+
"step": 23200
|
| 16282 |
+
},
|
| 16283 |
+
{
|
| 16284 |
+
"epoch": 0.43071512907767856,
|
| 16285 |
+
"grad_norm": 35.25,
|
| 16286 |
+
"learning_rate": 9.932700780595556e-06,
|
| 16287 |
+
"loss": 21.2037,
|
| 16288 |
+
"step": 23210
|
| 16289 |
+
},
|
| 16290 |
+
{
|
| 16291 |
+
"epoch": 0.4309007021621584,
|
| 16292 |
+
"grad_norm": 34.6875,
|
| 16293 |
+
"learning_rate": 9.932671784809515e-06,
|
| 16294 |
+
"loss": 20.9092,
|
| 16295 |
+
"step": 23220
|
| 16296 |
+
},
|
| 16297 |
+
{
|
| 16298 |
+
"epoch": 0.43108627524663823,
|
| 16299 |
+
"grad_norm": 36.53125,
|
| 16300 |
+
"learning_rate": 9.932642789023473e-06,
|
| 16301 |
+
"loss": 20.9398,
|
| 16302 |
+
"step": 23230
|
| 16303 |
+
},
|
| 16304 |
+
{
|
| 16305 |
+
"epoch": 0.43127184833111804,
|
| 16306 |
+
"grad_norm": 35.5625,
|
| 16307 |
+
"learning_rate": 9.93261379323743e-06,
|
| 16308 |
+
"loss": 20.7783,
|
| 16309 |
+
"step": 23240
|
| 16310 |
+
},
|
| 16311 |
+
{
|
| 16312 |
+
"epoch": 0.4314574214155979,
|
| 16313 |
+
"grad_norm": 35.34375,
|
| 16314 |
+
"learning_rate": 9.932584797451387e-06,
|
| 16315 |
+
"loss": 20.5817,
|
| 16316 |
+
"step": 23250
|
| 16317 |
+
},
|
| 16318 |
+
{
|
| 16319 |
+
"epoch": 0.4316429945000777,
|
| 16320 |
+
"grad_norm": 35.125,
|
| 16321 |
+
"learning_rate": 9.932555801665345e-06,
|
| 16322 |
+
"loss": 20.9545,
|
| 16323 |
+
"step": 23260
|
| 16324 |
+
},
|
| 16325 |
+
{
|
| 16326 |
+
"epoch": 0.4318285675845575,
|
| 16327 |
+
"grad_norm": 36.59375,
|
| 16328 |
+
"learning_rate": 9.932526805879302e-06,
|
| 16329 |
+
"loss": 20.4197,
|
| 16330 |
+
"step": 23270
|
| 16331 |
+
},
|
| 16332 |
+
{
|
| 16333 |
+
"epoch": 0.43201414066903737,
|
| 16334 |
+
"grad_norm": 34.59375,
|
| 16335 |
+
"learning_rate": 9.93249781009326e-06,
|
| 16336 |
+
"loss": 21.3306,
|
| 16337 |
+
"step": 23280
|
| 16338 |
+
},
|
| 16339 |
+
{
|
| 16340 |
+
"epoch": 0.4321997137535172,
|
| 16341 |
+
"grad_norm": 34.34375,
|
| 16342 |
+
"learning_rate": 9.932468814307217e-06,
|
| 16343 |
+
"loss": 20.4233,
|
| 16344 |
+
"step": 23290
|
| 16345 |
+
},
|
| 16346 |
+
{
|
| 16347 |
+
"epoch": 0.43238528683799704,
|
| 16348 |
+
"grad_norm": 35.59375,
|
| 16349 |
+
"learning_rate": 9.932439818521176e-06,
|
| 16350 |
+
"loss": 20.9082,
|
| 16351 |
+
"step": 23300
|
| 16352 |
+
},
|
| 16353 |
+
{
|
| 16354 |
+
"epoch": 0.43257085992247685,
|
| 16355 |
+
"grad_norm": 35.375,
|
| 16356 |
+
"learning_rate": 9.932410822735132e-06,
|
| 16357 |
+
"loss": 21.0376,
|
| 16358 |
+
"step": 23310
|
| 16359 |
+
},
|
| 16360 |
+
{
|
| 16361 |
+
"epoch": 0.43275643300695665,
|
| 16362 |
+
"grad_norm": 37.09375,
|
| 16363 |
+
"learning_rate": 9.93238182694909e-06,
|
| 16364 |
+
"loss": 20.735,
|
| 16365 |
+
"step": 23320
|
| 16366 |
+
},
|
| 16367 |
+
{
|
| 16368 |
+
"epoch": 0.4329420060914365,
|
| 16369 |
+
"grad_norm": 34.65625,
|
| 16370 |
+
"learning_rate": 9.932352831163048e-06,
|
| 16371 |
+
"loss": 20.7816,
|
| 16372 |
+
"step": 23330
|
| 16373 |
+
},
|
| 16374 |
+
{
|
| 16375 |
+
"epoch": 0.4331275791759163,
|
| 16376 |
+
"grad_norm": 35.03125,
|
| 16377 |
+
"learning_rate": 9.932323835377006e-06,
|
| 16378 |
+
"loss": 21.1626,
|
| 16379 |
+
"step": 23340
|
| 16380 |
+
},
|
| 16381 |
+
{
|
| 16382 |
+
"epoch": 0.43331315226039613,
|
| 16383 |
+
"grad_norm": 33.21875,
|
| 16384 |
+
"learning_rate": 9.932294839590963e-06,
|
| 16385 |
+
"loss": 21.2156,
|
| 16386 |
+
"step": 23350
|
| 16387 |
+
},
|
| 16388 |
+
{
|
| 16389 |
+
"epoch": 0.433498725344876,
|
| 16390 |
+
"grad_norm": 34.71875,
|
| 16391 |
+
"learning_rate": 9.93226584380492e-06,
|
| 16392 |
+
"loss": 20.5191,
|
| 16393 |
+
"step": 23360
|
| 16394 |
+
},
|
| 16395 |
+
{
|
| 16396 |
+
"epoch": 0.4336842984293558,
|
| 16397 |
+
"grad_norm": 34.0625,
|
| 16398 |
+
"learning_rate": 9.932236848018878e-06,
|
| 16399 |
+
"loss": 20.6801,
|
| 16400 |
+
"step": 23370
|
| 16401 |
+
},
|
| 16402 |
+
{
|
| 16403 |
+
"epoch": 0.43386987151383566,
|
| 16404 |
+
"grad_norm": 34.25,
|
| 16405 |
+
"learning_rate": 9.932207852232835e-06,
|
| 16406 |
+
"loss": 20.9476,
|
| 16407 |
+
"step": 23380
|
| 16408 |
+
},
|
| 16409 |
+
{
|
| 16410 |
+
"epoch": 0.43405544459831547,
|
| 16411 |
+
"grad_norm": 35.0625,
|
| 16412 |
+
"learning_rate": 9.932178856446793e-06,
|
| 16413 |
+
"loss": 21.1848,
|
| 16414 |
+
"step": 23390
|
| 16415 |
+
},
|
| 16416 |
+
{
|
| 16417 |
+
"epoch": 0.43424101768279527,
|
| 16418 |
+
"grad_norm": 34.125,
|
| 16419 |
+
"learning_rate": 9.932149860660752e-06,
|
| 16420 |
+
"loss": 20.944,
|
| 16421 |
+
"step": 23400
|
| 16422 |
+
},
|
| 16423 |
+
{
|
| 16424 |
+
"epoch": 0.43442659076727513,
|
| 16425 |
+
"grad_norm": 35.71875,
|
| 16426 |
+
"learning_rate": 9.932120864874708e-06,
|
| 16427 |
+
"loss": 20.6635,
|
| 16428 |
+
"step": 23410
|
| 16429 |
+
},
|
| 16430 |
+
{
|
| 16431 |
+
"epoch": 0.43461216385175494,
|
| 16432 |
+
"grad_norm": 36.25,
|
| 16433 |
+
"learning_rate": 9.932091869088665e-06,
|
| 16434 |
+
"loss": 20.9886,
|
| 16435 |
+
"step": 23420
|
| 16436 |
+
},
|
| 16437 |
+
{
|
| 16438 |
+
"epoch": 0.43479773693623475,
|
| 16439 |
+
"grad_norm": 34.28125,
|
| 16440 |
+
"learning_rate": 9.932062873302624e-06,
|
| 16441 |
+
"loss": 21.3632,
|
| 16442 |
+
"step": 23430
|
| 16443 |
+
},
|
| 16444 |
+
{
|
| 16445 |
+
"epoch": 0.4349833100207146,
|
| 16446 |
+
"grad_norm": 35.15625,
|
| 16447 |
+
"learning_rate": 9.932033877516582e-06,
|
| 16448 |
+
"loss": 21.2067,
|
| 16449 |
+
"step": 23440
|
| 16450 |
+
},
|
| 16451 |
+
{
|
| 16452 |
+
"epoch": 0.4351688831051944,
|
| 16453 |
+
"grad_norm": 34.84375,
|
| 16454 |
+
"learning_rate": 9.932004881730539e-06,
|
| 16455 |
+
"loss": 20.9518,
|
| 16456 |
+
"step": 23450
|
| 16457 |
+
},
|
| 16458 |
+
{
|
| 16459 |
+
"epoch": 0.4353544561896743,
|
| 16460 |
+
"grad_norm": 35.34375,
|
| 16461 |
+
"learning_rate": 9.931975885944496e-06,
|
| 16462 |
+
"loss": 20.9001,
|
| 16463 |
+
"step": 23460
|
| 16464 |
+
},
|
| 16465 |
+
{
|
| 16466 |
+
"epoch": 0.4355400292741541,
|
| 16467 |
+
"grad_norm": 34.0625,
|
| 16468 |
+
"learning_rate": 9.931946890158454e-06,
|
| 16469 |
+
"loss": 20.7958,
|
| 16470 |
+
"step": 23470
|
| 16471 |
+
},
|
| 16472 |
+
{
|
| 16473 |
+
"epoch": 0.4357256023586339,
|
| 16474 |
+
"grad_norm": 34.1875,
|
| 16475 |
+
"learning_rate": 9.931917894372411e-06,
|
| 16476 |
+
"loss": 20.5711,
|
| 16477 |
+
"step": 23480
|
| 16478 |
+
},
|
| 16479 |
+
{
|
| 16480 |
+
"epoch": 0.43591117544311375,
|
| 16481 |
+
"grad_norm": 36.40625,
|
| 16482 |
+
"learning_rate": 9.931888898586369e-06,
|
| 16483 |
+
"loss": 20.796,
|
| 16484 |
+
"step": 23490
|
| 16485 |
+
},
|
| 16486 |
+
{
|
| 16487 |
+
"epoch": 0.43609674852759356,
|
| 16488 |
+
"grad_norm": 34.25,
|
| 16489 |
+
"learning_rate": 9.931859902800328e-06,
|
| 16490 |
+
"loss": 20.722,
|
| 16491 |
+
"step": 23500
|
| 16492 |
+
},
|
| 16493 |
+
{
|
| 16494 |
+
"epoch": 0.43628232161207336,
|
| 16495 |
+
"grad_norm": 37.15625,
|
| 16496 |
+
"learning_rate": 9.931830907014285e-06,
|
| 16497 |
+
"loss": 20.9826,
|
| 16498 |
+
"step": 23510
|
| 16499 |
+
},
|
| 16500 |
+
{
|
| 16501 |
+
"epoch": 0.4364678946965532,
|
| 16502 |
+
"grad_norm": 34.375,
|
| 16503 |
+
"learning_rate": 9.93180191122824e-06,
|
| 16504 |
+
"loss": 21.0188,
|
| 16505 |
+
"step": 23520
|
| 16506 |
+
},
|
| 16507 |
+
{
|
| 16508 |
+
"epoch": 0.43665346778103303,
|
| 16509 |
+
"grad_norm": 37.125,
|
| 16510 |
+
"learning_rate": 9.9317729154422e-06,
|
| 16511 |
+
"loss": 21.2313,
|
| 16512 |
+
"step": 23530
|
| 16513 |
+
},
|
| 16514 |
+
{
|
| 16515 |
+
"epoch": 0.43683904086551284,
|
| 16516 |
+
"grad_norm": 34.71875,
|
| 16517 |
+
"learning_rate": 9.931743919656157e-06,
|
| 16518 |
+
"loss": 20.5476,
|
| 16519 |
+
"step": 23540
|
| 16520 |
+
},
|
| 16521 |
+
{
|
| 16522 |
+
"epoch": 0.4370246139499927,
|
| 16523 |
+
"grad_norm": 35.125,
|
| 16524 |
+
"learning_rate": 9.931714923870115e-06,
|
| 16525 |
+
"loss": 21.0134,
|
| 16526 |
+
"step": 23550
|
| 16527 |
+
},
|
| 16528 |
+
{
|
| 16529 |
+
"epoch": 0.4372101870344725,
|
| 16530 |
+
"grad_norm": 35.6875,
|
| 16531 |
+
"learning_rate": 9.931685928084072e-06,
|
| 16532 |
+
"loss": 20.7415,
|
| 16533 |
+
"step": 23560
|
| 16534 |
+
},
|
| 16535 |
+
{
|
| 16536 |
+
"epoch": 0.43739576011895237,
|
| 16537 |
+
"grad_norm": 36.125,
|
| 16538 |
+
"learning_rate": 9.93165693229803e-06,
|
| 16539 |
+
"loss": 21.2975,
|
| 16540 |
+
"step": 23570
|
| 16541 |
+
},
|
| 16542 |
+
{
|
| 16543 |
+
"epoch": 0.4375813332034322,
|
| 16544 |
+
"grad_norm": 35.34375,
|
| 16545 |
+
"learning_rate": 9.931627936511987e-06,
|
| 16546 |
+
"loss": 20.9121,
|
| 16547 |
+
"step": 23580
|
| 16548 |
+
},
|
| 16549 |
+
{
|
| 16550 |
+
"epoch": 0.437766906287912,
|
| 16551 |
+
"grad_norm": 36.78125,
|
| 16552 |
+
"learning_rate": 9.931598940725944e-06,
|
| 16553 |
+
"loss": 20.9767,
|
| 16554 |
+
"step": 23590
|
| 16555 |
+
},
|
| 16556 |
+
{
|
| 16557 |
+
"epoch": 0.43795247937239185,
|
| 16558 |
+
"grad_norm": 35.15625,
|
| 16559 |
+
"learning_rate": 9.931569944939903e-06,
|
| 16560 |
+
"loss": 21.0601,
|
| 16561 |
+
"step": 23600
|
| 16562 |
+
},
|
| 16563 |
+
{
|
| 16564 |
+
"epoch": 0.43813805245687165,
|
| 16565 |
+
"grad_norm": 36.625,
|
| 16566 |
+
"learning_rate": 9.931540949153861e-06,
|
| 16567 |
+
"loss": 21.2006,
|
| 16568 |
+
"step": 23610
|
| 16569 |
+
},
|
| 16570 |
+
{
|
| 16571 |
+
"epoch": 0.43832362554135146,
|
| 16572 |
+
"grad_norm": 33.21875,
|
| 16573 |
+
"learning_rate": 9.931511953367818e-06,
|
| 16574 |
+
"loss": 20.9193,
|
| 16575 |
+
"step": 23620
|
| 16576 |
+
},
|
| 16577 |
+
{
|
| 16578 |
+
"epoch": 0.4385091986258313,
|
| 16579 |
+
"grad_norm": 35.5625,
|
| 16580 |
+
"learning_rate": 9.931482957581776e-06,
|
| 16581 |
+
"loss": 21.2494,
|
| 16582 |
+
"step": 23630
|
| 16583 |
+
},
|
| 16584 |
+
{
|
| 16585 |
+
"epoch": 0.4386947717103111,
|
| 16586 |
+
"grad_norm": 35.5,
|
| 16587 |
+
"learning_rate": 9.931453961795733e-06,
|
| 16588 |
+
"loss": 20.8032,
|
| 16589 |
+
"step": 23640
|
| 16590 |
+
},
|
| 16591 |
+
{
|
| 16592 |
+
"epoch": 0.438880344794791,
|
| 16593 |
+
"grad_norm": 34.09375,
|
| 16594 |
+
"learning_rate": 9.93142496600969e-06,
|
| 16595 |
+
"loss": 20.7432,
|
| 16596 |
+
"step": 23650
|
| 16597 |
+
},
|
| 16598 |
+
{
|
| 16599 |
+
"epoch": 0.4390659178792708,
|
| 16600 |
+
"grad_norm": 34.53125,
|
| 16601 |
+
"learning_rate": 9.931395970223648e-06,
|
| 16602 |
+
"loss": 21.1021,
|
| 16603 |
+
"step": 23660
|
| 16604 |
+
},
|
| 16605 |
+
{
|
| 16606 |
+
"epoch": 0.4392514909637506,
|
| 16607 |
+
"grad_norm": 33.625,
|
| 16608 |
+
"learning_rate": 9.931366974437607e-06,
|
| 16609 |
+
"loss": 21.2628,
|
| 16610 |
+
"step": 23670
|
| 16611 |
+
},
|
| 16612 |
+
{
|
| 16613 |
+
"epoch": 0.43943706404823046,
|
| 16614 |
+
"grad_norm": 34.4375,
|
| 16615 |
+
"learning_rate": 9.931337978651563e-06,
|
| 16616 |
+
"loss": 20.8008,
|
| 16617 |
+
"step": 23680
|
| 16618 |
+
},
|
| 16619 |
+
{
|
| 16620 |
+
"epoch": 0.43962263713271027,
|
| 16621 |
+
"grad_norm": 35.875,
|
| 16622 |
+
"learning_rate": 9.93130898286552e-06,
|
| 16623 |
+
"loss": 20.8276,
|
| 16624 |
+
"step": 23690
|
| 16625 |
+
},
|
| 16626 |
+
{
|
| 16627 |
+
"epoch": 0.4398082102171901,
|
| 16628 |
+
"grad_norm": 33.90625,
|
| 16629 |
+
"learning_rate": 9.93127998707948e-06,
|
| 16630 |
+
"loss": 20.5879,
|
| 16631 |
+
"step": 23700
|
| 16632 |
+
},
|
| 16633 |
+
{
|
| 16634 |
+
"epoch": 0.43999378330166994,
|
| 16635 |
+
"grad_norm": 36.25,
|
| 16636 |
+
"learning_rate": 9.931250991293437e-06,
|
| 16637 |
+
"loss": 20.905,
|
| 16638 |
+
"step": 23710
|
| 16639 |
+
},
|
| 16640 |
+
{
|
| 16641 |
+
"epoch": 0.44017935638614974,
|
| 16642 |
+
"grad_norm": 35.8125,
|
| 16643 |
+
"learning_rate": 9.931221995507394e-06,
|
| 16644 |
+
"loss": 21.0795,
|
| 16645 |
+
"step": 23720
|
| 16646 |
+
},
|
| 16647 |
+
{
|
| 16648 |
+
"epoch": 0.4403649294706296,
|
| 16649 |
+
"grad_norm": 35.40625,
|
| 16650 |
+
"learning_rate": 9.931192999721351e-06,
|
| 16651 |
+
"loss": 20.6996,
|
| 16652 |
+
"step": 23730
|
| 16653 |
+
},
|
| 16654 |
+
{
|
| 16655 |
+
"epoch": 0.4405505025551094,
|
| 16656 |
+
"grad_norm": 33.625,
|
| 16657 |
+
"learning_rate": 9.931164003935309e-06,
|
| 16658 |
+
"loss": 20.894,
|
| 16659 |
+
"step": 23740
|
| 16660 |
+
},
|
| 16661 |
+
{
|
| 16662 |
+
"epoch": 0.4407360756395892,
|
| 16663 |
+
"grad_norm": 35.0625,
|
| 16664 |
+
"learning_rate": 9.931135008149266e-06,
|
| 16665 |
+
"loss": 20.8419,
|
| 16666 |
+
"step": 23750
|
| 16667 |
+
},
|
| 16668 |
+
{
|
| 16669 |
+
"epoch": 0.4409216487240691,
|
| 16670 |
+
"grad_norm": 34.9375,
|
| 16671 |
+
"learning_rate": 9.931106012363224e-06,
|
| 16672 |
+
"loss": 20.6036,
|
| 16673 |
+
"step": 23760
|
| 16674 |
+
},
|
| 16675 |
+
{
|
| 16676 |
+
"epoch": 0.4411072218085489,
|
| 16677 |
+
"grad_norm": 34.34375,
|
| 16678 |
+
"learning_rate": 9.931077016577181e-06,
|
| 16679 |
+
"loss": 20.7,
|
| 16680 |
+
"step": 23770
|
| 16681 |
+
},
|
| 16682 |
+
{
|
| 16683 |
+
"epoch": 0.4412927948930287,
|
| 16684 |
+
"grad_norm": 35.84375,
|
| 16685 |
+
"learning_rate": 9.93104802079114e-06,
|
| 16686 |
+
"loss": 21.0251,
|
| 16687 |
+
"step": 23780
|
| 16688 |
+
},
|
| 16689 |
+
{
|
| 16690 |
+
"epoch": 0.44147836797750856,
|
| 16691 |
+
"grad_norm": 35.4375,
|
| 16692 |
+
"learning_rate": 9.931019025005096e-06,
|
| 16693 |
+
"loss": 20.7861,
|
| 16694 |
+
"step": 23790
|
| 16695 |
+
},
|
| 16696 |
+
{
|
| 16697 |
+
"epoch": 0.44166394106198836,
|
| 16698 |
+
"grad_norm": 34.53125,
|
| 16699 |
+
"learning_rate": 9.930990029219055e-06,
|
| 16700 |
+
"loss": 21.0714,
|
| 16701 |
+
"step": 23800
|
| 16702 |
+
},
|
| 16703 |
+
{
|
| 16704 |
+
"epoch": 0.44184951414646817,
|
| 16705 |
+
"grad_norm": 35.1875,
|
| 16706 |
+
"learning_rate": 9.930961033433012e-06,
|
| 16707 |
+
"loss": 21.0033,
|
| 16708 |
+
"step": 23810
|
| 16709 |
+
},
|
| 16710 |
+
{
|
| 16711 |
+
"epoch": 0.44203508723094803,
|
| 16712 |
+
"grad_norm": 34.75,
|
| 16713 |
+
"learning_rate": 9.93093203764697e-06,
|
| 16714 |
+
"loss": 20.7072,
|
| 16715 |
+
"step": 23820
|
| 16716 |
+
},
|
| 16717 |
+
{
|
| 16718 |
+
"epoch": 0.44222066031542784,
|
| 16719 |
+
"grad_norm": 35.625,
|
| 16720 |
+
"learning_rate": 9.930903041860927e-06,
|
| 16721 |
+
"loss": 20.7695,
|
| 16722 |
+
"step": 23830
|
| 16723 |
+
},
|
| 16724 |
+
{
|
| 16725 |
+
"epoch": 0.4424062333999077,
|
| 16726 |
+
"grad_norm": 36.40625,
|
| 16727 |
+
"learning_rate": 9.930874046074885e-06,
|
| 16728 |
+
"loss": 20.7267,
|
| 16729 |
+
"step": 23840
|
| 16730 |
+
},
|
| 16731 |
+
{
|
| 16732 |
+
"epoch": 0.4425918064843875,
|
| 16733 |
+
"grad_norm": 36.03125,
|
| 16734 |
+
"learning_rate": 9.930845050288842e-06,
|
| 16735 |
+
"loss": 20.6114,
|
| 16736 |
+
"step": 23850
|
| 16737 |
+
},
|
| 16738 |
+
{
|
| 16739 |
+
"epoch": 0.4427773795688673,
|
| 16740 |
+
"grad_norm": 35.4375,
|
| 16741 |
+
"learning_rate": 9.9308160545028e-06,
|
| 16742 |
+
"loss": 20.9765,
|
| 16743 |
+
"step": 23860
|
| 16744 |
+
},
|
| 16745 |
+
{
|
| 16746 |
+
"epoch": 0.4429629526533472,
|
| 16747 |
+
"grad_norm": 34.625,
|
| 16748 |
+
"learning_rate": 9.930787058716757e-06,
|
| 16749 |
+
"loss": 20.9792,
|
| 16750 |
+
"step": 23870
|
| 16751 |
+
},
|
| 16752 |
+
{
|
| 16753 |
+
"epoch": 0.443148525737827,
|
| 16754 |
+
"grad_norm": 34.25,
|
| 16755 |
+
"learning_rate": 9.930758062930716e-06,
|
| 16756 |
+
"loss": 21.1687,
|
| 16757 |
+
"step": 23880
|
| 16758 |
+
},
|
| 16759 |
+
{
|
| 16760 |
+
"epoch": 0.4433340988223068,
|
| 16761 |
+
"grad_norm": 33.6875,
|
| 16762 |
+
"learning_rate": 9.930729067144673e-06,
|
| 16763 |
+
"loss": 20.8174,
|
| 16764 |
+
"step": 23890
|
| 16765 |
+
},
|
| 16766 |
+
{
|
| 16767 |
+
"epoch": 0.44351967190678665,
|
| 16768 |
+
"grad_norm": 33.03125,
|
| 16769 |
+
"learning_rate": 9.930700071358629e-06,
|
| 16770 |
+
"loss": 20.5752,
|
| 16771 |
+
"step": 23900
|
| 16772 |
+
},
|
| 16773 |
+
{
|
| 16774 |
+
"epoch": 0.44370524499126646,
|
| 16775 |
+
"grad_norm": 37.15625,
|
| 16776 |
+
"learning_rate": 9.930671075572588e-06,
|
| 16777 |
+
"loss": 20.5927,
|
| 16778 |
+
"step": 23910
|
| 16779 |
+
},
|
| 16780 |
+
{
|
| 16781 |
+
"epoch": 0.4438908180757463,
|
| 16782 |
+
"grad_norm": 36.375,
|
| 16783 |
+
"learning_rate": 9.930642079786546e-06,
|
| 16784 |
+
"loss": 21.0733,
|
| 16785 |
+
"step": 23920
|
| 16786 |
+
},
|
| 16787 |
+
{
|
| 16788 |
+
"epoch": 0.4440763911602261,
|
| 16789 |
+
"grad_norm": 35.375,
|
| 16790 |
+
"learning_rate": 9.930613084000503e-06,
|
| 16791 |
+
"loss": 20.7896,
|
| 16792 |
+
"step": 23930
|
| 16793 |
+
},
|
| 16794 |
+
{
|
| 16795 |
+
"epoch": 0.44426196424470593,
|
| 16796 |
+
"grad_norm": 33.34375,
|
| 16797 |
+
"learning_rate": 9.93058408821446e-06,
|
| 16798 |
+
"loss": 20.5514,
|
| 16799 |
+
"step": 23940
|
| 16800 |
+
},
|
| 16801 |
+
{
|
| 16802 |
+
"epoch": 0.4444475373291858,
|
| 16803 |
+
"grad_norm": 33.25,
|
| 16804 |
+
"learning_rate": 9.930555092428418e-06,
|
| 16805 |
+
"loss": 21.1296,
|
| 16806 |
+
"step": 23950
|
| 16807 |
+
},
|
| 16808 |
+
{
|
| 16809 |
+
"epoch": 0.4446331104136656,
|
| 16810 |
+
"grad_norm": 35.96875,
|
| 16811 |
+
"learning_rate": 9.930526096642375e-06,
|
| 16812 |
+
"loss": 20.7448,
|
| 16813 |
+
"step": 23960
|
| 16814 |
+
},
|
| 16815 |
+
{
|
| 16816 |
+
"epoch": 0.4448186834981454,
|
| 16817 |
+
"grad_norm": 34.59375,
|
| 16818 |
+
"learning_rate": 9.930497100856333e-06,
|
| 16819 |
+
"loss": 21.14,
|
| 16820 |
+
"step": 23970
|
| 16821 |
+
},
|
| 16822 |
+
{
|
| 16823 |
+
"epoch": 0.44500425658262527,
|
| 16824 |
+
"grad_norm": 34.28125,
|
| 16825 |
+
"learning_rate": 9.930468105070292e-06,
|
| 16826 |
+
"loss": 20.7303,
|
| 16827 |
+
"step": 23980
|
| 16828 |
+
},
|
| 16829 |
+
{
|
| 16830 |
+
"epoch": 0.4451898296671051,
|
| 16831 |
+
"grad_norm": 34.09375,
|
| 16832 |
+
"learning_rate": 9.930439109284249e-06,
|
| 16833 |
+
"loss": 21.1835,
|
| 16834 |
+
"step": 23990
|
| 16835 |
+
},
|
| 16836 |
+
{
|
| 16837 |
+
"epoch": 0.44537540275158494,
|
| 16838 |
+
"grad_norm": 35.4375,
|
| 16839 |
+
"learning_rate": 9.930410113498205e-06,
|
| 16840 |
+
"loss": 20.9196,
|
| 16841 |
+
"step": 24000
|
| 16842 |
+
},
|
| 16843 |
+
{
|
| 16844 |
+
"epoch": 0.44556097583606474,
|
| 16845 |
+
"grad_norm": 33.75,
|
| 16846 |
+
"learning_rate": 9.930381117712164e-06,
|
| 16847 |
+
"loss": 21.0292,
|
| 16848 |
+
"step": 24010
|
| 16849 |
+
},
|
| 16850 |
+
{
|
| 16851 |
+
"epoch": 0.44574654892054455,
|
| 16852 |
+
"grad_norm": 33.78125,
|
| 16853 |
+
"learning_rate": 9.930352121926121e-06,
|
| 16854 |
+
"loss": 20.8839,
|
| 16855 |
+
"step": 24020
|
| 16856 |
+
},
|
| 16857 |
+
{
|
| 16858 |
+
"epoch": 0.4459321220050244,
|
| 16859 |
+
"grad_norm": 36.25,
|
| 16860 |
+
"learning_rate": 9.930323126140079e-06,
|
| 16861 |
+
"loss": 21.0878,
|
| 16862 |
+
"step": 24030
|
| 16863 |
+
},
|
| 16864 |
+
{
|
| 16865 |
+
"epoch": 0.4461176950895042,
|
| 16866 |
+
"grad_norm": 34.84375,
|
| 16867 |
+
"learning_rate": 9.930294130354036e-06,
|
| 16868 |
+
"loss": 20.8566,
|
| 16869 |
+
"step": 24040
|
| 16870 |
+
},
|
| 16871 |
+
{
|
| 16872 |
+
"epoch": 0.446303268173984,
|
| 16873 |
+
"grad_norm": 34.8125,
|
| 16874 |
+
"learning_rate": 9.930265134567995e-06,
|
| 16875 |
+
"loss": 20.5984,
|
| 16876 |
+
"step": 24050
|
| 16877 |
+
},
|
| 16878 |
+
{
|
| 16879 |
+
"epoch": 0.4464888412584639,
|
| 16880 |
+
"grad_norm": 34.59375,
|
| 16881 |
+
"learning_rate": 9.930236138781951e-06,
|
| 16882 |
+
"loss": 20.4023,
|
| 16883 |
+
"step": 24060
|
| 16884 |
+
},
|
| 16885 |
+
{
|
| 16886 |
+
"epoch": 0.4466744143429437,
|
| 16887 |
+
"grad_norm": 33.5625,
|
| 16888 |
+
"learning_rate": 9.930207142995908e-06,
|
| 16889 |
+
"loss": 20.9537,
|
| 16890 |
+
"step": 24070
|
| 16891 |
+
},
|
| 16892 |
+
{
|
| 16893 |
+
"epoch": 0.4468599874274235,
|
| 16894 |
+
"grad_norm": 34.5625,
|
| 16895 |
+
"learning_rate": 9.930178147209868e-06,
|
| 16896 |
+
"loss": 20.6655,
|
| 16897 |
+
"step": 24080
|
| 16898 |
+
},
|
| 16899 |
+
{
|
| 16900 |
+
"epoch": 0.44704556051190336,
|
| 16901 |
+
"grad_norm": 34.53125,
|
| 16902 |
+
"learning_rate": 9.930149151423825e-06,
|
| 16903 |
+
"loss": 20.7582,
|
| 16904 |
+
"step": 24090
|
| 16905 |
+
},
|
| 16906 |
+
{
|
| 16907 |
+
"epoch": 0.44723113359638317,
|
| 16908 |
+
"grad_norm": 34.53125,
|
| 16909 |
+
"learning_rate": 9.930120155637782e-06,
|
| 16910 |
+
"loss": 20.7693,
|
| 16911 |
+
"step": 24100
|
| 16912 |
+
},
|
| 16913 |
+
{
|
| 16914 |
+
"epoch": 0.44741670668086303,
|
| 16915 |
+
"grad_norm": 34.28125,
|
| 16916 |
+
"learning_rate": 9.93009115985174e-06,
|
| 16917 |
+
"loss": 21.0091,
|
| 16918 |
+
"step": 24110
|
| 16919 |
+
},
|
| 16920 |
+
{
|
| 16921 |
+
"epoch": 0.44760227976534284,
|
| 16922 |
+
"grad_norm": 33.71875,
|
| 16923 |
+
"learning_rate": 9.930062164065697e-06,
|
| 16924 |
+
"loss": 20.9896,
|
| 16925 |
+
"step": 24120
|
| 16926 |
+
},
|
| 16927 |
+
{
|
| 16928 |
+
"epoch": 0.44778785284982264,
|
| 16929 |
+
"grad_norm": 36.53125,
|
| 16930 |
+
"learning_rate": 9.930033168279655e-06,
|
| 16931 |
+
"loss": 20.8672,
|
| 16932 |
+
"step": 24130
|
| 16933 |
+
},
|
| 16934 |
+
{
|
| 16935 |
+
"epoch": 0.4479734259343025,
|
| 16936 |
+
"grad_norm": 36.28125,
|
| 16937 |
+
"learning_rate": 9.930004172493612e-06,
|
| 16938 |
+
"loss": 20.6846,
|
| 16939 |
+
"step": 24140
|
| 16940 |
+
},
|
| 16941 |
+
{
|
| 16942 |
+
"epoch": 0.4481589990187823,
|
| 16943 |
+
"grad_norm": 35.71875,
|
| 16944 |
+
"learning_rate": 9.929975176707571e-06,
|
| 16945 |
+
"loss": 20.8901,
|
| 16946 |
+
"step": 24150
|
| 16947 |
+
},
|
| 16948 |
+
{
|
| 16949 |
+
"epoch": 0.4483445721032621,
|
| 16950 |
+
"grad_norm": 33.65625,
|
| 16951 |
+
"learning_rate": 9.929946180921527e-06,
|
| 16952 |
+
"loss": 21.0096,
|
| 16953 |
+
"step": 24160
|
| 16954 |
+
},
|
| 16955 |
+
{
|
| 16956 |
+
"epoch": 0.448530145187742,
|
| 16957 |
+
"grad_norm": 34.28125,
|
| 16958 |
+
"learning_rate": 9.929917185135484e-06,
|
| 16959 |
+
"loss": 20.8181,
|
| 16960 |
+
"step": 24170
|
| 16961 |
+
},
|
| 16962 |
+
{
|
| 16963 |
+
"epoch": 0.4487157182722218,
|
| 16964 |
+
"grad_norm": 35.3125,
|
| 16965 |
+
"learning_rate": 9.929888189349443e-06,
|
| 16966 |
+
"loss": 20.9108,
|
| 16967 |
+
"step": 24180
|
| 16968 |
+
},
|
| 16969 |
+
{
|
| 16970 |
+
"epoch": 0.44890129135670165,
|
| 16971 |
+
"grad_norm": 37.625,
|
| 16972 |
+
"learning_rate": 9.9298591935634e-06,
|
| 16973 |
+
"loss": 20.9324,
|
| 16974 |
+
"step": 24190
|
| 16975 |
+
},
|
| 16976 |
+
{
|
| 16977 |
+
"epoch": 0.44908686444118145,
|
| 16978 |
+
"grad_norm": 36.09375,
|
| 16979 |
+
"learning_rate": 9.929830197777358e-06,
|
| 16980 |
+
"loss": 20.987,
|
| 16981 |
+
"step": 24200
|
| 16982 |
+
},
|
| 16983 |
+
{
|
| 16984 |
+
"epoch": 0.44927243752566126,
|
| 16985 |
+
"grad_norm": 35.1875,
|
| 16986 |
+
"learning_rate": 9.929801201991315e-06,
|
| 16987 |
+
"loss": 20.7505,
|
| 16988 |
+
"step": 24210
|
| 16989 |
+
},
|
| 16990 |
+
{
|
| 16991 |
+
"epoch": 0.4494580106101411,
|
| 16992 |
+
"grad_norm": 36.125,
|
| 16993 |
+
"learning_rate": 9.929772206205273e-06,
|
| 16994 |
+
"loss": 20.9636,
|
| 16995 |
+
"step": 24220
|
| 16996 |
+
},
|
| 16997 |
+
{
|
| 16998 |
+
"epoch": 0.44964358369462093,
|
| 16999 |
+
"grad_norm": 35.28125,
|
| 17000 |
+
"learning_rate": 9.92974321041923e-06,
|
| 17001 |
+
"loss": 21.0674,
|
| 17002 |
+
"step": 24230
|
| 17003 |
+
},
|
| 17004 |
+
{
|
| 17005 |
+
"epoch": 0.44982915677910074,
|
| 17006 |
+
"grad_norm": 34.3125,
|
| 17007 |
+
"learning_rate": 9.929714214633188e-06,
|
| 17008 |
+
"loss": 20.6893,
|
| 17009 |
+
"step": 24240
|
| 17010 |
+
},
|
| 17011 |
+
{
|
| 17012 |
+
"epoch": 0.4500147298635806,
|
| 17013 |
+
"grad_norm": 36.625,
|
| 17014 |
+
"learning_rate": 9.929685218847147e-06,
|
| 17015 |
+
"loss": 20.9585,
|
| 17016 |
+
"step": 24250
|
| 17017 |
+
},
|
| 17018 |
+
{
|
| 17019 |
+
"epoch": 0.4502003029480604,
|
| 17020 |
+
"grad_norm": 35.5,
|
| 17021 |
+
"learning_rate": 9.929656223061104e-06,
|
| 17022 |
+
"loss": 20.3475,
|
| 17023 |
+
"step": 24260
|
| 17024 |
+
},
|
| 17025 |
+
{
|
| 17026 |
+
"epoch": 0.45038587603254027,
|
| 17027 |
+
"grad_norm": 34.6875,
|
| 17028 |
+
"learning_rate": 9.92962722727506e-06,
|
| 17029 |
+
"loss": 20.5898,
|
| 17030 |
+
"step": 24270
|
| 17031 |
+
},
|
| 17032 |
+
{
|
| 17033 |
+
"epoch": 0.4505714491170201,
|
| 17034 |
+
"grad_norm": 36.21875,
|
| 17035 |
+
"learning_rate": 9.929598231489019e-06,
|
| 17036 |
+
"loss": 20.5172,
|
| 17037 |
+
"step": 24280
|
| 17038 |
+
},
|
| 17039 |
+
{
|
| 17040 |
+
"epoch": 0.4507570222014999,
|
| 17041 |
+
"grad_norm": 34.53125,
|
| 17042 |
+
"learning_rate": 9.929569235702976e-06,
|
| 17043 |
+
"loss": 20.8779,
|
| 17044 |
+
"step": 24290
|
| 17045 |
+
},
|
| 17046 |
+
{
|
| 17047 |
+
"epoch": 0.45094259528597974,
|
| 17048 |
+
"grad_norm": 34.25,
|
| 17049 |
+
"learning_rate": 9.929540239916934e-06,
|
| 17050 |
+
"loss": 20.8615,
|
| 17051 |
+
"step": 24300
|
| 17052 |
+
},
|
| 17053 |
+
{
|
| 17054 |
+
"epoch": 0.45112816837045955,
|
| 17055 |
+
"grad_norm": 34.78125,
|
| 17056 |
+
"learning_rate": 9.929511244130891e-06,
|
| 17057 |
+
"loss": 21.23,
|
| 17058 |
+
"step": 24310
|
| 17059 |
+
},
|
| 17060 |
+
{
|
| 17061 |
+
"epoch": 0.45131374145493935,
|
| 17062 |
+
"grad_norm": 35.0625,
|
| 17063 |
+
"learning_rate": 9.929482248344849e-06,
|
| 17064 |
+
"loss": 21.1603,
|
| 17065 |
+
"step": 24320
|
| 17066 |
+
},
|
| 17067 |
+
{
|
| 17068 |
+
"epoch": 0.4514993145394192,
|
| 17069 |
+
"grad_norm": 34.875,
|
| 17070 |
+
"learning_rate": 9.929453252558806e-06,
|
| 17071 |
+
"loss": 21.007,
|
| 17072 |
+
"step": 24330
|
| 17073 |
+
},
|
| 17074 |
+
{
|
| 17075 |
+
"epoch": 0.451684887623899,
|
| 17076 |
+
"grad_norm": 33.5625,
|
| 17077 |
+
"learning_rate": 9.929424256772763e-06,
|
| 17078 |
+
"loss": 20.4047,
|
| 17079 |
+
"step": 24340
|
| 17080 |
+
},
|
| 17081 |
+
{
|
| 17082 |
+
"epoch": 0.45187046070837883,
|
| 17083 |
+
"grad_norm": 34.90625,
|
| 17084 |
+
"learning_rate": 9.929395260986721e-06,
|
| 17085 |
+
"loss": 21.1434,
|
| 17086 |
+
"step": 24350
|
| 17087 |
+
},
|
| 17088 |
+
{
|
| 17089 |
+
"epoch": 0.4520560337928587,
|
| 17090 |
+
"grad_norm": 34.8125,
|
| 17091 |
+
"learning_rate": 9.92936626520068e-06,
|
| 17092 |
+
"loss": 21.1945,
|
| 17093 |
+
"step": 24360
|
| 17094 |
+
},
|
| 17095 |
+
{
|
| 17096 |
+
"epoch": 0.4522416068773385,
|
| 17097 |
+
"grad_norm": 34.625,
|
| 17098 |
+
"learning_rate": 9.929337269414637e-06,
|
| 17099 |
+
"loss": 20.697,
|
| 17100 |
+
"step": 24370
|
| 17101 |
+
},
|
| 17102 |
+
{
|
| 17103 |
+
"epoch": 0.45242717996181836,
|
| 17104 |
+
"grad_norm": 34.96875,
|
| 17105 |
+
"learning_rate": 9.929308273628595e-06,
|
| 17106 |
+
"loss": 20.7853,
|
| 17107 |
+
"step": 24380
|
| 17108 |
+
},
|
| 17109 |
+
{
|
| 17110 |
+
"epoch": 0.45261275304629817,
|
| 17111 |
+
"grad_norm": 34.375,
|
| 17112 |
+
"learning_rate": 9.929279277842552e-06,
|
| 17113 |
+
"loss": 20.2789,
|
| 17114 |
+
"step": 24390
|
| 17115 |
+
},
|
| 17116 |
+
{
|
| 17117 |
+
"epoch": 0.452798326130778,
|
| 17118 |
+
"grad_norm": 35.90625,
|
| 17119 |
+
"learning_rate": 9.92925028205651e-06,
|
| 17120 |
+
"loss": 20.8062,
|
| 17121 |
+
"step": 24400
|
| 17122 |
+
},
|
| 17123 |
+
{
|
| 17124 |
+
"epoch": 0.45298389921525783,
|
| 17125 |
+
"grad_norm": 34.75,
|
| 17126 |
+
"learning_rate": 9.929221286270467e-06,
|
| 17127 |
+
"loss": 20.4341,
|
| 17128 |
+
"step": 24410
|
| 17129 |
+
},
|
| 17130 |
+
{
|
| 17131 |
+
"epoch": 0.45316947229973764,
|
| 17132 |
+
"grad_norm": 35.34375,
|
| 17133 |
+
"learning_rate": 9.929192290484424e-06,
|
| 17134 |
+
"loss": 20.4916,
|
| 17135 |
+
"step": 24420
|
| 17136 |
+
},
|
| 17137 |
+
{
|
| 17138 |
+
"epoch": 0.45335504538421745,
|
| 17139 |
+
"grad_norm": 35.53125,
|
| 17140 |
+
"learning_rate": 9.929163294698382e-06,
|
| 17141 |
+
"loss": 20.987,
|
| 17142 |
+
"step": 24430
|
| 17143 |
+
},
|
| 17144 |
+
{
|
| 17145 |
+
"epoch": 0.4535406184686973,
|
| 17146 |
+
"grad_norm": 34.03125,
|
| 17147 |
+
"learning_rate": 9.92913429891234e-06,
|
| 17148 |
+
"loss": 21.2552,
|
| 17149 |
+
"step": 24440
|
| 17150 |
+
},
|
| 17151 |
+
{
|
| 17152 |
+
"epoch": 0.4537261915531771,
|
| 17153 |
+
"grad_norm": 33.28125,
|
| 17154 |
+
"learning_rate": 9.929105303126297e-06,
|
| 17155 |
+
"loss": 20.5123,
|
| 17156 |
+
"step": 24450
|
| 17157 |
+
},
|
| 17158 |
+
{
|
| 17159 |
+
"epoch": 0.453911764637657,
|
| 17160 |
+
"grad_norm": 36.25,
|
| 17161 |
+
"learning_rate": 9.929076307340256e-06,
|
| 17162 |
+
"loss": 20.896,
|
| 17163 |
+
"step": 24460
|
| 17164 |
+
},
|
| 17165 |
+
{
|
| 17166 |
+
"epoch": 0.4540973377221368,
|
| 17167 |
+
"grad_norm": 34.5,
|
| 17168 |
+
"learning_rate": 9.929047311554213e-06,
|
| 17169 |
+
"loss": 20.9112,
|
| 17170 |
+
"step": 24470
|
| 17171 |
+
},
|
| 17172 |
+
{
|
| 17173 |
+
"epoch": 0.4542829108066166,
|
| 17174 |
+
"grad_norm": 36.375,
|
| 17175 |
+
"learning_rate": 9.92901831576817e-06,
|
| 17176 |
+
"loss": 20.9512,
|
| 17177 |
+
"step": 24480
|
| 17178 |
+
},
|
| 17179 |
+
{
|
| 17180 |
+
"epoch": 0.45446848389109645,
|
| 17181 |
+
"grad_norm": 35.21875,
|
| 17182 |
+
"learning_rate": 9.928989319982128e-06,
|
| 17183 |
+
"loss": 20.469,
|
| 17184 |
+
"step": 24490
|
| 17185 |
+
},
|
| 17186 |
+
{
|
| 17187 |
+
"epoch": 0.45465405697557626,
|
| 17188 |
+
"grad_norm": 37.09375,
|
| 17189 |
+
"learning_rate": 9.928960324196085e-06,
|
| 17190 |
+
"loss": 20.8936,
|
| 17191 |
+
"step": 24500
|
| 17192 |
+
},
|
| 17193 |
+
{
|
| 17194 |
+
"epoch": 0.45483963006005607,
|
| 17195 |
+
"grad_norm": 35.125,
|
| 17196 |
+
"learning_rate": 9.928931328410043e-06,
|
| 17197 |
+
"loss": 20.4707,
|
| 17198 |
+
"step": 24510
|
| 17199 |
+
},
|
| 17200 |
+
{
|
| 17201 |
+
"epoch": 0.45502520314453593,
|
| 17202 |
+
"grad_norm": 35.46875,
|
| 17203 |
+
"learning_rate": 9.928902332624e-06,
|
| 17204 |
+
"loss": 20.5323,
|
| 17205 |
+
"step": 24520
|
| 17206 |
+
},
|
| 17207 |
+
{
|
| 17208 |
+
"epoch": 0.45521077622901573,
|
| 17209 |
+
"grad_norm": 33.90625,
|
| 17210 |
+
"learning_rate": 9.92887333683796e-06,
|
| 17211 |
+
"loss": 21.0152,
|
| 17212 |
+
"step": 24530
|
| 17213 |
+
},
|
| 17214 |
+
{
|
| 17215 |
+
"epoch": 0.4553963493134956,
|
| 17216 |
+
"grad_norm": 34.65625,
|
| 17217 |
+
"learning_rate": 9.928844341051915e-06,
|
| 17218 |
+
"loss": 20.7327,
|
| 17219 |
+
"step": 24540
|
| 17220 |
+
},
|
| 17221 |
+
{
|
| 17222 |
+
"epoch": 0.4555819223979754,
|
| 17223 |
+
"grad_norm": 35.34375,
|
| 17224 |
+
"learning_rate": 9.928815345265872e-06,
|
| 17225 |
+
"loss": 20.7267,
|
| 17226 |
+
"step": 24550
|
| 17227 |
+
},
|
| 17228 |
+
{
|
| 17229 |
+
"epoch": 0.4557674954824552,
|
| 17230 |
+
"grad_norm": 33.34375,
|
| 17231 |
+
"learning_rate": 9.928786349479832e-06,
|
| 17232 |
+
"loss": 20.4997,
|
| 17233 |
+
"step": 24560
|
| 17234 |
+
},
|
| 17235 |
+
{
|
| 17236 |
+
"epoch": 0.45595306856693507,
|
| 17237 |
+
"grad_norm": 37.34375,
|
| 17238 |
+
"learning_rate": 9.928757353693789e-06,
|
| 17239 |
+
"loss": 20.8639,
|
| 17240 |
+
"step": 24570
|
| 17241 |
+
},
|
| 17242 |
+
{
|
| 17243 |
+
"epoch": 0.4561386416514149,
|
| 17244 |
+
"grad_norm": 34.15625,
|
| 17245 |
+
"learning_rate": 9.928728357907746e-06,
|
| 17246 |
+
"loss": 21.0693,
|
| 17247 |
+
"step": 24580
|
| 17248 |
+
},
|
| 17249 |
+
{
|
| 17250 |
+
"epoch": 0.4563242147358947,
|
| 17251 |
+
"grad_norm": 33.75,
|
| 17252 |
+
"learning_rate": 9.928699362121704e-06,
|
| 17253 |
+
"loss": 21.0186,
|
| 17254 |
+
"step": 24590
|
| 17255 |
+
},
|
| 17256 |
+
{
|
| 17257 |
+
"epoch": 0.45650978782037455,
|
| 17258 |
+
"grad_norm": 37.59375,
|
| 17259 |
+
"learning_rate": 9.928670366335661e-06,
|
| 17260 |
+
"loss": 21.1377,
|
| 17261 |
+
"step": 24600
|
| 17262 |
+
},
|
| 17263 |
+
{
|
| 17264 |
+
"epoch": 0.45669536090485435,
|
| 17265 |
+
"grad_norm": 35.125,
|
| 17266 |
+
"learning_rate": 9.928641370549619e-06,
|
| 17267 |
+
"loss": 20.7898,
|
| 17268 |
+
"step": 24610
|
| 17269 |
+
},
|
| 17270 |
+
{
|
| 17271 |
+
"epoch": 0.45688093398933416,
|
| 17272 |
+
"grad_norm": 35.0,
|
| 17273 |
+
"learning_rate": 9.928612374763576e-06,
|
| 17274 |
+
"loss": 20.8915,
|
| 17275 |
+
"step": 24620
|
| 17276 |
+
},
|
| 17277 |
+
{
|
| 17278 |
+
"epoch": 0.457066507073814,
|
| 17279 |
+
"grad_norm": 34.96875,
|
| 17280 |
+
"learning_rate": 9.928583378977535e-06,
|
| 17281 |
+
"loss": 20.677,
|
| 17282 |
+
"step": 24630
|
| 17283 |
+
},
|
| 17284 |
+
{
|
| 17285 |
+
"epoch": 0.4572520801582938,
|
| 17286 |
+
"grad_norm": 34.78125,
|
| 17287 |
+
"learning_rate": 9.928554383191492e-06,
|
| 17288 |
+
"loss": 20.5464,
|
| 17289 |
+
"step": 24640
|
| 17290 |
+
},
|
| 17291 |
+
{
|
| 17292 |
+
"epoch": 0.4574376532427737,
|
| 17293 |
+
"grad_norm": 35.0625,
|
| 17294 |
+
"learning_rate": 9.928525387405448e-06,
|
| 17295 |
+
"loss": 20.7197,
|
| 17296 |
+
"step": 24650
|
| 17297 |
+
},
|
| 17298 |
+
{
|
| 17299 |
+
"epoch": 0.4576232263272535,
|
| 17300 |
+
"grad_norm": 35.28125,
|
| 17301 |
+
"learning_rate": 9.928496391619407e-06,
|
| 17302 |
+
"loss": 21.186,
|
| 17303 |
+
"step": 24660
|
| 17304 |
+
},
|
| 17305 |
+
{
|
| 17306 |
+
"epoch": 0.4578087994117333,
|
| 17307 |
+
"grad_norm": 35.71875,
|
| 17308 |
+
"learning_rate": 9.928467395833365e-06,
|
| 17309 |
+
"loss": 21.1732,
|
| 17310 |
+
"step": 24670
|
| 17311 |
+
},
|
| 17312 |
+
{
|
| 17313 |
+
"epoch": 0.45799437249621316,
|
| 17314 |
+
"grad_norm": 34.3125,
|
| 17315 |
+
"learning_rate": 9.928438400047322e-06,
|
| 17316 |
+
"loss": 20.2372,
|
| 17317 |
+
"step": 24680
|
| 17318 |
+
},
|
| 17319 |
+
{
|
| 17320 |
+
"epoch": 0.45817994558069297,
|
| 17321 |
+
"grad_norm": 34.1875,
|
| 17322 |
+
"learning_rate": 9.92840940426128e-06,
|
| 17323 |
+
"loss": 20.5822,
|
| 17324 |
+
"step": 24690
|
| 17325 |
+
},
|
| 17326 |
+
{
|
| 17327 |
+
"epoch": 0.4583655186651728,
|
| 17328 |
+
"grad_norm": 34.34375,
|
| 17329 |
+
"learning_rate": 9.928380408475237e-06,
|
| 17330 |
+
"loss": 20.9295,
|
| 17331 |
+
"step": 24700
|
| 17332 |
+
},
|
| 17333 |
+
{
|
| 17334 |
+
"epoch": 0.45855109174965264,
|
| 17335 |
+
"grad_norm": 34.6875,
|
| 17336 |
+
"learning_rate": 9.928351412689194e-06,
|
| 17337 |
+
"loss": 20.7293,
|
| 17338 |
+
"step": 24710
|
| 17339 |
+
},
|
| 17340 |
+
{
|
| 17341 |
+
"epoch": 0.45873666483413245,
|
| 17342 |
+
"grad_norm": 34.40625,
|
| 17343 |
+
"learning_rate": 9.928322416903152e-06,
|
| 17344 |
+
"loss": 21.0629,
|
| 17345 |
+
"step": 24720
|
| 17346 |
+
},
|
| 17347 |
+
{
|
| 17348 |
+
"epoch": 0.4589222379186123,
|
| 17349 |
+
"grad_norm": 37.03125,
|
| 17350 |
+
"learning_rate": 9.92829342111711e-06,
|
| 17351 |
+
"loss": 20.6274,
|
| 17352 |
+
"step": 24730
|
| 17353 |
+
},
|
| 17354 |
+
{
|
| 17355 |
+
"epoch": 0.4591078110030921,
|
| 17356 |
+
"grad_norm": 34.625,
|
| 17357 |
+
"learning_rate": 9.928264425331068e-06,
|
| 17358 |
+
"loss": 20.7442,
|
| 17359 |
+
"step": 24740
|
| 17360 |
+
},
|
| 17361 |
+
{
|
| 17362 |
+
"epoch": 0.4592933840875719,
|
| 17363 |
+
"grad_norm": 34.28125,
|
| 17364 |
+
"learning_rate": 9.928235429545024e-06,
|
| 17365 |
+
"loss": 20.7463,
|
| 17366 |
+
"step": 24750
|
| 17367 |
+
},
|
| 17368 |
+
{
|
| 17369 |
+
"epoch": 0.4594789571720518,
|
| 17370 |
+
"grad_norm": 36.5625,
|
| 17371 |
+
"learning_rate": 9.928206433758983e-06,
|
| 17372 |
+
"loss": 21.1036,
|
| 17373 |
+
"step": 24760
|
| 17374 |
+
},
|
| 17375 |
+
{
|
| 17376 |
+
"epoch": 0.4596645302565316,
|
| 17377 |
+
"grad_norm": 35.25,
|
| 17378 |
+
"learning_rate": 9.92817743797294e-06,
|
| 17379 |
+
"loss": 20.6891,
|
| 17380 |
+
"step": 24770
|
| 17381 |
+
},
|
| 17382 |
+
{
|
| 17383 |
+
"epoch": 0.4598501033410114,
|
| 17384 |
+
"grad_norm": 33.6875,
|
| 17385 |
+
"learning_rate": 9.928148442186898e-06,
|
| 17386 |
+
"loss": 20.3999,
|
| 17387 |
+
"step": 24780
|
| 17388 |
+
},
|
| 17389 |
+
{
|
| 17390 |
+
"epoch": 0.46003567642549126,
|
| 17391 |
+
"grad_norm": 36.6875,
|
| 17392 |
+
"learning_rate": 9.928119446400855e-06,
|
| 17393 |
+
"loss": 21.0503,
|
| 17394 |
+
"step": 24790
|
| 17395 |
+
},
|
| 17396 |
+
{
|
| 17397 |
+
"epoch": 0.46022124950997106,
|
| 17398 |
+
"grad_norm": 36.15625,
|
| 17399 |
+
"learning_rate": 9.928090450614813e-06,
|
| 17400 |
+
"loss": 20.7396,
|
| 17401 |
+
"step": 24800
|
| 17402 |
+
},
|
| 17403 |
+
{
|
| 17404 |
+
"epoch": 0.4604068225944509,
|
| 17405 |
+
"grad_norm": 37.03125,
|
| 17406 |
+
"learning_rate": 9.92806145482877e-06,
|
| 17407 |
+
"loss": 20.8848,
|
| 17408 |
+
"step": 24810
|
| 17409 |
+
},
|
| 17410 |
+
{
|
| 17411 |
+
"epoch": 0.46059239567893073,
|
| 17412 |
+
"grad_norm": 33.375,
|
| 17413 |
+
"learning_rate": 9.928032459042727e-06,
|
| 17414 |
+
"loss": 20.9374,
|
| 17415 |
+
"step": 24820
|
| 17416 |
+
},
|
| 17417 |
+
{
|
| 17418 |
+
"epoch": 0.46077796876341054,
|
| 17419 |
+
"grad_norm": 35.75,
|
| 17420 |
+
"learning_rate": 9.928003463256685e-06,
|
| 17421 |
+
"loss": 20.8332,
|
| 17422 |
+
"step": 24830
|
| 17423 |
+
},
|
| 17424 |
+
{
|
| 17425 |
+
"epoch": 0.4609635418478904,
|
| 17426 |
+
"grad_norm": 35.09375,
|
| 17427 |
+
"learning_rate": 9.927974467470644e-06,
|
| 17428 |
+
"loss": 20.6518,
|
| 17429 |
+
"step": 24840
|
| 17430 |
+
},
|
| 17431 |
+
{
|
| 17432 |
+
"epoch": 0.4611491149323702,
|
| 17433 |
+
"grad_norm": 35.78125,
|
| 17434 |
+
"learning_rate": 9.927945471684601e-06,
|
| 17435 |
+
"loss": 20.633,
|
| 17436 |
+
"step": 24850
|
| 17437 |
+
},
|
| 17438 |
+
{
|
| 17439 |
+
"epoch": 0.46133468801685,
|
| 17440 |
+
"grad_norm": 34.3125,
|
| 17441 |
+
"learning_rate": 9.927916475898559e-06,
|
| 17442 |
+
"loss": 21.1518,
|
| 17443 |
+
"step": 24860
|
| 17444 |
+
},
|
| 17445 |
+
{
|
| 17446 |
+
"epoch": 0.4615202611013299,
|
| 17447 |
+
"grad_norm": 34.625,
|
| 17448 |
+
"learning_rate": 9.927887480112516e-06,
|
| 17449 |
+
"loss": 20.6709,
|
| 17450 |
+
"step": 24870
|
| 17451 |
+
},
|
| 17452 |
+
{
|
| 17453 |
+
"epoch": 0.4617058341858097,
|
| 17454 |
+
"grad_norm": 35.34375,
|
| 17455 |
+
"learning_rate": 9.927858484326474e-06,
|
| 17456 |
+
"loss": 20.86,
|
| 17457 |
+
"step": 24880
|
| 17458 |
+
},
|
| 17459 |
+
{
|
| 17460 |
+
"epoch": 0.46189140727028954,
|
| 17461 |
+
"grad_norm": 34.875,
|
| 17462 |
+
"learning_rate": 9.927829488540431e-06,
|
| 17463 |
+
"loss": 21.1381,
|
| 17464 |
+
"step": 24890
|
| 17465 |
+
},
|
| 17466 |
+
{
|
| 17467 |
+
"epoch": 0.46207698035476935,
|
| 17468 |
+
"grad_norm": 32.96875,
|
| 17469 |
+
"learning_rate": 9.927800492754388e-06,
|
| 17470 |
+
"loss": 20.7075,
|
| 17471 |
+
"step": 24900
|
| 17472 |
+
},
|
| 17473 |
+
{
|
| 17474 |
+
"epoch": 0.46226255343924916,
|
| 17475 |
+
"grad_norm": 35.59375,
|
| 17476 |
+
"learning_rate": 9.927771496968346e-06,
|
| 17477 |
+
"loss": 20.5517,
|
| 17478 |
+
"step": 24910
|
| 17479 |
+
},
|
| 17480 |
+
{
|
| 17481 |
+
"epoch": 0.462448126523729,
|
| 17482 |
+
"grad_norm": 36.0625,
|
| 17483 |
+
"learning_rate": 9.927742501182303e-06,
|
| 17484 |
+
"loss": 20.7381,
|
| 17485 |
+
"step": 24920
|
| 17486 |
+
},
|
| 17487 |
+
{
|
| 17488 |
+
"epoch": 0.4626336996082088,
|
| 17489 |
+
"grad_norm": 37.375,
|
| 17490 |
+
"learning_rate": 9.92771350539626e-06,
|
| 17491 |
+
"loss": 20.8368,
|
| 17492 |
+
"step": 24930
|
| 17493 |
+
},
|
| 17494 |
+
{
|
| 17495 |
+
"epoch": 0.46281927269268863,
|
| 17496 |
+
"grad_norm": 35.25,
|
| 17497 |
+
"learning_rate": 9.92768450961022e-06,
|
| 17498 |
+
"loss": 21.0123,
|
| 17499 |
+
"step": 24940
|
| 17500 |
+
},
|
| 17501 |
+
{
|
| 17502 |
+
"epoch": 0.4630048457771685,
|
| 17503 |
+
"grad_norm": 35.03125,
|
| 17504 |
+
"learning_rate": 9.927655513824177e-06,
|
| 17505 |
+
"loss": 20.7466,
|
| 17506 |
+
"step": 24950
|
| 17507 |
+
},
|
| 17508 |
+
{
|
| 17509 |
+
"epoch": 0.4631904188616483,
|
| 17510 |
+
"grad_norm": 35.90625,
|
| 17511 |
+
"learning_rate": 9.927626518038135e-06,
|
| 17512 |
+
"loss": 20.4304,
|
| 17513 |
+
"step": 24960
|
| 17514 |
+
},
|
| 17515 |
+
{
|
| 17516 |
+
"epoch": 0.4633759919461281,
|
| 17517 |
+
"grad_norm": 34.46875,
|
| 17518 |
+
"learning_rate": 9.927597522252092e-06,
|
| 17519 |
+
"loss": 20.482,
|
| 17520 |
+
"step": 24970
|
| 17521 |
+
},
|
| 17522 |
+
{
|
| 17523 |
+
"epoch": 0.46356156503060797,
|
| 17524 |
+
"grad_norm": 37.25,
|
| 17525 |
+
"learning_rate": 9.92756852646605e-06,
|
| 17526 |
+
"loss": 20.8669,
|
| 17527 |
+
"step": 24980
|
| 17528 |
+
},
|
| 17529 |
+
{
|
| 17530 |
+
"epoch": 0.4637471381150878,
|
| 17531 |
+
"grad_norm": 36.25,
|
| 17532 |
+
"learning_rate": 9.927539530680007e-06,
|
| 17533 |
+
"loss": 20.7218,
|
| 17534 |
+
"step": 24990
|
| 17535 |
+
},
|
| 17536 |
+
{
|
| 17537 |
+
"epoch": 0.46393271119956764,
|
| 17538 |
+
"grad_norm": 33.96875,
|
| 17539 |
+
"learning_rate": 9.927510534893964e-06,
|
| 17540 |
+
"loss": 20.7564,
|
| 17541 |
+
"step": 25000
|
| 17542 |
+
},
|
| 17543 |
+
{
|
| 17544 |
+
"epoch": 0.46393271119956764,
|
| 17545 |
+
"eval_loss": 2.5967071056365967,
|
| 17546 |
+
"eval_runtime": 453.4834,
|
| 17547 |
+
"eval_samples_per_second": 3202.139,
|
| 17548 |
+
"eval_steps_per_second": 50.035,
|
| 17549 |
+
"step": 25000
|
| 17550 |
}
|
| 17551 |
],
|
| 17552 |
"logging_steps": 10,
|
|
|
|
| 17566 |
"attributes": {}
|
| 17567 |
}
|
| 17568 |
},
|
| 17569 |
+
"total_flos": 4.36380689760256e+18,
|
| 17570 |
"train_batch_size": 8,
|
| 17571 |
"trial_name": null,
|
| 17572 |
"trial_params": null
|