Training in progress, step 11000, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1856040378
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d97a67c436d0aac8c51ef6e3c27fb63e9c89e825da3da26d0db7316a8b043cb5
|
| 3 |
size 1856040378
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 928000378
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bfe4534d71a931e71c77546b6b1db5867a3a90fa4307542a6e49a6954fa531e
|
| 3 |
size 928000378
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfed6543f5d672f101f42c0ba9d247e90cad9ca642dfb74b70cd76cc56d808d0
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e74c51e6198addd9d761a50ec2f9cc276c644808893fb0ce5a8bf14675cb23a
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 1.
|
| 3 |
-
"best_model_checkpoint": "model/chessformer-3/checkpoint-
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -35167,6 +35167,3522 @@
|
|
| 35167 |
"eval_samples_per_second": 557.83,
|
| 35168 |
"eval_steps_per_second": 69.749,
|
| 35169 |
"step": 10000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35170 |
}
|
| 35171 |
],
|
| 35172 |
"logging_steps": 2,
|
|
@@ -35186,7 +38702,7 @@
|
|
| 35186 |
"attributes": {}
|
| 35187 |
}
|
| 35188 |
},
|
| 35189 |
-
"total_flos": 2.
|
| 35190 |
"train_batch_size": 768,
|
| 35191 |
"trial_name": null,
|
| 35192 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 1.4820914268493652,
|
| 3 |
+
"best_model_checkpoint": "model/chessformer-3/checkpoint-11000",
|
| 4 |
+
"epoch": 0.4890845226979681,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 11000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 35167 |
"eval_samples_per_second": 557.83,
|
| 35168 |
"eval_steps_per_second": 69.749,
|
| 35169 |
"step": 10000
|
| 35170 |
+
},
|
| 35171 |
+
{
|
| 35172 |
+
"epoch": 0.4447112178204615,
|
| 35173 |
+
"grad_norm": 0.07122799009084702,
|
| 35174 |
+
"learning_rate": 0.0006260291775788085,
|
| 35175 |
+
"loss": 1.5277,
|
| 35176 |
+
"step": 10002
|
| 35177 |
+
},
|
| 35178 |
+
{
|
| 35179 |
+
"epoch": 0.4448001422791339,
|
| 35180 |
+
"grad_norm": 0.06492965668439865,
|
| 35181 |
+
"learning_rate": 0.0006258877037424691,
|
| 35182 |
+
"loss": 1.5261,
|
| 35183 |
+
"step": 10004
|
| 35184 |
+
},
|
| 35185 |
+
{
|
| 35186 |
+
"epoch": 0.44488906673780626,
|
| 35187 |
+
"grad_norm": 0.06462486833333969,
|
| 35188 |
+
"learning_rate": 0.0006257462191446941,
|
| 35189 |
+
"loss": 1.5268,
|
| 35190 |
+
"step": 10006
|
| 35191 |
+
},
|
| 35192 |
+
{
|
| 35193 |
+
"epoch": 0.4449779911964786,
|
| 35194 |
+
"grad_norm": 0.06561804562807083,
|
| 35195 |
+
"learning_rate": 0.0006256047237975782,
|
| 35196 |
+
"loss": 1.5237,
|
| 35197 |
+
"step": 10008
|
| 35198 |
+
},
|
| 35199 |
+
{
|
| 35200 |
+
"epoch": 0.44506691565515094,
|
| 35201 |
+
"grad_norm": 0.06555454432964325,
|
| 35202 |
+
"learning_rate": 0.0006254632177132169,
|
| 35203 |
+
"loss": 1.5244,
|
| 35204 |
+
"step": 10010
|
| 35205 |
+
},
|
| 35206 |
+
{
|
| 35207 |
+
"epoch": 0.4451558401138233,
|
| 35208 |
+
"grad_norm": 0.06676924228668213,
|
| 35209 |
+
"learning_rate": 0.0006253217009037068,
|
| 35210 |
+
"loss": 1.5251,
|
| 35211 |
+
"step": 10012
|
| 35212 |
+
},
|
| 35213 |
+
{
|
| 35214 |
+
"epoch": 0.4452447645724957,
|
| 35215 |
+
"grad_norm": 0.06575804203748703,
|
| 35216 |
+
"learning_rate": 0.0006251801733811455,
|
| 35217 |
+
"loss": 1.5222,
|
| 35218 |
+
"step": 10014
|
| 35219 |
+
},
|
| 35220 |
+
{
|
| 35221 |
+
"epoch": 0.44533368903116805,
|
| 35222 |
+
"grad_norm": 0.06349354237318039,
|
| 35223 |
+
"learning_rate": 0.0006250386351576314,
|
| 35224 |
+
"loss": 1.5211,
|
| 35225 |
+
"step": 10016
|
| 35226 |
+
},
|
| 35227 |
+
{
|
| 35228 |
+
"epoch": 0.44542261348984036,
|
| 35229 |
+
"grad_norm": 0.06419903039932251,
|
| 35230 |
+
"learning_rate": 0.0006248970862452637,
|
| 35231 |
+
"loss": 1.5216,
|
| 35232 |
+
"step": 10018
|
| 35233 |
+
},
|
| 35234 |
+
{
|
| 35235 |
+
"epoch": 0.4455115379485127,
|
| 35236 |
+
"grad_norm": 0.06334590911865234,
|
| 35237 |
+
"learning_rate": 0.0006247555266561425,
|
| 35238 |
+
"loss": 1.5178,
|
| 35239 |
+
"step": 10020
|
| 35240 |
+
},
|
| 35241 |
+
{
|
| 35242 |
+
"epoch": 0.4456004624071851,
|
| 35243 |
+
"grad_norm": 0.06277309358119965,
|
| 35244 |
+
"learning_rate": 0.0006246139564023693,
|
| 35245 |
+
"loss": 1.5201,
|
| 35246 |
+
"step": 10022
|
| 35247 |
+
},
|
| 35248 |
+
{
|
| 35249 |
+
"epoch": 0.44568938686585746,
|
| 35250 |
+
"grad_norm": 0.06414992362260818,
|
| 35251 |
+
"learning_rate": 0.0006244723754960459,
|
| 35252 |
+
"loss": 1.5213,
|
| 35253 |
+
"step": 10024
|
| 35254 |
+
},
|
| 35255 |
+
{
|
| 35256 |
+
"epoch": 0.44577831132452983,
|
| 35257 |
+
"grad_norm": 0.06549572199583054,
|
| 35258 |
+
"learning_rate": 0.0006243307839492752,
|
| 35259 |
+
"loss": 1.5174,
|
| 35260 |
+
"step": 10026
|
| 35261 |
+
},
|
| 35262 |
+
{
|
| 35263 |
+
"epoch": 0.44586723578320214,
|
| 35264 |
+
"grad_norm": 0.0640779510140419,
|
| 35265 |
+
"learning_rate": 0.0006241891817741613,
|
| 35266 |
+
"loss": 1.5167,
|
| 35267 |
+
"step": 10028
|
| 35268 |
+
},
|
| 35269 |
+
{
|
| 35270 |
+
"epoch": 0.4459561602418745,
|
| 35271 |
+
"grad_norm": 0.06481896340847015,
|
| 35272 |
+
"learning_rate": 0.0006240475689828086,
|
| 35273 |
+
"loss": 1.5147,
|
| 35274 |
+
"step": 10030
|
| 35275 |
+
},
|
| 35276 |
+
{
|
| 35277 |
+
"epoch": 0.4460450847005469,
|
| 35278 |
+
"grad_norm": 0.06547030061483383,
|
| 35279 |
+
"learning_rate": 0.0006239059455873235,
|
| 35280 |
+
"loss": 1.5214,
|
| 35281 |
+
"step": 10032
|
| 35282 |
+
},
|
| 35283 |
+
{
|
| 35284 |
+
"epoch": 0.44613400915921925,
|
| 35285 |
+
"grad_norm": 0.061458081007003784,
|
| 35286 |
+
"learning_rate": 0.0006237643115998119,
|
| 35287 |
+
"loss": 1.5146,
|
| 35288 |
+
"step": 10034
|
| 35289 |
+
},
|
| 35290 |
+
{
|
| 35291 |
+
"epoch": 0.4462229336178916,
|
| 35292 |
+
"grad_norm": 0.06149187311530113,
|
| 35293 |
+
"learning_rate": 0.0006236226670323816,
|
| 35294 |
+
"loss": 1.5123,
|
| 35295 |
+
"step": 10036
|
| 35296 |
+
},
|
| 35297 |
+
{
|
| 35298 |
+
"epoch": 0.446311858076564,
|
| 35299 |
+
"grad_norm": 0.06510701775550842,
|
| 35300 |
+
"learning_rate": 0.0006234810118971408,
|
| 35301 |
+
"loss": 1.5149,
|
| 35302 |
+
"step": 10038
|
| 35303 |
+
},
|
| 35304 |
+
{
|
| 35305 |
+
"epoch": 0.4464007825352363,
|
| 35306 |
+
"grad_norm": 0.061700914055109024,
|
| 35307 |
+
"learning_rate": 0.0006233393462061989,
|
| 35308 |
+
"loss": 1.5157,
|
| 35309 |
+
"step": 10040
|
| 35310 |
+
},
|
| 35311 |
+
{
|
| 35312 |
+
"epoch": 0.44648970699390866,
|
| 35313 |
+
"grad_norm": 0.06388545781373978,
|
| 35314 |
+
"learning_rate": 0.0006231976699716664,
|
| 35315 |
+
"loss": 1.5221,
|
| 35316 |
+
"step": 10042
|
| 35317 |
+
},
|
| 35318 |
+
{
|
| 35319 |
+
"epoch": 0.44657863145258103,
|
| 35320 |
+
"grad_norm": 0.06275127828121185,
|
| 35321 |
+
"learning_rate": 0.0006230559832056539,
|
| 35322 |
+
"loss": 1.5054,
|
| 35323 |
+
"step": 10044
|
| 35324 |
+
},
|
| 35325 |
+
{
|
| 35326 |
+
"epoch": 0.4466675559112534,
|
| 35327 |
+
"grad_norm": 0.06381896138191223,
|
| 35328 |
+
"learning_rate": 0.0006229142859202739,
|
| 35329 |
+
"loss": 1.5148,
|
| 35330 |
+
"step": 10046
|
| 35331 |
+
},
|
| 35332 |
+
{
|
| 35333 |
+
"epoch": 0.44675648036992577,
|
| 35334 |
+
"grad_norm": 0.06251713633537292,
|
| 35335 |
+
"learning_rate": 0.0006227725781276389,
|
| 35336 |
+
"loss": 1.5142,
|
| 35337 |
+
"step": 10048
|
| 35338 |
+
},
|
| 35339 |
+
{
|
| 35340 |
+
"epoch": 0.4468454048285981,
|
| 35341 |
+
"grad_norm": 0.06425853818655014,
|
| 35342 |
+
"learning_rate": 0.000622630859839863,
|
| 35343 |
+
"loss": 1.5165,
|
| 35344 |
+
"step": 10050
|
| 35345 |
+
},
|
| 35346 |
+
{
|
| 35347 |
+
"epoch": 0.44693432928727045,
|
| 35348 |
+
"grad_norm": 0.06350740045309067,
|
| 35349 |
+
"learning_rate": 0.0006224891310690606,
|
| 35350 |
+
"loss": 1.5139,
|
| 35351 |
+
"step": 10052
|
| 35352 |
+
},
|
| 35353 |
+
{
|
| 35354 |
+
"epoch": 0.4470232537459428,
|
| 35355 |
+
"grad_norm": 0.06478399783372879,
|
| 35356 |
+
"learning_rate": 0.0006223473918273477,
|
| 35357 |
+
"loss": 1.5146,
|
| 35358 |
+
"step": 10054
|
| 35359 |
+
},
|
| 35360 |
+
{
|
| 35361 |
+
"epoch": 0.4471121782046152,
|
| 35362 |
+
"grad_norm": 0.06344152241945267,
|
| 35363 |
+
"learning_rate": 0.0006222056421268405,
|
| 35364 |
+
"loss": 1.5167,
|
| 35365 |
+
"step": 10056
|
| 35366 |
+
},
|
| 35367 |
+
{
|
| 35368 |
+
"epoch": 0.44720110266328755,
|
| 35369 |
+
"grad_norm": 0.06286389380693436,
|
| 35370 |
+
"learning_rate": 0.0006220638819796565,
|
| 35371 |
+
"loss": 1.5089,
|
| 35372 |
+
"step": 10058
|
| 35373 |
+
},
|
| 35374 |
+
{
|
| 35375 |
+
"epoch": 0.4472900271219599,
|
| 35376 |
+
"grad_norm": 0.06424534320831299,
|
| 35377 |
+
"learning_rate": 0.0006219221113979138,
|
| 35378 |
+
"loss": 1.5131,
|
| 35379 |
+
"step": 10060
|
| 35380 |
+
},
|
| 35381 |
+
{
|
| 35382 |
+
"epoch": 0.44737895158063223,
|
| 35383 |
+
"grad_norm": 0.061146993190050125,
|
| 35384 |
+
"learning_rate": 0.0006217803303937319,
|
| 35385 |
+
"loss": 1.5113,
|
| 35386 |
+
"step": 10062
|
| 35387 |
+
},
|
| 35388 |
+
{
|
| 35389 |
+
"epoch": 0.4474678760393046,
|
| 35390 |
+
"grad_norm": 0.06360126286745071,
|
| 35391 |
+
"learning_rate": 0.0006216385389792306,
|
| 35392 |
+
"loss": 1.5176,
|
| 35393 |
+
"step": 10064
|
| 35394 |
+
},
|
| 35395 |
+
{
|
| 35396 |
+
"epoch": 0.44755680049797697,
|
| 35397 |
+
"grad_norm": 0.06215475872159004,
|
| 35398 |
+
"learning_rate": 0.0006214967371665309,
|
| 35399 |
+
"loss": 1.5132,
|
| 35400 |
+
"step": 10066
|
| 35401 |
+
},
|
| 35402 |
+
{
|
| 35403 |
+
"epoch": 0.44764572495664934,
|
| 35404 |
+
"grad_norm": 0.06413223594427109,
|
| 35405 |
+
"learning_rate": 0.0006213549249677548,
|
| 35406 |
+
"loss": 1.5158,
|
| 35407 |
+
"step": 10068
|
| 35408 |
+
},
|
| 35409 |
+
{
|
| 35410 |
+
"epoch": 0.4477346494153217,
|
| 35411 |
+
"grad_norm": 0.06347139924764633,
|
| 35412 |
+
"learning_rate": 0.000621213102395025,
|
| 35413 |
+
"loss": 1.5089,
|
| 35414 |
+
"step": 10070
|
| 35415 |
+
},
|
| 35416 |
+
{
|
| 35417 |
+
"epoch": 0.447823573873994,
|
| 35418 |
+
"grad_norm": 0.06259430944919586,
|
| 35419 |
+
"learning_rate": 0.0006210712694604647,
|
| 35420 |
+
"loss": 1.5121,
|
| 35421 |
+
"step": 10072
|
| 35422 |
+
},
|
| 35423 |
+
{
|
| 35424 |
+
"epoch": 0.4479124983326664,
|
| 35425 |
+
"grad_norm": 0.06153608858585358,
|
| 35426 |
+
"learning_rate": 0.0006209294261761989,
|
| 35427 |
+
"loss": 1.5149,
|
| 35428 |
+
"step": 10074
|
| 35429 |
+
},
|
| 35430 |
+
{
|
| 35431 |
+
"epoch": 0.44800142279133875,
|
| 35432 |
+
"grad_norm": 0.06388126313686371,
|
| 35433 |
+
"learning_rate": 0.000620787572554353,
|
| 35434 |
+
"loss": 1.5123,
|
| 35435 |
+
"step": 10076
|
| 35436 |
+
},
|
| 35437 |
+
{
|
| 35438 |
+
"epoch": 0.4480903472500111,
|
| 35439 |
+
"grad_norm": 0.06113690882921219,
|
| 35440 |
+
"learning_rate": 0.0006206457086070531,
|
| 35441 |
+
"loss": 1.5124,
|
| 35442 |
+
"step": 10078
|
| 35443 |
+
},
|
| 35444 |
+
{
|
| 35445 |
+
"epoch": 0.4481792717086835,
|
| 35446 |
+
"grad_norm": 0.06311555206775665,
|
| 35447 |
+
"learning_rate": 0.000620503834346426,
|
| 35448 |
+
"loss": 1.5159,
|
| 35449 |
+
"step": 10080
|
| 35450 |
+
},
|
| 35451 |
+
{
|
| 35452 |
+
"epoch": 0.4482681961673558,
|
| 35453 |
+
"grad_norm": 0.06283438205718994,
|
| 35454 |
+
"learning_rate": 0.0006203619497846005,
|
| 35455 |
+
"loss": 1.5146,
|
| 35456 |
+
"step": 10082
|
| 35457 |
+
},
|
| 35458 |
+
{
|
| 35459 |
+
"epoch": 0.44835712062602817,
|
| 35460 |
+
"grad_norm": 0.06206725165247917,
|
| 35461 |
+
"learning_rate": 0.0006202200549337048,
|
| 35462 |
+
"loss": 1.5096,
|
| 35463 |
+
"step": 10084
|
| 35464 |
+
},
|
| 35465 |
+
{
|
| 35466 |
+
"epoch": 0.44844604508470054,
|
| 35467 |
+
"grad_norm": 0.062284741550683975,
|
| 35468 |
+
"learning_rate": 0.0006200781498058695,
|
| 35469 |
+
"loss": 1.5088,
|
| 35470 |
+
"step": 10086
|
| 35471 |
+
},
|
| 35472 |
+
{
|
| 35473 |
+
"epoch": 0.4485349695433729,
|
| 35474 |
+
"grad_norm": 0.06355787068605423,
|
| 35475 |
+
"learning_rate": 0.0006199362344132243,
|
| 35476 |
+
"loss": 1.5075,
|
| 35477 |
+
"step": 10088
|
| 35478 |
+
},
|
| 35479 |
+
{
|
| 35480 |
+
"epoch": 0.4486238940020453,
|
| 35481 |
+
"grad_norm": 0.06461120396852493,
|
| 35482 |
+
"learning_rate": 0.0006197943087679013,
|
| 35483 |
+
"loss": 1.5123,
|
| 35484 |
+
"step": 10090
|
| 35485 |
+
},
|
| 35486 |
+
{
|
| 35487 |
+
"epoch": 0.44871281846071764,
|
| 35488 |
+
"grad_norm": 0.06290825456380844,
|
| 35489 |
+
"learning_rate": 0.0006196523728820329,
|
| 35490 |
+
"loss": 1.5114,
|
| 35491 |
+
"step": 10092
|
| 35492 |
+
},
|
| 35493 |
+
{
|
| 35494 |
+
"epoch": 0.44880174291938996,
|
| 35495 |
+
"grad_norm": 0.06232891604304314,
|
| 35496 |
+
"learning_rate": 0.0006195104267677525,
|
| 35497 |
+
"loss": 1.5073,
|
| 35498 |
+
"step": 10094
|
| 35499 |
+
},
|
| 35500 |
+
{
|
| 35501 |
+
"epoch": 0.4488906673780623,
|
| 35502 |
+
"grad_norm": 0.06516814976930618,
|
| 35503 |
+
"learning_rate": 0.0006193684704371941,
|
| 35504 |
+
"loss": 1.519,
|
| 35505 |
+
"step": 10096
|
| 35506 |
+
},
|
| 35507 |
+
{
|
| 35508 |
+
"epoch": 0.4489795918367347,
|
| 35509 |
+
"grad_norm": 0.06370670348405838,
|
| 35510 |
+
"learning_rate": 0.0006192265039024928,
|
| 35511 |
+
"loss": 1.5129,
|
| 35512 |
+
"step": 10098
|
| 35513 |
+
},
|
| 35514 |
+
{
|
| 35515 |
+
"epoch": 0.44906851629540706,
|
| 35516 |
+
"grad_norm": 0.0628281906247139,
|
| 35517 |
+
"learning_rate": 0.0006190845271757846,
|
| 35518 |
+
"loss": 1.5132,
|
| 35519 |
+
"step": 10100
|
| 35520 |
+
},
|
| 35521 |
+
{
|
| 35522 |
+
"epoch": 0.44915744075407943,
|
| 35523 |
+
"grad_norm": 0.06411248445510864,
|
| 35524 |
+
"learning_rate": 0.0006189425402692061,
|
| 35525 |
+
"loss": 1.5139,
|
| 35526 |
+
"step": 10102
|
| 35527 |
+
},
|
| 35528 |
+
{
|
| 35529 |
+
"epoch": 0.44924636521275174,
|
| 35530 |
+
"grad_norm": 0.06423705816268921,
|
| 35531 |
+
"learning_rate": 0.0006188005431948953,
|
| 35532 |
+
"loss": 1.5087,
|
| 35533 |
+
"step": 10104
|
| 35534 |
+
},
|
| 35535 |
+
{
|
| 35536 |
+
"epoch": 0.4493352896714241,
|
| 35537 |
+
"grad_norm": 0.06506875157356262,
|
| 35538 |
+
"learning_rate": 0.0006186585359649903,
|
| 35539 |
+
"loss": 1.5149,
|
| 35540 |
+
"step": 10106
|
| 35541 |
+
},
|
| 35542 |
+
{
|
| 35543 |
+
"epoch": 0.4494242141300965,
|
| 35544 |
+
"grad_norm": 0.06523150950670242,
|
| 35545 |
+
"learning_rate": 0.0006185165185916308,
|
| 35546 |
+
"loss": 1.5154,
|
| 35547 |
+
"step": 10108
|
| 35548 |
+
},
|
| 35549 |
+
{
|
| 35550 |
+
"epoch": 0.44951313858876885,
|
| 35551 |
+
"grad_norm": 0.06264711171388626,
|
| 35552 |
+
"learning_rate": 0.0006183744910869571,
|
| 35553 |
+
"loss": 1.511,
|
| 35554 |
+
"step": 10110
|
| 35555 |
+
},
|
| 35556 |
+
{
|
| 35557 |
+
"epoch": 0.4496020630474412,
|
| 35558 |
+
"grad_norm": 0.06609227508306503,
|
| 35559 |
+
"learning_rate": 0.0006182324534631102,
|
| 35560 |
+
"loss": 1.5134,
|
| 35561 |
+
"step": 10112
|
| 35562 |
+
},
|
| 35563 |
+
{
|
| 35564 |
+
"epoch": 0.4496909875061136,
|
| 35565 |
+
"grad_norm": 0.06463871151208878,
|
| 35566 |
+
"learning_rate": 0.0006180904057322321,
|
| 35567 |
+
"loss": 1.5089,
|
| 35568 |
+
"step": 10114
|
| 35569 |
+
},
|
| 35570 |
+
{
|
| 35571 |
+
"epoch": 0.4497799119647859,
|
| 35572 |
+
"grad_norm": 0.06578138470649719,
|
| 35573 |
+
"learning_rate": 0.0006179483479064657,
|
| 35574 |
+
"loss": 1.5116,
|
| 35575 |
+
"step": 10116
|
| 35576 |
+
},
|
| 35577 |
+
{
|
| 35578 |
+
"epoch": 0.44986883642345826,
|
| 35579 |
+
"grad_norm": 0.0638619214296341,
|
| 35580 |
+
"learning_rate": 0.0006178062799979548,
|
| 35581 |
+
"loss": 1.5139,
|
| 35582 |
+
"step": 10118
|
| 35583 |
+
},
|
| 35584 |
+
{
|
| 35585 |
+
"epoch": 0.44995776088213063,
|
| 35586 |
+
"grad_norm": 0.06302259117364883,
|
| 35587 |
+
"learning_rate": 0.0006176642020188439,
|
| 35588 |
+
"loss": 1.5093,
|
| 35589 |
+
"step": 10120
|
| 35590 |
+
},
|
| 35591 |
+
{
|
| 35592 |
+
"epoch": 0.450046685340803,
|
| 35593 |
+
"grad_norm": 0.06259556859731674,
|
| 35594 |
+
"learning_rate": 0.0006175221139812784,
|
| 35595 |
+
"loss": 1.51,
|
| 35596 |
+
"step": 10122
|
| 35597 |
+
},
|
| 35598 |
+
{
|
| 35599 |
+
"epoch": 0.45013560979947537,
|
| 35600 |
+
"grad_norm": 0.06390849500894547,
|
| 35601 |
+
"learning_rate": 0.0006173800158974048,
|
| 35602 |
+
"loss": 1.5142,
|
| 35603 |
+
"step": 10124
|
| 35604 |
+
},
|
| 35605 |
+
{
|
| 35606 |
+
"epoch": 0.4502245342581477,
|
| 35607 |
+
"grad_norm": 0.06265709549188614,
|
| 35608 |
+
"learning_rate": 0.0006172379077793702,
|
| 35609 |
+
"loss": 1.5049,
|
| 35610 |
+
"step": 10126
|
| 35611 |
+
},
|
| 35612 |
+
{
|
| 35613 |
+
"epoch": 0.45031345871682005,
|
| 35614 |
+
"grad_norm": 0.061674658209085464,
|
| 35615 |
+
"learning_rate": 0.0006170957896393225,
|
| 35616 |
+
"loss": 1.5133,
|
| 35617 |
+
"step": 10128
|
| 35618 |
+
},
|
| 35619 |
+
{
|
| 35620 |
+
"epoch": 0.4504023831754924,
|
| 35621 |
+
"grad_norm": 0.06264129281044006,
|
| 35622 |
+
"learning_rate": 0.0006169536614894107,
|
| 35623 |
+
"loss": 1.5137,
|
| 35624 |
+
"step": 10130
|
| 35625 |
+
},
|
| 35626 |
+
{
|
| 35627 |
+
"epoch": 0.4504913076341648,
|
| 35628 |
+
"grad_norm": 0.06164534017443657,
|
| 35629 |
+
"learning_rate": 0.0006168115233417846,
|
| 35630 |
+
"loss": 1.5115,
|
| 35631 |
+
"step": 10132
|
| 35632 |
+
},
|
| 35633 |
+
{
|
| 35634 |
+
"epoch": 0.45058023209283715,
|
| 35635 |
+
"grad_norm": 0.06206073611974716,
|
| 35636 |
+
"learning_rate": 0.0006166693752085946,
|
| 35637 |
+
"loss": 1.5174,
|
| 35638 |
+
"step": 10134
|
| 35639 |
+
},
|
| 35640 |
+
{
|
| 35641 |
+
"epoch": 0.4506691565515095,
|
| 35642 |
+
"grad_norm": 0.06333418935537338,
|
| 35643 |
+
"learning_rate": 0.0006165272171019923,
|
| 35644 |
+
"loss": 1.5157,
|
| 35645 |
+
"step": 10136
|
| 35646 |
+
},
|
| 35647 |
+
{
|
| 35648 |
+
"epoch": 0.45075808101018183,
|
| 35649 |
+
"grad_norm": 0.06554005295038223,
|
| 35650 |
+
"learning_rate": 0.0006163850490341298,
|
| 35651 |
+
"loss": 1.5043,
|
| 35652 |
+
"step": 10138
|
| 35653 |
+
},
|
| 35654 |
+
{
|
| 35655 |
+
"epoch": 0.4508470054688542,
|
| 35656 |
+
"grad_norm": 0.06292907893657684,
|
| 35657 |
+
"learning_rate": 0.0006162428710171608,
|
| 35658 |
+
"loss": 1.5117,
|
| 35659 |
+
"step": 10140
|
| 35660 |
+
},
|
| 35661 |
+
{
|
| 35662 |
+
"epoch": 0.45093592992752657,
|
| 35663 |
+
"grad_norm": 0.06288234144449234,
|
| 35664 |
+
"learning_rate": 0.0006161006830632386,
|
| 35665 |
+
"loss": 1.5138,
|
| 35666 |
+
"step": 10142
|
| 35667 |
+
},
|
| 35668 |
+
{
|
| 35669 |
+
"epoch": 0.45102485438619894,
|
| 35670 |
+
"grad_norm": 0.06246441975235939,
|
| 35671 |
+
"learning_rate": 0.0006159584851845184,
|
| 35672 |
+
"loss": 1.5104,
|
| 35673 |
+
"step": 10144
|
| 35674 |
+
},
|
| 35675 |
+
{
|
| 35676 |
+
"epoch": 0.4511137788448713,
|
| 35677 |
+
"grad_norm": 0.062472037971019745,
|
| 35678 |
+
"learning_rate": 0.0006158162773931559,
|
| 35679 |
+
"loss": 1.5149,
|
| 35680 |
+
"step": 10146
|
| 35681 |
+
},
|
| 35682 |
+
{
|
| 35683 |
+
"epoch": 0.4512027033035436,
|
| 35684 |
+
"grad_norm": 0.06365121155977249,
|
| 35685 |
+
"learning_rate": 0.0006156740597013079,
|
| 35686 |
+
"loss": 1.5114,
|
| 35687 |
+
"step": 10148
|
| 35688 |
+
},
|
| 35689 |
+
{
|
| 35690 |
+
"epoch": 0.451291627762216,
|
| 35691 |
+
"grad_norm": 0.06463265419006348,
|
| 35692 |
+
"learning_rate": 0.0006155318321211312,
|
| 35693 |
+
"loss": 1.5189,
|
| 35694 |
+
"step": 10150
|
| 35695 |
+
},
|
| 35696 |
+
{
|
| 35697 |
+
"epoch": 0.45138055222088835,
|
| 35698 |
+
"grad_norm": 0.06455686688423157,
|
| 35699 |
+
"learning_rate": 0.0006153895946647845,
|
| 35700 |
+
"loss": 1.5085,
|
| 35701 |
+
"step": 10152
|
| 35702 |
+
},
|
| 35703 |
+
{
|
| 35704 |
+
"epoch": 0.4514694766795607,
|
| 35705 |
+
"grad_norm": 0.06434636563062668,
|
| 35706 |
+
"learning_rate": 0.0006152473473444265,
|
| 35707 |
+
"loss": 1.5093,
|
| 35708 |
+
"step": 10154
|
| 35709 |
+
},
|
| 35710 |
+
{
|
| 35711 |
+
"epoch": 0.4515584011382331,
|
| 35712 |
+
"grad_norm": 0.06507983803749084,
|
| 35713 |
+
"learning_rate": 0.0006151050901722177,
|
| 35714 |
+
"loss": 1.5084,
|
| 35715 |
+
"step": 10156
|
| 35716 |
+
},
|
| 35717 |
+
{
|
| 35718 |
+
"epoch": 0.4516473255969054,
|
| 35719 |
+
"grad_norm": 0.0638333186507225,
|
| 35720 |
+
"learning_rate": 0.0006149628231603184,
|
| 35721 |
+
"loss": 1.5092,
|
| 35722 |
+
"step": 10158
|
| 35723 |
+
},
|
| 35724 |
+
{
|
| 35725 |
+
"epoch": 0.45173625005557777,
|
| 35726 |
+
"grad_norm": 0.0641988068819046,
|
| 35727 |
+
"learning_rate": 0.0006148205463208902,
|
| 35728 |
+
"loss": 1.5138,
|
| 35729 |
+
"step": 10160
|
| 35730 |
+
},
|
| 35731 |
+
{
|
| 35732 |
+
"epoch": 0.45182517451425014,
|
| 35733 |
+
"grad_norm": 0.06309515982866287,
|
| 35734 |
+
"learning_rate": 0.000614678259666096,
|
| 35735 |
+
"loss": 1.5057,
|
| 35736 |
+
"step": 10162
|
| 35737 |
+
},
|
| 35738 |
+
{
|
| 35739 |
+
"epoch": 0.4519140989729225,
|
| 35740 |
+
"grad_norm": 0.06427089869976044,
|
| 35741 |
+
"learning_rate": 0.0006145359632080987,
|
| 35742 |
+
"loss": 1.5137,
|
| 35743 |
+
"step": 10164
|
| 35744 |
+
},
|
| 35745 |
+
{
|
| 35746 |
+
"epoch": 0.4520030234315949,
|
| 35747 |
+
"grad_norm": 0.06765829026699066,
|
| 35748 |
+
"learning_rate": 0.0006143936569590624,
|
| 35749 |
+
"loss": 1.5093,
|
| 35750 |
+
"step": 10166
|
| 35751 |
+
},
|
| 35752 |
+
{
|
| 35753 |
+
"epoch": 0.45209194789026724,
|
| 35754 |
+
"grad_norm": 0.0644116923213005,
|
| 35755 |
+
"learning_rate": 0.0006142513409311522,
|
| 35756 |
+
"loss": 1.5123,
|
| 35757 |
+
"step": 10168
|
| 35758 |
+
},
|
| 35759 |
+
{
|
| 35760 |
+
"epoch": 0.45218087234893956,
|
| 35761 |
+
"grad_norm": 0.06532014906406403,
|
| 35762 |
+
"learning_rate": 0.000614109015136534,
|
| 35763 |
+
"loss": 1.5055,
|
| 35764 |
+
"step": 10170
|
| 35765 |
+
},
|
| 35766 |
+
{
|
| 35767 |
+
"epoch": 0.4522697968076119,
|
| 35768 |
+
"grad_norm": 0.06273654848337173,
|
| 35769 |
+
"learning_rate": 0.0006139666795873743,
|
| 35770 |
+
"loss": 1.5117,
|
| 35771 |
+
"step": 10172
|
| 35772 |
+
},
|
| 35773 |
+
{
|
| 35774 |
+
"epoch": 0.4523587212662843,
|
| 35775 |
+
"grad_norm": 0.0660717710852623,
|
| 35776 |
+
"learning_rate": 0.0006138243342958405,
|
| 35777 |
+
"loss": 1.5065,
|
| 35778 |
+
"step": 10174
|
| 35779 |
+
},
|
| 35780 |
+
{
|
| 35781 |
+
"epoch": 0.45244764572495666,
|
| 35782 |
+
"grad_norm": 0.06457041203975677,
|
| 35783 |
+
"learning_rate": 0.0006136819792741011,
|
| 35784 |
+
"loss": 1.5117,
|
| 35785 |
+
"step": 10176
|
| 35786 |
+
},
|
| 35787 |
+
{
|
| 35788 |
+
"epoch": 0.452536570183629,
|
| 35789 |
+
"grad_norm": 0.06340939551591873,
|
| 35790 |
+
"learning_rate": 0.000613539614534325,
|
| 35791 |
+
"loss": 1.507,
|
| 35792 |
+
"step": 10178
|
| 35793 |
+
},
|
| 35794 |
+
{
|
| 35795 |
+
"epoch": 0.45262549464230134,
|
| 35796 |
+
"grad_norm": 0.06222638487815857,
|
| 35797 |
+
"learning_rate": 0.0006133972400886825,
|
| 35798 |
+
"loss": 1.5065,
|
| 35799 |
+
"step": 10180
|
| 35800 |
+
},
|
| 35801 |
+
{
|
| 35802 |
+
"epoch": 0.4527144191009737,
|
| 35803 |
+
"grad_norm": 0.06319104135036469,
|
| 35804 |
+
"learning_rate": 0.0006132548559493441,
|
| 35805 |
+
"loss": 1.5134,
|
| 35806 |
+
"step": 10182
|
| 35807 |
+
},
|
| 35808 |
+
{
|
| 35809 |
+
"epoch": 0.4528033435596461,
|
| 35810 |
+
"grad_norm": 0.06425220519304276,
|
| 35811 |
+
"learning_rate": 0.0006131124621284815,
|
| 35812 |
+
"loss": 1.509,
|
| 35813 |
+
"step": 10184
|
| 35814 |
+
},
|
| 35815 |
+
{
|
| 35816 |
+
"epoch": 0.45289226801831844,
|
| 35817 |
+
"grad_norm": 0.06511306017637253,
|
| 35818 |
+
"learning_rate": 0.0006129700586382671,
|
| 35819 |
+
"loss": 1.5106,
|
| 35820 |
+
"step": 10186
|
| 35821 |
+
},
|
| 35822 |
+
{
|
| 35823 |
+
"epoch": 0.4529811924769908,
|
| 35824 |
+
"grad_norm": 0.06421872228384018,
|
| 35825 |
+
"learning_rate": 0.0006128276454908742,
|
| 35826 |
+
"loss": 1.5101,
|
| 35827 |
+
"step": 10188
|
| 35828 |
+
},
|
| 35829 |
+
{
|
| 35830 |
+
"epoch": 0.4530701169356632,
|
| 35831 |
+
"grad_norm": 0.06392832100391388,
|
| 35832 |
+
"learning_rate": 0.0006126852226984771,
|
| 35833 |
+
"loss": 1.5032,
|
| 35834 |
+
"step": 10190
|
| 35835 |
+
},
|
| 35836 |
+
{
|
| 35837 |
+
"epoch": 0.4531590413943355,
|
| 35838 |
+
"grad_norm": 0.06504455208778381,
|
| 35839 |
+
"learning_rate": 0.0006125427902732506,
|
| 35840 |
+
"loss": 1.5128,
|
| 35841 |
+
"step": 10192
|
| 35842 |
+
},
|
| 35843 |
+
{
|
| 35844 |
+
"epoch": 0.45324796585300786,
|
| 35845 |
+
"grad_norm": 0.06360786408185959,
|
| 35846 |
+
"learning_rate": 0.0006124003482273704,
|
| 35847 |
+
"loss": 1.5038,
|
| 35848 |
+
"step": 10194
|
| 35849 |
+
},
|
| 35850 |
+
{
|
| 35851 |
+
"epoch": 0.45333689031168023,
|
| 35852 |
+
"grad_norm": 0.06323164701461792,
|
| 35853 |
+
"learning_rate": 0.000612257896573013,
|
| 35854 |
+
"loss": 1.5068,
|
| 35855 |
+
"step": 10196
|
| 35856 |
+
},
|
| 35857 |
+
{
|
| 35858 |
+
"epoch": 0.4534258147703526,
|
| 35859 |
+
"grad_norm": 0.06563688069581985,
|
| 35860 |
+
"learning_rate": 0.000612115435322356,
|
| 35861 |
+
"loss": 1.5096,
|
| 35862 |
+
"step": 10198
|
| 35863 |
+
},
|
| 35864 |
+
{
|
| 35865 |
+
"epoch": 0.45351473922902497,
|
| 35866 |
+
"grad_norm": 0.06295774132013321,
|
| 35867 |
+
"learning_rate": 0.0006119729644875774,
|
| 35868 |
+
"loss": 1.5103,
|
| 35869 |
+
"step": 10200
|
| 35870 |
+
},
|
| 35871 |
+
{
|
| 35872 |
+
"epoch": 0.4536036636876973,
|
| 35873 |
+
"grad_norm": 0.06223352998495102,
|
| 35874 |
+
"learning_rate": 0.0006118304840808565,
|
| 35875 |
+
"loss": 1.5064,
|
| 35876 |
+
"step": 10202
|
| 35877 |
+
},
|
| 35878 |
+
{
|
| 35879 |
+
"epoch": 0.45369258814636965,
|
| 35880 |
+
"grad_norm": 0.06273999065160751,
|
| 35881 |
+
"learning_rate": 0.0006116879941143728,
|
| 35882 |
+
"loss": 1.5096,
|
| 35883 |
+
"step": 10204
|
| 35884 |
+
},
|
| 35885 |
+
{
|
| 35886 |
+
"epoch": 0.453781512605042,
|
| 35887 |
+
"grad_norm": 0.06447035819292068,
|
| 35888 |
+
"learning_rate": 0.0006115454946003074,
|
| 35889 |
+
"loss": 1.5094,
|
| 35890 |
+
"step": 10206
|
| 35891 |
+
},
|
| 35892 |
+
{
|
| 35893 |
+
"epoch": 0.4538704370637144,
|
| 35894 |
+
"grad_norm": 0.06401224434375763,
|
| 35895 |
+
"learning_rate": 0.0006114029855508413,
|
| 35896 |
+
"loss": 1.5158,
|
| 35897 |
+
"step": 10208
|
| 35898 |
+
},
|
| 35899 |
+
{
|
| 35900 |
+
"epoch": 0.45395936152238675,
|
| 35901 |
+
"grad_norm": 0.06190183758735657,
|
| 35902 |
+
"learning_rate": 0.0006112604669781572,
|
| 35903 |
+
"loss": 1.5101,
|
| 35904 |
+
"step": 10210
|
| 35905 |
+
},
|
| 35906 |
+
{
|
| 35907 |
+
"epoch": 0.45404828598105906,
|
| 35908 |
+
"grad_norm": 0.06359678506851196,
|
| 35909 |
+
"learning_rate": 0.0006111179388944381,
|
| 35910 |
+
"loss": 1.5114,
|
| 35911 |
+
"step": 10212
|
| 35912 |
+
},
|
| 35913 |
+
{
|
| 35914 |
+
"epoch": 0.45413721043973143,
|
| 35915 |
+
"grad_norm": 0.06362069398164749,
|
| 35916 |
+
"learning_rate": 0.0006109754013118678,
|
| 35917 |
+
"loss": 1.5103,
|
| 35918 |
+
"step": 10214
|
| 35919 |
+
},
|
| 35920 |
+
{
|
| 35921 |
+
"epoch": 0.4542261348984038,
|
| 35922 |
+
"grad_norm": 0.06391779333353043,
|
| 35923 |
+
"learning_rate": 0.0006108328542426312,
|
| 35924 |
+
"loss": 1.5183,
|
| 35925 |
+
"step": 10216
|
| 35926 |
+
},
|
| 35927 |
+
{
|
| 35928 |
+
"epoch": 0.45431505935707617,
|
| 35929 |
+
"grad_norm": 0.06379327178001404,
|
| 35930 |
+
"learning_rate": 0.0006106902976989139,
|
| 35931 |
+
"loss": 1.5107,
|
| 35932 |
+
"step": 10218
|
| 35933 |
+
},
|
| 35934 |
+
{
|
| 35935 |
+
"epoch": 0.45440398381574854,
|
| 35936 |
+
"grad_norm": 0.06601396203041077,
|
| 35937 |
+
"learning_rate": 0.0006105477316929021,
|
| 35938 |
+
"loss": 1.5121,
|
| 35939 |
+
"step": 10220
|
| 35940 |
+
},
|
| 35941 |
+
{
|
| 35942 |
+
"epoch": 0.4544929082744209,
|
| 35943 |
+
"grad_norm": 0.0628276839852333,
|
| 35944 |
+
"learning_rate": 0.0006104051562367829,
|
| 35945 |
+
"loss": 1.5103,
|
| 35946 |
+
"step": 10222
|
| 35947 |
+
},
|
| 35948 |
+
{
|
| 35949 |
+
"epoch": 0.4545818327330932,
|
| 35950 |
+
"grad_norm": 0.06758300215005875,
|
| 35951 |
+
"learning_rate": 0.0006102625713427446,
|
| 35952 |
+
"loss": 1.5088,
|
| 35953 |
+
"step": 10224
|
| 35954 |
+
},
|
| 35955 |
+
{
|
| 35956 |
+
"epoch": 0.4546707571917656,
|
| 35957 |
+
"grad_norm": 0.06313972175121307,
|
| 35958 |
+
"learning_rate": 0.0006101199770229758,
|
| 35959 |
+
"loss": 1.5101,
|
| 35960 |
+
"step": 10226
|
| 35961 |
+
},
|
| 35962 |
+
{
|
| 35963 |
+
"epoch": 0.45475968165043795,
|
| 35964 |
+
"grad_norm": 0.06574665755033493,
|
| 35965 |
+
"learning_rate": 0.0006099773732896658,
|
| 35966 |
+
"loss": 1.5103,
|
| 35967 |
+
"step": 10228
|
| 35968 |
+
},
|
| 35969 |
+
{
|
| 35970 |
+
"epoch": 0.4548486061091103,
|
| 35971 |
+
"grad_norm": 0.06595081835985184,
|
| 35972 |
+
"learning_rate": 0.0006098347601550055,
|
| 35973 |
+
"loss": 1.5099,
|
| 35974 |
+
"step": 10230
|
| 35975 |
+
},
|
| 35976 |
+
{
|
| 35977 |
+
"epoch": 0.4549375305677827,
|
| 35978 |
+
"grad_norm": 0.06619875133037567,
|
| 35979 |
+
"learning_rate": 0.0006096921376311857,
|
| 35980 |
+
"loss": 1.5086,
|
| 35981 |
+
"step": 10232
|
| 35982 |
+
},
|
| 35983 |
+
{
|
| 35984 |
+
"epoch": 0.455026455026455,
|
| 35985 |
+
"grad_norm": 0.06375854462385178,
|
| 35986 |
+
"learning_rate": 0.0006095495057303988,
|
| 35987 |
+
"loss": 1.5088,
|
| 35988 |
+
"step": 10234
|
| 35989 |
+
},
|
| 35990 |
+
{
|
| 35991 |
+
"epoch": 0.45511537948512737,
|
| 35992 |
+
"grad_norm": 0.06172388046979904,
|
| 35993 |
+
"learning_rate": 0.0006094068644648373,
|
| 35994 |
+
"loss": 1.5109,
|
| 35995 |
+
"step": 10236
|
| 35996 |
+
},
|
| 35997 |
+
{
|
| 35998 |
+
"epoch": 0.45520430394379974,
|
| 35999 |
+
"grad_norm": 0.06474345177412033,
|
| 36000 |
+
"learning_rate": 0.0006092642138466948,
|
| 36001 |
+
"loss": 1.5136,
|
| 36002 |
+
"step": 10238
|
| 36003 |
+
},
|
| 36004 |
+
{
|
| 36005 |
+
"epoch": 0.4552932284024721,
|
| 36006 |
+
"grad_norm": 0.06749559938907623,
|
| 36007 |
+
"learning_rate": 0.0006091215538881658,
|
| 36008 |
+
"loss": 1.5045,
|
| 36009 |
+
"step": 10240
|
| 36010 |
+
},
|
| 36011 |
+
{
|
| 36012 |
+
"epoch": 0.4553821528611445,
|
| 36013 |
+
"grad_norm": 0.062397945672273636,
|
| 36014 |
+
"learning_rate": 0.0006089788846014457,
|
| 36015 |
+
"loss": 1.5162,
|
| 36016 |
+
"step": 10242
|
| 36017 |
+
},
|
| 36018 |
+
{
|
| 36019 |
+
"epoch": 0.45547107731981684,
|
| 36020 |
+
"grad_norm": 0.06550050526857376,
|
| 36021 |
+
"learning_rate": 0.0006088362059987301,
|
| 36022 |
+
"loss": 1.5102,
|
| 36023 |
+
"step": 10244
|
| 36024 |
+
},
|
| 36025 |
+
{
|
| 36026 |
+
"epoch": 0.45556000177848915,
|
| 36027 |
+
"grad_norm": 0.0633438304066658,
|
| 36028 |
+
"learning_rate": 0.0006086935180922159,
|
| 36029 |
+
"loss": 1.506,
|
| 36030 |
+
"step": 10246
|
| 36031 |
+
},
|
| 36032 |
+
{
|
| 36033 |
+
"epoch": 0.4556489262371615,
|
| 36034 |
+
"grad_norm": 0.06535664945840836,
|
| 36035 |
+
"learning_rate": 0.000608550820894101,
|
| 36036 |
+
"loss": 1.5092,
|
| 36037 |
+
"step": 10248
|
| 36038 |
+
},
|
| 36039 |
+
{
|
| 36040 |
+
"epoch": 0.4557378506958339,
|
| 36041 |
+
"grad_norm": 0.0659709945321083,
|
| 36042 |
+
"learning_rate": 0.0006084081144165835,
|
| 36043 |
+
"loss": 1.518,
|
| 36044 |
+
"step": 10250
|
| 36045 |
+
},
|
| 36046 |
+
{
|
| 36047 |
+
"epoch": 0.45582677515450626,
|
| 36048 |
+
"grad_norm": 0.06421976536512375,
|
| 36049 |
+
"learning_rate": 0.0006082653986718626,
|
| 36050 |
+
"loss": 1.5138,
|
| 36051 |
+
"step": 10252
|
| 36052 |
+
},
|
| 36053 |
+
{
|
| 36054 |
+
"epoch": 0.4559156996131786,
|
| 36055 |
+
"grad_norm": 0.06513391435146332,
|
| 36056 |
+
"learning_rate": 0.0006081226736721383,
|
| 36057 |
+
"loss": 1.5079,
|
| 36058 |
+
"step": 10254
|
| 36059 |
+
},
|
| 36060 |
+
{
|
| 36061 |
+
"epoch": 0.45600462407185094,
|
| 36062 |
+
"grad_norm": 0.06458871066570282,
|
| 36063 |
+
"learning_rate": 0.0006079799394296115,
|
| 36064 |
+
"loss": 1.5101,
|
| 36065 |
+
"step": 10256
|
| 36066 |
+
},
|
| 36067 |
+
{
|
| 36068 |
+
"epoch": 0.4560935485305233,
|
| 36069 |
+
"grad_norm": 0.06421530246734619,
|
| 36070 |
+
"learning_rate": 0.0006078371959564833,
|
| 36071 |
+
"loss": 1.5049,
|
| 36072 |
+
"step": 10258
|
| 36073 |
+
},
|
| 36074 |
+
{
|
| 36075 |
+
"epoch": 0.4561824729891957,
|
| 36076 |
+
"grad_norm": 0.0646352618932724,
|
| 36077 |
+
"learning_rate": 0.0006076944432649567,
|
| 36078 |
+
"loss": 1.504,
|
| 36079 |
+
"step": 10260
|
| 36080 |
+
},
|
| 36081 |
+
{
|
| 36082 |
+
"epoch": 0.45627139744786804,
|
| 36083 |
+
"grad_norm": 0.06522826105356216,
|
| 36084 |
+
"learning_rate": 0.0006075516813672342,
|
| 36085 |
+
"loss": 1.5081,
|
| 36086 |
+
"step": 10262
|
| 36087 |
+
},
|
| 36088 |
+
{
|
| 36089 |
+
"epoch": 0.4563603219065404,
|
| 36090 |
+
"grad_norm": 0.06355036050081253,
|
| 36091 |
+
"learning_rate": 0.0006074089102755204,
|
| 36092 |
+
"loss": 1.5131,
|
| 36093 |
+
"step": 10264
|
| 36094 |
+
},
|
| 36095 |
+
{
|
| 36096 |
+
"epoch": 0.4564492463652127,
|
| 36097 |
+
"grad_norm": 0.06306944042444229,
|
| 36098 |
+
"learning_rate": 0.0006072661300020193,
|
| 36099 |
+
"loss": 1.5033,
|
| 36100 |
+
"step": 10266
|
| 36101 |
+
},
|
| 36102 |
+
{
|
| 36103 |
+
"epoch": 0.4565381708238851,
|
| 36104 |
+
"grad_norm": 0.0642002746462822,
|
| 36105 |
+
"learning_rate": 0.0006071233405589368,
|
| 36106 |
+
"loss": 1.5074,
|
| 36107 |
+
"step": 10268
|
| 36108 |
+
},
|
| 36109 |
+
{
|
| 36110 |
+
"epoch": 0.45662709528255746,
|
| 36111 |
+
"grad_norm": 0.06410963833332062,
|
| 36112 |
+
"learning_rate": 0.0006069805419584791,
|
| 36113 |
+
"loss": 1.5062,
|
| 36114 |
+
"step": 10270
|
| 36115 |
+
},
|
| 36116 |
+
{
|
| 36117 |
+
"epoch": 0.45671601974122983,
|
| 36118 |
+
"grad_norm": 0.06610678881406784,
|
| 36119 |
+
"learning_rate": 0.0006068377342128532,
|
| 36120 |
+
"loss": 1.5125,
|
| 36121 |
+
"step": 10272
|
| 36122 |
+
},
|
| 36123 |
+
{
|
| 36124 |
+
"epoch": 0.4568049441999022,
|
| 36125 |
+
"grad_norm": 0.06421998143196106,
|
| 36126 |
+
"learning_rate": 0.000606694917334267,
|
| 36127 |
+
"loss": 1.5103,
|
| 36128 |
+
"step": 10274
|
| 36129 |
+
},
|
| 36130 |
+
{
|
| 36131 |
+
"epoch": 0.45689386865857456,
|
| 36132 |
+
"grad_norm": 0.0632401779294014,
|
| 36133 |
+
"learning_rate": 0.000606552091334929,
|
| 36134 |
+
"loss": 1.5144,
|
| 36135 |
+
"step": 10276
|
| 36136 |
+
},
|
| 36137 |
+
{
|
| 36138 |
+
"epoch": 0.4569827931172469,
|
| 36139 |
+
"grad_norm": 0.06443547457456589,
|
| 36140 |
+
"learning_rate": 0.0006064092562270487,
|
| 36141 |
+
"loss": 1.5061,
|
| 36142 |
+
"step": 10278
|
| 36143 |
+
},
|
| 36144 |
+
{
|
| 36145 |
+
"epoch": 0.45707171757591925,
|
| 36146 |
+
"grad_norm": 0.06429964303970337,
|
| 36147 |
+
"learning_rate": 0.0006062664120228363,
|
| 36148 |
+
"loss": 1.5122,
|
| 36149 |
+
"step": 10280
|
| 36150 |
+
},
|
| 36151 |
+
{
|
| 36152 |
+
"epoch": 0.4571606420345916,
|
| 36153 |
+
"grad_norm": 0.06369158625602722,
|
| 36154 |
+
"learning_rate": 0.0006061235587345025,
|
| 36155 |
+
"loss": 1.5081,
|
| 36156 |
+
"step": 10282
|
| 36157 |
+
},
|
| 36158 |
+
{
|
| 36159 |
+
"epoch": 0.457249566493264,
|
| 36160 |
+
"grad_norm": 0.062136199325323105,
|
| 36161 |
+
"learning_rate": 0.0006059806963742595,
|
| 36162 |
+
"loss": 1.5106,
|
| 36163 |
+
"step": 10284
|
| 36164 |
+
},
|
| 36165 |
+
{
|
| 36166 |
+
"epoch": 0.45733849095193635,
|
| 36167 |
+
"grad_norm": 0.06280120462179184,
|
| 36168 |
+
"learning_rate": 0.0006058378249543193,
|
| 36169 |
+
"loss": 1.5096,
|
| 36170 |
+
"step": 10286
|
| 36171 |
+
},
|
| 36172 |
+
{
|
| 36173 |
+
"epoch": 0.45742741541060866,
|
| 36174 |
+
"grad_norm": 0.06295254826545715,
|
| 36175 |
+
"learning_rate": 0.0006056949444868956,
|
| 36176 |
+
"loss": 1.5038,
|
| 36177 |
+
"step": 10288
|
| 36178 |
+
},
|
| 36179 |
+
{
|
| 36180 |
+
"epoch": 0.45751633986928103,
|
| 36181 |
+
"grad_norm": 0.0640212669968605,
|
| 36182 |
+
"learning_rate": 0.0006055520549842022,
|
| 36183 |
+
"loss": 1.5059,
|
| 36184 |
+
"step": 10290
|
| 36185 |
+
},
|
| 36186 |
+
{
|
| 36187 |
+
"epoch": 0.4576052643279534,
|
| 36188 |
+
"grad_norm": 0.06613568961620331,
|
| 36189 |
+
"learning_rate": 0.000605409156458454,
|
| 36190 |
+
"loss": 1.5122,
|
| 36191 |
+
"step": 10292
|
| 36192 |
+
},
|
| 36193 |
+
{
|
| 36194 |
+
"epoch": 0.45769418878662577,
|
| 36195 |
+
"grad_norm": 0.06338582932949066,
|
| 36196 |
+
"learning_rate": 0.0006052662489218665,
|
| 36197 |
+
"loss": 1.5069,
|
| 36198 |
+
"step": 10294
|
| 36199 |
+
},
|
| 36200 |
+
{
|
| 36201 |
+
"epoch": 0.45778311324529813,
|
| 36202 |
+
"grad_norm": 0.06454145908355713,
|
| 36203 |
+
"learning_rate": 0.0006051233323866563,
|
| 36204 |
+
"loss": 1.506,
|
| 36205 |
+
"step": 10296
|
| 36206 |
+
},
|
| 36207 |
+
{
|
| 36208 |
+
"epoch": 0.4578720377039705,
|
| 36209 |
+
"grad_norm": 0.06602118164300919,
|
| 36210 |
+
"learning_rate": 0.0006049804068650403,
|
| 36211 |
+
"loss": 1.5075,
|
| 36212 |
+
"step": 10298
|
| 36213 |
+
},
|
| 36214 |
+
{
|
| 36215 |
+
"epoch": 0.4579609621626428,
|
| 36216 |
+
"grad_norm": 0.06418369710445404,
|
| 36217 |
+
"learning_rate": 0.0006048374723692365,
|
| 36218 |
+
"loss": 1.508,
|
| 36219 |
+
"step": 10300
|
| 36220 |
+
},
|
| 36221 |
+
{
|
| 36222 |
+
"epoch": 0.4580498866213152,
|
| 36223 |
+
"grad_norm": 0.063139408826828,
|
| 36224 |
+
"learning_rate": 0.0006046945289114634,
|
| 36225 |
+
"loss": 1.5068,
|
| 36226 |
+
"step": 10302
|
| 36227 |
+
},
|
| 36228 |
+
{
|
| 36229 |
+
"epoch": 0.45813881107998755,
|
| 36230 |
+
"grad_norm": 0.06343439221382141,
|
| 36231 |
+
"learning_rate": 0.0006045515765039408,
|
| 36232 |
+
"loss": 1.5078,
|
| 36233 |
+
"step": 10304
|
| 36234 |
+
},
|
| 36235 |
+
{
|
| 36236 |
+
"epoch": 0.4582277355386599,
|
| 36237 |
+
"grad_norm": 0.06396979093551636,
|
| 36238 |
+
"learning_rate": 0.0006044086151588886,
|
| 36239 |
+
"loss": 1.5098,
|
| 36240 |
+
"step": 10306
|
| 36241 |
+
},
|
| 36242 |
+
{
|
| 36243 |
+
"epoch": 0.4583166599973323,
|
| 36244 |
+
"grad_norm": 0.06393872201442719,
|
| 36245 |
+
"learning_rate": 0.0006042656448885279,
|
| 36246 |
+
"loss": 1.507,
|
| 36247 |
+
"step": 10308
|
| 36248 |
+
},
|
| 36249 |
+
{
|
| 36250 |
+
"epoch": 0.4584055844560046,
|
| 36251 |
+
"grad_norm": 0.06459838151931763,
|
| 36252 |
+
"learning_rate": 0.0006041226657050804,
|
| 36253 |
+
"loss": 1.5107,
|
| 36254 |
+
"step": 10310
|
| 36255 |
+
},
|
| 36256 |
+
{
|
| 36257 |
+
"epoch": 0.45849450891467697,
|
| 36258 |
+
"grad_norm": 0.06489048898220062,
|
| 36259 |
+
"learning_rate": 0.0006039796776207686,
|
| 36260 |
+
"loss": 1.5094,
|
| 36261 |
+
"step": 10312
|
| 36262 |
+
},
|
| 36263 |
+
{
|
| 36264 |
+
"epoch": 0.45858343337334934,
|
| 36265 |
+
"grad_norm": 0.06409396976232529,
|
| 36266 |
+
"learning_rate": 0.0006038366806478157,
|
| 36267 |
+
"loss": 1.5047,
|
| 36268 |
+
"step": 10314
|
| 36269 |
+
},
|
| 36270 |
+
{
|
| 36271 |
+
"epoch": 0.4586723578320217,
|
| 36272 |
+
"grad_norm": 0.06495364010334015,
|
| 36273 |
+
"learning_rate": 0.0006036936747984456,
|
| 36274 |
+
"loss": 1.4996,
|
| 36275 |
+
"step": 10316
|
| 36276 |
+
},
|
| 36277 |
+
{
|
| 36278 |
+
"epoch": 0.4587612822906941,
|
| 36279 |
+
"grad_norm": 0.06571628898382187,
|
| 36280 |
+
"learning_rate": 0.0006035506600848835,
|
| 36281 |
+
"loss": 1.5113,
|
| 36282 |
+
"step": 10318
|
| 36283 |
+
},
|
| 36284 |
+
{
|
| 36285 |
+
"epoch": 0.45885020674936644,
|
| 36286 |
+
"grad_norm": 0.06504350155591965,
|
| 36287 |
+
"learning_rate": 0.0006034076365193545,
|
| 36288 |
+
"loss": 1.4964,
|
| 36289 |
+
"step": 10320
|
| 36290 |
+
},
|
| 36291 |
+
{
|
| 36292 |
+
"epoch": 0.45893913120803875,
|
| 36293 |
+
"grad_norm": 0.06698577105998993,
|
| 36294 |
+
"learning_rate": 0.0006032646041140849,
|
| 36295 |
+
"loss": 1.5051,
|
| 36296 |
+
"step": 10322
|
| 36297 |
+
},
|
| 36298 |
+
{
|
| 36299 |
+
"epoch": 0.4590280556667111,
|
| 36300 |
+
"grad_norm": 0.06189613789319992,
|
| 36301 |
+
"learning_rate": 0.0006031215628813021,
|
| 36302 |
+
"loss": 1.5097,
|
| 36303 |
+
"step": 10324
|
| 36304 |
+
},
|
| 36305 |
+
{
|
| 36306 |
+
"epoch": 0.4591169801253835,
|
| 36307 |
+
"grad_norm": 0.06384344398975372,
|
| 36308 |
+
"learning_rate": 0.0006029785128332336,
|
| 36309 |
+
"loss": 1.512,
|
| 36310 |
+
"step": 10326
|
| 36311 |
+
},
|
| 36312 |
+
{
|
| 36313 |
+
"epoch": 0.45920590458405586,
|
| 36314 |
+
"grad_norm": 0.06703834980726242,
|
| 36315 |
+
"learning_rate": 0.0006028354539821079,
|
| 36316 |
+
"loss": 1.5041,
|
| 36317 |
+
"step": 10328
|
| 36318 |
+
},
|
| 36319 |
+
{
|
| 36320 |
+
"epoch": 0.4592948290427282,
|
| 36321 |
+
"grad_norm": 0.06500376015901566,
|
| 36322 |
+
"learning_rate": 0.0006026923863401545,
|
| 36323 |
+
"loss": 1.5061,
|
| 36324 |
+
"step": 10330
|
| 36325 |
+
},
|
| 36326 |
+
{
|
| 36327 |
+
"epoch": 0.45938375350140054,
|
| 36328 |
+
"grad_norm": 0.06535261869430542,
|
| 36329 |
+
"learning_rate": 0.0006025493099196033,
|
| 36330 |
+
"loss": 1.5052,
|
| 36331 |
+
"step": 10332
|
| 36332 |
+
},
|
| 36333 |
+
{
|
| 36334 |
+
"epoch": 0.4594726779600729,
|
| 36335 |
+
"grad_norm": 0.062189262360334396,
|
| 36336 |
+
"learning_rate": 0.0006024062247326854,
|
| 36337 |
+
"loss": 1.5009,
|
| 36338 |
+
"step": 10334
|
| 36339 |
+
},
|
| 36340 |
+
{
|
| 36341 |
+
"epoch": 0.4595616024187453,
|
| 36342 |
+
"grad_norm": 0.06319093704223633,
|
| 36343 |
+
"learning_rate": 0.0006022631307916318,
|
| 36344 |
+
"loss": 1.5052,
|
| 36345 |
+
"step": 10336
|
| 36346 |
+
},
|
| 36347 |
+
{
|
| 36348 |
+
"epoch": 0.45965052687741764,
|
| 36349 |
+
"grad_norm": 0.0611843504011631,
|
| 36350 |
+
"learning_rate": 0.0006021200281086753,
|
| 36351 |
+
"loss": 1.5051,
|
| 36352 |
+
"step": 10338
|
| 36353 |
+
},
|
| 36354 |
+
{
|
| 36355 |
+
"epoch": 0.45973945133609,
|
| 36356 |
+
"grad_norm": 0.06422754377126694,
|
| 36357 |
+
"learning_rate": 0.0006019769166960485,
|
| 36358 |
+
"loss": 1.5042,
|
| 36359 |
+
"step": 10340
|
| 36360 |
+
},
|
| 36361 |
+
{
|
| 36362 |
+
"epoch": 0.4598283757947623,
|
| 36363 |
+
"grad_norm": 0.06372174620628357,
|
| 36364 |
+
"learning_rate": 0.0006018337965659859,
|
| 36365 |
+
"loss": 1.513,
|
| 36366 |
+
"step": 10342
|
| 36367 |
+
},
|
| 36368 |
+
{
|
| 36369 |
+
"epoch": 0.4599173002534347,
|
| 36370 |
+
"grad_norm": 0.06303902715444565,
|
| 36371 |
+
"learning_rate": 0.0006016906677307213,
|
| 36372 |
+
"loss": 1.5053,
|
| 36373 |
+
"step": 10344
|
| 36374 |
+
},
|
| 36375 |
+
{
|
| 36376 |
+
"epoch": 0.46000622471210706,
|
| 36377 |
+
"grad_norm": 0.0631803497672081,
|
| 36378 |
+
"learning_rate": 0.0006015475302024904,
|
| 36379 |
+
"loss": 1.5102,
|
| 36380 |
+
"step": 10346
|
| 36381 |
+
},
|
| 36382 |
+
{
|
| 36383 |
+
"epoch": 0.4600951491707794,
|
| 36384 |
+
"grad_norm": 0.06362450867891312,
|
| 36385 |
+
"learning_rate": 0.0006014043839935291,
|
| 36386 |
+
"loss": 1.5115,
|
| 36387 |
+
"step": 10348
|
| 36388 |
+
},
|
| 36389 |
+
{
|
| 36390 |
+
"epoch": 0.4601840736294518,
|
| 36391 |
+
"grad_norm": 0.06407329440116882,
|
| 36392 |
+
"learning_rate": 0.0006012612291160743,
|
| 36393 |
+
"loss": 1.5065,
|
| 36394 |
+
"step": 10350
|
| 36395 |
+
},
|
| 36396 |
+
{
|
| 36397 |
+
"epoch": 0.46027299808812416,
|
| 36398 |
+
"grad_norm": 0.061808858066797256,
|
| 36399 |
+
"learning_rate": 0.0006011180655823632,
|
| 36400 |
+
"loss": 1.5125,
|
| 36401 |
+
"step": 10352
|
| 36402 |
+
},
|
| 36403 |
+
{
|
| 36404 |
+
"epoch": 0.4603619225467965,
|
| 36405 |
+
"grad_norm": 0.0652756616473198,
|
| 36406 |
+
"learning_rate": 0.0006009748934046343,
|
| 36407 |
+
"loss": 1.5064,
|
| 36408 |
+
"step": 10354
|
| 36409 |
+
},
|
| 36410 |
+
{
|
| 36411 |
+
"epoch": 0.46045084700546884,
|
| 36412 |
+
"grad_norm": 0.0631522387266159,
|
| 36413 |
+
"learning_rate": 0.0006008317125951265,
|
| 36414 |
+
"loss": 1.504,
|
| 36415 |
+
"step": 10356
|
| 36416 |
+
},
|
| 36417 |
+
{
|
| 36418 |
+
"epoch": 0.4605397714641412,
|
| 36419 |
+
"grad_norm": 0.0668981596827507,
|
| 36420 |
+
"learning_rate": 0.0006006885231660796,
|
| 36421 |
+
"loss": 1.5145,
|
| 36422 |
+
"step": 10358
|
| 36423 |
+
},
|
| 36424 |
+
{
|
| 36425 |
+
"epoch": 0.4606286959228136,
|
| 36426 |
+
"grad_norm": 0.06222165748476982,
|
| 36427 |
+
"learning_rate": 0.0006005453251297341,
|
| 36428 |
+
"loss": 1.5067,
|
| 36429 |
+
"step": 10360
|
| 36430 |
+
},
|
| 36431 |
+
{
|
| 36432 |
+
"epoch": 0.46071762038148595,
|
| 36433 |
+
"grad_norm": 0.06397759169340134,
|
| 36434 |
+
"learning_rate": 0.0006004021184983309,
|
| 36435 |
+
"loss": 1.5067,
|
| 36436 |
+
"step": 10362
|
| 36437 |
+
},
|
| 36438 |
+
{
|
| 36439 |
+
"epoch": 0.46080654484015826,
|
| 36440 |
+
"grad_norm": 0.060776520520448685,
|
| 36441 |
+
"learning_rate": 0.0006002589032841122,
|
| 36442 |
+
"loss": 1.5025,
|
| 36443 |
+
"step": 10364
|
| 36444 |
+
},
|
| 36445 |
+
{
|
| 36446 |
+
"epoch": 0.46089546929883063,
|
| 36447 |
+
"grad_norm": 0.06292383372783661,
|
| 36448 |
+
"learning_rate": 0.0006001156794993208,
|
| 36449 |
+
"loss": 1.4995,
|
| 36450 |
+
"step": 10366
|
| 36451 |
+
},
|
| 36452 |
+
{
|
| 36453 |
+
"epoch": 0.460984393757503,
|
| 36454 |
+
"grad_norm": 0.06334930658340454,
|
| 36455 |
+
"learning_rate": 0.0005999724471561999,
|
| 36456 |
+
"loss": 1.5038,
|
| 36457 |
+
"step": 10368
|
| 36458 |
+
},
|
| 36459 |
+
{
|
| 36460 |
+
"epoch": 0.46107331821617537,
|
| 36461 |
+
"grad_norm": 0.062282536178827286,
|
| 36462 |
+
"learning_rate": 0.0005998292062669935,
|
| 36463 |
+
"loss": 1.5019,
|
| 36464 |
+
"step": 10370
|
| 36465 |
+
},
|
| 36466 |
+
{
|
| 36467 |
+
"epoch": 0.46116224267484773,
|
| 36468 |
+
"grad_norm": 0.06332574039697647,
|
| 36469 |
+
"learning_rate": 0.0005996859568439468,
|
| 36470 |
+
"loss": 1.5022,
|
| 36471 |
+
"step": 10372
|
| 36472 |
+
},
|
| 36473 |
+
{
|
| 36474 |
+
"epoch": 0.4612511671335201,
|
| 36475 |
+
"grad_norm": 0.06254351884126663,
|
| 36476 |
+
"learning_rate": 0.000599542698899305,
|
| 36477 |
+
"loss": 1.513,
|
| 36478 |
+
"step": 10374
|
| 36479 |
+
},
|
| 36480 |
+
{
|
| 36481 |
+
"epoch": 0.4613400915921924,
|
| 36482 |
+
"grad_norm": 0.06307613104581833,
|
| 36483 |
+
"learning_rate": 0.0005993994324453147,
|
| 36484 |
+
"loss": 1.5088,
|
| 36485 |
+
"step": 10376
|
| 36486 |
+
},
|
| 36487 |
+
{
|
| 36488 |
+
"epoch": 0.4614290160508648,
|
| 36489 |
+
"grad_norm": 0.062051285058259964,
|
| 36490 |
+
"learning_rate": 0.0005992561574942229,
|
| 36491 |
+
"loss": 1.4984,
|
| 36492 |
+
"step": 10378
|
| 36493 |
+
},
|
| 36494 |
+
{
|
| 36495 |
+
"epoch": 0.46151794050953715,
|
| 36496 |
+
"grad_norm": 0.06389043480157852,
|
| 36497 |
+
"learning_rate": 0.0005991128740582773,
|
| 36498 |
+
"loss": 1.5078,
|
| 36499 |
+
"step": 10380
|
| 36500 |
+
},
|
| 36501 |
+
{
|
| 36502 |
+
"epoch": 0.4616068649682095,
|
| 36503 |
+
"grad_norm": 0.06286501884460449,
|
| 36504 |
+
"learning_rate": 0.0005989695821497266,
|
| 36505 |
+
"loss": 1.5044,
|
| 36506 |
+
"step": 10382
|
| 36507 |
+
},
|
| 36508 |
+
{
|
| 36509 |
+
"epoch": 0.4616957894268819,
|
| 36510 |
+
"grad_norm": 0.0667327269911766,
|
| 36511 |
+
"learning_rate": 0.0005988262817808198,
|
| 36512 |
+
"loss": 1.5093,
|
| 36513 |
+
"step": 10384
|
| 36514 |
+
},
|
| 36515 |
+
{
|
| 36516 |
+
"epoch": 0.4617847138855542,
|
| 36517 |
+
"grad_norm": 0.06277990341186523,
|
| 36518 |
+
"learning_rate": 0.0005986829729638069,
|
| 36519 |
+
"loss": 1.5009,
|
| 36520 |
+
"step": 10386
|
| 36521 |
+
},
|
| 36522 |
+
{
|
| 36523 |
+
"epoch": 0.46187363834422657,
|
| 36524 |
+
"grad_norm": 0.06250549107789993,
|
| 36525 |
+
"learning_rate": 0.0005985396557109386,
|
| 36526 |
+
"loss": 1.509,
|
| 36527 |
+
"step": 10388
|
| 36528 |
+
},
|
| 36529 |
+
{
|
| 36530 |
+
"epoch": 0.46196256280289894,
|
| 36531 |
+
"grad_norm": 0.06798355281352997,
|
| 36532 |
+
"learning_rate": 0.0005983963300344662,
|
| 36533 |
+
"loss": 1.5055,
|
| 36534 |
+
"step": 10390
|
| 36535 |
+
},
|
| 36536 |
+
{
|
| 36537 |
+
"epoch": 0.4620514872615713,
|
| 36538 |
+
"grad_norm": 0.06521153450012207,
|
| 36539 |
+
"learning_rate": 0.000598252995946642,
|
| 36540 |
+
"loss": 1.5108,
|
| 36541 |
+
"step": 10392
|
| 36542 |
+
},
|
| 36543 |
+
{
|
| 36544 |
+
"epoch": 0.46214041172024367,
|
| 36545 |
+
"grad_norm": 0.06334955245256424,
|
| 36546 |
+
"learning_rate": 0.0005981096534597186,
|
| 36547 |
+
"loss": 1.5062,
|
| 36548 |
+
"step": 10394
|
| 36549 |
+
},
|
| 36550 |
+
{
|
| 36551 |
+
"epoch": 0.462229336178916,
|
| 36552 |
+
"grad_norm": 0.06671233475208282,
|
| 36553 |
+
"learning_rate": 0.0005979663025859499,
|
| 36554 |
+
"loss": 1.508,
|
| 36555 |
+
"step": 10396
|
| 36556 |
+
},
|
| 36557 |
+
{
|
| 36558 |
+
"epoch": 0.46231826063758835,
|
| 36559 |
+
"grad_norm": 0.06669692695140839,
|
| 36560 |
+
"learning_rate": 0.0005978229433375897,
|
| 36561 |
+
"loss": 1.5016,
|
| 36562 |
+
"step": 10398
|
| 36563 |
+
},
|
| 36564 |
+
{
|
| 36565 |
+
"epoch": 0.4624071850962607,
|
| 36566 |
+
"grad_norm": 0.06515517830848694,
|
| 36567 |
+
"learning_rate": 0.0005976795757268933,
|
| 36568 |
+
"loss": 1.5029,
|
| 36569 |
+
"step": 10400
|
| 36570 |
+
},
|
| 36571 |
+
{
|
| 36572 |
+
"epoch": 0.4624961095549331,
|
| 36573 |
+
"grad_norm": 0.06504440307617188,
|
| 36574 |
+
"learning_rate": 0.0005975361997661162,
|
| 36575 |
+
"loss": 1.5052,
|
| 36576 |
+
"step": 10402
|
| 36577 |
+
},
|
| 36578 |
+
{
|
| 36579 |
+
"epoch": 0.46258503401360546,
|
| 36580 |
+
"grad_norm": 0.06195230036973953,
|
| 36581 |
+
"learning_rate": 0.0005973928154675151,
|
| 36582 |
+
"loss": 1.5033,
|
| 36583 |
+
"step": 10404
|
| 36584 |
+
},
|
| 36585 |
+
{
|
| 36586 |
+
"epoch": 0.4626739584722778,
|
| 36587 |
+
"grad_norm": 0.0632019117474556,
|
| 36588 |
+
"learning_rate": 0.0005972494228433468,
|
| 36589 |
+
"loss": 1.5057,
|
| 36590 |
+
"step": 10406
|
| 36591 |
+
},
|
| 36592 |
+
{
|
| 36593 |
+
"epoch": 0.46276288293095014,
|
| 36594 |
+
"grad_norm": 0.06470786780118942,
|
| 36595 |
+
"learning_rate": 0.0005971060219058694,
|
| 36596 |
+
"loss": 1.5045,
|
| 36597 |
+
"step": 10408
|
| 36598 |
+
},
|
| 36599 |
+
{
|
| 36600 |
+
"epoch": 0.4628518073896225,
|
| 36601 |
+
"grad_norm": 0.06392474472522736,
|
| 36602 |
+
"learning_rate": 0.000596962612667341,
|
| 36603 |
+
"loss": 1.506,
|
| 36604 |
+
"step": 10410
|
| 36605 |
+
},
|
| 36606 |
+
{
|
| 36607 |
+
"epoch": 0.4629407318482949,
|
| 36608 |
+
"grad_norm": 0.06552916020154953,
|
| 36609 |
+
"learning_rate": 0.0005968191951400215,
|
| 36610 |
+
"loss": 1.5066,
|
| 36611 |
+
"step": 10412
|
| 36612 |
+
},
|
| 36613 |
+
{
|
| 36614 |
+
"epoch": 0.46302965630696724,
|
| 36615 |
+
"grad_norm": 0.06476908922195435,
|
| 36616 |
+
"learning_rate": 0.0005966757693361705,
|
| 36617 |
+
"loss": 1.5028,
|
| 36618 |
+
"step": 10414
|
| 36619 |
+
},
|
| 36620 |
+
{
|
| 36621 |
+
"epoch": 0.4631185807656396,
|
| 36622 |
+
"grad_norm": 0.06308183819055557,
|
| 36623 |
+
"learning_rate": 0.0005965323352680486,
|
| 36624 |
+
"loss": 1.5073,
|
| 36625 |
+
"step": 10416
|
| 36626 |
+
},
|
| 36627 |
+
{
|
| 36628 |
+
"epoch": 0.4632075052243119,
|
| 36629 |
+
"grad_norm": 0.06603847444057465,
|
| 36630 |
+
"learning_rate": 0.0005963888929479173,
|
| 36631 |
+
"loss": 1.516,
|
| 36632 |
+
"step": 10418
|
| 36633 |
+
},
|
| 36634 |
+
{
|
| 36635 |
+
"epoch": 0.4632964296829843,
|
| 36636 |
+
"grad_norm": 0.06707409769296646,
|
| 36637 |
+
"learning_rate": 0.0005962454423880387,
|
| 36638 |
+
"loss": 1.5026,
|
| 36639 |
+
"step": 10420
|
| 36640 |
+
},
|
| 36641 |
+
{
|
| 36642 |
+
"epoch": 0.46338535414165666,
|
| 36643 |
+
"grad_norm": 0.06557576358318329,
|
| 36644 |
+
"learning_rate": 0.0005961019836006755,
|
| 36645 |
+
"loss": 1.5101,
|
| 36646 |
+
"step": 10422
|
| 36647 |
+
},
|
| 36648 |
+
{
|
| 36649 |
+
"epoch": 0.463474278600329,
|
| 36650 |
+
"grad_norm": 0.06399369239807129,
|
| 36651 |
+
"learning_rate": 0.0005959585165980912,
|
| 36652 |
+
"loss": 1.5085,
|
| 36653 |
+
"step": 10424
|
| 36654 |
+
},
|
| 36655 |
+
{
|
| 36656 |
+
"epoch": 0.4635632030590014,
|
| 36657 |
+
"grad_norm": 0.06383886933326721,
|
| 36658 |
+
"learning_rate": 0.0005958150413925501,
|
| 36659 |
+
"loss": 1.5059,
|
| 36660 |
+
"step": 10426
|
| 36661 |
+
},
|
| 36662 |
+
{
|
| 36663 |
+
"epoch": 0.46365212751767376,
|
| 36664 |
+
"grad_norm": 0.0634358674287796,
|
| 36665 |
+
"learning_rate": 0.000595671557996317,
|
| 36666 |
+
"loss": 1.5054,
|
| 36667 |
+
"step": 10428
|
| 36668 |
+
},
|
| 36669 |
+
{
|
| 36670 |
+
"epoch": 0.4637410519763461,
|
| 36671 |
+
"grad_norm": 0.06545853614807129,
|
| 36672 |
+
"learning_rate": 0.0005955280664216575,
|
| 36673 |
+
"loss": 1.513,
|
| 36674 |
+
"step": 10430
|
| 36675 |
+
},
|
| 36676 |
+
{
|
| 36677 |
+
"epoch": 0.46382997643501844,
|
| 36678 |
+
"grad_norm": 0.06597691774368286,
|
| 36679 |
+
"learning_rate": 0.0005953845666808378,
|
| 36680 |
+
"loss": 1.5063,
|
| 36681 |
+
"step": 10432
|
| 36682 |
+
},
|
| 36683 |
+
{
|
| 36684 |
+
"epoch": 0.4639189008936908,
|
| 36685 |
+
"grad_norm": 0.06610873341560364,
|
| 36686 |
+
"learning_rate": 0.0005952410587861251,
|
| 36687 |
+
"loss": 1.5096,
|
| 36688 |
+
"step": 10434
|
| 36689 |
+
},
|
| 36690 |
+
{
|
| 36691 |
+
"epoch": 0.4640078253523632,
|
| 36692 |
+
"grad_norm": 0.06371376663446426,
|
| 36693 |
+
"learning_rate": 0.0005950975427497871,
|
| 36694 |
+
"loss": 1.5077,
|
| 36695 |
+
"step": 10436
|
| 36696 |
+
},
|
| 36697 |
+
{
|
| 36698 |
+
"epoch": 0.46409674981103555,
|
| 36699 |
+
"grad_norm": 0.06398597359657288,
|
| 36700 |
+
"learning_rate": 0.0005949540185840919,
|
| 36701 |
+
"loss": 1.5092,
|
| 36702 |
+
"step": 10438
|
| 36703 |
+
},
|
| 36704 |
+
{
|
| 36705 |
+
"epoch": 0.46418567426970786,
|
| 36706 |
+
"grad_norm": 0.06583784520626068,
|
| 36707 |
+
"learning_rate": 0.0005948104863013089,
|
| 36708 |
+
"loss": 1.5122,
|
| 36709 |
+
"step": 10440
|
| 36710 |
+
},
|
| 36711 |
+
{
|
| 36712 |
+
"epoch": 0.46427459872838023,
|
| 36713 |
+
"grad_norm": 0.06568284332752228,
|
| 36714 |
+
"learning_rate": 0.0005946669459137075,
|
| 36715 |
+
"loss": 1.5076,
|
| 36716 |
+
"step": 10442
|
| 36717 |
+
},
|
| 36718 |
+
{
|
| 36719 |
+
"epoch": 0.4643635231870526,
|
| 36720 |
+
"grad_norm": 0.06291704624891281,
|
| 36721 |
+
"learning_rate": 0.0005945233974335585,
|
| 36722 |
+
"loss": 1.5103,
|
| 36723 |
+
"step": 10444
|
| 36724 |
+
},
|
| 36725 |
+
{
|
| 36726 |
+
"epoch": 0.46445244764572496,
|
| 36727 |
+
"grad_norm": 0.06666183471679688,
|
| 36728 |
+
"learning_rate": 0.0005943798408731329,
|
| 36729 |
+
"loss": 1.5065,
|
| 36730 |
+
"step": 10446
|
| 36731 |
+
},
|
| 36732 |
+
{
|
| 36733 |
+
"epoch": 0.46454137210439733,
|
| 36734 |
+
"grad_norm": 0.06522826105356216,
|
| 36735 |
+
"learning_rate": 0.0005942362762447026,
|
| 36736 |
+
"loss": 1.5054,
|
| 36737 |
+
"step": 10448
|
| 36738 |
+
},
|
| 36739 |
+
{
|
| 36740 |
+
"epoch": 0.46463029656306964,
|
| 36741 |
+
"grad_norm": 0.06623510271310806,
|
| 36742 |
+
"learning_rate": 0.0005940927035605403,
|
| 36743 |
+
"loss": 1.5098,
|
| 36744 |
+
"step": 10450
|
| 36745 |
+
},
|
| 36746 |
+
{
|
| 36747 |
+
"epoch": 0.464719221021742,
|
| 36748 |
+
"grad_norm": 0.06403439491987228,
|
| 36749 |
+
"learning_rate": 0.0005939491228329187,
|
| 36750 |
+
"loss": 1.5,
|
| 36751 |
+
"step": 10452
|
| 36752 |
+
},
|
| 36753 |
+
{
|
| 36754 |
+
"epoch": 0.4648081454804144,
|
| 36755 |
+
"grad_norm": 0.0637822076678276,
|
| 36756 |
+
"learning_rate": 0.0005938055340741123,
|
| 36757 |
+
"loss": 1.5119,
|
| 36758 |
+
"step": 10454
|
| 36759 |
+
},
|
| 36760 |
+
{
|
| 36761 |
+
"epoch": 0.46489706993908675,
|
| 36762 |
+
"grad_norm": 0.0661793127655983,
|
| 36763 |
+
"learning_rate": 0.0005936619372963953,
|
| 36764 |
+
"loss": 1.5033,
|
| 36765 |
+
"step": 10456
|
| 36766 |
+
},
|
| 36767 |
+
{
|
| 36768 |
+
"epoch": 0.4649859943977591,
|
| 36769 |
+
"grad_norm": 0.06596322357654572,
|
| 36770 |
+
"learning_rate": 0.0005935183325120433,
|
| 36771 |
+
"loss": 1.5056,
|
| 36772 |
+
"step": 10458
|
| 36773 |
+
},
|
| 36774 |
+
{
|
| 36775 |
+
"epoch": 0.4650749188564315,
|
| 36776 |
+
"grad_norm": 0.06263718008995056,
|
| 36777 |
+
"learning_rate": 0.000593374719733332,
|
| 36778 |
+
"loss": 1.5065,
|
| 36779 |
+
"step": 10460
|
| 36780 |
+
},
|
| 36781 |
+
{
|
| 36782 |
+
"epoch": 0.4651638433151038,
|
| 36783 |
+
"grad_norm": 0.06337321549654007,
|
| 36784 |
+
"learning_rate": 0.0005932310989725382,
|
| 36785 |
+
"loss": 1.5115,
|
| 36786 |
+
"step": 10462
|
| 36787 |
+
},
|
| 36788 |
+
{
|
| 36789 |
+
"epoch": 0.46525276777377617,
|
| 36790 |
+
"grad_norm": 0.0630149319767952,
|
| 36791 |
+
"learning_rate": 0.0005930874702419392,
|
| 36792 |
+
"loss": 1.5084,
|
| 36793 |
+
"step": 10464
|
| 36794 |
+
},
|
| 36795 |
+
{
|
| 36796 |
+
"epoch": 0.46534169223244853,
|
| 36797 |
+
"grad_norm": 0.06514789909124374,
|
| 36798 |
+
"learning_rate": 0.0005929438335538131,
|
| 36799 |
+
"loss": 1.5035,
|
| 36800 |
+
"step": 10466
|
| 36801 |
+
},
|
| 36802 |
+
{
|
| 36803 |
+
"epoch": 0.4654306166911209,
|
| 36804 |
+
"grad_norm": 0.061315376311540604,
|
| 36805 |
+
"learning_rate": 0.0005928001889204385,
|
| 36806 |
+
"loss": 1.5073,
|
| 36807 |
+
"step": 10468
|
| 36808 |
+
},
|
| 36809 |
+
{
|
| 36810 |
+
"epoch": 0.46551954114979327,
|
| 36811 |
+
"grad_norm": 0.06364026665687561,
|
| 36812 |
+
"learning_rate": 0.0005926565363540947,
|
| 36813 |
+
"loss": 1.5027,
|
| 36814 |
+
"step": 10470
|
| 36815 |
+
},
|
| 36816 |
+
{
|
| 36817 |
+
"epoch": 0.4656084656084656,
|
| 36818 |
+
"grad_norm": 0.06301172822713852,
|
| 36819 |
+
"learning_rate": 0.0005925128758670619,
|
| 36820 |
+
"loss": 1.5064,
|
| 36821 |
+
"step": 10472
|
| 36822 |
+
},
|
| 36823 |
+
{
|
| 36824 |
+
"epoch": 0.46569739006713795,
|
| 36825 |
+
"grad_norm": 0.06264245510101318,
|
| 36826 |
+
"learning_rate": 0.0005923692074716209,
|
| 36827 |
+
"loss": 1.5051,
|
| 36828 |
+
"step": 10474
|
| 36829 |
+
},
|
| 36830 |
+
{
|
| 36831 |
+
"epoch": 0.4657863145258103,
|
| 36832 |
+
"grad_norm": 0.06521310657262802,
|
| 36833 |
+
"learning_rate": 0.0005922255311800529,
|
| 36834 |
+
"loss": 1.5008,
|
| 36835 |
+
"step": 10476
|
| 36836 |
+
},
|
| 36837 |
+
{
|
| 36838 |
+
"epoch": 0.4658752389844827,
|
| 36839 |
+
"grad_norm": 0.06265758723020554,
|
| 36840 |
+
"learning_rate": 0.0005920818470046399,
|
| 36841 |
+
"loss": 1.5011,
|
| 36842 |
+
"step": 10478
|
| 36843 |
+
},
|
| 36844 |
+
{
|
| 36845 |
+
"epoch": 0.46596416344315505,
|
| 36846 |
+
"grad_norm": 0.06298228353261948,
|
| 36847 |
+
"learning_rate": 0.000591938154957665,
|
| 36848 |
+
"loss": 1.5046,
|
| 36849 |
+
"step": 10480
|
| 36850 |
+
},
|
| 36851 |
+
{
|
| 36852 |
+
"epoch": 0.4660530879018274,
|
| 36853 |
+
"grad_norm": 0.06504108011722565,
|
| 36854 |
+
"learning_rate": 0.0005917944550514114,
|
| 36855 |
+
"loss": 1.5035,
|
| 36856 |
+
"step": 10482
|
| 36857 |
+
},
|
| 36858 |
+
{
|
| 36859 |
+
"epoch": 0.46614201236049974,
|
| 36860 |
+
"grad_norm": 0.0628167986869812,
|
| 36861 |
+
"learning_rate": 0.0005916507472981632,
|
| 36862 |
+
"loss": 1.5018,
|
| 36863 |
+
"step": 10484
|
| 36864 |
+
},
|
| 36865 |
+
{
|
| 36866 |
+
"epoch": 0.4662309368191721,
|
| 36867 |
+
"grad_norm": 0.06394867599010468,
|
| 36868 |
+
"learning_rate": 0.0005915070317102053,
|
| 36869 |
+
"loss": 1.5055,
|
| 36870 |
+
"step": 10486
|
| 36871 |
+
},
|
| 36872 |
+
{
|
| 36873 |
+
"epoch": 0.46631986127784447,
|
| 36874 |
+
"grad_norm": 0.06346921622753143,
|
| 36875 |
+
"learning_rate": 0.0005913633082998231,
|
| 36876 |
+
"loss": 1.5132,
|
| 36877 |
+
"step": 10488
|
| 36878 |
+
},
|
| 36879 |
+
{
|
| 36880 |
+
"epoch": 0.46640878573651684,
|
| 36881 |
+
"grad_norm": 0.06185340881347656,
|
| 36882 |
+
"learning_rate": 0.0005912195770793028,
|
| 36883 |
+
"loss": 1.5051,
|
| 36884 |
+
"step": 10490
|
| 36885 |
+
},
|
| 36886 |
+
{
|
| 36887 |
+
"epoch": 0.4664977101951892,
|
| 36888 |
+
"grad_norm": 0.06247565150260925,
|
| 36889 |
+
"learning_rate": 0.0005910758380609308,
|
| 36890 |
+
"loss": 1.5057,
|
| 36891 |
+
"step": 10492
|
| 36892 |
+
},
|
| 36893 |
+
{
|
| 36894 |
+
"epoch": 0.4665866346538615,
|
| 36895 |
+
"grad_norm": 0.061248041689395905,
|
| 36896 |
+
"learning_rate": 0.000590932091256995,
|
| 36897 |
+
"loss": 1.4972,
|
| 36898 |
+
"step": 10494
|
| 36899 |
+
},
|
| 36900 |
+
{
|
| 36901 |
+
"epoch": 0.4666755591125339,
|
| 36902 |
+
"grad_norm": 0.062036920338869095,
|
| 36903 |
+
"learning_rate": 0.0005907883366797832,
|
| 36904 |
+
"loss": 1.5055,
|
| 36905 |
+
"step": 10496
|
| 36906 |
+
},
|
| 36907 |
+
{
|
| 36908 |
+
"epoch": 0.46676448357120626,
|
| 36909 |
+
"grad_norm": 0.0646434798836708,
|
| 36910 |
+
"learning_rate": 0.0005906445743415845,
|
| 36911 |
+
"loss": 1.5068,
|
| 36912 |
+
"step": 10498
|
| 36913 |
+
},
|
| 36914 |
+
{
|
| 36915 |
+
"epoch": 0.4668534080298786,
|
| 36916 |
+
"grad_norm": 0.06403973698616028,
|
| 36917 |
+
"learning_rate": 0.0005905008042546878,
|
| 36918 |
+
"loss": 1.5053,
|
| 36919 |
+
"step": 10500
|
| 36920 |
+
},
|
| 36921 |
+
{
|
| 36922 |
+
"epoch": 0.4668534080298786,
|
| 36923 |
+
"eval_loss": 1.4856921434402466,
|
| 36924 |
+
"eval_runtime": 12.4102,
|
| 36925 |
+
"eval_samples_per_second": 556.799,
|
| 36926 |
+
"eval_steps_per_second": 69.62,
|
| 36927 |
+
"step": 10500
|
| 36928 |
+
},
|
| 36929 |
+
{
|
| 36930 |
+
"epoch": 0.466942332488551,
|
| 36931 |
+
"grad_norm": 0.06568461656570435,
|
| 36932 |
+
"learning_rate": 0.0005903570264313837,
|
| 36933 |
+
"loss": 1.5073,
|
| 36934 |
+
"step": 10502
|
| 36935 |
+
},
|
| 36936 |
+
{
|
| 36937 |
+
"epoch": 0.46703125694722336,
|
| 36938 |
+
"grad_norm": 0.06413400918245316,
|
| 36939 |
+
"learning_rate": 0.0005902132408839626,
|
| 36940 |
+
"loss": 1.5047,
|
| 36941 |
+
"step": 10504
|
| 36942 |
+
},
|
| 36943 |
+
{
|
| 36944 |
+
"epoch": 0.4671201814058957,
|
| 36945 |
+
"grad_norm": 0.06379328668117523,
|
| 36946 |
+
"learning_rate": 0.0005900694476247164,
|
| 36947 |
+
"loss": 1.5017,
|
| 36948 |
+
"step": 10506
|
| 36949 |
+
},
|
| 36950 |
+
{
|
| 36951 |
+
"epoch": 0.46720910586456804,
|
| 36952 |
+
"grad_norm": 0.06230514496564865,
|
| 36953 |
+
"learning_rate": 0.0005899256466659369,
|
| 36954 |
+
"loss": 1.5085,
|
| 36955 |
+
"step": 10508
|
| 36956 |
+
},
|
| 36957 |
+
{
|
| 36958 |
+
"epoch": 0.4672980303232404,
|
| 36959 |
+
"grad_norm": 0.06407498568296432,
|
| 36960 |
+
"learning_rate": 0.0005897818380199165,
|
| 36961 |
+
"loss": 1.5085,
|
| 36962 |
+
"step": 10510
|
| 36963 |
+
},
|
| 36964 |
+
{
|
| 36965 |
+
"epoch": 0.4673869547819128,
|
| 36966 |
+
"grad_norm": 0.06471338123083115,
|
| 36967 |
+
"learning_rate": 0.0005896380216989495,
|
| 36968 |
+
"loss": 1.5057,
|
| 36969 |
+
"step": 10512
|
| 36970 |
+
},
|
| 36971 |
+
{
|
| 36972 |
+
"epoch": 0.46747587924058515,
|
| 36973 |
+
"grad_norm": 0.06504790484905243,
|
| 36974 |
+
"learning_rate": 0.0005894941977153289,
|
| 36975 |
+
"loss": 1.5093,
|
| 36976 |
+
"step": 10514
|
| 36977 |
+
},
|
| 36978 |
+
{
|
| 36979 |
+
"epoch": 0.46756480369925746,
|
| 36980 |
+
"grad_norm": 0.06497068703174591,
|
| 36981 |
+
"learning_rate": 0.0005893503660813499,
|
| 36982 |
+
"loss": 1.5046,
|
| 36983 |
+
"step": 10516
|
| 36984 |
+
},
|
| 36985 |
+
{
|
| 36986 |
+
"epoch": 0.4676537281579298,
|
| 36987 |
+
"grad_norm": 0.06451025605201721,
|
| 36988 |
+
"learning_rate": 0.000589206526809308,
|
| 36989 |
+
"loss": 1.504,
|
| 36990 |
+
"step": 10518
|
| 36991 |
+
},
|
| 36992 |
+
{
|
| 36993 |
+
"epoch": 0.4677426526166022,
|
| 36994 |
+
"grad_norm": 0.062198251485824585,
|
| 36995 |
+
"learning_rate": 0.0005890626799114991,
|
| 36996 |
+
"loss": 1.5068,
|
| 36997 |
+
"step": 10520
|
| 36998 |
+
},
|
| 36999 |
+
{
|
| 37000 |
+
"epoch": 0.46783157707527456,
|
| 37001 |
+
"grad_norm": 0.0641157478094101,
|
| 37002 |
+
"learning_rate": 0.0005889188254002198,
|
| 37003 |
+
"loss": 1.5027,
|
| 37004 |
+
"step": 10522
|
| 37005 |
+
},
|
| 37006 |
+
{
|
| 37007 |
+
"epoch": 0.46792050153394693,
|
| 37008 |
+
"grad_norm": 0.06388696283102036,
|
| 37009 |
+
"learning_rate": 0.0005887749632877673,
|
| 37010 |
+
"loss": 1.5103,
|
| 37011 |
+
"step": 10524
|
| 37012 |
+
},
|
| 37013 |
+
{
|
| 37014 |
+
"epoch": 0.46800942599261924,
|
| 37015 |
+
"grad_norm": 0.06389610469341278,
|
| 37016 |
+
"learning_rate": 0.0005886310935864399,
|
| 37017 |
+
"loss": 1.5049,
|
| 37018 |
+
"step": 10526
|
| 37019 |
+
},
|
| 37020 |
+
{
|
| 37021 |
+
"epoch": 0.4680983504512916,
|
| 37022 |
+
"grad_norm": 0.06436503678560257,
|
| 37023 |
+
"learning_rate": 0.0005884872163085359,
|
| 37024 |
+
"loss": 1.5033,
|
| 37025 |
+
"step": 10528
|
| 37026 |
+
},
|
| 37027 |
+
{
|
| 37028 |
+
"epoch": 0.468187274909964,
|
| 37029 |
+
"grad_norm": 0.06388412415981293,
|
| 37030 |
+
"learning_rate": 0.0005883433314663549,
|
| 37031 |
+
"loss": 1.5082,
|
| 37032 |
+
"step": 10530
|
| 37033 |
+
},
|
| 37034 |
+
{
|
| 37035 |
+
"epoch": 0.46827619936863635,
|
| 37036 |
+
"grad_norm": 0.06415783613920212,
|
| 37037 |
+
"learning_rate": 0.0005881994390721964,
|
| 37038 |
+
"loss": 1.5031,
|
| 37039 |
+
"step": 10532
|
| 37040 |
+
},
|
| 37041 |
+
{
|
| 37042 |
+
"epoch": 0.4683651238273087,
|
| 37043 |
+
"grad_norm": 0.06618549674749374,
|
| 37044 |
+
"learning_rate": 0.0005880555391383613,
|
| 37045 |
+
"loss": 1.4983,
|
| 37046 |
+
"step": 10534
|
| 37047 |
+
},
|
| 37048 |
+
{
|
| 37049 |
+
"epoch": 0.4684540482859811,
|
| 37050 |
+
"grad_norm": 0.06373777985572815,
|
| 37051 |
+
"learning_rate": 0.0005879116316771507,
|
| 37052 |
+
"loss": 1.5058,
|
| 37053 |
+
"step": 10536
|
| 37054 |
+
},
|
| 37055 |
+
{
|
| 37056 |
+
"epoch": 0.4685429727446534,
|
| 37057 |
+
"grad_norm": 0.06715410202741623,
|
| 37058 |
+
"learning_rate": 0.0005877677167008663,
|
| 37059 |
+
"loss": 1.5013,
|
| 37060 |
+
"step": 10538
|
| 37061 |
+
},
|
| 37062 |
+
{
|
| 37063 |
+
"epoch": 0.46863189720332576,
|
| 37064 |
+
"grad_norm": 0.0642244890332222,
|
| 37065 |
+
"learning_rate": 0.0005876237942218107,
|
| 37066 |
+
"loss": 1.5073,
|
| 37067 |
+
"step": 10540
|
| 37068 |
+
},
|
| 37069 |
+
{
|
| 37070 |
+
"epoch": 0.46872082166199813,
|
| 37071 |
+
"grad_norm": 0.06601016223430634,
|
| 37072 |
+
"learning_rate": 0.0005874798642522869,
|
| 37073 |
+
"loss": 1.4981,
|
| 37074 |
+
"step": 10542
|
| 37075 |
+
},
|
| 37076 |
+
{
|
| 37077 |
+
"epoch": 0.4688097461206705,
|
| 37078 |
+
"grad_norm": 0.06458527594804764,
|
| 37079 |
+
"learning_rate": 0.0005873359268045991,
|
| 37080 |
+
"loss": 1.5065,
|
| 37081 |
+
"step": 10544
|
| 37082 |
+
},
|
| 37083 |
+
{
|
| 37084 |
+
"epoch": 0.46889867057934287,
|
| 37085 |
+
"grad_norm": 0.06326668709516525,
|
| 37086 |
+
"learning_rate": 0.0005871919818910511,
|
| 37087 |
+
"loss": 1.5067,
|
| 37088 |
+
"step": 10546
|
| 37089 |
+
},
|
| 37090 |
+
{
|
| 37091 |
+
"epoch": 0.4689875950380152,
|
| 37092 |
+
"grad_norm": 0.06331662088632584,
|
| 37093 |
+
"learning_rate": 0.0005870480295239486,
|
| 37094 |
+
"loss": 1.5053,
|
| 37095 |
+
"step": 10548
|
| 37096 |
+
},
|
| 37097 |
+
{
|
| 37098 |
+
"epoch": 0.46907651949668755,
|
| 37099 |
+
"grad_norm": 0.0645529255270958,
|
| 37100 |
+
"learning_rate": 0.0005869040697155966,
|
| 37101 |
+
"loss": 1.5113,
|
| 37102 |
+
"step": 10550
|
| 37103 |
+
},
|
| 37104 |
+
{
|
| 37105 |
+
"epoch": 0.4691654439553599,
|
| 37106 |
+
"grad_norm": 0.0635153278708458,
|
| 37107 |
+
"learning_rate": 0.0005867601024783021,
|
| 37108 |
+
"loss": 1.503,
|
| 37109 |
+
"step": 10552
|
| 37110 |
+
},
|
| 37111 |
+
{
|
| 37112 |
+
"epoch": 0.4692543684140323,
|
| 37113 |
+
"grad_norm": 0.06458617746829987,
|
| 37114 |
+
"learning_rate": 0.0005866161278243713,
|
| 37115 |
+
"loss": 1.5021,
|
| 37116 |
+
"step": 10554
|
| 37117 |
+
},
|
| 37118 |
+
{
|
| 37119 |
+
"epoch": 0.46934329287270465,
|
| 37120 |
+
"grad_norm": 0.0654539167881012,
|
| 37121 |
+
"learning_rate": 0.0005864721457661124,
|
| 37122 |
+
"loss": 1.5025,
|
| 37123 |
+
"step": 10556
|
| 37124 |
+
},
|
| 37125 |
+
{
|
| 37126 |
+
"epoch": 0.469432217331377,
|
| 37127 |
+
"grad_norm": 0.06510742008686066,
|
| 37128 |
+
"learning_rate": 0.0005863281563158332,
|
| 37129 |
+
"loss": 1.5045,
|
| 37130 |
+
"step": 10558
|
| 37131 |
+
},
|
| 37132 |
+
{
|
| 37133 |
+
"epoch": 0.46952114179004933,
|
| 37134 |
+
"grad_norm": 0.0627632662653923,
|
| 37135 |
+
"learning_rate": 0.000586184159485843,
|
| 37136 |
+
"loss": 1.5048,
|
| 37137 |
+
"step": 10560
|
| 37138 |
+
},
|
| 37139 |
+
{
|
| 37140 |
+
"epoch": 0.4696100662487217,
|
| 37141 |
+
"grad_norm": 0.06467565894126892,
|
| 37142 |
+
"learning_rate": 0.000586040155288451,
|
| 37143 |
+
"loss": 1.5027,
|
| 37144 |
+
"step": 10562
|
| 37145 |
+
},
|
| 37146 |
+
{
|
| 37147 |
+
"epoch": 0.46969899070739407,
|
| 37148 |
+
"grad_norm": 0.06286469101905823,
|
| 37149 |
+
"learning_rate": 0.0005858961437359674,
|
| 37150 |
+
"loss": 1.4974,
|
| 37151 |
+
"step": 10564
|
| 37152 |
+
},
|
| 37153 |
+
{
|
| 37154 |
+
"epoch": 0.46978791516606644,
|
| 37155 |
+
"grad_norm": 0.06415469199419022,
|
| 37156 |
+
"learning_rate": 0.0005857521248407027,
|
| 37157 |
+
"loss": 1.5036,
|
| 37158 |
+
"step": 10566
|
| 37159 |
+
},
|
| 37160 |
+
{
|
| 37161 |
+
"epoch": 0.4698768396247388,
|
| 37162 |
+
"grad_norm": 0.06374349445104599,
|
| 37163 |
+
"learning_rate": 0.0005856080986149687,
|
| 37164 |
+
"loss": 1.4962,
|
| 37165 |
+
"step": 10568
|
| 37166 |
+
},
|
| 37167 |
+
{
|
| 37168 |
+
"epoch": 0.4699657640834111,
|
| 37169 |
+
"grad_norm": 0.0641632080078125,
|
| 37170 |
+
"learning_rate": 0.0005854640650710771,
|
| 37171 |
+
"loss": 1.5011,
|
| 37172 |
+
"step": 10570
|
| 37173 |
+
},
|
| 37174 |
+
{
|
| 37175 |
+
"epoch": 0.4700546885420835,
|
| 37176 |
+
"grad_norm": 0.06440167129039764,
|
| 37177 |
+
"learning_rate": 0.0005853200242213405,
|
| 37178 |
+
"loss": 1.5109,
|
| 37179 |
+
"step": 10572
|
| 37180 |
+
},
|
| 37181 |
+
{
|
| 37182 |
+
"epoch": 0.47014361300075586,
|
| 37183 |
+
"grad_norm": 0.06403553485870361,
|
| 37184 |
+
"learning_rate": 0.0005851759760780724,
|
| 37185 |
+
"loss": 1.5112,
|
| 37186 |
+
"step": 10574
|
| 37187 |
+
},
|
| 37188 |
+
{
|
| 37189 |
+
"epoch": 0.4702325374594282,
|
| 37190 |
+
"grad_norm": 0.06359109282493591,
|
| 37191 |
+
"learning_rate": 0.0005850319206535863,
|
| 37192 |
+
"loss": 1.5012,
|
| 37193 |
+
"step": 10576
|
| 37194 |
+
},
|
| 37195 |
+
{
|
| 37196 |
+
"epoch": 0.4703214619181006,
|
| 37197 |
+
"grad_norm": 0.06405449658632278,
|
| 37198 |
+
"learning_rate": 0.0005848878579601971,
|
| 37199 |
+
"loss": 1.5061,
|
| 37200 |
+
"step": 10578
|
| 37201 |
+
},
|
| 37202 |
+
{
|
| 37203 |
+
"epoch": 0.4704103863767729,
|
| 37204 |
+
"grad_norm": 0.06662164628505707,
|
| 37205 |
+
"learning_rate": 0.0005847437880102196,
|
| 37206 |
+
"loss": 1.5053,
|
| 37207 |
+
"step": 10580
|
| 37208 |
+
},
|
| 37209 |
+
{
|
| 37210 |
+
"epoch": 0.4704993108354453,
|
| 37211 |
+
"grad_norm": 0.06323253363370895,
|
| 37212 |
+
"learning_rate": 0.0005845997108159697,
|
| 37213 |
+
"loss": 1.5118,
|
| 37214 |
+
"step": 10582
|
| 37215 |
+
},
|
| 37216 |
+
{
|
| 37217 |
+
"epoch": 0.47058823529411764,
|
| 37218 |
+
"grad_norm": 0.06517712771892548,
|
| 37219 |
+
"learning_rate": 0.0005844556263897637,
|
| 37220 |
+
"loss": 1.5091,
|
| 37221 |
+
"step": 10584
|
| 37222 |
+
},
|
| 37223 |
+
{
|
| 37224 |
+
"epoch": 0.47067715975279,
|
| 37225 |
+
"grad_norm": 0.0638645812869072,
|
| 37226 |
+
"learning_rate": 0.0005843115347439184,
|
| 37227 |
+
"loss": 1.5093,
|
| 37228 |
+
"step": 10586
|
| 37229 |
+
},
|
| 37230 |
+
{
|
| 37231 |
+
"epoch": 0.4707660842114624,
|
| 37232 |
+
"grad_norm": 0.06771344691514969,
|
| 37233 |
+
"learning_rate": 0.0005841674358907517,
|
| 37234 |
+
"loss": 1.512,
|
| 37235 |
+
"step": 10588
|
| 37236 |
+
},
|
| 37237 |
+
{
|
| 37238 |
+
"epoch": 0.47085500867013474,
|
| 37239 |
+
"grad_norm": 0.06667336076498032,
|
| 37240 |
+
"learning_rate": 0.0005840233298425818,
|
| 37241 |
+
"loss": 1.5051,
|
| 37242 |
+
"step": 10590
|
| 37243 |
+
},
|
| 37244 |
+
{
|
| 37245 |
+
"epoch": 0.47094393312880706,
|
| 37246 |
+
"grad_norm": 0.06689155846834183,
|
| 37247 |
+
"learning_rate": 0.0005838792166117273,
|
| 37248 |
+
"loss": 1.5107,
|
| 37249 |
+
"step": 10592
|
| 37250 |
+
},
|
| 37251 |
+
{
|
| 37252 |
+
"epoch": 0.4710328575874794,
|
| 37253 |
+
"grad_norm": 0.06521068513393402,
|
| 37254 |
+
"learning_rate": 0.0005837350962105076,
|
| 37255 |
+
"loss": 1.508,
|
| 37256 |
+
"step": 10594
|
| 37257 |
+
},
|
| 37258 |
+
{
|
| 37259 |
+
"epoch": 0.4711217820461518,
|
| 37260 |
+
"grad_norm": 0.06260724365711212,
|
| 37261 |
+
"learning_rate": 0.0005835909686512429,
|
| 37262 |
+
"loss": 1.4977,
|
| 37263 |
+
"step": 10596
|
| 37264 |
+
},
|
| 37265 |
+
{
|
| 37266 |
+
"epoch": 0.47121070650482416,
|
| 37267 |
+
"grad_norm": 0.06671774387359619,
|
| 37268 |
+
"learning_rate": 0.0005834468339462539,
|
| 37269 |
+
"loss": 1.5029,
|
| 37270 |
+
"step": 10598
|
| 37271 |
+
},
|
| 37272 |
+
{
|
| 37273 |
+
"epoch": 0.47129963096349653,
|
| 37274 |
+
"grad_norm": 0.0636298730969429,
|
| 37275 |
+
"learning_rate": 0.0005833026921078616,
|
| 37276 |
+
"loss": 1.5078,
|
| 37277 |
+
"step": 10600
|
| 37278 |
+
},
|
| 37279 |
+
{
|
| 37280 |
+
"epoch": 0.47138855542216884,
|
| 37281 |
+
"grad_norm": 0.0641683042049408,
|
| 37282 |
+
"learning_rate": 0.0005831585431483883,
|
| 37283 |
+
"loss": 1.4961,
|
| 37284 |
+
"step": 10602
|
| 37285 |
+
},
|
| 37286 |
+
{
|
| 37287 |
+
"epoch": 0.4714774798808412,
|
| 37288 |
+
"grad_norm": 0.06526412814855576,
|
| 37289 |
+
"learning_rate": 0.0005830143870801562,
|
| 37290 |
+
"loss": 1.5074,
|
| 37291 |
+
"step": 10604
|
| 37292 |
+
},
|
| 37293 |
+
{
|
| 37294 |
+
"epoch": 0.4715664043395136,
|
| 37295 |
+
"grad_norm": 0.06388209015130997,
|
| 37296 |
+
"learning_rate": 0.0005828702239154886,
|
| 37297 |
+
"loss": 1.5082,
|
| 37298 |
+
"step": 10606
|
| 37299 |
+
},
|
| 37300 |
+
{
|
| 37301 |
+
"epoch": 0.47165532879818595,
|
| 37302 |
+
"grad_norm": 0.0643208771944046,
|
| 37303 |
+
"learning_rate": 0.0005827260536667089,
|
| 37304 |
+
"loss": 1.5034,
|
| 37305 |
+
"step": 10608
|
| 37306 |
+
},
|
| 37307 |
+
{
|
| 37308 |
+
"epoch": 0.4717442532568583,
|
| 37309 |
+
"grad_norm": 0.0622149333357811,
|
| 37310 |
+
"learning_rate": 0.0005825818763461416,
|
| 37311 |
+
"loss": 1.5036,
|
| 37312 |
+
"step": 10610
|
| 37313 |
+
},
|
| 37314 |
+
{
|
| 37315 |
+
"epoch": 0.4718331777155307,
|
| 37316 |
+
"grad_norm": 0.06327205151319504,
|
| 37317 |
+
"learning_rate": 0.0005824376919661114,
|
| 37318 |
+
"loss": 1.5018,
|
| 37319 |
+
"step": 10612
|
| 37320 |
+
},
|
| 37321 |
+
{
|
| 37322 |
+
"epoch": 0.471922102174203,
|
| 37323 |
+
"grad_norm": 0.06469540297985077,
|
| 37324 |
+
"learning_rate": 0.0005822935005389443,
|
| 37325 |
+
"loss": 1.5091,
|
| 37326 |
+
"step": 10614
|
| 37327 |
+
},
|
| 37328 |
+
{
|
| 37329 |
+
"epoch": 0.47201102663287536,
|
| 37330 |
+
"grad_norm": 0.06485003978013992,
|
| 37331 |
+
"learning_rate": 0.000582149302076966,
|
| 37332 |
+
"loss": 1.5048,
|
| 37333 |
+
"step": 10616
|
| 37334 |
+
},
|
| 37335 |
+
{
|
| 37336 |
+
"epoch": 0.47209995109154773,
|
| 37337 |
+
"grad_norm": 0.06667590141296387,
|
| 37338 |
+
"learning_rate": 0.0005820050965925032,
|
| 37339 |
+
"loss": 1.5053,
|
| 37340 |
+
"step": 10618
|
| 37341 |
+
},
|
| 37342 |
+
{
|
| 37343 |
+
"epoch": 0.4721888755502201,
|
| 37344 |
+
"grad_norm": 0.0658281147480011,
|
| 37345 |
+
"learning_rate": 0.0005818608840978837,
|
| 37346 |
+
"loss": 1.5085,
|
| 37347 |
+
"step": 10620
|
| 37348 |
+
},
|
| 37349 |
+
{
|
| 37350 |
+
"epoch": 0.47227780000889247,
|
| 37351 |
+
"grad_norm": 0.06506504863500595,
|
| 37352 |
+
"learning_rate": 0.0005817166646054348,
|
| 37353 |
+
"loss": 1.5044,
|
| 37354 |
+
"step": 10622
|
| 37355 |
+
},
|
| 37356 |
+
{
|
| 37357 |
+
"epoch": 0.4723667244675648,
|
| 37358 |
+
"grad_norm": 0.06641970574855804,
|
| 37359 |
+
"learning_rate": 0.0005815724381274854,
|
| 37360 |
+
"loss": 1.507,
|
| 37361 |
+
"step": 10624
|
| 37362 |
+
},
|
| 37363 |
+
{
|
| 37364 |
+
"epoch": 0.47245564892623715,
|
| 37365 |
+
"grad_norm": 0.06248960644006729,
|
| 37366 |
+
"learning_rate": 0.0005814282046763643,
|
| 37367 |
+
"loss": 1.505,
|
| 37368 |
+
"step": 10626
|
| 37369 |
+
},
|
| 37370 |
+
{
|
| 37371 |
+
"epoch": 0.4725445733849095,
|
| 37372 |
+
"grad_norm": 0.06606055051088333,
|
| 37373 |
+
"learning_rate": 0.0005812839642644017,
|
| 37374 |
+
"loss": 1.5084,
|
| 37375 |
+
"step": 10628
|
| 37376 |
+
},
|
| 37377 |
+
{
|
| 37378 |
+
"epoch": 0.4726334978435819,
|
| 37379 |
+
"grad_norm": 0.06419503688812256,
|
| 37380 |
+
"learning_rate": 0.0005811397169039277,
|
| 37381 |
+
"loss": 1.5038,
|
| 37382 |
+
"step": 10630
|
| 37383 |
+
},
|
| 37384 |
+
{
|
| 37385 |
+
"epoch": 0.47272242230225425,
|
| 37386 |
+
"grad_norm": 0.06354578584432602,
|
| 37387 |
+
"learning_rate": 0.0005809954626072728,
|
| 37388 |
+
"loss": 1.5025,
|
| 37389 |
+
"step": 10632
|
| 37390 |
+
},
|
| 37391 |
+
{
|
| 37392 |
+
"epoch": 0.47281134676092657,
|
| 37393 |
+
"grad_norm": 0.06533468514680862,
|
| 37394 |
+
"learning_rate": 0.000580851201386769,
|
| 37395 |
+
"loss": 1.5118,
|
| 37396 |
+
"step": 10634
|
| 37397 |
+
},
|
| 37398 |
+
{
|
| 37399 |
+
"epoch": 0.47290027121959893,
|
| 37400 |
+
"grad_norm": 0.06488578021526337,
|
| 37401 |
+
"learning_rate": 0.0005807069332547482,
|
| 37402 |
+
"loss": 1.5051,
|
| 37403 |
+
"step": 10636
|
| 37404 |
+
},
|
| 37405 |
+
{
|
| 37406 |
+
"epoch": 0.4729891956782713,
|
| 37407 |
+
"grad_norm": 0.061041004955768585,
|
| 37408 |
+
"learning_rate": 0.000580562658223543,
|
| 37409 |
+
"loss": 1.5046,
|
| 37410 |
+
"step": 10638
|
| 37411 |
+
},
|
| 37412 |
+
{
|
| 37413 |
+
"epoch": 0.47307812013694367,
|
| 37414 |
+
"grad_norm": 0.06147739291191101,
|
| 37415 |
+
"learning_rate": 0.0005804183763054869,
|
| 37416 |
+
"loss": 1.5003,
|
| 37417 |
+
"step": 10640
|
| 37418 |
+
},
|
| 37419 |
+
{
|
| 37420 |
+
"epoch": 0.47316704459561604,
|
| 37421 |
+
"grad_norm": 0.06590231508016586,
|
| 37422 |
+
"learning_rate": 0.0005802740875129135,
|
| 37423 |
+
"loss": 1.5015,
|
| 37424 |
+
"step": 10642
|
| 37425 |
+
},
|
| 37426 |
+
{
|
| 37427 |
+
"epoch": 0.4732559690542884,
|
| 37428 |
+
"grad_norm": 0.06333669275045395,
|
| 37429 |
+
"learning_rate": 0.0005801297918581574,
|
| 37430 |
+
"loss": 1.5054,
|
| 37431 |
+
"step": 10644
|
| 37432 |
+
},
|
| 37433 |
+
{
|
| 37434 |
+
"epoch": 0.4733448935129607,
|
| 37435 |
+
"grad_norm": 0.06394929438829422,
|
| 37436 |
+
"learning_rate": 0.0005799854893535535,
|
| 37437 |
+
"loss": 1.4967,
|
| 37438 |
+
"step": 10646
|
| 37439 |
+
},
|
| 37440 |
+
{
|
| 37441 |
+
"epoch": 0.4734338179716331,
|
| 37442 |
+
"grad_norm": 0.06518962234258652,
|
| 37443 |
+
"learning_rate": 0.0005798411800114375,
|
| 37444 |
+
"loss": 1.5029,
|
| 37445 |
+
"step": 10648
|
| 37446 |
+
},
|
| 37447 |
+
{
|
| 37448 |
+
"epoch": 0.47352274243030545,
|
| 37449 |
+
"grad_norm": 0.06461624801158905,
|
| 37450 |
+
"learning_rate": 0.0005796968638441455,
|
| 37451 |
+
"loss": 1.5038,
|
| 37452 |
+
"step": 10650
|
| 37453 |
+
},
|
| 37454 |
+
{
|
| 37455 |
+
"epoch": 0.4736116668889778,
|
| 37456 |
+
"grad_norm": 0.06398481875658035,
|
| 37457 |
+
"learning_rate": 0.0005795525408640146,
|
| 37458 |
+
"loss": 1.501,
|
| 37459 |
+
"step": 10652
|
| 37460 |
+
},
|
| 37461 |
+
{
|
| 37462 |
+
"epoch": 0.4737005913476502,
|
| 37463 |
+
"grad_norm": 0.06461846828460693,
|
| 37464 |
+
"learning_rate": 0.0005794082110833817,
|
| 37465 |
+
"loss": 1.5096,
|
| 37466 |
+
"step": 10654
|
| 37467 |
+
},
|
| 37468 |
+
{
|
| 37469 |
+
"epoch": 0.4737895158063225,
|
| 37470 |
+
"grad_norm": 0.0636335164308548,
|
| 37471 |
+
"learning_rate": 0.0005792638745145851,
|
| 37472 |
+
"loss": 1.504,
|
| 37473 |
+
"step": 10656
|
| 37474 |
+
},
|
| 37475 |
+
{
|
| 37476 |
+
"epoch": 0.47387844026499487,
|
| 37477 |
+
"grad_norm": 0.0646030604839325,
|
| 37478 |
+
"learning_rate": 0.0005791195311699631,
|
| 37479 |
+
"loss": 1.5081,
|
| 37480 |
+
"step": 10658
|
| 37481 |
+
},
|
| 37482 |
+
{
|
| 37483 |
+
"epoch": 0.47396736472366724,
|
| 37484 |
+
"grad_norm": 0.06314581632614136,
|
| 37485 |
+
"learning_rate": 0.0005789751810618551,
|
| 37486 |
+
"loss": 1.5029,
|
| 37487 |
+
"step": 10660
|
| 37488 |
+
},
|
| 37489 |
+
{
|
| 37490 |
+
"epoch": 0.4740562891823396,
|
| 37491 |
+
"grad_norm": 0.06447373330593109,
|
| 37492 |
+
"learning_rate": 0.0005788308242026004,
|
| 37493 |
+
"loss": 1.5029,
|
| 37494 |
+
"step": 10662
|
| 37495 |
+
},
|
| 37496 |
+
{
|
| 37497 |
+
"epoch": 0.474145213641012,
|
| 37498 |
+
"grad_norm": 0.06374399363994598,
|
| 37499 |
+
"learning_rate": 0.0005786864606045396,
|
| 37500 |
+
"loss": 1.5069,
|
| 37501 |
+
"step": 10664
|
| 37502 |
+
},
|
| 37503 |
+
{
|
| 37504 |
+
"epoch": 0.47423413809968434,
|
| 37505 |
+
"grad_norm": 0.06367414444684982,
|
| 37506 |
+
"learning_rate": 0.0005785420902800131,
|
| 37507 |
+
"loss": 1.4976,
|
| 37508 |
+
"step": 10666
|
| 37509 |
+
},
|
| 37510 |
+
{
|
| 37511 |
+
"epoch": 0.47432306255835666,
|
| 37512 |
+
"grad_norm": 0.06507609784603119,
|
| 37513 |
+
"learning_rate": 0.0005783977132413629,
|
| 37514 |
+
"loss": 1.5004,
|
| 37515 |
+
"step": 10668
|
| 37516 |
+
},
|
| 37517 |
+
{
|
| 37518 |
+
"epoch": 0.474411987017029,
|
| 37519 |
+
"grad_norm": 0.06401117891073227,
|
| 37520 |
+
"learning_rate": 0.0005782533295009307,
|
| 37521 |
+
"loss": 1.5017,
|
| 37522 |
+
"step": 10670
|
| 37523 |
+
},
|
| 37524 |
+
{
|
| 37525 |
+
"epoch": 0.4745009114757014,
|
| 37526 |
+
"grad_norm": 0.06449148803949356,
|
| 37527 |
+
"learning_rate": 0.0005781089390710588,
|
| 37528 |
+
"loss": 1.5078,
|
| 37529 |
+
"step": 10672
|
| 37530 |
+
},
|
| 37531 |
+
{
|
| 37532 |
+
"epoch": 0.47458983593437376,
|
| 37533 |
+
"grad_norm": 0.06876698136329651,
|
| 37534 |
+
"learning_rate": 0.0005779645419640907,
|
| 37535 |
+
"loss": 1.5033,
|
| 37536 |
+
"step": 10674
|
| 37537 |
+
},
|
| 37538 |
+
{
|
| 37539 |
+
"epoch": 0.47467876039304613,
|
| 37540 |
+
"grad_norm": 0.06464815884828568,
|
| 37541 |
+
"learning_rate": 0.00057782013819237,
|
| 37542 |
+
"loss": 1.5032,
|
| 37543 |
+
"step": 10676
|
| 37544 |
+
},
|
| 37545 |
+
{
|
| 37546 |
+
"epoch": 0.47476768485171844,
|
| 37547 |
+
"grad_norm": 0.06513024866580963,
|
| 37548 |
+
"learning_rate": 0.000577675727768241,
|
| 37549 |
+
"loss": 1.5039,
|
| 37550 |
+
"step": 10678
|
| 37551 |
+
},
|
| 37552 |
+
{
|
| 37553 |
+
"epoch": 0.4748566093103908,
|
| 37554 |
+
"grad_norm": 0.06479975581169128,
|
| 37555 |
+
"learning_rate": 0.0005775313107040483,
|
| 37556 |
+
"loss": 1.5033,
|
| 37557 |
+
"step": 10680
|
| 37558 |
+
},
|
| 37559 |
+
{
|
| 37560 |
+
"epoch": 0.4749455337690632,
|
| 37561 |
+
"grad_norm": 0.06562959402799606,
|
| 37562 |
+
"learning_rate": 0.0005773868870121377,
|
| 37563 |
+
"loss": 1.5035,
|
| 37564 |
+
"step": 10682
|
| 37565 |
+
},
|
| 37566 |
+
{
|
| 37567 |
+
"epoch": 0.47503445822773555,
|
| 37568 |
+
"grad_norm": 0.06198382005095482,
|
| 37569 |
+
"learning_rate": 0.0005772424567048549,
|
| 37570 |
+
"loss": 1.502,
|
| 37571 |
+
"step": 10684
|
| 37572 |
+
},
|
| 37573 |
+
{
|
| 37574 |
+
"epoch": 0.4751233826864079,
|
| 37575 |
+
"grad_norm": 0.0658661350607872,
|
| 37576 |
+
"learning_rate": 0.0005770980197945464,
|
| 37577 |
+
"loss": 1.5029,
|
| 37578 |
+
"step": 10686
|
| 37579 |
+
},
|
| 37580 |
+
{
|
| 37581 |
+
"epoch": 0.4752123071450803,
|
| 37582 |
+
"grad_norm": 0.06593360006809235,
|
| 37583 |
+
"learning_rate": 0.0005769535762935595,
|
| 37584 |
+
"loss": 1.5024,
|
| 37585 |
+
"step": 10688
|
| 37586 |
+
},
|
| 37587 |
+
{
|
| 37588 |
+
"epoch": 0.4753012316037526,
|
| 37589 |
+
"grad_norm": 0.06711971014738083,
|
| 37590 |
+
"learning_rate": 0.0005768091262142416,
|
| 37591 |
+
"loss": 1.5028,
|
| 37592 |
+
"step": 10690
|
| 37593 |
+
},
|
| 37594 |
+
{
|
| 37595 |
+
"epoch": 0.47539015606242496,
|
| 37596 |
+
"grad_norm": 0.06662214547395706,
|
| 37597 |
+
"learning_rate": 0.0005766646695689415,
|
| 37598 |
+
"loss": 1.4979,
|
| 37599 |
+
"step": 10692
|
| 37600 |
+
},
|
| 37601 |
+
{
|
| 37602 |
+
"epoch": 0.47547908052109733,
|
| 37603 |
+
"grad_norm": 0.062251124531030655,
|
| 37604 |
+
"learning_rate": 0.0005765202063700072,
|
| 37605 |
+
"loss": 1.5074,
|
| 37606 |
+
"step": 10694
|
| 37607 |
+
},
|
| 37608 |
+
{
|
| 37609 |
+
"epoch": 0.4755680049797697,
|
| 37610 |
+
"grad_norm": 0.0647764578461647,
|
| 37611 |
+
"learning_rate": 0.0005763757366297886,
|
| 37612 |
+
"loss": 1.5038,
|
| 37613 |
+
"step": 10696
|
| 37614 |
+
},
|
| 37615 |
+
{
|
| 37616 |
+
"epoch": 0.47565692943844207,
|
| 37617 |
+
"grad_norm": 0.06302308291196823,
|
| 37618 |
+
"learning_rate": 0.0005762312603606355,
|
| 37619 |
+
"loss": 1.4976,
|
| 37620 |
+
"step": 10698
|
| 37621 |
+
},
|
| 37622 |
+
{
|
| 37623 |
+
"epoch": 0.4757458538971144,
|
| 37624 |
+
"grad_norm": 0.06450463831424713,
|
| 37625 |
+
"learning_rate": 0.0005760867775748983,
|
| 37626 |
+
"loss": 1.502,
|
| 37627 |
+
"step": 10700
|
| 37628 |
+
},
|
| 37629 |
+
{
|
| 37630 |
+
"epoch": 0.47583477835578675,
|
| 37631 |
+
"grad_norm": 0.06613396853208542,
|
| 37632 |
+
"learning_rate": 0.000575942288284928,
|
| 37633 |
+
"loss": 1.4991,
|
| 37634 |
+
"step": 10702
|
| 37635 |
+
},
|
| 37636 |
+
{
|
| 37637 |
+
"epoch": 0.4759237028144591,
|
| 37638 |
+
"grad_norm": 0.06332564353942871,
|
| 37639 |
+
"learning_rate": 0.0005757977925030763,
|
| 37640 |
+
"loss": 1.5066,
|
| 37641 |
+
"step": 10704
|
| 37642 |
+
},
|
| 37643 |
+
{
|
| 37644 |
+
"epoch": 0.4760126272731315,
|
| 37645 |
+
"grad_norm": 0.06272386014461517,
|
| 37646 |
+
"learning_rate": 0.0005756532902416952,
|
| 37647 |
+
"loss": 1.5016,
|
| 37648 |
+
"step": 10706
|
| 37649 |
+
},
|
| 37650 |
+
{
|
| 37651 |
+
"epoch": 0.47610155173180385,
|
| 37652 |
+
"grad_norm": 0.06387588381767273,
|
| 37653 |
+
"learning_rate": 0.0005755087815131375,
|
| 37654 |
+
"loss": 1.501,
|
| 37655 |
+
"step": 10708
|
| 37656 |
+
},
|
| 37657 |
+
{
|
| 37658 |
+
"epoch": 0.47619047619047616,
|
| 37659 |
+
"grad_norm": 0.0668317899107933,
|
| 37660 |
+
"learning_rate": 0.0005753642663297564,
|
| 37661 |
+
"loss": 1.5068,
|
| 37662 |
+
"step": 10710
|
| 37663 |
+
},
|
| 37664 |
+
{
|
| 37665 |
+
"epoch": 0.47627940064914853,
|
| 37666 |
+
"grad_norm": 0.06176994368433952,
|
| 37667 |
+
"learning_rate": 0.0005752197447039056,
|
| 37668 |
+
"loss": 1.5082,
|
| 37669 |
+
"step": 10712
|
| 37670 |
+
},
|
| 37671 |
+
{
|
| 37672 |
+
"epoch": 0.4763683251078209,
|
| 37673 |
+
"grad_norm": 0.06459102034568787,
|
| 37674 |
+
"learning_rate": 0.0005750752166479397,
|
| 37675 |
+
"loss": 1.5063,
|
| 37676 |
+
"step": 10714
|
| 37677 |
+
},
|
| 37678 |
+
{
|
| 37679 |
+
"epoch": 0.47645724956649327,
|
| 37680 |
+
"grad_norm": 0.0622757263481617,
|
| 37681 |
+
"learning_rate": 0.0005749306821742132,
|
| 37682 |
+
"loss": 1.5021,
|
| 37683 |
+
"step": 10716
|
| 37684 |
+
},
|
| 37685 |
+
{
|
| 37686 |
+
"epoch": 0.47654617402516564,
|
| 37687 |
+
"grad_norm": 0.06583724915981293,
|
| 37688 |
+
"learning_rate": 0.0005747861412950821,
|
| 37689 |
+
"loss": 1.5038,
|
| 37690 |
+
"step": 10718
|
| 37691 |
+
},
|
| 37692 |
+
{
|
| 37693 |
+
"epoch": 0.476635098483838,
|
| 37694 |
+
"grad_norm": 0.0620274692773819,
|
| 37695 |
+
"learning_rate": 0.0005746415940229018,
|
| 37696 |
+
"loss": 1.5039,
|
| 37697 |
+
"step": 10720
|
| 37698 |
+
},
|
| 37699 |
+
{
|
| 37700 |
+
"epoch": 0.4767240229425103,
|
| 37701 |
+
"grad_norm": 0.06492220610380173,
|
| 37702 |
+
"learning_rate": 0.0005744970403700292,
|
| 37703 |
+
"loss": 1.5058,
|
| 37704 |
+
"step": 10722
|
| 37705 |
+
},
|
| 37706 |
+
{
|
| 37707 |
+
"epoch": 0.4768129474011827,
|
| 37708 |
+
"grad_norm": 0.06156022846698761,
|
| 37709 |
+
"learning_rate": 0.0005743524803488214,
|
| 37710 |
+
"loss": 1.5004,
|
| 37711 |
+
"step": 10724
|
| 37712 |
+
},
|
| 37713 |
+
{
|
| 37714 |
+
"epoch": 0.47690187185985505,
|
| 37715 |
+
"grad_norm": 0.06396898627281189,
|
| 37716 |
+
"learning_rate": 0.0005742079139716358,
|
| 37717 |
+
"loss": 1.502,
|
| 37718 |
+
"step": 10726
|
| 37719 |
+
},
|
| 37720 |
+
{
|
| 37721 |
+
"epoch": 0.4769907963185274,
|
| 37722 |
+
"grad_norm": 0.06404668837785721,
|
| 37723 |
+
"learning_rate": 0.0005740633412508307,
|
| 37724 |
+
"loss": 1.5032,
|
| 37725 |
+
"step": 10728
|
| 37726 |
+
},
|
| 37727 |
+
{
|
| 37728 |
+
"epoch": 0.4770797207771998,
|
| 37729 |
+
"grad_norm": 0.06537303328514099,
|
| 37730 |
+
"learning_rate": 0.0005739187621987648,
|
| 37731 |
+
"loss": 1.5036,
|
| 37732 |
+
"step": 10730
|
| 37733 |
+
},
|
| 37734 |
+
{
|
| 37735 |
+
"epoch": 0.4771686452358721,
|
| 37736 |
+
"grad_norm": 0.0657230019569397,
|
| 37737 |
+
"learning_rate": 0.0005737741768277974,
|
| 37738 |
+
"loss": 1.505,
|
| 37739 |
+
"step": 10732
|
| 37740 |
+
},
|
| 37741 |
+
{
|
| 37742 |
+
"epoch": 0.47725756969454447,
|
| 37743 |
+
"grad_norm": 0.06339870393276215,
|
| 37744 |
+
"learning_rate": 0.0005736295851502882,
|
| 37745 |
+
"loss": 1.5034,
|
| 37746 |
+
"step": 10734
|
| 37747 |
+
},
|
| 37748 |
+
{
|
| 37749 |
+
"epoch": 0.47734649415321684,
|
| 37750 |
+
"grad_norm": 0.06523014605045319,
|
| 37751 |
+
"learning_rate": 0.0005734849871785976,
|
| 37752 |
+
"loss": 1.5058,
|
| 37753 |
+
"step": 10736
|
| 37754 |
+
},
|
| 37755 |
+
{
|
| 37756 |
+
"epoch": 0.4774354186118892,
|
| 37757 |
+
"grad_norm": 0.06503810733556747,
|
| 37758 |
+
"learning_rate": 0.0005733403829250865,
|
| 37759 |
+
"loss": 1.5031,
|
| 37760 |
+
"step": 10738
|
| 37761 |
+
},
|
| 37762 |
+
{
|
| 37763 |
+
"epoch": 0.4775243430705616,
|
| 37764 |
+
"grad_norm": 0.0636671707034111,
|
| 37765 |
+
"learning_rate": 0.0005731957724021163,
|
| 37766 |
+
"loss": 1.5059,
|
| 37767 |
+
"step": 10740
|
| 37768 |
+
},
|
| 37769 |
+
{
|
| 37770 |
+
"epoch": 0.47761326752923394,
|
| 37771 |
+
"grad_norm": 0.06321322917938232,
|
| 37772 |
+
"learning_rate": 0.0005730511556220488,
|
| 37773 |
+
"loss": 1.503,
|
| 37774 |
+
"step": 10742
|
| 37775 |
+
},
|
| 37776 |
+
{
|
| 37777 |
+
"epoch": 0.47770219198790626,
|
| 37778 |
+
"grad_norm": 0.06607749313116074,
|
| 37779 |
+
"learning_rate": 0.0005729065325972467,
|
| 37780 |
+
"loss": 1.5087,
|
| 37781 |
+
"step": 10744
|
| 37782 |
+
},
|
| 37783 |
+
{
|
| 37784 |
+
"epoch": 0.4777911164465786,
|
| 37785 |
+
"grad_norm": 0.06520562618970871,
|
| 37786 |
+
"learning_rate": 0.0005727619033400729,
|
| 37787 |
+
"loss": 1.5022,
|
| 37788 |
+
"step": 10746
|
| 37789 |
+
},
|
| 37790 |
+
{
|
| 37791 |
+
"epoch": 0.477880040905251,
|
| 37792 |
+
"grad_norm": 0.06609973311424255,
|
| 37793 |
+
"learning_rate": 0.0005726172678628907,
|
| 37794 |
+
"loss": 1.5012,
|
| 37795 |
+
"step": 10748
|
| 37796 |
+
},
|
| 37797 |
+
{
|
| 37798 |
+
"epoch": 0.47796896536392336,
|
| 37799 |
+
"grad_norm": 0.06533106416463852,
|
| 37800 |
+
"learning_rate": 0.0005724726261780648,
|
| 37801 |
+
"loss": 1.4998,
|
| 37802 |
+
"step": 10750
|
| 37803 |
+
},
|
| 37804 |
+
{
|
| 37805 |
+
"epoch": 0.4780578898225957,
|
| 37806 |
+
"grad_norm": 0.06694469600915909,
|
| 37807 |
+
"learning_rate": 0.0005723279782979591,
|
| 37808 |
+
"loss": 1.5035,
|
| 37809 |
+
"step": 10752
|
| 37810 |
+
},
|
| 37811 |
+
{
|
| 37812 |
+
"epoch": 0.47814681428126804,
|
| 37813 |
+
"grad_norm": 0.06440529227256775,
|
| 37814 |
+
"learning_rate": 0.0005721833242349393,
|
| 37815 |
+
"loss": 1.5098,
|
| 37816 |
+
"step": 10754
|
| 37817 |
+
},
|
| 37818 |
+
{
|
| 37819 |
+
"epoch": 0.4782357387399404,
|
| 37820 |
+
"grad_norm": 0.06357917189598083,
|
| 37821 |
+
"learning_rate": 0.0005720386640013707,
|
| 37822 |
+
"loss": 1.5016,
|
| 37823 |
+
"step": 10756
|
| 37824 |
+
},
|
| 37825 |
+
{
|
| 37826 |
+
"epoch": 0.4783246631986128,
|
| 37827 |
+
"grad_norm": 0.06445612013339996,
|
| 37828 |
+
"learning_rate": 0.0005718939976096198,
|
| 37829 |
+
"loss": 1.5005,
|
| 37830 |
+
"step": 10758
|
| 37831 |
+
},
|
| 37832 |
+
{
|
| 37833 |
+
"epoch": 0.47841358765728514,
|
| 37834 |
+
"grad_norm": 0.062472015619277954,
|
| 37835 |
+
"learning_rate": 0.000571749325072053,
|
| 37836 |
+
"loss": 1.5029,
|
| 37837 |
+
"step": 10760
|
| 37838 |
+
},
|
| 37839 |
+
{
|
| 37840 |
+
"epoch": 0.4785025121159575,
|
| 37841 |
+
"grad_norm": 0.06607341021299362,
|
| 37842 |
+
"learning_rate": 0.0005716046464010378,
|
| 37843 |
+
"loss": 1.5072,
|
| 37844 |
+
"step": 10762
|
| 37845 |
+
},
|
| 37846 |
+
{
|
| 37847 |
+
"epoch": 0.4785914365746298,
|
| 37848 |
+
"grad_norm": 0.06257757544517517,
|
| 37849 |
+
"learning_rate": 0.0005714599616089419,
|
| 37850 |
+
"loss": 1.5015,
|
| 37851 |
+
"step": 10764
|
| 37852 |
+
},
|
| 37853 |
+
{
|
| 37854 |
+
"epoch": 0.4786803610333022,
|
| 37855 |
+
"grad_norm": 0.06556899100542068,
|
| 37856 |
+
"learning_rate": 0.0005713152707081335,
|
| 37857 |
+
"loss": 1.5052,
|
| 37858 |
+
"step": 10766
|
| 37859 |
+
},
|
| 37860 |
+
{
|
| 37861 |
+
"epoch": 0.47876928549197456,
|
| 37862 |
+
"grad_norm": 0.06689021736383438,
|
| 37863 |
+
"learning_rate": 0.0005711705737109816,
|
| 37864 |
+
"loss": 1.5016,
|
| 37865 |
+
"step": 10768
|
| 37866 |
+
},
|
| 37867 |
+
{
|
| 37868 |
+
"epoch": 0.47885820995064693,
|
| 37869 |
+
"grad_norm": 0.06304630637168884,
|
| 37870 |
+
"learning_rate": 0.0005710258706298553,
|
| 37871 |
+
"loss": 1.5011,
|
| 37872 |
+
"step": 10770
|
| 37873 |
+
},
|
| 37874 |
+
{
|
| 37875 |
+
"epoch": 0.4789471344093193,
|
| 37876 |
+
"grad_norm": 0.06447041779756546,
|
| 37877 |
+
"learning_rate": 0.0005708811614771245,
|
| 37878 |
+
"loss": 1.5073,
|
| 37879 |
+
"step": 10772
|
| 37880 |
+
},
|
| 37881 |
+
{
|
| 37882 |
+
"epoch": 0.47903605886799167,
|
| 37883 |
+
"grad_norm": 0.06245449185371399,
|
| 37884 |
+
"learning_rate": 0.0005707364462651598,
|
| 37885 |
+
"loss": 1.4974,
|
| 37886 |
+
"step": 10774
|
| 37887 |
+
},
|
| 37888 |
+
{
|
| 37889 |
+
"epoch": 0.479124983326664,
|
| 37890 |
+
"grad_norm": 0.063130222260952,
|
| 37891 |
+
"learning_rate": 0.000570591725006332,
|
| 37892 |
+
"loss": 1.4997,
|
| 37893 |
+
"step": 10776
|
| 37894 |
+
},
|
| 37895 |
+
{
|
| 37896 |
+
"epoch": 0.47921390778533635,
|
| 37897 |
+
"grad_norm": 0.06639711558818817,
|
| 37898 |
+
"learning_rate": 0.0005704469977130123,
|
| 37899 |
+
"loss": 1.5042,
|
| 37900 |
+
"step": 10778
|
| 37901 |
+
},
|
| 37902 |
+
{
|
| 37903 |
+
"epoch": 0.4793028322440087,
|
| 37904 |
+
"grad_norm": 0.06467228382825851,
|
| 37905 |
+
"learning_rate": 0.0005703022643975728,
|
| 37906 |
+
"loss": 1.5027,
|
| 37907 |
+
"step": 10780
|
| 37908 |
+
},
|
| 37909 |
+
{
|
| 37910 |
+
"epoch": 0.4793917567026811,
|
| 37911 |
+
"grad_norm": 0.06834383308887482,
|
| 37912 |
+
"learning_rate": 0.0005701575250723859,
|
| 37913 |
+
"loss": 1.4984,
|
| 37914 |
+
"step": 10782
|
| 37915 |
+
},
|
| 37916 |
+
{
|
| 37917 |
+
"epoch": 0.47948068116135345,
|
| 37918 |
+
"grad_norm": 0.0679764524102211,
|
| 37919 |
+
"learning_rate": 0.0005700127797498248,
|
| 37920 |
+
"loss": 1.5074,
|
| 37921 |
+
"step": 10784
|
| 37922 |
+
},
|
| 37923 |
+
{
|
| 37924 |
+
"epoch": 0.47956960562002576,
|
| 37925 |
+
"grad_norm": 0.06350118666887283,
|
| 37926 |
+
"learning_rate": 0.0005698680284422626,
|
| 37927 |
+
"loss": 1.4981,
|
| 37928 |
+
"step": 10786
|
| 37929 |
+
},
|
| 37930 |
+
{
|
| 37931 |
+
"epoch": 0.47965853007869813,
|
| 37932 |
+
"grad_norm": 0.06375767290592194,
|
| 37933 |
+
"learning_rate": 0.0005697232711620733,
|
| 37934 |
+
"loss": 1.5007,
|
| 37935 |
+
"step": 10788
|
| 37936 |
+
},
|
| 37937 |
+
{
|
| 37938 |
+
"epoch": 0.4797474545373705,
|
| 37939 |
+
"grad_norm": 0.0636902004480362,
|
| 37940 |
+
"learning_rate": 0.0005695785079216318,
|
| 37941 |
+
"loss": 1.5074,
|
| 37942 |
+
"step": 10790
|
| 37943 |
+
},
|
| 37944 |
+
{
|
| 37945 |
+
"epoch": 0.47983637899604287,
|
| 37946 |
+
"grad_norm": 0.06637261062860489,
|
| 37947 |
+
"learning_rate": 0.0005694337387333127,
|
| 37948 |
+
"loss": 1.5031,
|
| 37949 |
+
"step": 10792
|
| 37950 |
+
},
|
| 37951 |
+
{
|
| 37952 |
+
"epoch": 0.47992530345471524,
|
| 37953 |
+
"grad_norm": 0.06588244438171387,
|
| 37954 |
+
"learning_rate": 0.0005692889636094917,
|
| 37955 |
+
"loss": 1.5034,
|
| 37956 |
+
"step": 10794
|
| 37957 |
+
},
|
| 37958 |
+
{
|
| 37959 |
+
"epoch": 0.4800142279133876,
|
| 37960 |
+
"grad_norm": 0.06770049780607224,
|
| 37961 |
+
"learning_rate": 0.0005691441825625446,
|
| 37962 |
+
"loss": 1.5026,
|
| 37963 |
+
"step": 10796
|
| 37964 |
+
},
|
| 37965 |
+
{
|
| 37966 |
+
"epoch": 0.4801031523720599,
|
| 37967 |
+
"grad_norm": 0.06415392458438873,
|
| 37968 |
+
"learning_rate": 0.0005689993956048481,
|
| 37969 |
+
"loss": 1.5048,
|
| 37970 |
+
"step": 10798
|
| 37971 |
+
},
|
| 37972 |
+
{
|
| 37973 |
+
"epoch": 0.4801920768307323,
|
| 37974 |
+
"grad_norm": 0.06577417999505997,
|
| 37975 |
+
"learning_rate": 0.0005688546027487792,
|
| 37976 |
+
"loss": 1.5023,
|
| 37977 |
+
"step": 10800
|
| 37978 |
+
},
|
| 37979 |
+
{
|
| 37980 |
+
"epoch": 0.48028100128940465,
|
| 37981 |
+
"grad_norm": 0.06189163774251938,
|
| 37982 |
+
"learning_rate": 0.0005687098040067153,
|
| 37983 |
+
"loss": 1.5017,
|
| 37984 |
+
"step": 10802
|
| 37985 |
+
},
|
| 37986 |
+
{
|
| 37987 |
+
"epoch": 0.480369925748077,
|
| 37988 |
+
"grad_norm": 0.06363365054130554,
|
| 37989 |
+
"learning_rate": 0.0005685649993910348,
|
| 37990 |
+
"loss": 1.5037,
|
| 37991 |
+
"step": 10804
|
| 37992 |
+
},
|
| 37993 |
+
{
|
| 37994 |
+
"epoch": 0.4804588502067494,
|
| 37995 |
+
"grad_norm": 0.06573474407196045,
|
| 37996 |
+
"learning_rate": 0.0005684201889141158,
|
| 37997 |
+
"loss": 1.5027,
|
| 37998 |
+
"step": 10806
|
| 37999 |
+
},
|
| 38000 |
+
{
|
| 38001 |
+
"epoch": 0.4805477746654217,
|
| 38002 |
+
"grad_norm": 0.06577087193727493,
|
| 38003 |
+
"learning_rate": 0.0005682753725883378,
|
| 38004 |
+
"loss": 1.5003,
|
| 38005 |
+
"step": 10808
|
| 38006 |
+
},
|
| 38007 |
+
{
|
| 38008 |
+
"epoch": 0.48063669912409407,
|
| 38009 |
+
"grad_norm": 0.062234729528427124,
|
| 38010 |
+
"learning_rate": 0.0005681305504260798,
|
| 38011 |
+
"loss": 1.5009,
|
| 38012 |
+
"step": 10810
|
| 38013 |
+
},
|
| 38014 |
+
{
|
| 38015 |
+
"epoch": 0.48072562358276644,
|
| 38016 |
+
"grad_norm": 0.06282081454992294,
|
| 38017 |
+
"learning_rate": 0.0005679857224397222,
|
| 38018 |
+
"loss": 1.5035,
|
| 38019 |
+
"step": 10812
|
| 38020 |
+
},
|
| 38021 |
+
{
|
| 38022 |
+
"epoch": 0.4808145480414388,
|
| 38023 |
+
"grad_norm": 0.06254705041646957,
|
| 38024 |
+
"learning_rate": 0.0005678408886416454,
|
| 38025 |
+
"loss": 1.5049,
|
| 38026 |
+
"step": 10814
|
| 38027 |
+
},
|
| 38028 |
+
{
|
| 38029 |
+
"epoch": 0.4809034725001112,
|
| 38030 |
+
"grad_norm": 0.06438016891479492,
|
| 38031 |
+
"learning_rate": 0.0005676960490442305,
|
| 38032 |
+
"loss": 1.4978,
|
| 38033 |
+
"step": 10816
|
| 38034 |
+
},
|
| 38035 |
+
{
|
| 38036 |
+
"epoch": 0.4809923969587835,
|
| 38037 |
+
"grad_norm": 0.06419461965560913,
|
| 38038 |
+
"learning_rate": 0.0005675512036598592,
|
| 38039 |
+
"loss": 1.5005,
|
| 38040 |
+
"step": 10818
|
| 38041 |
+
},
|
| 38042 |
+
{
|
| 38043 |
+
"epoch": 0.48108132141745585,
|
| 38044 |
+
"grad_norm": 0.06625373661518097,
|
| 38045 |
+
"learning_rate": 0.000567406352500913,
|
| 38046 |
+
"loss": 1.4987,
|
| 38047 |
+
"step": 10820
|
| 38048 |
+
},
|
| 38049 |
+
{
|
| 38050 |
+
"epoch": 0.4811702458761282,
|
| 38051 |
+
"grad_norm": 0.06367786973714828,
|
| 38052 |
+
"learning_rate": 0.0005672614955797749,
|
| 38053 |
+
"loss": 1.5023,
|
| 38054 |
+
"step": 10822
|
| 38055 |
+
},
|
| 38056 |
+
{
|
| 38057 |
+
"epoch": 0.4812591703348006,
|
| 38058 |
+
"grad_norm": 0.06512659043073654,
|
| 38059 |
+
"learning_rate": 0.0005671166329088278,
|
| 38060 |
+
"loss": 1.5038,
|
| 38061 |
+
"step": 10824
|
| 38062 |
+
},
|
| 38063 |
+
{
|
| 38064 |
+
"epoch": 0.48134809479347296,
|
| 38065 |
+
"grad_norm": 0.06565241515636444,
|
| 38066 |
+
"learning_rate": 0.0005669717645004551,
|
| 38067 |
+
"loss": 1.5081,
|
| 38068 |
+
"step": 10826
|
| 38069 |
+
},
|
| 38070 |
+
{
|
| 38071 |
+
"epoch": 0.4814370192521453,
|
| 38072 |
+
"grad_norm": 0.06338914483785629,
|
| 38073 |
+
"learning_rate": 0.0005668268903670407,
|
| 38074 |
+
"loss": 1.5058,
|
| 38075 |
+
"step": 10828
|
| 38076 |
+
},
|
| 38077 |
+
{
|
| 38078 |
+
"epoch": 0.48152594371081764,
|
| 38079 |
+
"grad_norm": 0.06349223107099533,
|
| 38080 |
+
"learning_rate": 0.0005666820105209694,
|
| 38081 |
+
"loss": 1.4945,
|
| 38082 |
+
"step": 10830
|
| 38083 |
+
},
|
| 38084 |
+
{
|
| 38085 |
+
"epoch": 0.48161486816949,
|
| 38086 |
+
"grad_norm": 0.06533645838499069,
|
| 38087 |
+
"learning_rate": 0.0005665371249746259,
|
| 38088 |
+
"loss": 1.503,
|
| 38089 |
+
"step": 10832
|
| 38090 |
+
},
|
| 38091 |
+
{
|
| 38092 |
+
"epoch": 0.4817037926281624,
|
| 38093 |
+
"grad_norm": 0.07015854865312576,
|
| 38094 |
+
"learning_rate": 0.0005663922337403957,
|
| 38095 |
+
"loss": 1.5032,
|
| 38096 |
+
"step": 10834
|
| 38097 |
+
},
|
| 38098 |
+
{
|
| 38099 |
+
"epoch": 0.48179271708683474,
|
| 38100 |
+
"grad_norm": 0.06523853540420532,
|
| 38101 |
+
"learning_rate": 0.0005662473368306649,
|
| 38102 |
+
"loss": 1.5043,
|
| 38103 |
+
"step": 10836
|
| 38104 |
+
},
|
| 38105 |
+
{
|
| 38106 |
+
"epoch": 0.4818816415455071,
|
| 38107 |
+
"grad_norm": 0.06229966878890991,
|
| 38108 |
+
"learning_rate": 0.0005661024342578197,
|
| 38109 |
+
"loss": 1.5009,
|
| 38110 |
+
"step": 10838
|
| 38111 |
+
},
|
| 38112 |
+
{
|
| 38113 |
+
"epoch": 0.4819705660041794,
|
| 38114 |
+
"grad_norm": 0.06518285721540451,
|
| 38115 |
+
"learning_rate": 0.0005659575260342473,
|
| 38116 |
+
"loss": 1.5044,
|
| 38117 |
+
"step": 10840
|
| 38118 |
+
},
|
| 38119 |
+
{
|
| 38120 |
+
"epoch": 0.4820594904628518,
|
| 38121 |
+
"grad_norm": 0.06520578265190125,
|
| 38122 |
+
"learning_rate": 0.0005658126121723346,
|
| 38123 |
+
"loss": 1.5082,
|
| 38124 |
+
"step": 10842
|
| 38125 |
+
},
|
| 38126 |
+
{
|
| 38127 |
+
"epoch": 0.48214841492152416,
|
| 38128 |
+
"grad_norm": 0.06587293744087219,
|
| 38129 |
+
"learning_rate": 0.00056566769268447,
|
| 38130 |
+
"loss": 1.5047,
|
| 38131 |
+
"step": 10844
|
| 38132 |
+
},
|
| 38133 |
+
{
|
| 38134 |
+
"epoch": 0.48223733938019653,
|
| 38135 |
+
"grad_norm": 0.06606019288301468,
|
| 38136 |
+
"learning_rate": 0.0005655227675830416,
|
| 38137 |
+
"loss": 1.4998,
|
| 38138 |
+
"step": 10846
|
| 38139 |
+
},
|
| 38140 |
+
{
|
| 38141 |
+
"epoch": 0.4823262638388689,
|
| 38142 |
+
"grad_norm": 0.06386158615350723,
|
| 38143 |
+
"learning_rate": 0.0005653778368804381,
|
| 38144 |
+
"loss": 1.5081,
|
| 38145 |
+
"step": 10848
|
| 38146 |
+
},
|
| 38147 |
+
{
|
| 38148 |
+
"epoch": 0.48241518829754126,
|
| 38149 |
+
"grad_norm": 0.06474409252405167,
|
| 38150 |
+
"learning_rate": 0.0005652329005890492,
|
| 38151 |
+
"loss": 1.5008,
|
| 38152 |
+
"step": 10850
|
| 38153 |
+
},
|
| 38154 |
+
{
|
| 38155 |
+
"epoch": 0.4825041127562136,
|
| 38156 |
+
"grad_norm": 0.06231878697872162,
|
| 38157 |
+
"learning_rate": 0.0005650879587212645,
|
| 38158 |
+
"loss": 1.4979,
|
| 38159 |
+
"step": 10852
|
| 38160 |
+
},
|
| 38161 |
+
{
|
| 38162 |
+
"epoch": 0.48259303721488594,
|
| 38163 |
+
"grad_norm": 0.06352438777685165,
|
| 38164 |
+
"learning_rate": 0.0005649430112894743,
|
| 38165 |
+
"loss": 1.5012,
|
| 38166 |
+
"step": 10854
|
| 38167 |
+
},
|
| 38168 |
+
{
|
| 38169 |
+
"epoch": 0.4826819616735583,
|
| 38170 |
+
"grad_norm": 0.06429651379585266,
|
| 38171 |
+
"learning_rate": 0.000564798058306069,
|
| 38172 |
+
"loss": 1.4951,
|
| 38173 |
+
"step": 10856
|
| 38174 |
+
},
|
| 38175 |
+
{
|
| 38176 |
+
"epoch": 0.4827708861322307,
|
| 38177 |
+
"grad_norm": 0.06389694660902023,
|
| 38178 |
+
"learning_rate": 0.0005646530997834403,
|
| 38179 |
+
"loss": 1.4992,
|
| 38180 |
+
"step": 10858
|
| 38181 |
+
},
|
| 38182 |
+
{
|
| 38183 |
+
"epoch": 0.48285981059090305,
|
| 38184 |
+
"grad_norm": 0.06426005810499191,
|
| 38185 |
+
"learning_rate": 0.0005645081357339797,
|
| 38186 |
+
"loss": 1.4968,
|
| 38187 |
+
"step": 10860
|
| 38188 |
+
},
|
| 38189 |
+
{
|
| 38190 |
+
"epoch": 0.48294873504957536,
|
| 38191 |
+
"grad_norm": 0.062285613268613815,
|
| 38192 |
+
"learning_rate": 0.0005643631661700796,
|
| 38193 |
+
"loss": 1.4996,
|
| 38194 |
+
"step": 10862
|
| 38195 |
+
},
|
| 38196 |
+
{
|
| 38197 |
+
"epoch": 0.48303765950824773,
|
| 38198 |
+
"grad_norm": 0.06469501554965973,
|
| 38199 |
+
"learning_rate": 0.0005642181911041321,
|
| 38200 |
+
"loss": 1.4993,
|
| 38201 |
+
"step": 10864
|
| 38202 |
+
},
|
| 38203 |
+
{
|
| 38204 |
+
"epoch": 0.4831265839669201,
|
| 38205 |
+
"grad_norm": 0.06305573880672455,
|
| 38206 |
+
"learning_rate": 0.0005640732105485308,
|
| 38207 |
+
"loss": 1.5,
|
| 38208 |
+
"step": 10866
|
| 38209 |
+
},
|
| 38210 |
+
{
|
| 38211 |
+
"epoch": 0.48321550842559247,
|
| 38212 |
+
"grad_norm": 0.06352468580007553,
|
| 38213 |
+
"learning_rate": 0.000563928224515669,
|
| 38214 |
+
"loss": 1.5065,
|
| 38215 |
+
"step": 10868
|
| 38216 |
+
},
|
| 38217 |
+
{
|
| 38218 |
+
"epoch": 0.48330443288426483,
|
| 38219 |
+
"grad_norm": 0.06422882527112961,
|
| 38220 |
+
"learning_rate": 0.0005637832330179409,
|
| 38221 |
+
"loss": 1.5032,
|
| 38222 |
+
"step": 10870
|
| 38223 |
+
},
|
| 38224 |
+
{
|
| 38225 |
+
"epoch": 0.4833933573429372,
|
| 38226 |
+
"grad_norm": 0.06555035710334778,
|
| 38227 |
+
"learning_rate": 0.000563638236067741,
|
| 38228 |
+
"loss": 1.5019,
|
| 38229 |
+
"step": 10872
|
| 38230 |
+
},
|
| 38231 |
+
{
|
| 38232 |
+
"epoch": 0.4834822818016095,
|
| 38233 |
+
"grad_norm": 0.0639529749751091,
|
| 38234 |
+
"learning_rate": 0.0005634932336774641,
|
| 38235 |
+
"loss": 1.4982,
|
| 38236 |
+
"step": 10874
|
| 38237 |
+
},
|
| 38238 |
+
{
|
| 38239 |
+
"epoch": 0.4835712062602819,
|
| 38240 |
+
"grad_norm": 0.06562841683626175,
|
| 38241 |
+
"learning_rate": 0.0005633482258595059,
|
| 38242 |
+
"loss": 1.5008,
|
| 38243 |
+
"step": 10876
|
| 38244 |
+
},
|
| 38245 |
+
{
|
| 38246 |
+
"epoch": 0.48366013071895425,
|
| 38247 |
+
"grad_norm": 0.06432850658893585,
|
| 38248 |
+
"learning_rate": 0.0005632032126262622,
|
| 38249 |
+
"loss": 1.507,
|
| 38250 |
+
"step": 10878
|
| 38251 |
+
},
|
| 38252 |
+
{
|
| 38253 |
+
"epoch": 0.4837490551776266,
|
| 38254 |
+
"grad_norm": 0.06400411576032639,
|
| 38255 |
+
"learning_rate": 0.0005630581939901294,
|
| 38256 |
+
"loss": 1.5074,
|
| 38257 |
+
"step": 10880
|
| 38258 |
+
},
|
| 38259 |
+
{
|
| 38260 |
+
"epoch": 0.483837979636299,
|
| 38261 |
+
"grad_norm": 0.06370353698730469,
|
| 38262 |
+
"learning_rate": 0.0005629131699635041,
|
| 38263 |
+
"loss": 1.4988,
|
| 38264 |
+
"step": 10882
|
| 38265 |
+
},
|
| 38266 |
+
{
|
| 38267 |
+
"epoch": 0.4839269040949713,
|
| 38268 |
+
"grad_norm": 0.06292074173688889,
|
| 38269 |
+
"learning_rate": 0.0005627681405587839,
|
| 38270 |
+
"loss": 1.5069,
|
| 38271 |
+
"step": 10884
|
| 38272 |
+
},
|
| 38273 |
+
{
|
| 38274 |
+
"epoch": 0.48401582855364367,
|
| 38275 |
+
"grad_norm": 0.0631115660071373,
|
| 38276 |
+
"learning_rate": 0.0005626231057883664,
|
| 38277 |
+
"loss": 1.503,
|
| 38278 |
+
"step": 10886
|
| 38279 |
+
},
|
| 38280 |
+
{
|
| 38281 |
+
"epoch": 0.48410475301231604,
|
| 38282 |
+
"grad_norm": 0.06345234811306,
|
| 38283 |
+
"learning_rate": 0.0005624780656646499,
|
| 38284 |
+
"loss": 1.5007,
|
| 38285 |
+
"step": 10888
|
| 38286 |
+
},
|
| 38287 |
+
{
|
| 38288 |
+
"epoch": 0.4841936774709884,
|
| 38289 |
+
"grad_norm": 0.06285906583070755,
|
| 38290 |
+
"learning_rate": 0.000562333020200033,
|
| 38291 |
+
"loss": 1.5008,
|
| 38292 |
+
"step": 10890
|
| 38293 |
+
},
|
| 38294 |
+
{
|
| 38295 |
+
"epoch": 0.48428260192966077,
|
| 38296 |
+
"grad_norm": 0.06470154970884323,
|
| 38297 |
+
"learning_rate": 0.0005621879694069148,
|
| 38298 |
+
"loss": 1.5029,
|
| 38299 |
+
"step": 10892
|
| 38300 |
+
},
|
| 38301 |
+
{
|
| 38302 |
+
"epoch": 0.4843715263883331,
|
| 38303 |
+
"grad_norm": 0.06552711874246597,
|
| 38304 |
+
"learning_rate": 0.000562042913297695,
|
| 38305 |
+
"loss": 1.5065,
|
| 38306 |
+
"step": 10894
|
| 38307 |
+
},
|
| 38308 |
+
{
|
| 38309 |
+
"epoch": 0.48446045084700545,
|
| 38310 |
+
"grad_norm": 0.0655001625418663,
|
| 38311 |
+
"learning_rate": 0.0005618978518847733,
|
| 38312 |
+
"loss": 1.5033,
|
| 38313 |
+
"step": 10896
|
| 38314 |
+
},
|
| 38315 |
+
{
|
| 38316 |
+
"epoch": 0.4845493753056778,
|
| 38317 |
+
"grad_norm": 0.06529513746500015,
|
| 38318 |
+
"learning_rate": 0.0005617527851805507,
|
| 38319 |
+
"loss": 1.4996,
|
| 38320 |
+
"step": 10898
|
| 38321 |
+
},
|
| 38322 |
+
{
|
| 38323 |
+
"epoch": 0.4846382997643502,
|
| 38324 |
+
"grad_norm": 0.06527028232812881,
|
| 38325 |
+
"learning_rate": 0.0005616077131974279,
|
| 38326 |
+
"loss": 1.5027,
|
| 38327 |
+
"step": 10900
|
| 38328 |
+
},
|
| 38329 |
+
{
|
| 38330 |
+
"epoch": 0.48472722422302256,
|
| 38331 |
+
"grad_norm": 0.06357049942016602,
|
| 38332 |
+
"learning_rate": 0.0005614626359478062,
|
| 38333 |
+
"loss": 1.5025,
|
| 38334 |
+
"step": 10902
|
| 38335 |
+
},
|
| 38336 |
+
{
|
| 38337 |
+
"epoch": 0.4848161486816949,
|
| 38338 |
+
"grad_norm": 0.06293467432260513,
|
| 38339 |
+
"learning_rate": 0.0005613175534440875,
|
| 38340 |
+
"loss": 1.5036,
|
| 38341 |
+
"step": 10904
|
| 38342 |
+
},
|
| 38343 |
+
{
|
| 38344 |
+
"epoch": 0.48490507314036724,
|
| 38345 |
+
"grad_norm": 0.06417332589626312,
|
| 38346 |
+
"learning_rate": 0.0005611724656986741,
|
| 38347 |
+
"loss": 1.4987,
|
| 38348 |
+
"step": 10906
|
| 38349 |
+
},
|
| 38350 |
+
{
|
| 38351 |
+
"epoch": 0.4849939975990396,
|
| 38352 |
+
"grad_norm": 0.06176379695534706,
|
| 38353 |
+
"learning_rate": 0.0005610273727239688,
|
| 38354 |
+
"loss": 1.5024,
|
| 38355 |
+
"step": 10908
|
| 38356 |
+
},
|
| 38357 |
+
{
|
| 38358 |
+
"epoch": 0.485082922057712,
|
| 38359 |
+
"grad_norm": 0.06424736231565475,
|
| 38360 |
+
"learning_rate": 0.0005608822745323748,
|
| 38361 |
+
"loss": 1.5012,
|
| 38362 |
+
"step": 10910
|
| 38363 |
+
},
|
| 38364 |
+
{
|
| 38365 |
+
"epoch": 0.48517184651638434,
|
| 38366 |
+
"grad_norm": 0.06530416756868362,
|
| 38367 |
+
"learning_rate": 0.0005607371711362956,
|
| 38368 |
+
"loss": 1.4997,
|
| 38369 |
+
"step": 10912
|
| 38370 |
+
},
|
| 38371 |
+
{
|
| 38372 |
+
"epoch": 0.4852607709750567,
|
| 38373 |
+
"grad_norm": 0.06718974560499191,
|
| 38374 |
+
"learning_rate": 0.0005605920625481353,
|
| 38375 |
+
"loss": 1.5028,
|
| 38376 |
+
"step": 10914
|
| 38377 |
+
},
|
| 38378 |
+
{
|
| 38379 |
+
"epoch": 0.485349695433729,
|
| 38380 |
+
"grad_norm": 0.06613506376743317,
|
| 38381 |
+
"learning_rate": 0.0005604469487802987,
|
| 38382 |
+
"loss": 1.4958,
|
| 38383 |
+
"step": 10916
|
| 38384 |
+
},
|
| 38385 |
+
{
|
| 38386 |
+
"epoch": 0.4854386198924014,
|
| 38387 |
+
"grad_norm": 0.06338746100664139,
|
| 38388 |
+
"learning_rate": 0.0005603018298451903,
|
| 38389 |
+
"loss": 1.5018,
|
| 38390 |
+
"step": 10918
|
| 38391 |
+
},
|
| 38392 |
+
{
|
| 38393 |
+
"epoch": 0.48552754435107376,
|
| 38394 |
+
"grad_norm": 0.06582844257354736,
|
| 38395 |
+
"learning_rate": 0.0005601567057552158,
|
| 38396 |
+
"loss": 1.4997,
|
| 38397 |
+
"step": 10920
|
| 38398 |
+
},
|
| 38399 |
+
{
|
| 38400 |
+
"epoch": 0.4856164688097461,
|
| 38401 |
+
"grad_norm": 0.06286223977804184,
|
| 38402 |
+
"learning_rate": 0.000560011576522781,
|
| 38403 |
+
"loss": 1.4998,
|
| 38404 |
+
"step": 10922
|
| 38405 |
+
},
|
| 38406 |
+
{
|
| 38407 |
+
"epoch": 0.4857053932684185,
|
| 38408 |
+
"grad_norm": 0.06453262269496918,
|
| 38409 |
+
"learning_rate": 0.0005598664421602921,
|
| 38410 |
+
"loss": 1.4966,
|
| 38411 |
+
"step": 10924
|
| 38412 |
+
},
|
| 38413 |
+
{
|
| 38414 |
+
"epoch": 0.48579431772709086,
|
| 38415 |
+
"grad_norm": 0.06320101022720337,
|
| 38416 |
+
"learning_rate": 0.0005597213026801561,
|
| 38417 |
+
"loss": 1.5054,
|
| 38418 |
+
"step": 10926
|
| 38419 |
+
},
|
| 38420 |
+
{
|
| 38421 |
+
"epoch": 0.4858832421857632,
|
| 38422 |
+
"grad_norm": 0.06558766961097717,
|
| 38423 |
+
"learning_rate": 0.0005595761580947798,
|
| 38424 |
+
"loss": 1.5011,
|
| 38425 |
+
"step": 10928
|
| 38426 |
+
},
|
| 38427 |
+
{
|
| 38428 |
+
"epoch": 0.48597216664443554,
|
| 38429 |
+
"grad_norm": 0.06365552544593811,
|
| 38430 |
+
"learning_rate": 0.000559431008416571,
|
| 38431 |
+
"loss": 1.5008,
|
| 38432 |
+
"step": 10930
|
| 38433 |
+
},
|
| 38434 |
+
{
|
| 38435 |
+
"epoch": 0.4860610911031079,
|
| 38436 |
+
"grad_norm": 0.06486231088638306,
|
| 38437 |
+
"learning_rate": 0.0005592858536579377,
|
| 38438 |
+
"loss": 1.5008,
|
| 38439 |
+
"step": 10932
|
| 38440 |
+
},
|
| 38441 |
+
{
|
| 38442 |
+
"epoch": 0.4861500155617803,
|
| 38443 |
+
"grad_norm": 0.063105009496212,
|
| 38444 |
+
"learning_rate": 0.0005591406938312885,
|
| 38445 |
+
"loss": 1.5011,
|
| 38446 |
+
"step": 10934
|
| 38447 |
+
},
|
| 38448 |
+
{
|
| 38449 |
+
"epoch": 0.48623894002045265,
|
| 38450 |
+
"grad_norm": 0.06280262023210526,
|
| 38451 |
+
"learning_rate": 0.000558995528949032,
|
| 38452 |
+
"loss": 1.4973,
|
| 38453 |
+
"step": 10936
|
| 38454 |
+
},
|
| 38455 |
+
{
|
| 38456 |
+
"epoch": 0.48632786447912496,
|
| 38457 |
+
"grad_norm": 0.0647190734744072,
|
| 38458 |
+
"learning_rate": 0.0005588503590235777,
|
| 38459 |
+
"loss": 1.4982,
|
| 38460 |
+
"step": 10938
|
| 38461 |
+
},
|
| 38462 |
+
{
|
| 38463 |
+
"epoch": 0.48641678893779733,
|
| 38464 |
+
"grad_norm": 0.06445516645908356,
|
| 38465 |
+
"learning_rate": 0.0005587051840673355,
|
| 38466 |
+
"loss": 1.5,
|
| 38467 |
+
"step": 10940
|
| 38468 |
+
},
|
| 38469 |
+
{
|
| 38470 |
+
"epoch": 0.4865057133964697,
|
| 38471 |
+
"grad_norm": 0.06466984003782272,
|
| 38472 |
+
"learning_rate": 0.0005585600040927154,
|
| 38473 |
+
"loss": 1.4962,
|
| 38474 |
+
"step": 10942
|
| 38475 |
+
},
|
| 38476 |
+
{
|
| 38477 |
+
"epoch": 0.48659463785514206,
|
| 38478 |
+
"grad_norm": 0.06264392286539078,
|
| 38479 |
+
"learning_rate": 0.0005584148191121279,
|
| 38480 |
+
"loss": 1.4962,
|
| 38481 |
+
"step": 10944
|
| 38482 |
+
},
|
| 38483 |
+
{
|
| 38484 |
+
"epoch": 0.48668356231381443,
|
| 38485 |
+
"grad_norm": 0.06432507932186127,
|
| 38486 |
+
"learning_rate": 0.0005582696291379843,
|
| 38487 |
+
"loss": 1.4995,
|
| 38488 |
+
"step": 10946
|
| 38489 |
+
},
|
| 38490 |
+
{
|
| 38491 |
+
"epoch": 0.48677248677248675,
|
| 38492 |
+
"grad_norm": 0.0634993389248848,
|
| 38493 |
+
"learning_rate": 0.0005581244341826963,
|
| 38494 |
+
"loss": 1.5032,
|
| 38495 |
+
"step": 10948
|
| 38496 |
+
},
|
| 38497 |
+
{
|
| 38498 |
+
"epoch": 0.4868614112311591,
|
| 38499 |
+
"grad_norm": 0.06333111226558685,
|
| 38500 |
+
"learning_rate": 0.0005579792342586753,
|
| 38501 |
+
"loss": 1.5059,
|
| 38502 |
+
"step": 10950
|
| 38503 |
+
},
|
| 38504 |
+
{
|
| 38505 |
+
"epoch": 0.4869503356898315,
|
| 38506 |
+
"grad_norm": 0.06281735002994537,
|
| 38507 |
+
"learning_rate": 0.0005578340293783339,
|
| 38508 |
+
"loss": 1.5028,
|
| 38509 |
+
"step": 10952
|
| 38510 |
+
},
|
| 38511 |
+
{
|
| 38512 |
+
"epoch": 0.48703926014850385,
|
| 38513 |
+
"grad_norm": 0.06381440162658691,
|
| 38514 |
+
"learning_rate": 0.0005576888195540848,
|
| 38515 |
+
"loss": 1.5039,
|
| 38516 |
+
"step": 10954
|
| 38517 |
+
},
|
| 38518 |
+
{
|
| 38519 |
+
"epoch": 0.4871281846071762,
|
| 38520 |
+
"grad_norm": 0.06332245469093323,
|
| 38521 |
+
"learning_rate": 0.000557543604798341,
|
| 38522 |
+
"loss": 1.5009,
|
| 38523 |
+
"step": 10956
|
| 38524 |
+
},
|
| 38525 |
+
{
|
| 38526 |
+
"epoch": 0.4872171090658486,
|
| 38527 |
+
"grad_norm": 0.06438940018415451,
|
| 38528 |
+
"learning_rate": 0.0005573983851235165,
|
| 38529 |
+
"loss": 1.5028,
|
| 38530 |
+
"step": 10958
|
| 38531 |
+
},
|
| 38532 |
+
{
|
| 38533 |
+
"epoch": 0.4873060335245209,
|
| 38534 |
+
"grad_norm": 0.06432240456342697,
|
| 38535 |
+
"learning_rate": 0.000557253160542025,
|
| 38536 |
+
"loss": 1.4928,
|
| 38537 |
+
"step": 10960
|
| 38538 |
+
},
|
| 38539 |
+
{
|
| 38540 |
+
"epoch": 0.48739495798319327,
|
| 38541 |
+
"grad_norm": 0.06432882696390152,
|
| 38542 |
+
"learning_rate": 0.0005571079310662811,
|
| 38543 |
+
"loss": 1.5016,
|
| 38544 |
+
"step": 10962
|
| 38545 |
+
},
|
| 38546 |
+
{
|
| 38547 |
+
"epoch": 0.48748388244186563,
|
| 38548 |
+
"grad_norm": 0.06522729992866516,
|
| 38549 |
+
"learning_rate": 0.0005569626967086995,
|
| 38550 |
+
"loss": 1.5001,
|
| 38551 |
+
"step": 10964
|
| 38552 |
+
},
|
| 38553 |
+
{
|
| 38554 |
+
"epoch": 0.487572806900538,
|
| 38555 |
+
"grad_norm": 0.0628109946846962,
|
| 38556 |
+
"learning_rate": 0.0005568174574816957,
|
| 38557 |
+
"loss": 1.505,
|
| 38558 |
+
"step": 10966
|
| 38559 |
+
},
|
| 38560 |
+
{
|
| 38561 |
+
"epoch": 0.48766173135921037,
|
| 38562 |
+
"grad_norm": 0.06767455488443375,
|
| 38563 |
+
"learning_rate": 0.0005566722133976851,
|
| 38564 |
+
"loss": 1.5021,
|
| 38565 |
+
"step": 10968
|
| 38566 |
+
},
|
| 38567 |
+
{
|
| 38568 |
+
"epoch": 0.4877506558178827,
|
| 38569 |
+
"grad_norm": 0.06257057934999466,
|
| 38570 |
+
"learning_rate": 0.000556526964469084,
|
| 38571 |
+
"loss": 1.4986,
|
| 38572 |
+
"step": 10970
|
| 38573 |
+
},
|
| 38574 |
+
{
|
| 38575 |
+
"epoch": 0.48783958027655505,
|
| 38576 |
+
"grad_norm": 0.06524206697940826,
|
| 38577 |
+
"learning_rate": 0.0005563817107083088,
|
| 38578 |
+
"loss": 1.4929,
|
| 38579 |
+
"step": 10972
|
| 38580 |
+
},
|
| 38581 |
+
{
|
| 38582 |
+
"epoch": 0.4879285047352274,
|
| 38583 |
+
"grad_norm": 0.06345758587121964,
|
| 38584 |
+
"learning_rate": 0.0005562364521277766,
|
| 38585 |
+
"loss": 1.4987,
|
| 38586 |
+
"step": 10974
|
| 38587 |
+
},
|
| 38588 |
+
{
|
| 38589 |
+
"epoch": 0.4880174291938998,
|
| 38590 |
+
"grad_norm": 0.0644221380352974,
|
| 38591 |
+
"learning_rate": 0.0005560911887399047,
|
| 38592 |
+
"loss": 1.4985,
|
| 38593 |
+
"step": 10976
|
| 38594 |
+
},
|
| 38595 |
+
{
|
| 38596 |
+
"epoch": 0.48810635365257216,
|
| 38597 |
+
"grad_norm": 0.06262285262346268,
|
| 38598 |
+
"learning_rate": 0.0005559459205571106,
|
| 38599 |
+
"loss": 1.5021,
|
| 38600 |
+
"step": 10978
|
| 38601 |
+
},
|
| 38602 |
+
{
|
| 38603 |
+
"epoch": 0.4881952781112445,
|
| 38604 |
+
"grad_norm": 0.06352121382951736,
|
| 38605 |
+
"learning_rate": 0.0005558006475918128,
|
| 38606 |
+
"loss": 1.5005,
|
| 38607 |
+
"step": 10980
|
| 38608 |
+
},
|
| 38609 |
+
{
|
| 38610 |
+
"epoch": 0.48828420256991684,
|
| 38611 |
+
"grad_norm": 0.06309682130813599,
|
| 38612 |
+
"learning_rate": 0.0005556553698564297,
|
| 38613 |
+
"loss": 1.5002,
|
| 38614 |
+
"step": 10982
|
| 38615 |
+
},
|
| 38616 |
+
{
|
| 38617 |
+
"epoch": 0.4883731270285892,
|
| 38618 |
+
"grad_norm": 0.061160314828157425,
|
| 38619 |
+
"learning_rate": 0.0005555100873633804,
|
| 38620 |
+
"loss": 1.5016,
|
| 38621 |
+
"step": 10984
|
| 38622 |
+
},
|
| 38623 |
+
{
|
| 38624 |
+
"epoch": 0.4884620514872616,
|
| 38625 |
+
"grad_norm": 0.06492006033658981,
|
| 38626 |
+
"learning_rate": 0.0005553648001250842,
|
| 38627 |
+
"loss": 1.5017,
|
| 38628 |
+
"step": 10986
|
| 38629 |
+
},
|
| 38630 |
+
{
|
| 38631 |
+
"epoch": 0.48855097594593394,
|
| 38632 |
+
"grad_norm": 0.06279385089874268,
|
| 38633 |
+
"learning_rate": 0.0005552195081539608,
|
| 38634 |
+
"loss": 1.5062,
|
| 38635 |
+
"step": 10988
|
| 38636 |
+
},
|
| 38637 |
+
{
|
| 38638 |
+
"epoch": 0.4886399004046063,
|
| 38639 |
+
"grad_norm": 0.06334224343299866,
|
| 38640 |
+
"learning_rate": 0.0005550742114624305,
|
| 38641 |
+
"loss": 1.5027,
|
| 38642 |
+
"step": 10990
|
| 38643 |
+
},
|
| 38644 |
+
{
|
| 38645 |
+
"epoch": 0.4887288248632786,
|
| 38646 |
+
"grad_norm": 0.06543200463056564,
|
| 38647 |
+
"learning_rate": 0.000554928910062914,
|
| 38648 |
+
"loss": 1.5028,
|
| 38649 |
+
"step": 10992
|
| 38650 |
+
},
|
| 38651 |
+
{
|
| 38652 |
+
"epoch": 0.488817749321951,
|
| 38653 |
+
"grad_norm": 0.06258574873209,
|
| 38654 |
+
"learning_rate": 0.0005547836039678321,
|
| 38655 |
+
"loss": 1.5023,
|
| 38656 |
+
"step": 10994
|
| 38657 |
+
},
|
| 38658 |
+
{
|
| 38659 |
+
"epoch": 0.48890667378062336,
|
| 38660 |
+
"grad_norm": 0.06658318638801575,
|
| 38661 |
+
"learning_rate": 0.0005546382931896065,
|
| 38662 |
+
"loss": 1.4961,
|
| 38663 |
+
"step": 10996
|
| 38664 |
+
},
|
| 38665 |
+
{
|
| 38666 |
+
"epoch": 0.4889955982392957,
|
| 38667 |
+
"grad_norm": 0.062175750732421875,
|
| 38668 |
+
"learning_rate": 0.0005544929777406586,
|
| 38669 |
+
"loss": 1.5022,
|
| 38670 |
+
"step": 10998
|
| 38671 |
+
},
|
| 38672 |
+
{
|
| 38673 |
+
"epoch": 0.4890845226979681,
|
| 38674 |
+
"grad_norm": 0.06402657181024551,
|
| 38675 |
+
"learning_rate": 0.0005543476576334109,
|
| 38676 |
+
"loss": 1.4992,
|
| 38677 |
+
"step": 11000
|
| 38678 |
+
},
|
| 38679 |
+
{
|
| 38680 |
+
"epoch": 0.4890845226979681,
|
| 38681 |
+
"eval_loss": 1.4820914268493652,
|
| 38682 |
+
"eval_runtime": 13.0255,
|
| 38683 |
+
"eval_samples_per_second": 530.5,
|
| 38684 |
+
"eval_steps_per_second": 66.332,
|
| 38685 |
+
"step": 11000
|
| 38686 |
}
|
| 38687 |
],
|
| 38688 |
"logging_steps": 2,
|
|
|
|
| 38702 |
"attributes": {}
|
| 38703 |
}
|
| 38704 |
},
|
| 38705 |
+
"total_flos": 2.353704355233792e+19,
|
| 38706 |
"train_batch_size": 768,
|
| 38707 |
"trial_name": null,
|
| 38708 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5240
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c16b168a1b45c2dd9bad38e7f3121cc40352741df7801b098a9bb22dcd0ff219
|
| 3 |
size 5240
|