Training in progress, step 3430, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4991025680
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da5a0bdf59230579992d741a50df2f015dc95da21e5c9ad8df62b05e5a074114
|
| 3 |
size 4991025680
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1610725592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7ee4641179f879934111d4dff301dfcb8c32980135f0a1f4d1e120daf291603
|
| 3 |
size 1610725592
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9583813675
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:252cadf1a76eaa5aa79a2138869590b4e7292ef471eb403887942e733bc1fb44
|
| 3 |
size 9583813675
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60cafcb642e347b846ba10f93f2c1574ee79e1cf114e3211090b0ee3a25ab013
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f9642ce6ab8f2a426d884031eda0edbcb8aa9ba985e47eebee8e0dd7b5813a6
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 1000,
|
| 3 |
"best_metric": 4.025330543518066,
|
| 4 |
"best_model_checkpoint": "./snac_checkpoints-full/checkpoint-1000",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -21056,6 +21056,3016 @@
|
|
| 21056 |
"eval_samples_per_second": 29.828,
|
| 21057 |
"eval_steps_per_second": 7.457,
|
| 21058 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21059 |
}
|
| 21060 |
],
|
| 21061 |
"logging_steps": 1,
|
|
@@ -21070,12 +24080,12 @@
|
|
| 21070 |
"should_evaluate": false,
|
| 21071 |
"should_log": false,
|
| 21072 |
"should_save": true,
|
| 21073 |
-
"should_training_stop":
|
| 21074 |
},
|
| 21075 |
"attributes": {}
|
| 21076 |
}
|
| 21077 |
},
|
| 21078 |
-
"total_flos":
|
| 21079 |
"train_batch_size": 32,
|
| 21080 |
"trial_name": null,
|
| 21081 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 1000,
|
| 3 |
"best_metric": 4.025330543518066,
|
| 4 |
"best_model_checkpoint": "./snac_checkpoints-full/checkpoint-1000",
|
| 5 |
+
"epoch": 7.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 3430,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 21056 |
"eval_samples_per_second": 29.828,
|
| 21057 |
"eval_steps_per_second": 7.457,
|
| 21058 |
"step": 3000
|
| 21059 |
+
},
|
| 21060 |
+
{
|
| 21061 |
+
"epoch": 6.124489795918367,
|
| 21062 |
+
"grad_norm": 2.46875,
|
| 21063 |
+
"learning_rate": 1.3929381276320053e-05,
|
| 21064 |
+
"loss": 0.8544,
|
| 21065 |
+
"step": 3001
|
| 21066 |
+
},
|
| 21067 |
+
{
|
| 21068 |
+
"epoch": 6.126530612244898,
|
| 21069 |
+
"grad_norm": 2.59375,
|
| 21070 |
+
"learning_rate": 1.3896987366375121e-05,
|
| 21071 |
+
"loss": 0.8821,
|
| 21072 |
+
"step": 3002
|
| 21073 |
+
},
|
| 21074 |
+
{
|
| 21075 |
+
"epoch": 6.128571428571428,
|
| 21076 |
+
"grad_norm": 2.3125,
|
| 21077 |
+
"learning_rate": 1.3864593456430192e-05,
|
| 21078 |
+
"loss": 0.8304,
|
| 21079 |
+
"step": 3003
|
| 21080 |
+
},
|
| 21081 |
+
{
|
| 21082 |
+
"epoch": 6.130612244897959,
|
| 21083 |
+
"grad_norm": 2.5,
|
| 21084 |
+
"learning_rate": 1.3832199546485261e-05,
|
| 21085 |
+
"loss": 0.7474,
|
| 21086 |
+
"step": 3004
|
| 21087 |
+
},
|
| 21088 |
+
{
|
| 21089 |
+
"epoch": 6.13265306122449,
|
| 21090 |
+
"grad_norm": 2.84375,
|
| 21091 |
+
"learning_rate": 1.379980563654033e-05,
|
| 21092 |
+
"loss": 0.8452,
|
| 21093 |
+
"step": 3005
|
| 21094 |
+
},
|
| 21095 |
+
{
|
| 21096 |
+
"epoch": 6.134693877551021,
|
| 21097 |
+
"grad_norm": 2.34375,
|
| 21098 |
+
"learning_rate": 1.37674117265954e-05,
|
| 21099 |
+
"loss": 0.7788,
|
| 21100 |
+
"step": 3006
|
| 21101 |
+
},
|
| 21102 |
+
{
|
| 21103 |
+
"epoch": 6.136734693877551,
|
| 21104 |
+
"grad_norm": 2.5625,
|
| 21105 |
+
"learning_rate": 1.373501781665047e-05,
|
| 21106 |
+
"loss": 0.8852,
|
| 21107 |
+
"step": 3007
|
| 21108 |
+
},
|
| 21109 |
+
{
|
| 21110 |
+
"epoch": 6.1387755102040815,
|
| 21111 |
+
"grad_norm": 2.5,
|
| 21112 |
+
"learning_rate": 1.3702623906705539e-05,
|
| 21113 |
+
"loss": 0.8137,
|
| 21114 |
+
"step": 3008
|
| 21115 |
+
},
|
| 21116 |
+
{
|
| 21117 |
+
"epoch": 6.140816326530612,
|
| 21118 |
+
"grad_norm": 2.515625,
|
| 21119 |
+
"learning_rate": 1.367022999676061e-05,
|
| 21120 |
+
"loss": 0.8416,
|
| 21121 |
+
"step": 3009
|
| 21122 |
+
},
|
| 21123 |
+
{
|
| 21124 |
+
"epoch": 6.142857142857143,
|
| 21125 |
+
"grad_norm": 2.46875,
|
| 21126 |
+
"learning_rate": 1.3637836086815678e-05,
|
| 21127 |
+
"loss": 0.7968,
|
| 21128 |
+
"step": 3010
|
| 21129 |
+
},
|
| 21130 |
+
{
|
| 21131 |
+
"epoch": 6.144897959183673,
|
| 21132 |
+
"grad_norm": 2.359375,
|
| 21133 |
+
"learning_rate": 1.360544217687075e-05,
|
| 21134 |
+
"loss": 0.843,
|
| 21135 |
+
"step": 3011
|
| 21136 |
+
},
|
| 21137 |
+
{
|
| 21138 |
+
"epoch": 6.146938775510204,
|
| 21139 |
+
"grad_norm": 2.40625,
|
| 21140 |
+
"learning_rate": 1.357304826692582e-05,
|
| 21141 |
+
"loss": 0.8465,
|
| 21142 |
+
"step": 3012
|
| 21143 |
+
},
|
| 21144 |
+
{
|
| 21145 |
+
"epoch": 6.148979591836735,
|
| 21146 |
+
"grad_norm": 2.40625,
|
| 21147 |
+
"learning_rate": 1.3540654356980889e-05,
|
| 21148 |
+
"loss": 0.7731,
|
| 21149 |
+
"step": 3013
|
| 21150 |
+
},
|
| 21151 |
+
{
|
| 21152 |
+
"epoch": 6.151020408163265,
|
| 21153 |
+
"grad_norm": 2.46875,
|
| 21154 |
+
"learning_rate": 1.350826044703596e-05,
|
| 21155 |
+
"loss": 0.813,
|
| 21156 |
+
"step": 3014
|
| 21157 |
+
},
|
| 21158 |
+
{
|
| 21159 |
+
"epoch": 6.153061224489796,
|
| 21160 |
+
"grad_norm": 2.59375,
|
| 21161 |
+
"learning_rate": 1.3475866537091028e-05,
|
| 21162 |
+
"loss": 0.7976,
|
| 21163 |
+
"step": 3015
|
| 21164 |
+
},
|
| 21165 |
+
{
|
| 21166 |
+
"epoch": 6.155102040816327,
|
| 21167 |
+
"grad_norm": 2.34375,
|
| 21168 |
+
"learning_rate": 1.3443472627146097e-05,
|
| 21169 |
+
"loss": 0.8235,
|
| 21170 |
+
"step": 3016
|
| 21171 |
+
},
|
| 21172 |
+
{
|
| 21173 |
+
"epoch": 6.1571428571428575,
|
| 21174 |
+
"grad_norm": 2.3125,
|
| 21175 |
+
"learning_rate": 1.3411078717201168e-05,
|
| 21176 |
+
"loss": 0.8262,
|
| 21177 |
+
"step": 3017
|
| 21178 |
+
},
|
| 21179 |
+
{
|
| 21180 |
+
"epoch": 6.159183673469387,
|
| 21181 |
+
"grad_norm": 2.53125,
|
| 21182 |
+
"learning_rate": 1.3378684807256237e-05,
|
| 21183 |
+
"loss": 0.8519,
|
| 21184 |
+
"step": 3018
|
| 21185 |
+
},
|
| 21186 |
+
{
|
| 21187 |
+
"epoch": 6.161224489795918,
|
| 21188 |
+
"grad_norm": 2.515625,
|
| 21189 |
+
"learning_rate": 1.3346290897311306e-05,
|
| 21190 |
+
"loss": 0.8816,
|
| 21191 |
+
"step": 3019
|
| 21192 |
+
},
|
| 21193 |
+
{
|
| 21194 |
+
"epoch": 6.163265306122449,
|
| 21195 |
+
"grad_norm": 2.390625,
|
| 21196 |
+
"learning_rate": 1.3313896987366375e-05,
|
| 21197 |
+
"loss": 0.8324,
|
| 21198 |
+
"step": 3020
|
| 21199 |
+
},
|
| 21200 |
+
{
|
| 21201 |
+
"epoch": 6.16530612244898,
|
| 21202 |
+
"grad_norm": 2.703125,
|
| 21203 |
+
"learning_rate": 1.3281503077421445e-05,
|
| 21204 |
+
"loss": 0.7992,
|
| 21205 |
+
"step": 3021
|
| 21206 |
+
},
|
| 21207 |
+
{
|
| 21208 |
+
"epoch": 6.16734693877551,
|
| 21209 |
+
"grad_norm": 2.4375,
|
| 21210 |
+
"learning_rate": 1.3249109167476514e-05,
|
| 21211 |
+
"loss": 0.7889,
|
| 21212 |
+
"step": 3022
|
| 21213 |
+
},
|
| 21214 |
+
{
|
| 21215 |
+
"epoch": 6.169387755102041,
|
| 21216 |
+
"grad_norm": 2.375,
|
| 21217 |
+
"learning_rate": 1.3216715257531583e-05,
|
| 21218 |
+
"loss": 0.7467,
|
| 21219 |
+
"step": 3023
|
| 21220 |
+
},
|
| 21221 |
+
{
|
| 21222 |
+
"epoch": 6.171428571428572,
|
| 21223 |
+
"grad_norm": 2.4375,
|
| 21224 |
+
"learning_rate": 1.3184321347586654e-05,
|
| 21225 |
+
"loss": 0.8538,
|
| 21226 |
+
"step": 3024
|
| 21227 |
+
},
|
| 21228 |
+
{
|
| 21229 |
+
"epoch": 6.173469387755102,
|
| 21230 |
+
"grad_norm": 2.34375,
|
| 21231 |
+
"learning_rate": 1.3151927437641723e-05,
|
| 21232 |
+
"loss": 0.8073,
|
| 21233 |
+
"step": 3025
|
| 21234 |
+
},
|
| 21235 |
+
{
|
| 21236 |
+
"epoch": 6.1755102040816325,
|
| 21237 |
+
"grad_norm": 2.578125,
|
| 21238 |
+
"learning_rate": 1.3119533527696792e-05,
|
| 21239 |
+
"loss": 0.8491,
|
| 21240 |
+
"step": 3026
|
| 21241 |
+
},
|
| 21242 |
+
{
|
| 21243 |
+
"epoch": 6.177551020408163,
|
| 21244 |
+
"grad_norm": 2.296875,
|
| 21245 |
+
"learning_rate": 1.3087139617751864e-05,
|
| 21246 |
+
"loss": 0.816,
|
| 21247 |
+
"step": 3027
|
| 21248 |
+
},
|
| 21249 |
+
{
|
| 21250 |
+
"epoch": 6.179591836734694,
|
| 21251 |
+
"grad_norm": 2.625,
|
| 21252 |
+
"learning_rate": 1.3054745707806935e-05,
|
| 21253 |
+
"loss": 0.7944,
|
| 21254 |
+
"step": 3028
|
| 21255 |
+
},
|
| 21256 |
+
{
|
| 21257 |
+
"epoch": 6.181632653061224,
|
| 21258 |
+
"grad_norm": 2.546875,
|
| 21259 |
+
"learning_rate": 1.3022351797862004e-05,
|
| 21260 |
+
"loss": 0.8029,
|
| 21261 |
+
"step": 3029
|
| 21262 |
+
},
|
| 21263 |
+
{
|
| 21264 |
+
"epoch": 6.183673469387755,
|
| 21265 |
+
"grad_norm": 2.40625,
|
| 21266 |
+
"learning_rate": 1.2989957887917073e-05,
|
| 21267 |
+
"loss": 0.7615,
|
| 21268 |
+
"step": 3030
|
| 21269 |
+
},
|
| 21270 |
+
{
|
| 21271 |
+
"epoch": 6.185714285714286,
|
| 21272 |
+
"grad_norm": 2.640625,
|
| 21273 |
+
"learning_rate": 1.2957563977972142e-05,
|
| 21274 |
+
"loss": 0.8891,
|
| 21275 |
+
"step": 3031
|
| 21276 |
+
},
|
| 21277 |
+
{
|
| 21278 |
+
"epoch": 6.187755102040816,
|
| 21279 |
+
"grad_norm": 2.84375,
|
| 21280 |
+
"learning_rate": 1.2925170068027212e-05,
|
| 21281 |
+
"loss": 0.9069,
|
| 21282 |
+
"step": 3032
|
| 21283 |
+
},
|
| 21284 |
+
{
|
| 21285 |
+
"epoch": 6.189795918367347,
|
| 21286 |
+
"grad_norm": 2.578125,
|
| 21287 |
+
"learning_rate": 1.2892776158082281e-05,
|
| 21288 |
+
"loss": 0.8789,
|
| 21289 |
+
"step": 3033
|
| 21290 |
+
},
|
| 21291 |
+
{
|
| 21292 |
+
"epoch": 6.191836734693878,
|
| 21293 |
+
"grad_norm": 2.484375,
|
| 21294 |
+
"learning_rate": 1.286038224813735e-05,
|
| 21295 |
+
"loss": 0.8501,
|
| 21296 |
+
"step": 3034
|
| 21297 |
+
},
|
| 21298 |
+
{
|
| 21299 |
+
"epoch": 6.1938775510204085,
|
| 21300 |
+
"grad_norm": 2.46875,
|
| 21301 |
+
"learning_rate": 1.2827988338192421e-05,
|
| 21302 |
+
"loss": 0.82,
|
| 21303 |
+
"step": 3035
|
| 21304 |
+
},
|
| 21305 |
+
{
|
| 21306 |
+
"epoch": 6.1959183673469385,
|
| 21307 |
+
"grad_norm": 2.296875,
|
| 21308 |
+
"learning_rate": 1.279559442824749e-05,
|
| 21309 |
+
"loss": 0.7979,
|
| 21310 |
+
"step": 3036
|
| 21311 |
+
},
|
| 21312 |
+
{
|
| 21313 |
+
"epoch": 6.197959183673469,
|
| 21314 |
+
"grad_norm": 2.46875,
|
| 21315 |
+
"learning_rate": 1.2763200518302559e-05,
|
| 21316 |
+
"loss": 0.7961,
|
| 21317 |
+
"step": 3037
|
| 21318 |
+
},
|
| 21319 |
+
{
|
| 21320 |
+
"epoch": 6.2,
|
| 21321 |
+
"grad_norm": 2.359375,
|
| 21322 |
+
"learning_rate": 1.273080660835763e-05,
|
| 21323 |
+
"loss": 0.7912,
|
| 21324 |
+
"step": 3038
|
| 21325 |
+
},
|
| 21326 |
+
{
|
| 21327 |
+
"epoch": 6.202040816326531,
|
| 21328 |
+
"grad_norm": 2.3125,
|
| 21329 |
+
"learning_rate": 1.2698412698412699e-05,
|
| 21330 |
+
"loss": 0.7595,
|
| 21331 |
+
"step": 3039
|
| 21332 |
+
},
|
| 21333 |
+
{
|
| 21334 |
+
"epoch": 6.204081632653061,
|
| 21335 |
+
"grad_norm": 2.390625,
|
| 21336 |
+
"learning_rate": 1.2666018788467768e-05,
|
| 21337 |
+
"loss": 0.8133,
|
| 21338 |
+
"step": 3040
|
| 21339 |
+
},
|
| 21340 |
+
{
|
| 21341 |
+
"epoch": 6.206122448979592,
|
| 21342 |
+
"grad_norm": 2.53125,
|
| 21343 |
+
"learning_rate": 1.2633624878522836e-05,
|
| 21344 |
+
"loss": 0.8362,
|
| 21345 |
+
"step": 3041
|
| 21346 |
+
},
|
| 21347 |
+
{
|
| 21348 |
+
"epoch": 6.208163265306123,
|
| 21349 |
+
"grad_norm": 2.28125,
|
| 21350 |
+
"learning_rate": 1.2601230968577907e-05,
|
| 21351 |
+
"loss": 0.767,
|
| 21352 |
+
"step": 3042
|
| 21353 |
+
},
|
| 21354 |
+
{
|
| 21355 |
+
"epoch": 6.210204081632653,
|
| 21356 |
+
"grad_norm": 2.65625,
|
| 21357 |
+
"learning_rate": 1.2568837058632976e-05,
|
| 21358 |
+
"loss": 0.8336,
|
| 21359 |
+
"step": 3043
|
| 21360 |
+
},
|
| 21361 |
+
{
|
| 21362 |
+
"epoch": 6.2122448979591836,
|
| 21363 |
+
"grad_norm": 2.25,
|
| 21364 |
+
"learning_rate": 1.2536443148688048e-05,
|
| 21365 |
+
"loss": 0.821,
|
| 21366 |
+
"step": 3044
|
| 21367 |
+
},
|
| 21368 |
+
{
|
| 21369 |
+
"epoch": 6.214285714285714,
|
| 21370 |
+
"grad_norm": 2.4375,
|
| 21371 |
+
"learning_rate": 1.2504049238743117e-05,
|
| 21372 |
+
"loss": 0.7919,
|
| 21373 |
+
"step": 3045
|
| 21374 |
+
},
|
| 21375 |
+
{
|
| 21376 |
+
"epoch": 6.216326530612245,
|
| 21377 |
+
"grad_norm": 2.421875,
|
| 21378 |
+
"learning_rate": 1.2471655328798186e-05,
|
| 21379 |
+
"loss": 0.8154,
|
| 21380 |
+
"step": 3046
|
| 21381 |
+
},
|
| 21382 |
+
{
|
| 21383 |
+
"epoch": 6.218367346938775,
|
| 21384 |
+
"grad_norm": 2.484375,
|
| 21385 |
+
"learning_rate": 1.2439261418853255e-05,
|
| 21386 |
+
"loss": 0.7772,
|
| 21387 |
+
"step": 3047
|
| 21388 |
+
},
|
| 21389 |
+
{
|
| 21390 |
+
"epoch": 6.220408163265306,
|
| 21391 |
+
"grad_norm": 2.46875,
|
| 21392 |
+
"learning_rate": 1.2406867508908326e-05,
|
| 21393 |
+
"loss": 0.852,
|
| 21394 |
+
"step": 3048
|
| 21395 |
+
},
|
| 21396 |
+
{
|
| 21397 |
+
"epoch": 6.222448979591837,
|
| 21398 |
+
"grad_norm": 2.359375,
|
| 21399 |
+
"learning_rate": 1.2374473598963397e-05,
|
| 21400 |
+
"loss": 0.7787,
|
| 21401 |
+
"step": 3049
|
| 21402 |
+
},
|
| 21403 |
+
{
|
| 21404 |
+
"epoch": 6.224489795918367,
|
| 21405 |
+
"grad_norm": 2.546875,
|
| 21406 |
+
"learning_rate": 1.2342079689018466e-05,
|
| 21407 |
+
"loss": 0.7517,
|
| 21408 |
+
"step": 3050
|
| 21409 |
+
},
|
| 21410 |
+
{
|
| 21411 |
+
"epoch": 6.226530612244898,
|
| 21412 |
+
"grad_norm": 2.5625,
|
| 21413 |
+
"learning_rate": 1.2309685779073535e-05,
|
| 21414 |
+
"loss": 0.8715,
|
| 21415 |
+
"step": 3051
|
| 21416 |
+
},
|
| 21417 |
+
{
|
| 21418 |
+
"epoch": 6.228571428571429,
|
| 21419 |
+
"grad_norm": 2.453125,
|
| 21420 |
+
"learning_rate": 1.2277291869128604e-05,
|
| 21421 |
+
"loss": 0.8348,
|
| 21422 |
+
"step": 3052
|
| 21423 |
+
},
|
| 21424 |
+
{
|
| 21425 |
+
"epoch": 6.2306122448979595,
|
| 21426 |
+
"grad_norm": 2.34375,
|
| 21427 |
+
"learning_rate": 1.2244897959183674e-05,
|
| 21428 |
+
"loss": 0.8242,
|
| 21429 |
+
"step": 3053
|
| 21430 |
+
},
|
| 21431 |
+
{
|
| 21432 |
+
"epoch": 6.2326530612244895,
|
| 21433 |
+
"grad_norm": 2.390625,
|
| 21434 |
+
"learning_rate": 1.2212504049238743e-05,
|
| 21435 |
+
"loss": 0.7916,
|
| 21436 |
+
"step": 3054
|
| 21437 |
+
},
|
| 21438 |
+
{
|
| 21439 |
+
"epoch": 6.23469387755102,
|
| 21440 |
+
"grad_norm": 2.3125,
|
| 21441 |
+
"learning_rate": 1.2180110139293812e-05,
|
| 21442 |
+
"loss": 0.7629,
|
| 21443 |
+
"step": 3055
|
| 21444 |
+
},
|
| 21445 |
+
{
|
| 21446 |
+
"epoch": 6.236734693877551,
|
| 21447 |
+
"grad_norm": 2.546875,
|
| 21448 |
+
"learning_rate": 1.2147716229348883e-05,
|
| 21449 |
+
"loss": 0.8209,
|
| 21450 |
+
"step": 3056
|
| 21451 |
+
},
|
| 21452 |
+
{
|
| 21453 |
+
"epoch": 6.238775510204082,
|
| 21454 |
+
"grad_norm": 2.46875,
|
| 21455 |
+
"learning_rate": 1.2115322319403953e-05,
|
| 21456 |
+
"loss": 0.8332,
|
| 21457 |
+
"step": 3057
|
| 21458 |
+
},
|
| 21459 |
+
{
|
| 21460 |
+
"epoch": 6.240816326530612,
|
| 21461 |
+
"grad_norm": 2.390625,
|
| 21462 |
+
"learning_rate": 1.2082928409459022e-05,
|
| 21463 |
+
"loss": 0.7971,
|
| 21464 |
+
"step": 3058
|
| 21465 |
+
},
|
| 21466 |
+
{
|
| 21467 |
+
"epoch": 6.242857142857143,
|
| 21468 |
+
"grad_norm": 2.359375,
|
| 21469 |
+
"learning_rate": 1.2050534499514091e-05,
|
| 21470 |
+
"loss": 0.7935,
|
| 21471 |
+
"step": 3059
|
| 21472 |
+
},
|
| 21473 |
+
{
|
| 21474 |
+
"epoch": 6.244897959183674,
|
| 21475 |
+
"grad_norm": 2.390625,
|
| 21476 |
+
"learning_rate": 1.2018140589569162e-05,
|
| 21477 |
+
"loss": 0.8895,
|
| 21478 |
+
"step": 3060
|
| 21479 |
+
},
|
| 21480 |
+
{
|
| 21481 |
+
"epoch": 6.246938775510204,
|
| 21482 |
+
"grad_norm": 2.40625,
|
| 21483 |
+
"learning_rate": 1.1985746679624231e-05,
|
| 21484 |
+
"loss": 0.882,
|
| 21485 |
+
"step": 3061
|
| 21486 |
+
},
|
| 21487 |
+
{
|
| 21488 |
+
"epoch": 6.248979591836735,
|
| 21489 |
+
"grad_norm": 2.671875,
|
| 21490 |
+
"learning_rate": 1.19533527696793e-05,
|
| 21491 |
+
"loss": 0.8206,
|
| 21492 |
+
"step": 3062
|
| 21493 |
+
},
|
| 21494 |
+
{
|
| 21495 |
+
"epoch": 6.251020408163265,
|
| 21496 |
+
"grad_norm": 2.40625,
|
| 21497 |
+
"learning_rate": 1.192095885973437e-05,
|
| 21498 |
+
"loss": 0.8074,
|
| 21499 |
+
"step": 3063
|
| 21500 |
+
},
|
| 21501 |
+
{
|
| 21502 |
+
"epoch": 6.253061224489796,
|
| 21503 |
+
"grad_norm": 2.40625,
|
| 21504 |
+
"learning_rate": 1.1888564949789441e-05,
|
| 21505 |
+
"loss": 0.7785,
|
| 21506 |
+
"step": 3064
|
| 21507 |
+
},
|
| 21508 |
+
{
|
| 21509 |
+
"epoch": 6.255102040816326,
|
| 21510 |
+
"grad_norm": 2.375,
|
| 21511 |
+
"learning_rate": 1.185617103984451e-05,
|
| 21512 |
+
"loss": 0.7863,
|
| 21513 |
+
"step": 3065
|
| 21514 |
+
},
|
| 21515 |
+
{
|
| 21516 |
+
"epoch": 6.257142857142857,
|
| 21517 |
+
"grad_norm": 2.3125,
|
| 21518 |
+
"learning_rate": 1.182377712989958e-05,
|
| 21519 |
+
"loss": 0.7716,
|
| 21520 |
+
"step": 3066
|
| 21521 |
+
},
|
| 21522 |
+
{
|
| 21523 |
+
"epoch": 6.259183673469388,
|
| 21524 |
+
"grad_norm": 2.4375,
|
| 21525 |
+
"learning_rate": 1.179138321995465e-05,
|
| 21526 |
+
"loss": 0.7988,
|
| 21527 |
+
"step": 3067
|
| 21528 |
+
},
|
| 21529 |
+
{
|
| 21530 |
+
"epoch": 6.261224489795918,
|
| 21531 |
+
"grad_norm": 2.484375,
|
| 21532 |
+
"learning_rate": 1.1758989310009719e-05,
|
| 21533 |
+
"loss": 0.8253,
|
| 21534 |
+
"step": 3068
|
| 21535 |
+
},
|
| 21536 |
+
{
|
| 21537 |
+
"epoch": 6.263265306122449,
|
| 21538 |
+
"grad_norm": 2.5625,
|
| 21539 |
+
"learning_rate": 1.1726595400064788e-05,
|
| 21540 |
+
"loss": 0.7901,
|
| 21541 |
+
"step": 3069
|
| 21542 |
+
},
|
| 21543 |
+
{
|
| 21544 |
+
"epoch": 6.26530612244898,
|
| 21545 |
+
"grad_norm": 2.53125,
|
| 21546 |
+
"learning_rate": 1.1694201490119858e-05,
|
| 21547 |
+
"loss": 0.8628,
|
| 21548 |
+
"step": 3070
|
| 21549 |
+
},
|
| 21550 |
+
{
|
| 21551 |
+
"epoch": 6.2673469387755105,
|
| 21552 |
+
"grad_norm": 2.515625,
|
| 21553 |
+
"learning_rate": 1.1661807580174927e-05,
|
| 21554 |
+
"loss": 0.8475,
|
| 21555 |
+
"step": 3071
|
| 21556 |
+
},
|
| 21557 |
+
{
|
| 21558 |
+
"epoch": 6.2693877551020405,
|
| 21559 |
+
"grad_norm": 2.609375,
|
| 21560 |
+
"learning_rate": 1.1629413670229998e-05,
|
| 21561 |
+
"loss": 0.7617,
|
| 21562 |
+
"step": 3072
|
| 21563 |
+
},
|
| 21564 |
+
{
|
| 21565 |
+
"epoch": 6.271428571428571,
|
| 21566 |
+
"grad_norm": 2.25,
|
| 21567 |
+
"learning_rate": 1.1597019760285067e-05,
|
| 21568 |
+
"loss": 0.7973,
|
| 21569 |
+
"step": 3073
|
| 21570 |
+
},
|
| 21571 |
+
{
|
| 21572 |
+
"epoch": 6.273469387755102,
|
| 21573 |
+
"grad_norm": 2.640625,
|
| 21574 |
+
"learning_rate": 1.1564625850340138e-05,
|
| 21575 |
+
"loss": 0.7726,
|
| 21576 |
+
"step": 3074
|
| 21577 |
+
},
|
| 21578 |
+
{
|
| 21579 |
+
"epoch": 6.275510204081632,
|
| 21580 |
+
"grad_norm": 2.53125,
|
| 21581 |
+
"learning_rate": 1.1532231940395207e-05,
|
| 21582 |
+
"loss": 0.7822,
|
| 21583 |
+
"step": 3075
|
| 21584 |
+
},
|
| 21585 |
+
{
|
| 21586 |
+
"epoch": 6.277551020408163,
|
| 21587 |
+
"grad_norm": 2.5625,
|
| 21588 |
+
"learning_rate": 1.1499838030450276e-05,
|
| 21589 |
+
"loss": 0.9039,
|
| 21590 |
+
"step": 3076
|
| 21591 |
+
},
|
| 21592 |
+
{
|
| 21593 |
+
"epoch": 6.279591836734694,
|
| 21594 |
+
"grad_norm": 2.546875,
|
| 21595 |
+
"learning_rate": 1.1467444120505345e-05,
|
| 21596 |
+
"loss": 0.8531,
|
| 21597 |
+
"step": 3077
|
| 21598 |
+
},
|
| 21599 |
+
{
|
| 21600 |
+
"epoch": 6.281632653061225,
|
| 21601 |
+
"grad_norm": 2.8125,
|
| 21602 |
+
"learning_rate": 1.1435050210560415e-05,
|
| 21603 |
+
"loss": 0.894,
|
| 21604 |
+
"step": 3078
|
| 21605 |
+
},
|
| 21606 |
+
{
|
| 21607 |
+
"epoch": 6.283673469387755,
|
| 21608 |
+
"grad_norm": 2.609375,
|
| 21609 |
+
"learning_rate": 1.1402656300615484e-05,
|
| 21610 |
+
"loss": 0.8102,
|
| 21611 |
+
"step": 3079
|
| 21612 |
+
},
|
| 21613 |
+
{
|
| 21614 |
+
"epoch": 6.285714285714286,
|
| 21615 |
+
"grad_norm": 2.453125,
|
| 21616 |
+
"learning_rate": 1.1370262390670553e-05,
|
| 21617 |
+
"loss": 0.8188,
|
| 21618 |
+
"step": 3080
|
| 21619 |
+
},
|
| 21620 |
+
{
|
| 21621 |
+
"epoch": 6.2877551020408164,
|
| 21622 |
+
"grad_norm": 2.5,
|
| 21623 |
+
"learning_rate": 1.1337868480725626e-05,
|
| 21624 |
+
"loss": 0.8201,
|
| 21625 |
+
"step": 3081
|
| 21626 |
+
},
|
| 21627 |
+
{
|
| 21628 |
+
"epoch": 6.289795918367347,
|
| 21629 |
+
"grad_norm": 2.53125,
|
| 21630 |
+
"learning_rate": 1.1305474570780694e-05,
|
| 21631 |
+
"loss": 0.8298,
|
| 21632 |
+
"step": 3082
|
| 21633 |
+
},
|
| 21634 |
+
{
|
| 21635 |
+
"epoch": 6.291836734693877,
|
| 21636 |
+
"grad_norm": 2.53125,
|
| 21637 |
+
"learning_rate": 1.1273080660835763e-05,
|
| 21638 |
+
"loss": 0.7766,
|
| 21639 |
+
"step": 3083
|
| 21640 |
+
},
|
| 21641 |
+
{
|
| 21642 |
+
"epoch": 6.293877551020408,
|
| 21643 |
+
"grad_norm": 2.5,
|
| 21644 |
+
"learning_rate": 1.1240686750890832e-05,
|
| 21645 |
+
"loss": 0.8098,
|
| 21646 |
+
"step": 3084
|
| 21647 |
+
},
|
| 21648 |
+
{
|
| 21649 |
+
"epoch": 6.295918367346939,
|
| 21650 |
+
"grad_norm": 2.53125,
|
| 21651 |
+
"learning_rate": 1.1208292840945903e-05,
|
| 21652 |
+
"loss": 0.7918,
|
| 21653 |
+
"step": 3085
|
| 21654 |
+
},
|
| 21655 |
+
{
|
| 21656 |
+
"epoch": 6.29795918367347,
|
| 21657 |
+
"grad_norm": 2.46875,
|
| 21658 |
+
"learning_rate": 1.1175898931000972e-05,
|
| 21659 |
+
"loss": 0.8648,
|
| 21660 |
+
"step": 3086
|
| 21661 |
+
},
|
| 21662 |
+
{
|
| 21663 |
+
"epoch": 6.3,
|
| 21664 |
+
"grad_norm": 2.453125,
|
| 21665 |
+
"learning_rate": 1.1143505021056041e-05,
|
| 21666 |
+
"loss": 0.8067,
|
| 21667 |
+
"step": 3087
|
| 21668 |
+
},
|
| 21669 |
+
{
|
| 21670 |
+
"epoch": 6.302040816326531,
|
| 21671 |
+
"grad_norm": 2.484375,
|
| 21672 |
+
"learning_rate": 1.1111111111111112e-05,
|
| 21673 |
+
"loss": 0.8346,
|
| 21674 |
+
"step": 3088
|
| 21675 |
+
},
|
| 21676 |
+
{
|
| 21677 |
+
"epoch": 6.3040816326530615,
|
| 21678 |
+
"grad_norm": 2.5,
|
| 21679 |
+
"learning_rate": 1.1078717201166182e-05,
|
| 21680 |
+
"loss": 0.7401,
|
| 21681 |
+
"step": 3089
|
| 21682 |
+
},
|
| 21683 |
+
{
|
| 21684 |
+
"epoch": 6.3061224489795915,
|
| 21685 |
+
"grad_norm": 2.640625,
|
| 21686 |
+
"learning_rate": 1.1046323291221251e-05,
|
| 21687 |
+
"loss": 0.8474,
|
| 21688 |
+
"step": 3090
|
| 21689 |
+
},
|
| 21690 |
+
{
|
| 21691 |
+
"epoch": 6.308163265306122,
|
| 21692 |
+
"grad_norm": 2.5,
|
| 21693 |
+
"learning_rate": 1.101392938127632e-05,
|
| 21694 |
+
"loss": 0.8069,
|
| 21695 |
+
"step": 3091
|
| 21696 |
+
},
|
| 21697 |
+
{
|
| 21698 |
+
"epoch": 6.310204081632653,
|
| 21699 |
+
"grad_norm": 2.5625,
|
| 21700 |
+
"learning_rate": 1.0981535471331391e-05,
|
| 21701 |
+
"loss": 0.8522,
|
| 21702 |
+
"step": 3092
|
| 21703 |
+
},
|
| 21704 |
+
{
|
| 21705 |
+
"epoch": 6.312244897959184,
|
| 21706 |
+
"grad_norm": 2.515625,
|
| 21707 |
+
"learning_rate": 1.094914156138646e-05,
|
| 21708 |
+
"loss": 0.7717,
|
| 21709 |
+
"step": 3093
|
| 21710 |
+
},
|
| 21711 |
+
{
|
| 21712 |
+
"epoch": 6.314285714285714,
|
| 21713 |
+
"grad_norm": 2.421875,
|
| 21714 |
+
"learning_rate": 1.0916747651441529e-05,
|
| 21715 |
+
"loss": 0.8352,
|
| 21716 |
+
"step": 3094
|
| 21717 |
+
},
|
| 21718 |
+
{
|
| 21719 |
+
"epoch": 6.316326530612245,
|
| 21720 |
+
"grad_norm": 2.34375,
|
| 21721 |
+
"learning_rate": 1.08843537414966e-05,
|
| 21722 |
+
"loss": 0.7743,
|
| 21723 |
+
"step": 3095
|
| 21724 |
+
},
|
| 21725 |
+
{
|
| 21726 |
+
"epoch": 6.318367346938776,
|
| 21727 |
+
"grad_norm": 2.75,
|
| 21728 |
+
"learning_rate": 1.0851959831551668e-05,
|
| 21729 |
+
"loss": 0.857,
|
| 21730 |
+
"step": 3096
|
| 21731 |
+
},
|
| 21732 |
+
{
|
| 21733 |
+
"epoch": 6.320408163265306,
|
| 21734 |
+
"grad_norm": 2.625,
|
| 21735 |
+
"learning_rate": 1.0819565921606739e-05,
|
| 21736 |
+
"loss": 0.8793,
|
| 21737 |
+
"step": 3097
|
| 21738 |
+
},
|
| 21739 |
+
{
|
| 21740 |
+
"epoch": 6.322448979591837,
|
| 21741 |
+
"grad_norm": 2.40625,
|
| 21742 |
+
"learning_rate": 1.0787172011661808e-05,
|
| 21743 |
+
"loss": 0.7947,
|
| 21744 |
+
"step": 3098
|
| 21745 |
+
},
|
| 21746 |
+
{
|
| 21747 |
+
"epoch": 6.3244897959183675,
|
| 21748 |
+
"grad_norm": 2.3125,
|
| 21749 |
+
"learning_rate": 1.0754778101716879e-05,
|
| 21750 |
+
"loss": 0.7887,
|
| 21751 |
+
"step": 3099
|
| 21752 |
+
},
|
| 21753 |
+
{
|
| 21754 |
+
"epoch": 6.326530612244898,
|
| 21755 |
+
"grad_norm": 2.421875,
|
| 21756 |
+
"learning_rate": 1.0722384191771948e-05,
|
| 21757 |
+
"loss": 0.8215,
|
| 21758 |
+
"step": 3100
|
| 21759 |
+
},
|
| 21760 |
+
{
|
| 21761 |
+
"epoch": 6.328571428571428,
|
| 21762 |
+
"grad_norm": 2.546875,
|
| 21763 |
+
"learning_rate": 1.0689990281827017e-05,
|
| 21764 |
+
"loss": 0.7912,
|
| 21765 |
+
"step": 3101
|
| 21766 |
+
},
|
| 21767 |
+
{
|
| 21768 |
+
"epoch": 6.330612244897959,
|
| 21769 |
+
"grad_norm": 2.609375,
|
| 21770 |
+
"learning_rate": 1.0657596371882087e-05,
|
| 21771 |
+
"loss": 0.8136,
|
| 21772 |
+
"step": 3102
|
| 21773 |
+
},
|
| 21774 |
+
{
|
| 21775 |
+
"epoch": 6.33265306122449,
|
| 21776 |
+
"grad_norm": 2.375,
|
| 21777 |
+
"learning_rate": 1.0625202461937156e-05,
|
| 21778 |
+
"loss": 0.8157,
|
| 21779 |
+
"step": 3103
|
| 21780 |
+
},
|
| 21781 |
+
{
|
| 21782 |
+
"epoch": 6.33469387755102,
|
| 21783 |
+
"grad_norm": 2.328125,
|
| 21784 |
+
"learning_rate": 1.0592808551992225e-05,
|
| 21785 |
+
"loss": 0.7969,
|
| 21786 |
+
"step": 3104
|
| 21787 |
+
},
|
| 21788 |
+
{
|
| 21789 |
+
"epoch": 6.336734693877551,
|
| 21790 |
+
"grad_norm": 2.265625,
|
| 21791 |
+
"learning_rate": 1.0560414642047296e-05,
|
| 21792 |
+
"loss": 0.7621,
|
| 21793 |
+
"step": 3105
|
| 21794 |
+
},
|
| 21795 |
+
{
|
| 21796 |
+
"epoch": 6.338775510204082,
|
| 21797 |
+
"grad_norm": 2.390625,
|
| 21798 |
+
"learning_rate": 1.0528020732102367e-05,
|
| 21799 |
+
"loss": 0.8642,
|
| 21800 |
+
"step": 3106
|
| 21801 |
+
},
|
| 21802 |
+
{
|
| 21803 |
+
"epoch": 6.340816326530613,
|
| 21804 |
+
"grad_norm": 2.53125,
|
| 21805 |
+
"learning_rate": 1.0495626822157436e-05,
|
| 21806 |
+
"loss": 0.8744,
|
| 21807 |
+
"step": 3107
|
| 21808 |
+
},
|
| 21809 |
+
{
|
| 21810 |
+
"epoch": 6.3428571428571425,
|
| 21811 |
+
"grad_norm": 2.703125,
|
| 21812 |
+
"learning_rate": 1.0463232912212504e-05,
|
| 21813 |
+
"loss": 0.8444,
|
| 21814 |
+
"step": 3108
|
| 21815 |
+
},
|
| 21816 |
+
{
|
| 21817 |
+
"epoch": 6.344897959183673,
|
| 21818 |
+
"grad_norm": 2.28125,
|
| 21819 |
+
"learning_rate": 1.0430839002267573e-05,
|
| 21820 |
+
"loss": 0.791,
|
| 21821 |
+
"step": 3109
|
| 21822 |
+
},
|
| 21823 |
+
{
|
| 21824 |
+
"epoch": 6.346938775510204,
|
| 21825 |
+
"grad_norm": 2.40625,
|
| 21826 |
+
"learning_rate": 1.0398445092322644e-05,
|
| 21827 |
+
"loss": 0.8152,
|
| 21828 |
+
"step": 3110
|
| 21829 |
+
},
|
| 21830 |
+
{
|
| 21831 |
+
"epoch": 6.348979591836735,
|
| 21832 |
+
"grad_norm": 2.40625,
|
| 21833 |
+
"learning_rate": 1.0366051182377713e-05,
|
| 21834 |
+
"loss": 0.8108,
|
| 21835 |
+
"step": 3111
|
| 21836 |
+
},
|
| 21837 |
+
{
|
| 21838 |
+
"epoch": 6.351020408163265,
|
| 21839 |
+
"grad_norm": 2.65625,
|
| 21840 |
+
"learning_rate": 1.0333657272432782e-05,
|
| 21841 |
+
"loss": 0.8797,
|
| 21842 |
+
"step": 3112
|
| 21843 |
+
},
|
| 21844 |
+
{
|
| 21845 |
+
"epoch": 6.353061224489796,
|
| 21846 |
+
"grad_norm": 2.609375,
|
| 21847 |
+
"learning_rate": 1.0301263362487853e-05,
|
| 21848 |
+
"loss": 0.803,
|
| 21849 |
+
"step": 3113
|
| 21850 |
+
},
|
| 21851 |
+
{
|
| 21852 |
+
"epoch": 6.355102040816327,
|
| 21853 |
+
"grad_norm": 2.5,
|
| 21854 |
+
"learning_rate": 1.0268869452542923e-05,
|
| 21855 |
+
"loss": 0.8266,
|
| 21856 |
+
"step": 3114
|
| 21857 |
+
},
|
| 21858 |
+
{
|
| 21859 |
+
"epoch": 6.357142857142857,
|
| 21860 |
+
"grad_norm": 2.921875,
|
| 21861 |
+
"learning_rate": 1.0236475542597992e-05,
|
| 21862 |
+
"loss": 0.8791,
|
| 21863 |
+
"step": 3115
|
| 21864 |
+
},
|
| 21865 |
+
{
|
| 21866 |
+
"epoch": 6.359183673469388,
|
| 21867 |
+
"grad_norm": 2.5625,
|
| 21868 |
+
"learning_rate": 1.0204081632653061e-05,
|
| 21869 |
+
"loss": 0.7947,
|
| 21870 |
+
"step": 3116
|
| 21871 |
+
},
|
| 21872 |
+
{
|
| 21873 |
+
"epoch": 6.3612244897959185,
|
| 21874 |
+
"grad_norm": 2.53125,
|
| 21875 |
+
"learning_rate": 1.0171687722708132e-05,
|
| 21876 |
+
"loss": 0.8718,
|
| 21877 |
+
"step": 3117
|
| 21878 |
+
},
|
| 21879 |
+
{
|
| 21880 |
+
"epoch": 6.363265306122449,
|
| 21881 |
+
"grad_norm": 2.53125,
|
| 21882 |
+
"learning_rate": 1.0139293812763201e-05,
|
| 21883 |
+
"loss": 0.7808,
|
| 21884 |
+
"step": 3118
|
| 21885 |
+
},
|
| 21886 |
+
{
|
| 21887 |
+
"epoch": 6.365306122448979,
|
| 21888 |
+
"grad_norm": 2.296875,
|
| 21889 |
+
"learning_rate": 1.010689990281827e-05,
|
| 21890 |
+
"loss": 0.8579,
|
| 21891 |
+
"step": 3119
|
| 21892 |
+
},
|
| 21893 |
+
{
|
| 21894 |
+
"epoch": 6.36734693877551,
|
| 21895 |
+
"grad_norm": 2.25,
|
| 21896 |
+
"learning_rate": 1.007450599287334e-05,
|
| 21897 |
+
"loss": 0.7282,
|
| 21898 |
+
"step": 3120
|
| 21899 |
+
},
|
| 21900 |
+
{
|
| 21901 |
+
"epoch": 6.369387755102041,
|
| 21902 |
+
"grad_norm": 2.765625,
|
| 21903 |
+
"learning_rate": 1.004211208292841e-05,
|
| 21904 |
+
"loss": 0.8556,
|
| 21905 |
+
"step": 3121
|
| 21906 |
+
},
|
| 21907 |
+
{
|
| 21908 |
+
"epoch": 6.371428571428572,
|
| 21909 |
+
"grad_norm": 2.65625,
|
| 21910 |
+
"learning_rate": 1.000971817298348e-05,
|
| 21911 |
+
"loss": 0.8057,
|
| 21912 |
+
"step": 3122
|
| 21913 |
+
},
|
| 21914 |
+
{
|
| 21915 |
+
"epoch": 6.373469387755102,
|
| 21916 |
+
"grad_norm": 2.578125,
|
| 21917 |
+
"learning_rate": 9.977324263038549e-06,
|
| 21918 |
+
"loss": 0.798,
|
| 21919 |
+
"step": 3123
|
| 21920 |
+
},
|
| 21921 |
+
{
|
| 21922 |
+
"epoch": 6.375510204081633,
|
| 21923 |
+
"grad_norm": 2.53125,
|
| 21924 |
+
"learning_rate": 9.94493035309362e-06,
|
| 21925 |
+
"loss": 0.8284,
|
| 21926 |
+
"step": 3124
|
| 21927 |
+
},
|
| 21928 |
+
{
|
| 21929 |
+
"epoch": 6.377551020408164,
|
| 21930 |
+
"grad_norm": 2.484375,
|
| 21931 |
+
"learning_rate": 9.912536443148689e-06,
|
| 21932 |
+
"loss": 0.8091,
|
| 21933 |
+
"step": 3125
|
| 21934 |
+
},
|
| 21935 |
+
{
|
| 21936 |
+
"epoch": 6.3795918367346935,
|
| 21937 |
+
"grad_norm": 2.515625,
|
| 21938 |
+
"learning_rate": 9.880142533203758e-06,
|
| 21939 |
+
"loss": 0.834,
|
| 21940 |
+
"step": 3126
|
| 21941 |
+
},
|
| 21942 |
+
{
|
| 21943 |
+
"epoch": 6.381632653061224,
|
| 21944 |
+
"grad_norm": 2.296875,
|
| 21945 |
+
"learning_rate": 9.847748623258828e-06,
|
| 21946 |
+
"loss": 0.8147,
|
| 21947 |
+
"step": 3127
|
| 21948 |
+
},
|
| 21949 |
+
{
|
| 21950 |
+
"epoch": 6.383673469387755,
|
| 21951 |
+
"grad_norm": 2.234375,
|
| 21952 |
+
"learning_rate": 9.815354713313897e-06,
|
| 21953 |
+
"loss": 0.7288,
|
| 21954 |
+
"step": 3128
|
| 21955 |
+
},
|
| 21956 |
+
{
|
| 21957 |
+
"epoch": 6.385714285714286,
|
| 21958 |
+
"grad_norm": 2.421875,
|
| 21959 |
+
"learning_rate": 9.782960803368966e-06,
|
| 21960 |
+
"loss": 0.8257,
|
| 21961 |
+
"step": 3129
|
| 21962 |
+
},
|
| 21963 |
+
{
|
| 21964 |
+
"epoch": 6.387755102040816,
|
| 21965 |
+
"grad_norm": 2.640625,
|
| 21966 |
+
"learning_rate": 9.750566893424037e-06,
|
| 21967 |
+
"loss": 0.8032,
|
| 21968 |
+
"step": 3130
|
| 21969 |
+
},
|
| 21970 |
+
{
|
| 21971 |
+
"epoch": 6.389795918367347,
|
| 21972 |
+
"grad_norm": 2.296875,
|
| 21973 |
+
"learning_rate": 9.718172983479108e-06,
|
| 21974 |
+
"loss": 0.7995,
|
| 21975 |
+
"step": 3131
|
| 21976 |
+
},
|
| 21977 |
+
{
|
| 21978 |
+
"epoch": 6.391836734693878,
|
| 21979 |
+
"grad_norm": 2.4375,
|
| 21980 |
+
"learning_rate": 9.685779073534177e-06,
|
| 21981 |
+
"loss": 0.7743,
|
| 21982 |
+
"step": 3132
|
| 21983 |
+
},
|
| 21984 |
+
{
|
| 21985 |
+
"epoch": 6.393877551020408,
|
| 21986 |
+
"grad_norm": 2.59375,
|
| 21987 |
+
"learning_rate": 9.653385163589246e-06,
|
| 21988 |
+
"loss": 0.8109,
|
| 21989 |
+
"step": 3133
|
| 21990 |
+
},
|
| 21991 |
+
{
|
| 21992 |
+
"epoch": 6.395918367346939,
|
| 21993 |
+
"grad_norm": 2.4375,
|
| 21994 |
+
"learning_rate": 9.620991253644314e-06,
|
| 21995 |
+
"loss": 0.7975,
|
| 21996 |
+
"step": 3134
|
| 21997 |
+
},
|
| 21998 |
+
{
|
| 21999 |
+
"epoch": 6.3979591836734695,
|
| 22000 |
+
"grad_norm": 2.46875,
|
| 22001 |
+
"learning_rate": 9.588597343699385e-06,
|
| 22002 |
+
"loss": 0.809,
|
| 22003 |
+
"step": 3135
|
| 22004 |
+
},
|
| 22005 |
+
{
|
| 22006 |
+
"epoch": 6.4,
|
| 22007 |
+
"grad_norm": 2.546875,
|
| 22008 |
+
"learning_rate": 9.556203433754454e-06,
|
| 22009 |
+
"loss": 0.833,
|
| 22010 |
+
"step": 3136
|
| 22011 |
+
},
|
| 22012 |
+
{
|
| 22013 |
+
"epoch": 6.40204081632653,
|
| 22014 |
+
"grad_norm": 2.421875,
|
| 22015 |
+
"learning_rate": 9.523809523809523e-06,
|
| 22016 |
+
"loss": 0.859,
|
| 22017 |
+
"step": 3137
|
| 22018 |
+
},
|
| 22019 |
+
{
|
| 22020 |
+
"epoch": 6.404081632653061,
|
| 22021 |
+
"grad_norm": 2.40625,
|
| 22022 |
+
"learning_rate": 9.491415613864595e-06,
|
| 22023 |
+
"loss": 0.7756,
|
| 22024 |
+
"step": 3138
|
| 22025 |
+
},
|
| 22026 |
+
{
|
| 22027 |
+
"epoch": 6.406122448979592,
|
| 22028 |
+
"grad_norm": 2.515625,
|
| 22029 |
+
"learning_rate": 9.459021703919664e-06,
|
| 22030 |
+
"loss": 0.8029,
|
| 22031 |
+
"step": 3139
|
| 22032 |
+
},
|
| 22033 |
+
{
|
| 22034 |
+
"epoch": 6.408163265306122,
|
| 22035 |
+
"grad_norm": 2.46875,
|
| 22036 |
+
"learning_rate": 9.426627793974733e-06,
|
| 22037 |
+
"loss": 0.7622,
|
| 22038 |
+
"step": 3140
|
| 22039 |
+
},
|
| 22040 |
+
{
|
| 22041 |
+
"epoch": 6.410204081632653,
|
| 22042 |
+
"grad_norm": 2.359375,
|
| 22043 |
+
"learning_rate": 9.394233884029802e-06,
|
| 22044 |
+
"loss": 0.8132,
|
| 22045 |
+
"step": 3141
|
| 22046 |
+
},
|
| 22047 |
+
{
|
| 22048 |
+
"epoch": 6.412244897959184,
|
| 22049 |
+
"grad_norm": 2.46875,
|
| 22050 |
+
"learning_rate": 9.361839974084873e-06,
|
| 22051 |
+
"loss": 0.8315,
|
| 22052 |
+
"step": 3142
|
| 22053 |
+
},
|
| 22054 |
+
{
|
| 22055 |
+
"epoch": 6.414285714285715,
|
| 22056 |
+
"grad_norm": 2.484375,
|
| 22057 |
+
"learning_rate": 9.329446064139942e-06,
|
| 22058 |
+
"loss": 0.8266,
|
| 22059 |
+
"step": 3143
|
| 22060 |
+
},
|
| 22061 |
+
{
|
| 22062 |
+
"epoch": 6.416326530612245,
|
| 22063 |
+
"grad_norm": 2.40625,
|
| 22064 |
+
"learning_rate": 9.297052154195011e-06,
|
| 22065 |
+
"loss": 0.7522,
|
| 22066 |
+
"step": 3144
|
| 22067 |
+
},
|
| 22068 |
+
{
|
| 22069 |
+
"epoch": 6.418367346938775,
|
| 22070 |
+
"grad_norm": 2.671875,
|
| 22071 |
+
"learning_rate": 9.264658244250082e-06,
|
| 22072 |
+
"loss": 0.7832,
|
| 22073 |
+
"step": 3145
|
| 22074 |
+
},
|
| 22075 |
+
{
|
| 22076 |
+
"epoch": 6.420408163265306,
|
| 22077 |
+
"grad_norm": 2.390625,
|
| 22078 |
+
"learning_rate": 9.23226433430515e-06,
|
| 22079 |
+
"loss": 0.7991,
|
| 22080 |
+
"step": 3146
|
| 22081 |
+
},
|
| 22082 |
+
{
|
| 22083 |
+
"epoch": 6.422448979591837,
|
| 22084 |
+
"grad_norm": 2.359375,
|
| 22085 |
+
"learning_rate": 9.199870424360221e-06,
|
| 22086 |
+
"loss": 0.8581,
|
| 22087 |
+
"step": 3147
|
| 22088 |
+
},
|
| 22089 |
+
{
|
| 22090 |
+
"epoch": 6.424489795918367,
|
| 22091 |
+
"grad_norm": 2.453125,
|
| 22092 |
+
"learning_rate": 9.16747651441529e-06,
|
| 22093 |
+
"loss": 0.817,
|
| 22094 |
+
"step": 3148
|
| 22095 |
+
},
|
| 22096 |
+
{
|
| 22097 |
+
"epoch": 6.426530612244898,
|
| 22098 |
+
"grad_norm": 2.328125,
|
| 22099 |
+
"learning_rate": 9.13508260447036e-06,
|
| 22100 |
+
"loss": 0.7831,
|
| 22101 |
+
"step": 3149
|
| 22102 |
+
},
|
| 22103 |
+
{
|
| 22104 |
+
"epoch": 6.428571428571429,
|
| 22105 |
+
"grad_norm": 2.1875,
|
| 22106 |
+
"learning_rate": 9.10268869452543e-06,
|
| 22107 |
+
"loss": 0.7685,
|
| 22108 |
+
"step": 3150
|
| 22109 |
+
},
|
| 22110 |
+
{
|
| 22111 |
+
"epoch": 6.430612244897959,
|
| 22112 |
+
"grad_norm": 2.421875,
|
| 22113 |
+
"learning_rate": 9.070294784580499e-06,
|
| 22114 |
+
"loss": 0.789,
|
| 22115 |
+
"step": 3151
|
| 22116 |
+
},
|
| 22117 |
+
{
|
| 22118 |
+
"epoch": 6.43265306122449,
|
| 22119 |
+
"grad_norm": 2.53125,
|
| 22120 |
+
"learning_rate": 9.03790087463557e-06,
|
| 22121 |
+
"loss": 0.8112,
|
| 22122 |
+
"step": 3152
|
| 22123 |
+
},
|
| 22124 |
+
{
|
| 22125 |
+
"epoch": 6.4346938775510205,
|
| 22126 |
+
"grad_norm": 2.59375,
|
| 22127 |
+
"learning_rate": 9.005506964690638e-06,
|
| 22128 |
+
"loss": 0.8298,
|
| 22129 |
+
"step": 3153
|
| 22130 |
+
},
|
| 22131 |
+
{
|
| 22132 |
+
"epoch": 6.436734693877551,
|
| 22133 |
+
"grad_norm": 2.390625,
|
| 22134 |
+
"learning_rate": 8.973113054745707e-06,
|
| 22135 |
+
"loss": 0.8014,
|
| 22136 |
+
"step": 3154
|
| 22137 |
+
},
|
| 22138 |
+
{
|
| 22139 |
+
"epoch": 6.438775510204081,
|
| 22140 |
+
"grad_norm": 2.609375,
|
| 22141 |
+
"learning_rate": 8.940719144800778e-06,
|
| 22142 |
+
"loss": 0.8091,
|
| 22143 |
+
"step": 3155
|
| 22144 |
+
},
|
| 22145 |
+
{
|
| 22146 |
+
"epoch": 6.440816326530612,
|
| 22147 |
+
"grad_norm": 2.3125,
|
| 22148 |
+
"learning_rate": 8.908325234855849e-06,
|
| 22149 |
+
"loss": 0.7423,
|
| 22150 |
+
"step": 3156
|
| 22151 |
+
},
|
| 22152 |
+
{
|
| 22153 |
+
"epoch": 6.442857142857143,
|
| 22154 |
+
"grad_norm": 2.28125,
|
| 22155 |
+
"learning_rate": 8.875931324910918e-06,
|
| 22156 |
+
"loss": 0.8788,
|
| 22157 |
+
"step": 3157
|
| 22158 |
+
},
|
| 22159 |
+
{
|
| 22160 |
+
"epoch": 6.444897959183674,
|
| 22161 |
+
"grad_norm": 2.4375,
|
| 22162 |
+
"learning_rate": 8.843537414965987e-06,
|
| 22163 |
+
"loss": 0.7966,
|
| 22164 |
+
"step": 3158
|
| 22165 |
+
},
|
| 22166 |
+
{
|
| 22167 |
+
"epoch": 6.446938775510204,
|
| 22168 |
+
"grad_norm": 2.3125,
|
| 22169 |
+
"learning_rate": 8.811143505021057e-06,
|
| 22170 |
+
"loss": 0.8059,
|
| 22171 |
+
"step": 3159
|
| 22172 |
+
},
|
| 22173 |
+
{
|
| 22174 |
+
"epoch": 6.448979591836735,
|
| 22175 |
+
"grad_norm": 2.5,
|
| 22176 |
+
"learning_rate": 8.778749595076126e-06,
|
| 22177 |
+
"loss": 0.8892,
|
| 22178 |
+
"step": 3160
|
| 22179 |
+
},
|
| 22180 |
+
{
|
| 22181 |
+
"epoch": 6.451020408163266,
|
| 22182 |
+
"grad_norm": 2.40625,
|
| 22183 |
+
"learning_rate": 8.746355685131195e-06,
|
| 22184 |
+
"loss": 0.7999,
|
| 22185 |
+
"step": 3161
|
| 22186 |
+
},
|
| 22187 |
+
{
|
| 22188 |
+
"epoch": 6.453061224489796,
|
| 22189 |
+
"grad_norm": 2.390625,
|
| 22190 |
+
"learning_rate": 8.713961775186264e-06,
|
| 22191 |
+
"loss": 0.853,
|
| 22192 |
+
"step": 3162
|
| 22193 |
+
},
|
| 22194 |
+
{
|
| 22195 |
+
"epoch": 6.455102040816326,
|
| 22196 |
+
"grad_norm": 2.59375,
|
| 22197 |
+
"learning_rate": 8.681567865241336e-06,
|
| 22198 |
+
"loss": 0.8077,
|
| 22199 |
+
"step": 3163
|
| 22200 |
+
},
|
| 22201 |
+
{
|
| 22202 |
+
"epoch": 6.457142857142857,
|
| 22203 |
+
"grad_norm": 2.25,
|
| 22204 |
+
"learning_rate": 8.649173955296405e-06,
|
| 22205 |
+
"loss": 0.8016,
|
| 22206 |
+
"step": 3164
|
| 22207 |
+
},
|
| 22208 |
+
{
|
| 22209 |
+
"epoch": 6.459183673469388,
|
| 22210 |
+
"grad_norm": 2.3125,
|
| 22211 |
+
"learning_rate": 8.616780045351474e-06,
|
| 22212 |
+
"loss": 0.7455,
|
| 22213 |
+
"step": 3165
|
| 22214 |
+
},
|
| 22215 |
+
{
|
| 22216 |
+
"epoch": 6.461224489795918,
|
| 22217 |
+
"grad_norm": 2.421875,
|
| 22218 |
+
"learning_rate": 8.584386135406543e-06,
|
| 22219 |
+
"loss": 0.8458,
|
| 22220 |
+
"step": 3166
|
| 22221 |
+
},
|
| 22222 |
+
{
|
| 22223 |
+
"epoch": 6.463265306122449,
|
| 22224 |
+
"grad_norm": 2.5,
|
| 22225 |
+
"learning_rate": 8.551992225461614e-06,
|
| 22226 |
+
"loss": 0.8079,
|
| 22227 |
+
"step": 3167
|
| 22228 |
+
},
|
| 22229 |
+
{
|
| 22230 |
+
"epoch": 6.46530612244898,
|
| 22231 |
+
"grad_norm": 2.5625,
|
| 22232 |
+
"learning_rate": 8.519598315516683e-06,
|
| 22233 |
+
"loss": 0.7935,
|
| 22234 |
+
"step": 3168
|
| 22235 |
+
},
|
| 22236 |
+
{
|
| 22237 |
+
"epoch": 6.46734693877551,
|
| 22238 |
+
"grad_norm": 2.3125,
|
| 22239 |
+
"learning_rate": 8.487204405571752e-06,
|
| 22240 |
+
"loss": 0.8373,
|
| 22241 |
+
"step": 3169
|
| 22242 |
+
},
|
| 22243 |
+
{
|
| 22244 |
+
"epoch": 6.469387755102041,
|
| 22245 |
+
"grad_norm": 2.59375,
|
| 22246 |
+
"learning_rate": 8.454810495626823e-06,
|
| 22247 |
+
"loss": 0.8089,
|
| 22248 |
+
"step": 3170
|
| 22249 |
+
},
|
| 22250 |
+
{
|
| 22251 |
+
"epoch": 6.4714285714285715,
|
| 22252 |
+
"grad_norm": 2.484375,
|
| 22253 |
+
"learning_rate": 8.422416585681893e-06,
|
| 22254 |
+
"loss": 0.8119,
|
| 22255 |
+
"step": 3171
|
| 22256 |
+
},
|
| 22257 |
+
{
|
| 22258 |
+
"epoch": 6.473469387755102,
|
| 22259 |
+
"grad_norm": 2.515625,
|
| 22260 |
+
"learning_rate": 8.390022675736962e-06,
|
| 22261 |
+
"loss": 0.9165,
|
| 22262 |
+
"step": 3172
|
| 22263 |
+
},
|
| 22264 |
+
{
|
| 22265 |
+
"epoch": 6.475510204081632,
|
| 22266 |
+
"grad_norm": 2.515625,
|
| 22267 |
+
"learning_rate": 8.357628765792031e-06,
|
| 22268 |
+
"loss": 0.7549,
|
| 22269 |
+
"step": 3173
|
| 22270 |
+
},
|
| 22271 |
+
{
|
| 22272 |
+
"epoch": 6.477551020408163,
|
| 22273 |
+
"grad_norm": 2.578125,
|
| 22274 |
+
"learning_rate": 8.325234855847102e-06,
|
| 22275 |
+
"loss": 0.8911,
|
| 22276 |
+
"step": 3174
|
| 22277 |
+
},
|
| 22278 |
+
{
|
| 22279 |
+
"epoch": 6.479591836734694,
|
| 22280 |
+
"grad_norm": 2.53125,
|
| 22281 |
+
"learning_rate": 8.29284094590217e-06,
|
| 22282 |
+
"loss": 0.8601,
|
| 22283 |
+
"step": 3175
|
| 22284 |
+
},
|
| 22285 |
+
{
|
| 22286 |
+
"epoch": 6.481632653061224,
|
| 22287 |
+
"grad_norm": 2.5,
|
| 22288 |
+
"learning_rate": 8.26044703595724e-06,
|
| 22289 |
+
"loss": 0.8139,
|
| 22290 |
+
"step": 3176
|
| 22291 |
+
},
|
| 22292 |
+
{
|
| 22293 |
+
"epoch": 6.483673469387755,
|
| 22294 |
+
"grad_norm": 2.53125,
|
| 22295 |
+
"learning_rate": 8.22805312601231e-06,
|
| 22296 |
+
"loss": 0.8243,
|
| 22297 |
+
"step": 3177
|
| 22298 |
+
},
|
| 22299 |
+
{
|
| 22300 |
+
"epoch": 6.485714285714286,
|
| 22301 |
+
"grad_norm": 2.46875,
|
| 22302 |
+
"learning_rate": 8.19565921606738e-06,
|
| 22303 |
+
"loss": 0.7648,
|
| 22304 |
+
"step": 3178
|
| 22305 |
+
},
|
| 22306 |
+
{
|
| 22307 |
+
"epoch": 6.487755102040817,
|
| 22308 |
+
"grad_norm": 2.609375,
|
| 22309 |
+
"learning_rate": 8.163265306122448e-06,
|
| 22310 |
+
"loss": 0.8437,
|
| 22311 |
+
"step": 3179
|
| 22312 |
+
},
|
| 22313 |
+
{
|
| 22314 |
+
"epoch": 6.489795918367347,
|
| 22315 |
+
"grad_norm": 2.609375,
|
| 22316 |
+
"learning_rate": 8.130871396177519e-06,
|
| 22317 |
+
"loss": 0.8532,
|
| 22318 |
+
"step": 3180
|
| 22319 |
+
},
|
| 22320 |
+
{
|
| 22321 |
+
"epoch": 6.4918367346938775,
|
| 22322 |
+
"grad_norm": 2.25,
|
| 22323 |
+
"learning_rate": 8.09847748623259e-06,
|
| 22324 |
+
"loss": 0.7887,
|
| 22325 |
+
"step": 3181
|
| 22326 |
+
},
|
| 22327 |
+
{
|
| 22328 |
+
"epoch": 6.493877551020408,
|
| 22329 |
+
"grad_norm": 2.296875,
|
| 22330 |
+
"learning_rate": 8.066083576287659e-06,
|
| 22331 |
+
"loss": 0.7737,
|
| 22332 |
+
"step": 3182
|
| 22333 |
+
},
|
| 22334 |
+
{
|
| 22335 |
+
"epoch": 6.495918367346939,
|
| 22336 |
+
"grad_norm": 2.515625,
|
| 22337 |
+
"learning_rate": 8.033689666342728e-06,
|
| 22338 |
+
"loss": 0.7514,
|
| 22339 |
+
"step": 3183
|
| 22340 |
+
},
|
| 22341 |
+
{
|
| 22342 |
+
"epoch": 6.497959183673469,
|
| 22343 |
+
"grad_norm": 2.71875,
|
| 22344 |
+
"learning_rate": 8.001295756397798e-06,
|
| 22345 |
+
"loss": 0.8424,
|
| 22346 |
+
"step": 3184
|
| 22347 |
+
},
|
| 22348 |
+
{
|
| 22349 |
+
"epoch": 6.5,
|
| 22350 |
+
"grad_norm": 2.296875,
|
| 22351 |
+
"learning_rate": 7.968901846452867e-06,
|
| 22352 |
+
"loss": 0.7592,
|
| 22353 |
+
"step": 3185
|
| 22354 |
+
},
|
| 22355 |
+
{
|
| 22356 |
+
"epoch": 6.502040816326531,
|
| 22357 |
+
"grad_norm": 2.375,
|
| 22358 |
+
"learning_rate": 7.936507936507936e-06,
|
| 22359 |
+
"loss": 0.7765,
|
| 22360 |
+
"step": 3186
|
| 22361 |
+
},
|
| 22362 |
+
{
|
| 22363 |
+
"epoch": 6.504081632653062,
|
| 22364 |
+
"grad_norm": 2.484375,
|
| 22365 |
+
"learning_rate": 7.904114026563005e-06,
|
| 22366 |
+
"loss": 0.7909,
|
| 22367 |
+
"step": 3187
|
| 22368 |
+
},
|
| 22369 |
+
{
|
| 22370 |
+
"epoch": 6.506122448979592,
|
| 22371 |
+
"grad_norm": 2.546875,
|
| 22372 |
+
"learning_rate": 7.871720116618077e-06,
|
| 22373 |
+
"loss": 0.8351,
|
| 22374 |
+
"step": 3188
|
| 22375 |
+
},
|
| 22376 |
+
{
|
| 22377 |
+
"epoch": 6.5081632653061225,
|
| 22378 |
+
"grad_norm": 2.546875,
|
| 22379 |
+
"learning_rate": 7.839326206673146e-06,
|
| 22380 |
+
"loss": 0.8427,
|
| 22381 |
+
"step": 3189
|
| 22382 |
+
},
|
| 22383 |
+
{
|
| 22384 |
+
"epoch": 6.510204081632653,
|
| 22385 |
+
"grad_norm": 2.40625,
|
| 22386 |
+
"learning_rate": 7.806932296728215e-06,
|
| 22387 |
+
"loss": 0.8148,
|
| 22388 |
+
"step": 3190
|
| 22389 |
+
},
|
| 22390 |
+
{
|
| 22391 |
+
"epoch": 6.512244897959183,
|
| 22392 |
+
"grad_norm": 2.296875,
|
| 22393 |
+
"learning_rate": 7.774538386783286e-06,
|
| 22394 |
+
"loss": 0.847,
|
| 22395 |
+
"step": 3191
|
| 22396 |
+
},
|
| 22397 |
+
{
|
| 22398 |
+
"epoch": 6.514285714285714,
|
| 22399 |
+
"grad_norm": 2.484375,
|
| 22400 |
+
"learning_rate": 7.742144476838355e-06,
|
| 22401 |
+
"loss": 0.7621,
|
| 22402 |
+
"step": 3192
|
| 22403 |
+
},
|
| 22404 |
+
{
|
| 22405 |
+
"epoch": 6.516326530612245,
|
| 22406 |
+
"grad_norm": 2.421875,
|
| 22407 |
+
"learning_rate": 7.709750566893424e-06,
|
| 22408 |
+
"loss": 0.8186,
|
| 22409 |
+
"step": 3193
|
| 22410 |
+
},
|
| 22411 |
+
{
|
| 22412 |
+
"epoch": 6.518367346938776,
|
| 22413 |
+
"grad_norm": 2.3125,
|
| 22414 |
+
"learning_rate": 7.677356656948493e-06,
|
| 22415 |
+
"loss": 0.8107,
|
| 22416 |
+
"step": 3194
|
| 22417 |
+
},
|
| 22418 |
+
{
|
| 22419 |
+
"epoch": 6.520408163265306,
|
| 22420 |
+
"grad_norm": 2.546875,
|
| 22421 |
+
"learning_rate": 7.644962747003564e-06,
|
| 22422 |
+
"loss": 0.8685,
|
| 22423 |
+
"step": 3195
|
| 22424 |
+
},
|
| 22425 |
+
{
|
| 22426 |
+
"epoch": 6.522448979591837,
|
| 22427 |
+
"grad_norm": 2.4375,
|
| 22428 |
+
"learning_rate": 7.612568837058634e-06,
|
| 22429 |
+
"loss": 0.7544,
|
| 22430 |
+
"step": 3196
|
| 22431 |
+
},
|
| 22432 |
+
{
|
| 22433 |
+
"epoch": 6.524489795918368,
|
| 22434 |
+
"grad_norm": 2.484375,
|
| 22435 |
+
"learning_rate": 7.580174927113703e-06,
|
| 22436 |
+
"loss": 0.8385,
|
| 22437 |
+
"step": 3197
|
| 22438 |
+
},
|
| 22439 |
+
{
|
| 22440 |
+
"epoch": 6.526530612244898,
|
| 22441 |
+
"grad_norm": 2.390625,
|
| 22442 |
+
"learning_rate": 7.547781017168773e-06,
|
| 22443 |
+
"loss": 0.7223,
|
| 22444 |
+
"step": 3198
|
| 22445 |
+
},
|
| 22446 |
+
{
|
| 22447 |
+
"epoch": 6.5285714285714285,
|
| 22448 |
+
"grad_norm": 2.4375,
|
| 22449 |
+
"learning_rate": 7.515387107223842e-06,
|
| 22450 |
+
"loss": 0.7613,
|
| 22451 |
+
"step": 3199
|
| 22452 |
+
},
|
| 22453 |
+
{
|
| 22454 |
+
"epoch": 6.530612244897959,
|
| 22455 |
+
"grad_norm": 2.5,
|
| 22456 |
+
"learning_rate": 7.482993197278912e-06,
|
| 22457 |
+
"loss": 0.7926,
|
| 22458 |
+
"step": 3200
|
| 22459 |
+
},
|
| 22460 |
+
{
|
| 22461 |
+
"epoch": 6.53265306122449,
|
| 22462 |
+
"grad_norm": 2.453125,
|
| 22463 |
+
"learning_rate": 7.450599287333982e-06,
|
| 22464 |
+
"loss": 0.7643,
|
| 22465 |
+
"step": 3201
|
| 22466 |
+
},
|
| 22467 |
+
{
|
| 22468 |
+
"epoch": 6.53469387755102,
|
| 22469 |
+
"grad_norm": 2.640625,
|
| 22470 |
+
"learning_rate": 7.418205377389051e-06,
|
| 22471 |
+
"loss": 0.8568,
|
| 22472 |
+
"step": 3202
|
| 22473 |
+
},
|
| 22474 |
+
{
|
| 22475 |
+
"epoch": 6.536734693877551,
|
| 22476 |
+
"grad_norm": 2.34375,
|
| 22477 |
+
"learning_rate": 7.38581146744412e-06,
|
| 22478 |
+
"loss": 0.7992,
|
| 22479 |
+
"step": 3203
|
| 22480 |
+
},
|
| 22481 |
+
{
|
| 22482 |
+
"epoch": 6.538775510204082,
|
| 22483 |
+
"grad_norm": 2.359375,
|
| 22484 |
+
"learning_rate": 7.35341755749919e-06,
|
| 22485 |
+
"loss": 0.7878,
|
| 22486 |
+
"step": 3204
|
| 22487 |
+
},
|
| 22488 |
+
{
|
| 22489 |
+
"epoch": 6.540816326530612,
|
| 22490 |
+
"grad_norm": 2.375,
|
| 22491 |
+
"learning_rate": 7.321023647554261e-06,
|
| 22492 |
+
"loss": 0.7886,
|
| 22493 |
+
"step": 3205
|
| 22494 |
+
},
|
| 22495 |
+
{
|
| 22496 |
+
"epoch": 6.542857142857143,
|
| 22497 |
+
"grad_norm": 2.53125,
|
| 22498 |
+
"learning_rate": 7.28862973760933e-06,
|
| 22499 |
+
"loss": 0.8078,
|
| 22500 |
+
"step": 3206
|
| 22501 |
+
},
|
| 22502 |
+
{
|
| 22503 |
+
"epoch": 6.544897959183674,
|
| 22504 |
+
"grad_norm": 2.65625,
|
| 22505 |
+
"learning_rate": 7.2562358276644e-06,
|
| 22506 |
+
"loss": 0.8891,
|
| 22507 |
+
"step": 3207
|
| 22508 |
+
},
|
| 22509 |
+
{
|
| 22510 |
+
"epoch": 6.546938775510204,
|
| 22511 |
+
"grad_norm": 2.640625,
|
| 22512 |
+
"learning_rate": 7.2238419177194695e-06,
|
| 22513 |
+
"loss": 0.8695,
|
| 22514 |
+
"step": 3208
|
| 22515 |
+
},
|
| 22516 |
+
{
|
| 22517 |
+
"epoch": 6.548979591836734,
|
| 22518 |
+
"grad_norm": 2.515625,
|
| 22519 |
+
"learning_rate": 7.191448007774538e-06,
|
| 22520 |
+
"loss": 0.8001,
|
| 22521 |
+
"step": 3209
|
| 22522 |
+
},
|
| 22523 |
+
{
|
| 22524 |
+
"epoch": 6.551020408163265,
|
| 22525 |
+
"grad_norm": 2.484375,
|
| 22526 |
+
"learning_rate": 7.159054097829608e-06,
|
| 22527 |
+
"loss": 0.7508,
|
| 22528 |
+
"step": 3210
|
| 22529 |
+
},
|
| 22530 |
+
{
|
| 22531 |
+
"epoch": 6.553061224489796,
|
| 22532 |
+
"grad_norm": 2.421875,
|
| 22533 |
+
"learning_rate": 7.126660187884677e-06,
|
| 22534 |
+
"loss": 0.8383,
|
| 22535 |
+
"step": 3211
|
| 22536 |
+
},
|
| 22537 |
+
{
|
| 22538 |
+
"epoch": 6.555102040816326,
|
| 22539 |
+
"grad_norm": 2.296875,
|
| 22540 |
+
"learning_rate": 7.094266277939747e-06,
|
| 22541 |
+
"loss": 0.838,
|
| 22542 |
+
"step": 3212
|
| 22543 |
+
},
|
| 22544 |
+
{
|
| 22545 |
+
"epoch": 6.557142857142857,
|
| 22546 |
+
"grad_norm": 2.4375,
|
| 22547 |
+
"learning_rate": 7.061872367994818e-06,
|
| 22548 |
+
"loss": 0.802,
|
| 22549 |
+
"step": 3213
|
| 22550 |
+
},
|
| 22551 |
+
{
|
| 22552 |
+
"epoch": 6.559183673469388,
|
| 22553 |
+
"grad_norm": 2.375,
|
| 22554 |
+
"learning_rate": 7.0294784580498875e-06,
|
| 22555 |
+
"loss": 0.7898,
|
| 22556 |
+
"step": 3214
|
| 22557 |
+
},
|
| 22558 |
+
{
|
| 22559 |
+
"epoch": 6.561224489795919,
|
| 22560 |
+
"grad_norm": 2.484375,
|
| 22561 |
+
"learning_rate": 6.9970845481049564e-06,
|
| 22562 |
+
"loss": 0.7663,
|
| 22563 |
+
"step": 3215
|
| 22564 |
+
},
|
| 22565 |
+
{
|
| 22566 |
+
"epoch": 6.563265306122449,
|
| 22567 |
+
"grad_norm": 2.359375,
|
| 22568 |
+
"learning_rate": 6.964690638160026e-06,
|
| 22569 |
+
"loss": 0.7638,
|
| 22570 |
+
"step": 3216
|
| 22571 |
+
},
|
| 22572 |
+
{
|
| 22573 |
+
"epoch": 6.5653061224489795,
|
| 22574 |
+
"grad_norm": 2.34375,
|
| 22575 |
+
"learning_rate": 6.932296728215096e-06,
|
| 22576 |
+
"loss": 0.7664,
|
| 22577 |
+
"step": 3217
|
| 22578 |
+
},
|
| 22579 |
+
{
|
| 22580 |
+
"epoch": 6.56734693877551,
|
| 22581 |
+
"grad_norm": 2.25,
|
| 22582 |
+
"learning_rate": 6.899902818270165e-06,
|
| 22583 |
+
"loss": 0.7741,
|
| 22584 |
+
"step": 3218
|
| 22585 |
+
},
|
| 22586 |
+
{
|
| 22587 |
+
"epoch": 6.569387755102041,
|
| 22588 |
+
"grad_norm": 2.453125,
|
| 22589 |
+
"learning_rate": 6.867508908325235e-06,
|
| 22590 |
+
"loss": 0.8285,
|
| 22591 |
+
"step": 3219
|
| 22592 |
+
},
|
| 22593 |
+
{
|
| 22594 |
+
"epoch": 6.571428571428571,
|
| 22595 |
+
"grad_norm": 2.515625,
|
| 22596 |
+
"learning_rate": 6.835114998380305e-06,
|
| 22597 |
+
"loss": 0.7725,
|
| 22598 |
+
"step": 3220
|
| 22599 |
+
},
|
| 22600 |
+
{
|
| 22601 |
+
"epoch": 6.573469387755102,
|
| 22602 |
+
"grad_norm": 2.546875,
|
| 22603 |
+
"learning_rate": 6.802721088435375e-06,
|
| 22604 |
+
"loss": 0.7967,
|
| 22605 |
+
"step": 3221
|
| 22606 |
+
},
|
| 22607 |
+
{
|
| 22608 |
+
"epoch": 6.575510204081633,
|
| 22609 |
+
"grad_norm": 2.40625,
|
| 22610 |
+
"learning_rate": 6.770327178490444e-06,
|
| 22611 |
+
"loss": 0.8043,
|
| 22612 |
+
"step": 3222
|
| 22613 |
+
},
|
| 22614 |
+
{
|
| 22615 |
+
"epoch": 6.577551020408164,
|
| 22616 |
+
"grad_norm": 2.53125,
|
| 22617 |
+
"learning_rate": 6.737933268545514e-06,
|
| 22618 |
+
"loss": 0.8327,
|
| 22619 |
+
"step": 3223
|
| 22620 |
+
},
|
| 22621 |
+
{
|
| 22622 |
+
"epoch": 6.579591836734694,
|
| 22623 |
+
"grad_norm": 2.125,
|
| 22624 |
+
"learning_rate": 6.705539358600584e-06,
|
| 22625 |
+
"loss": 0.7573,
|
| 22626 |
+
"step": 3224
|
| 22627 |
+
},
|
| 22628 |
+
{
|
| 22629 |
+
"epoch": 6.581632653061225,
|
| 22630 |
+
"grad_norm": 2.203125,
|
| 22631 |
+
"learning_rate": 6.673145448655653e-06,
|
| 22632 |
+
"loss": 0.7698,
|
| 22633 |
+
"step": 3225
|
| 22634 |
+
},
|
| 22635 |
+
{
|
| 22636 |
+
"epoch": 6.583673469387755,
|
| 22637 |
+
"grad_norm": 2.46875,
|
| 22638 |
+
"learning_rate": 6.640751538710723e-06,
|
| 22639 |
+
"loss": 0.87,
|
| 22640 |
+
"step": 3226
|
| 22641 |
+
},
|
| 22642 |
+
{
|
| 22643 |
+
"epoch": 6.585714285714285,
|
| 22644 |
+
"grad_norm": 2.40625,
|
| 22645 |
+
"learning_rate": 6.608357628765792e-06,
|
| 22646 |
+
"loss": 0.8061,
|
| 22647 |
+
"step": 3227
|
| 22648 |
+
},
|
| 22649 |
+
{
|
| 22650 |
+
"epoch": 6.587755102040816,
|
| 22651 |
+
"grad_norm": 2.796875,
|
| 22652 |
+
"learning_rate": 6.5759637188208614e-06,
|
| 22653 |
+
"loss": 0.9108,
|
| 22654 |
+
"step": 3228
|
| 22655 |
+
},
|
| 22656 |
+
{
|
| 22657 |
+
"epoch": 6.589795918367347,
|
| 22658 |
+
"grad_norm": 2.609375,
|
| 22659 |
+
"learning_rate": 6.543569808875932e-06,
|
| 22660 |
+
"loss": 0.8008,
|
| 22661 |
+
"step": 3229
|
| 22662 |
+
},
|
| 22663 |
+
{
|
| 22664 |
+
"epoch": 6.591836734693878,
|
| 22665 |
+
"grad_norm": 2.390625,
|
| 22666 |
+
"learning_rate": 6.511175898931002e-06,
|
| 22667 |
+
"loss": 0.8727,
|
| 22668 |
+
"step": 3230
|
| 22669 |
+
},
|
| 22670 |
+
{
|
| 22671 |
+
"epoch": 6.593877551020408,
|
| 22672 |
+
"grad_norm": 2.328125,
|
| 22673 |
+
"learning_rate": 6.478781988986071e-06,
|
| 22674 |
+
"loss": 0.8308,
|
| 22675 |
+
"step": 3231
|
| 22676 |
+
},
|
| 22677 |
+
{
|
| 22678 |
+
"epoch": 6.595918367346939,
|
| 22679 |
+
"grad_norm": 2.5,
|
| 22680 |
+
"learning_rate": 6.446388079041141e-06,
|
| 22681 |
+
"loss": 0.8273,
|
| 22682 |
+
"step": 3232
|
| 22683 |
+
},
|
| 22684 |
+
{
|
| 22685 |
+
"epoch": 6.59795918367347,
|
| 22686 |
+
"grad_norm": 2.625,
|
| 22687 |
+
"learning_rate": 6.4139941690962105e-06,
|
| 22688 |
+
"loss": 0.8319,
|
| 22689 |
+
"step": 3233
|
| 22690 |
+
},
|
| 22691 |
+
{
|
| 22692 |
+
"epoch": 6.6,
|
| 22693 |
+
"grad_norm": 2.46875,
|
| 22694 |
+
"learning_rate": 6.3816002591512795e-06,
|
| 22695 |
+
"loss": 0.8239,
|
| 22696 |
+
"step": 3234
|
| 22697 |
+
},
|
| 22698 |
+
{
|
| 22699 |
+
"epoch": 6.6020408163265305,
|
| 22700 |
+
"grad_norm": 2.546875,
|
| 22701 |
+
"learning_rate": 6.349206349206349e-06,
|
| 22702 |
+
"loss": 0.8057,
|
| 22703 |
+
"step": 3235
|
| 22704 |
+
},
|
| 22705 |
+
{
|
| 22706 |
+
"epoch": 6.604081632653061,
|
| 22707 |
+
"grad_norm": 2.375,
|
| 22708 |
+
"learning_rate": 6.316812439261418e-06,
|
| 22709 |
+
"loss": 0.794,
|
| 22710 |
+
"step": 3236
|
| 22711 |
+
},
|
| 22712 |
+
{
|
| 22713 |
+
"epoch": 6.606122448979592,
|
| 22714 |
+
"grad_norm": 2.734375,
|
| 22715 |
+
"learning_rate": 6.284418529316488e-06,
|
| 22716 |
+
"loss": 0.9053,
|
| 22717 |
+
"step": 3237
|
| 22718 |
+
},
|
| 22719 |
+
{
|
| 22720 |
+
"epoch": 6.608163265306122,
|
| 22721 |
+
"grad_norm": 2.421875,
|
| 22722 |
+
"learning_rate": 6.252024619371559e-06,
|
| 22723 |
+
"loss": 0.7992,
|
| 22724 |
+
"step": 3238
|
| 22725 |
+
},
|
| 22726 |
+
{
|
| 22727 |
+
"epoch": 6.610204081632653,
|
| 22728 |
+
"grad_norm": 2.46875,
|
| 22729 |
+
"learning_rate": 6.219630709426628e-06,
|
| 22730 |
+
"loss": 0.862,
|
| 22731 |
+
"step": 3239
|
| 22732 |
+
},
|
| 22733 |
+
{
|
| 22734 |
+
"epoch": 6.612244897959184,
|
| 22735 |
+
"grad_norm": 2.34375,
|
| 22736 |
+
"learning_rate": 6.187236799481698e-06,
|
| 22737 |
+
"loss": 0.816,
|
| 22738 |
+
"step": 3240
|
| 22739 |
+
},
|
| 22740 |
+
{
|
| 22741 |
+
"epoch": 6.614285714285714,
|
| 22742 |
+
"grad_norm": 2.234375,
|
| 22743 |
+
"learning_rate": 6.154842889536767e-06,
|
| 22744 |
+
"loss": 0.8024,
|
| 22745 |
+
"step": 3241
|
| 22746 |
+
},
|
| 22747 |
+
{
|
| 22748 |
+
"epoch": 6.616326530612245,
|
| 22749 |
+
"grad_norm": 2.65625,
|
| 22750 |
+
"learning_rate": 6.122448979591837e-06,
|
| 22751 |
+
"loss": 0.8232,
|
| 22752 |
+
"step": 3242
|
| 22753 |
+
},
|
| 22754 |
+
{
|
| 22755 |
+
"epoch": 6.618367346938776,
|
| 22756 |
+
"grad_norm": 2.484375,
|
| 22757 |
+
"learning_rate": 6.090055069646906e-06,
|
| 22758 |
+
"loss": 0.8025,
|
| 22759 |
+
"step": 3243
|
| 22760 |
+
},
|
| 22761 |
+
{
|
| 22762 |
+
"epoch": 6.6204081632653065,
|
| 22763 |
+
"grad_norm": 2.65625,
|
| 22764 |
+
"learning_rate": 6.057661159701977e-06,
|
| 22765 |
+
"loss": 0.8248,
|
| 22766 |
+
"step": 3244
|
| 22767 |
+
},
|
| 22768 |
+
{
|
| 22769 |
+
"epoch": 6.622448979591836,
|
| 22770 |
+
"grad_norm": 2.328125,
|
| 22771 |
+
"learning_rate": 6.025267249757046e-06,
|
| 22772 |
+
"loss": 0.8003,
|
| 22773 |
+
"step": 3245
|
| 22774 |
+
},
|
| 22775 |
+
{
|
| 22776 |
+
"epoch": 6.624489795918367,
|
| 22777 |
+
"grad_norm": 2.453125,
|
| 22778 |
+
"learning_rate": 5.9928733398121155e-06,
|
| 22779 |
+
"loss": 0.8715,
|
| 22780 |
+
"step": 3246
|
| 22781 |
+
},
|
| 22782 |
+
{
|
| 22783 |
+
"epoch": 6.626530612244898,
|
| 22784 |
+
"grad_norm": 2.4375,
|
| 22785 |
+
"learning_rate": 5.960479429867185e-06,
|
| 22786 |
+
"loss": 0.7995,
|
| 22787 |
+
"step": 3247
|
| 22788 |
+
},
|
| 22789 |
+
{
|
| 22790 |
+
"epoch": 6.628571428571428,
|
| 22791 |
+
"grad_norm": 2.53125,
|
| 22792 |
+
"learning_rate": 5.928085519922255e-06,
|
| 22793 |
+
"loss": 0.8677,
|
| 22794 |
+
"step": 3248
|
| 22795 |
+
},
|
| 22796 |
+
{
|
| 22797 |
+
"epoch": 6.630612244897959,
|
| 22798 |
+
"grad_norm": 2.28125,
|
| 22799 |
+
"learning_rate": 5.895691609977325e-06,
|
| 22800 |
+
"loss": 0.777,
|
| 22801 |
+
"step": 3249
|
| 22802 |
+
},
|
| 22803 |
+
{
|
| 22804 |
+
"epoch": 6.63265306122449,
|
| 22805 |
+
"grad_norm": 2.09375,
|
| 22806 |
+
"learning_rate": 5.863297700032394e-06,
|
| 22807 |
+
"loss": 0.8114,
|
| 22808 |
+
"step": 3250
|
| 22809 |
+
},
|
| 22810 |
+
{
|
| 22811 |
+
"epoch": 6.634693877551021,
|
| 22812 |
+
"grad_norm": 2.421875,
|
| 22813 |
+
"learning_rate": 5.830903790087464e-06,
|
| 22814 |
+
"loss": 0.8321,
|
| 22815 |
+
"step": 3251
|
| 22816 |
+
},
|
| 22817 |
+
{
|
| 22818 |
+
"epoch": 6.636734693877551,
|
| 22819 |
+
"grad_norm": 2.3125,
|
| 22820 |
+
"learning_rate": 5.7985098801425335e-06,
|
| 22821 |
+
"loss": 0.8757,
|
| 22822 |
+
"step": 3252
|
| 22823 |
+
},
|
| 22824 |
+
{
|
| 22825 |
+
"epoch": 6.6387755102040815,
|
| 22826 |
+
"grad_norm": 2.34375,
|
| 22827 |
+
"learning_rate": 5.766115970197603e-06,
|
| 22828 |
+
"loss": 0.7658,
|
| 22829 |
+
"step": 3253
|
| 22830 |
+
},
|
| 22831 |
+
{
|
| 22832 |
+
"epoch": 6.640816326530612,
|
| 22833 |
+
"grad_norm": 2.390625,
|
| 22834 |
+
"learning_rate": 5.733722060252672e-06,
|
| 22835 |
+
"loss": 0.7093,
|
| 22836 |
+
"step": 3254
|
| 22837 |
+
},
|
| 22838 |
+
{
|
| 22839 |
+
"epoch": 6.642857142857143,
|
| 22840 |
+
"grad_norm": 2.53125,
|
| 22841 |
+
"learning_rate": 5.701328150307742e-06,
|
| 22842 |
+
"loss": 0.8227,
|
| 22843 |
+
"step": 3255
|
| 22844 |
+
},
|
| 22845 |
+
{
|
| 22846 |
+
"epoch": 6.644897959183673,
|
| 22847 |
+
"grad_norm": 2.28125,
|
| 22848 |
+
"learning_rate": 5.668934240362813e-06,
|
| 22849 |
+
"loss": 0.7419,
|
| 22850 |
+
"step": 3256
|
| 22851 |
+
},
|
| 22852 |
+
{
|
| 22853 |
+
"epoch": 6.646938775510204,
|
| 22854 |
+
"grad_norm": 2.46875,
|
| 22855 |
+
"learning_rate": 5.636540330417882e-06,
|
| 22856 |
+
"loss": 0.8162,
|
| 22857 |
+
"step": 3257
|
| 22858 |
+
},
|
| 22859 |
+
{
|
| 22860 |
+
"epoch": 6.648979591836735,
|
| 22861 |
+
"grad_norm": 2.703125,
|
| 22862 |
+
"learning_rate": 5.6041464204729515e-06,
|
| 22863 |
+
"loss": 0.8695,
|
| 22864 |
+
"step": 3258
|
| 22865 |
+
},
|
| 22866 |
+
{
|
| 22867 |
+
"epoch": 6.651020408163266,
|
| 22868 |
+
"grad_norm": 2.640625,
|
| 22869 |
+
"learning_rate": 5.5717525105280205e-06,
|
| 22870 |
+
"loss": 0.9011,
|
| 22871 |
+
"step": 3259
|
| 22872 |
+
},
|
| 22873 |
+
{
|
| 22874 |
+
"epoch": 6.653061224489796,
|
| 22875 |
+
"grad_norm": 2.546875,
|
| 22876 |
+
"learning_rate": 5.539358600583091e-06,
|
| 22877 |
+
"loss": 0.7929,
|
| 22878 |
+
"step": 3260
|
| 22879 |
+
},
|
| 22880 |
+
{
|
| 22881 |
+
"epoch": 6.655102040816327,
|
| 22882 |
+
"grad_norm": 2.3125,
|
| 22883 |
+
"learning_rate": 5.50696469063816e-06,
|
| 22884 |
+
"loss": 0.7634,
|
| 22885 |
+
"step": 3261
|
| 22886 |
+
},
|
| 22887 |
+
{
|
| 22888 |
+
"epoch": 6.6571428571428575,
|
| 22889 |
+
"grad_norm": 2.25,
|
| 22890 |
+
"learning_rate": 5.47457078069323e-06,
|
| 22891 |
+
"loss": 0.7489,
|
| 22892 |
+
"step": 3262
|
| 22893 |
+
},
|
| 22894 |
+
{
|
| 22895 |
+
"epoch": 6.659183673469387,
|
| 22896 |
+
"grad_norm": 2.46875,
|
| 22897 |
+
"learning_rate": 5.4421768707483e-06,
|
| 22898 |
+
"loss": 0.8152,
|
| 22899 |
+
"step": 3263
|
| 22900 |
+
},
|
| 22901 |
+
{
|
| 22902 |
+
"epoch": 6.661224489795918,
|
| 22903 |
+
"grad_norm": 2.34375,
|
| 22904 |
+
"learning_rate": 5.4097829608033695e-06,
|
| 22905 |
+
"loss": 0.7356,
|
| 22906 |
+
"step": 3264
|
| 22907 |
+
},
|
| 22908 |
+
{
|
| 22909 |
+
"epoch": 6.663265306122449,
|
| 22910 |
+
"grad_norm": 2.515625,
|
| 22911 |
+
"learning_rate": 5.377389050858439e-06,
|
| 22912 |
+
"loss": 0.7876,
|
| 22913 |
+
"step": 3265
|
| 22914 |
+
},
|
| 22915 |
+
{
|
| 22916 |
+
"epoch": 6.66530612244898,
|
| 22917 |
+
"grad_norm": 2.578125,
|
| 22918 |
+
"learning_rate": 5.344995140913508e-06,
|
| 22919 |
+
"loss": 0.7798,
|
| 22920 |
+
"step": 3266
|
| 22921 |
+
},
|
| 22922 |
+
{
|
| 22923 |
+
"epoch": 6.66734693877551,
|
| 22924 |
+
"grad_norm": 2.421875,
|
| 22925 |
+
"learning_rate": 5.312601230968578e-06,
|
| 22926 |
+
"loss": 0.7442,
|
| 22927 |
+
"step": 3267
|
| 22928 |
+
},
|
| 22929 |
+
{
|
| 22930 |
+
"epoch": 6.669387755102041,
|
| 22931 |
+
"grad_norm": 2.390625,
|
| 22932 |
+
"learning_rate": 5.280207321023648e-06,
|
| 22933 |
+
"loss": 0.8275,
|
| 22934 |
+
"step": 3268
|
| 22935 |
+
},
|
| 22936 |
+
{
|
| 22937 |
+
"epoch": 6.671428571428572,
|
| 22938 |
+
"grad_norm": 2.734375,
|
| 22939 |
+
"learning_rate": 5.247813411078718e-06,
|
| 22940 |
+
"loss": 0.8946,
|
| 22941 |
+
"step": 3269
|
| 22942 |
+
},
|
| 22943 |
+
{
|
| 22944 |
+
"epoch": 6.673469387755102,
|
| 22945 |
+
"grad_norm": 2.3125,
|
| 22946 |
+
"learning_rate": 5.215419501133787e-06,
|
| 22947 |
+
"loss": 0.87,
|
| 22948 |
+
"step": 3270
|
| 22949 |
+
},
|
| 22950 |
+
{
|
| 22951 |
+
"epoch": 6.6755102040816325,
|
| 22952 |
+
"grad_norm": 2.40625,
|
| 22953 |
+
"learning_rate": 5.1830255911888565e-06,
|
| 22954 |
+
"loss": 0.8487,
|
| 22955 |
+
"step": 3271
|
| 22956 |
+
},
|
| 22957 |
+
{
|
| 22958 |
+
"epoch": 6.677551020408163,
|
| 22959 |
+
"grad_norm": 2.421875,
|
| 22960 |
+
"learning_rate": 5.150631681243926e-06,
|
| 22961 |
+
"loss": 0.8483,
|
| 22962 |
+
"step": 3272
|
| 22963 |
+
},
|
| 22964 |
+
{
|
| 22965 |
+
"epoch": 6.679591836734694,
|
| 22966 |
+
"grad_norm": 2.375,
|
| 22967 |
+
"learning_rate": 5.118237771298996e-06,
|
| 22968 |
+
"loss": 0.8666,
|
| 22969 |
+
"step": 3273
|
| 22970 |
+
},
|
| 22971 |
+
{
|
| 22972 |
+
"epoch": 6.681632653061224,
|
| 22973 |
+
"grad_norm": 2.5,
|
| 22974 |
+
"learning_rate": 5.085843861354066e-06,
|
| 22975 |
+
"loss": 0.8191,
|
| 22976 |
+
"step": 3274
|
| 22977 |
+
},
|
| 22978 |
+
{
|
| 22979 |
+
"epoch": 6.683673469387755,
|
| 22980 |
+
"grad_norm": 2.875,
|
| 22981 |
+
"learning_rate": 5.053449951409135e-06,
|
| 22982 |
+
"loss": 0.8965,
|
| 22983 |
+
"step": 3275
|
| 22984 |
+
},
|
| 22985 |
+
{
|
| 22986 |
+
"epoch": 6.685714285714286,
|
| 22987 |
+
"grad_norm": 2.359375,
|
| 22988 |
+
"learning_rate": 5.021056041464205e-06,
|
| 22989 |
+
"loss": 0.8196,
|
| 22990 |
+
"step": 3276
|
| 22991 |
+
},
|
| 22992 |
+
{
|
| 22993 |
+
"epoch": 6.687755102040816,
|
| 22994 |
+
"grad_norm": 2.375,
|
| 22995 |
+
"learning_rate": 4.9886621315192745e-06,
|
| 22996 |
+
"loss": 0.8078,
|
| 22997 |
+
"step": 3277
|
| 22998 |
+
},
|
| 22999 |
+
{
|
| 23000 |
+
"epoch": 6.689795918367347,
|
| 23001 |
+
"grad_norm": 2.4375,
|
| 23002 |
+
"learning_rate": 4.956268221574344e-06,
|
| 23003 |
+
"loss": 0.7917,
|
| 23004 |
+
"step": 3278
|
| 23005 |
+
},
|
| 23006 |
+
{
|
| 23007 |
+
"epoch": 6.691836734693878,
|
| 23008 |
+
"grad_norm": 2.453125,
|
| 23009 |
+
"learning_rate": 4.923874311629414e-06,
|
| 23010 |
+
"loss": 0.8491,
|
| 23011 |
+
"step": 3279
|
| 23012 |
+
},
|
| 23013 |
+
{
|
| 23014 |
+
"epoch": 6.6938775510204085,
|
| 23015 |
+
"grad_norm": 2.609375,
|
| 23016 |
+
"learning_rate": 4.891480401684483e-06,
|
| 23017 |
+
"loss": 0.7846,
|
| 23018 |
+
"step": 3280
|
| 23019 |
+
},
|
| 23020 |
+
{
|
| 23021 |
+
"epoch": 6.6959183673469385,
|
| 23022 |
+
"grad_norm": 2.40625,
|
| 23023 |
+
"learning_rate": 4.859086491739554e-06,
|
| 23024 |
+
"loss": 0.7881,
|
| 23025 |
+
"step": 3281
|
| 23026 |
+
},
|
| 23027 |
+
{
|
| 23028 |
+
"epoch": 6.697959183673469,
|
| 23029 |
+
"grad_norm": 2.75,
|
| 23030 |
+
"learning_rate": 4.826692581794623e-06,
|
| 23031 |
+
"loss": 0.9012,
|
| 23032 |
+
"step": 3282
|
| 23033 |
+
},
|
| 23034 |
+
{
|
| 23035 |
+
"epoch": 6.7,
|
| 23036 |
+
"grad_norm": 2.46875,
|
| 23037 |
+
"learning_rate": 4.7942986718496926e-06,
|
| 23038 |
+
"loss": 0.8185,
|
| 23039 |
+
"step": 3283
|
| 23040 |
+
},
|
| 23041 |
+
{
|
| 23042 |
+
"epoch": 6.70204081632653,
|
| 23043 |
+
"grad_norm": 2.34375,
|
| 23044 |
+
"learning_rate": 4.7619047619047615e-06,
|
| 23045 |
+
"loss": 0.7944,
|
| 23046 |
+
"step": 3284
|
| 23047 |
+
},
|
| 23048 |
+
{
|
| 23049 |
+
"epoch": 6.704081632653061,
|
| 23050 |
+
"grad_norm": 2.375,
|
| 23051 |
+
"learning_rate": 4.729510851959832e-06,
|
| 23052 |
+
"loss": 0.8205,
|
| 23053 |
+
"step": 3285
|
| 23054 |
+
},
|
| 23055 |
+
{
|
| 23056 |
+
"epoch": 6.706122448979592,
|
| 23057 |
+
"grad_norm": 2.671875,
|
| 23058 |
+
"learning_rate": 4.697116942014901e-06,
|
| 23059 |
+
"loss": 0.8723,
|
| 23060 |
+
"step": 3286
|
| 23061 |
+
},
|
| 23062 |
+
{
|
| 23063 |
+
"epoch": 6.708163265306123,
|
| 23064 |
+
"grad_norm": 2.5,
|
| 23065 |
+
"learning_rate": 4.664723032069971e-06,
|
| 23066 |
+
"loss": 0.8153,
|
| 23067 |
+
"step": 3287
|
| 23068 |
+
},
|
| 23069 |
+
{
|
| 23070 |
+
"epoch": 6.710204081632653,
|
| 23071 |
+
"grad_norm": 2.4375,
|
| 23072 |
+
"learning_rate": 4.632329122125041e-06,
|
| 23073 |
+
"loss": 0.8019,
|
| 23074 |
+
"step": 3288
|
| 23075 |
+
},
|
| 23076 |
+
{
|
| 23077 |
+
"epoch": 6.7122448979591836,
|
| 23078 |
+
"grad_norm": 2.390625,
|
| 23079 |
+
"learning_rate": 4.599935212180111e-06,
|
| 23080 |
+
"loss": 0.8282,
|
| 23081 |
+
"step": 3289
|
| 23082 |
+
},
|
| 23083 |
+
{
|
| 23084 |
+
"epoch": 6.714285714285714,
|
| 23085 |
+
"grad_norm": 2.546875,
|
| 23086 |
+
"learning_rate": 4.56754130223518e-06,
|
| 23087 |
+
"loss": 0.8162,
|
| 23088 |
+
"step": 3290
|
| 23089 |
+
},
|
| 23090 |
+
{
|
| 23091 |
+
"epoch": 6.716326530612245,
|
| 23092 |
+
"grad_norm": 2.53125,
|
| 23093 |
+
"learning_rate": 4.535147392290249e-06,
|
| 23094 |
+
"loss": 0.8267,
|
| 23095 |
+
"step": 3291
|
| 23096 |
+
},
|
| 23097 |
+
{
|
| 23098 |
+
"epoch": 6.718367346938775,
|
| 23099 |
+
"grad_norm": 2.5,
|
| 23100 |
+
"learning_rate": 4.502753482345319e-06,
|
| 23101 |
+
"loss": 0.8133,
|
| 23102 |
+
"step": 3292
|
| 23103 |
+
},
|
| 23104 |
+
{
|
| 23105 |
+
"epoch": 6.720408163265306,
|
| 23106 |
+
"grad_norm": 2.375,
|
| 23107 |
+
"learning_rate": 4.470359572400389e-06,
|
| 23108 |
+
"loss": 0.8336,
|
| 23109 |
+
"step": 3293
|
| 23110 |
+
},
|
| 23111 |
+
{
|
| 23112 |
+
"epoch": 6.722448979591837,
|
| 23113 |
+
"grad_norm": 2.609375,
|
| 23114 |
+
"learning_rate": 4.437965662455459e-06,
|
| 23115 |
+
"loss": 0.9122,
|
| 23116 |
+
"step": 3294
|
| 23117 |
+
},
|
| 23118 |
+
{
|
| 23119 |
+
"epoch": 6.724489795918368,
|
| 23120 |
+
"grad_norm": 2.46875,
|
| 23121 |
+
"learning_rate": 4.405571752510529e-06,
|
| 23122 |
+
"loss": 0.8662,
|
| 23123 |
+
"step": 3295
|
| 23124 |
+
},
|
| 23125 |
+
{
|
| 23126 |
+
"epoch": 6.726530612244898,
|
| 23127 |
+
"grad_norm": 2.375,
|
| 23128 |
+
"learning_rate": 4.3731778425655976e-06,
|
| 23129 |
+
"loss": 0.7482,
|
| 23130 |
+
"step": 3296
|
| 23131 |
+
},
|
| 23132 |
+
{
|
| 23133 |
+
"epoch": 6.728571428571429,
|
| 23134 |
+
"grad_norm": 2.328125,
|
| 23135 |
+
"learning_rate": 4.340783932620668e-06,
|
| 23136 |
+
"loss": 0.8384,
|
| 23137 |
+
"step": 3297
|
| 23138 |
+
},
|
| 23139 |
+
{
|
| 23140 |
+
"epoch": 6.7306122448979595,
|
| 23141 |
+
"grad_norm": 2.484375,
|
| 23142 |
+
"learning_rate": 4.308390022675737e-06,
|
| 23143 |
+
"loss": 0.7669,
|
| 23144 |
+
"step": 3298
|
| 23145 |
+
},
|
| 23146 |
+
{
|
| 23147 |
+
"epoch": 6.7326530612244895,
|
| 23148 |
+
"grad_norm": 2.640625,
|
| 23149 |
+
"learning_rate": 4.275996112730807e-06,
|
| 23150 |
+
"loss": 0.8645,
|
| 23151 |
+
"step": 3299
|
| 23152 |
+
},
|
| 23153 |
+
{
|
| 23154 |
+
"epoch": 6.73469387755102,
|
| 23155 |
+
"grad_norm": 2.328125,
|
| 23156 |
+
"learning_rate": 4.243602202785876e-06,
|
| 23157 |
+
"loss": 0.8026,
|
| 23158 |
+
"step": 3300
|
| 23159 |
+
},
|
| 23160 |
+
{
|
| 23161 |
+
"epoch": 6.736734693877551,
|
| 23162 |
+
"grad_norm": 2.4375,
|
| 23163 |
+
"learning_rate": 4.211208292840947e-06,
|
| 23164 |
+
"loss": 0.8471,
|
| 23165 |
+
"step": 3301
|
| 23166 |
+
},
|
| 23167 |
+
{
|
| 23168 |
+
"epoch": 6.738775510204082,
|
| 23169 |
+
"grad_norm": 2.578125,
|
| 23170 |
+
"learning_rate": 4.178814382896016e-06,
|
| 23171 |
+
"loss": 0.8891,
|
| 23172 |
+
"step": 3302
|
| 23173 |
+
},
|
| 23174 |
+
{
|
| 23175 |
+
"epoch": 6.740816326530612,
|
| 23176 |
+
"grad_norm": 2.671875,
|
| 23177 |
+
"learning_rate": 4.146420472951085e-06,
|
| 23178 |
+
"loss": 0.824,
|
| 23179 |
+
"step": 3303
|
| 23180 |
+
},
|
| 23181 |
+
{
|
| 23182 |
+
"epoch": 6.742857142857143,
|
| 23183 |
+
"grad_norm": 2.5625,
|
| 23184 |
+
"learning_rate": 4.114026563006155e-06,
|
| 23185 |
+
"loss": 0.8246,
|
| 23186 |
+
"step": 3304
|
| 23187 |
+
},
|
| 23188 |
+
{
|
| 23189 |
+
"epoch": 6.744897959183674,
|
| 23190 |
+
"grad_norm": 2.484375,
|
| 23191 |
+
"learning_rate": 4.081632653061224e-06,
|
| 23192 |
+
"loss": 0.8093,
|
| 23193 |
+
"step": 3305
|
| 23194 |
+
},
|
| 23195 |
+
{
|
| 23196 |
+
"epoch": 6.746938775510204,
|
| 23197 |
+
"grad_norm": 2.734375,
|
| 23198 |
+
"learning_rate": 4.049238743116295e-06,
|
| 23199 |
+
"loss": 0.8453,
|
| 23200 |
+
"step": 3306
|
| 23201 |
+
},
|
| 23202 |
+
{
|
| 23203 |
+
"epoch": 6.748979591836735,
|
| 23204 |
+
"grad_norm": 2.34375,
|
| 23205 |
+
"learning_rate": 4.016844833171364e-06,
|
| 23206 |
+
"loss": 0.7437,
|
| 23207 |
+
"step": 3307
|
| 23208 |
+
},
|
| 23209 |
+
{
|
| 23210 |
+
"epoch": 6.751020408163265,
|
| 23211 |
+
"grad_norm": 2.40625,
|
| 23212 |
+
"learning_rate": 3.984450923226434e-06,
|
| 23213 |
+
"loss": 0.853,
|
| 23214 |
+
"step": 3308
|
| 23215 |
+
},
|
| 23216 |
+
{
|
| 23217 |
+
"epoch": 6.753061224489796,
|
| 23218 |
+
"grad_norm": 2.25,
|
| 23219 |
+
"learning_rate": 3.9520570132815026e-06,
|
| 23220 |
+
"loss": 0.771,
|
| 23221 |
+
"step": 3309
|
| 23222 |
+
},
|
| 23223 |
+
{
|
| 23224 |
+
"epoch": 6.755102040816326,
|
| 23225 |
+
"grad_norm": 2.5,
|
| 23226 |
+
"learning_rate": 3.919663103336573e-06,
|
| 23227 |
+
"loss": 0.8485,
|
| 23228 |
+
"step": 3310
|
| 23229 |
+
},
|
| 23230 |
+
{
|
| 23231 |
+
"epoch": 6.757142857142857,
|
| 23232 |
+
"grad_norm": 2.328125,
|
| 23233 |
+
"learning_rate": 3.887269193391643e-06,
|
| 23234 |
+
"loss": 0.7745,
|
| 23235 |
+
"step": 3311
|
| 23236 |
+
},
|
| 23237 |
+
{
|
| 23238 |
+
"epoch": 6.759183673469388,
|
| 23239 |
+
"grad_norm": 2.4375,
|
| 23240 |
+
"learning_rate": 3.854875283446712e-06,
|
| 23241 |
+
"loss": 0.86,
|
| 23242 |
+
"step": 3312
|
| 23243 |
+
},
|
| 23244 |
+
{
|
| 23245 |
+
"epoch": 6.761224489795918,
|
| 23246 |
+
"grad_norm": 2.671875,
|
| 23247 |
+
"learning_rate": 3.822481373501782e-06,
|
| 23248 |
+
"loss": 0.8632,
|
| 23249 |
+
"step": 3313
|
| 23250 |
+
},
|
| 23251 |
+
{
|
| 23252 |
+
"epoch": 6.763265306122449,
|
| 23253 |
+
"grad_norm": 2.3125,
|
| 23254 |
+
"learning_rate": 3.7900874635568516e-06,
|
| 23255 |
+
"loss": 0.7821,
|
| 23256 |
+
"step": 3314
|
| 23257 |
+
},
|
| 23258 |
+
{
|
| 23259 |
+
"epoch": 6.76530612244898,
|
| 23260 |
+
"grad_norm": 2.4375,
|
| 23261 |
+
"learning_rate": 3.757693553611921e-06,
|
| 23262 |
+
"loss": 0.7944,
|
| 23263 |
+
"step": 3315
|
| 23264 |
+
},
|
| 23265 |
+
{
|
| 23266 |
+
"epoch": 6.7673469387755105,
|
| 23267 |
+
"grad_norm": 2.78125,
|
| 23268 |
+
"learning_rate": 3.725299643666991e-06,
|
| 23269 |
+
"loss": 0.9161,
|
| 23270 |
+
"step": 3316
|
| 23271 |
+
},
|
| 23272 |
+
{
|
| 23273 |
+
"epoch": 6.7693877551020405,
|
| 23274 |
+
"grad_norm": 2.421875,
|
| 23275 |
+
"learning_rate": 3.69290573372206e-06,
|
| 23276 |
+
"loss": 0.8084,
|
| 23277 |
+
"step": 3317
|
| 23278 |
+
},
|
| 23279 |
+
{
|
| 23280 |
+
"epoch": 6.771428571428571,
|
| 23281 |
+
"grad_norm": 2.28125,
|
| 23282 |
+
"learning_rate": 3.6605118237771304e-06,
|
| 23283 |
+
"loss": 0.7925,
|
| 23284 |
+
"step": 3318
|
| 23285 |
+
},
|
| 23286 |
+
{
|
| 23287 |
+
"epoch": 6.773469387755102,
|
| 23288 |
+
"grad_norm": 2.46875,
|
| 23289 |
+
"learning_rate": 3.6281179138322e-06,
|
| 23290 |
+
"loss": 0.8479,
|
| 23291 |
+
"step": 3319
|
| 23292 |
+
},
|
| 23293 |
+
{
|
| 23294 |
+
"epoch": 6.775510204081632,
|
| 23295 |
+
"grad_norm": 2.25,
|
| 23296 |
+
"learning_rate": 3.595724003887269e-06,
|
| 23297 |
+
"loss": 0.7686,
|
| 23298 |
+
"step": 3320
|
| 23299 |
+
},
|
| 23300 |
+
{
|
| 23301 |
+
"epoch": 6.777551020408163,
|
| 23302 |
+
"grad_norm": 2.390625,
|
| 23303 |
+
"learning_rate": 3.5633300939423386e-06,
|
| 23304 |
+
"loss": 0.82,
|
| 23305 |
+
"step": 3321
|
| 23306 |
+
},
|
| 23307 |
+
{
|
| 23308 |
+
"epoch": 6.779591836734694,
|
| 23309 |
+
"grad_norm": 2.640625,
|
| 23310 |
+
"learning_rate": 3.530936183997409e-06,
|
| 23311 |
+
"loss": 0.8676,
|
| 23312 |
+
"step": 3322
|
| 23313 |
+
},
|
| 23314 |
+
{
|
| 23315 |
+
"epoch": 6.781632653061225,
|
| 23316 |
+
"grad_norm": 2.265625,
|
| 23317 |
+
"learning_rate": 3.4985422740524782e-06,
|
| 23318 |
+
"loss": 0.8016,
|
| 23319 |
+
"step": 3323
|
| 23320 |
+
},
|
| 23321 |
+
{
|
| 23322 |
+
"epoch": 6.783673469387755,
|
| 23323 |
+
"grad_norm": 2.484375,
|
| 23324 |
+
"learning_rate": 3.466148364107548e-06,
|
| 23325 |
+
"loss": 0.7916,
|
| 23326 |
+
"step": 3324
|
| 23327 |
+
},
|
| 23328 |
+
{
|
| 23329 |
+
"epoch": 6.785714285714286,
|
| 23330 |
+
"grad_norm": 2.53125,
|
| 23331 |
+
"learning_rate": 3.4337544541626174e-06,
|
| 23332 |
+
"loss": 0.8089,
|
| 23333 |
+
"step": 3325
|
| 23334 |
+
},
|
| 23335 |
+
{
|
| 23336 |
+
"epoch": 6.7877551020408164,
|
| 23337 |
+
"grad_norm": 2.625,
|
| 23338 |
+
"learning_rate": 3.4013605442176877e-06,
|
| 23339 |
+
"loss": 0.8226,
|
| 23340 |
+
"step": 3326
|
| 23341 |
+
},
|
| 23342 |
+
{
|
| 23343 |
+
"epoch": 6.789795918367347,
|
| 23344 |
+
"grad_norm": 2.46875,
|
| 23345 |
+
"learning_rate": 3.368966634272757e-06,
|
| 23346 |
+
"loss": 0.8101,
|
| 23347 |
+
"step": 3327
|
| 23348 |
+
},
|
| 23349 |
+
{
|
| 23350 |
+
"epoch": 6.791836734693877,
|
| 23351 |
+
"grad_norm": 2.453125,
|
| 23352 |
+
"learning_rate": 3.3365727243278264e-06,
|
| 23353 |
+
"loss": 0.8383,
|
| 23354 |
+
"step": 3328
|
| 23355 |
+
},
|
| 23356 |
+
{
|
| 23357 |
+
"epoch": 6.793877551020408,
|
| 23358 |
+
"grad_norm": 2.203125,
|
| 23359 |
+
"learning_rate": 3.304178814382896e-06,
|
| 23360 |
+
"loss": 0.7322,
|
| 23361 |
+
"step": 3329
|
| 23362 |
+
},
|
| 23363 |
+
{
|
| 23364 |
+
"epoch": 6.795918367346939,
|
| 23365 |
+
"grad_norm": 2.40625,
|
| 23366 |
+
"learning_rate": 3.271784904437966e-06,
|
| 23367 |
+
"loss": 0.7632,
|
| 23368 |
+
"step": 3330
|
| 23369 |
+
},
|
| 23370 |
+
{
|
| 23371 |
+
"epoch": 6.79795918367347,
|
| 23372 |
+
"grad_norm": 2.46875,
|
| 23373 |
+
"learning_rate": 3.2393909944930354e-06,
|
| 23374 |
+
"loss": 0.8714,
|
| 23375 |
+
"step": 3331
|
| 23376 |
+
},
|
| 23377 |
+
{
|
| 23378 |
+
"epoch": 6.8,
|
| 23379 |
+
"grad_norm": 2.546875,
|
| 23380 |
+
"learning_rate": 3.2069970845481052e-06,
|
| 23381 |
+
"loss": 0.8458,
|
| 23382 |
+
"step": 3332
|
| 23383 |
+
},
|
| 23384 |
+
{
|
| 23385 |
+
"epoch": 6.802040816326531,
|
| 23386 |
+
"grad_norm": 2.359375,
|
| 23387 |
+
"learning_rate": 3.1746031746031746e-06,
|
| 23388 |
+
"loss": 0.8663,
|
| 23389 |
+
"step": 3333
|
| 23390 |
+
},
|
| 23391 |
+
{
|
| 23392 |
+
"epoch": 6.8040816326530615,
|
| 23393 |
+
"grad_norm": 2.5625,
|
| 23394 |
+
"learning_rate": 3.142209264658244e-06,
|
| 23395 |
+
"loss": 0.8691,
|
| 23396 |
+
"step": 3334
|
| 23397 |
+
},
|
| 23398 |
+
{
|
| 23399 |
+
"epoch": 6.8061224489795915,
|
| 23400 |
+
"grad_norm": 2.625,
|
| 23401 |
+
"learning_rate": 3.109815354713314e-06,
|
| 23402 |
+
"loss": 0.8029,
|
| 23403 |
+
"step": 3335
|
| 23404 |
+
},
|
| 23405 |
+
{
|
| 23406 |
+
"epoch": 6.808163265306122,
|
| 23407 |
+
"grad_norm": 2.421875,
|
| 23408 |
+
"learning_rate": 3.0774214447683836e-06,
|
| 23409 |
+
"loss": 0.8763,
|
| 23410 |
+
"step": 3336
|
| 23411 |
+
},
|
| 23412 |
+
{
|
| 23413 |
+
"epoch": 6.810204081632653,
|
| 23414 |
+
"grad_norm": 2.625,
|
| 23415 |
+
"learning_rate": 3.045027534823453e-06,
|
| 23416 |
+
"loss": 0.7908,
|
| 23417 |
+
"step": 3337
|
| 23418 |
+
},
|
| 23419 |
+
{
|
| 23420 |
+
"epoch": 6.812244897959184,
|
| 23421 |
+
"grad_norm": 2.28125,
|
| 23422 |
+
"learning_rate": 3.012633624878523e-06,
|
| 23423 |
+
"loss": 0.7283,
|
| 23424 |
+
"step": 3338
|
| 23425 |
+
},
|
| 23426 |
+
{
|
| 23427 |
+
"epoch": 6.814285714285714,
|
| 23428 |
+
"grad_norm": 2.578125,
|
| 23429 |
+
"learning_rate": 2.9802397149335927e-06,
|
| 23430 |
+
"loss": 0.8298,
|
| 23431 |
+
"step": 3339
|
| 23432 |
+
},
|
| 23433 |
+
{
|
| 23434 |
+
"epoch": 6.816326530612245,
|
| 23435 |
+
"grad_norm": 2.28125,
|
| 23436 |
+
"learning_rate": 2.9478458049886625e-06,
|
| 23437 |
+
"loss": 0.7913,
|
| 23438 |
+
"step": 3340
|
| 23439 |
+
},
|
| 23440 |
+
{
|
| 23441 |
+
"epoch": 6.818367346938776,
|
| 23442 |
+
"grad_norm": 2.265625,
|
| 23443 |
+
"learning_rate": 2.915451895043732e-06,
|
| 23444 |
+
"loss": 0.7501,
|
| 23445 |
+
"step": 3341
|
| 23446 |
+
},
|
| 23447 |
+
{
|
| 23448 |
+
"epoch": 6.820408163265306,
|
| 23449 |
+
"grad_norm": 2.3125,
|
| 23450 |
+
"learning_rate": 2.8830579850988017e-06,
|
| 23451 |
+
"loss": 0.7858,
|
| 23452 |
+
"step": 3342
|
| 23453 |
+
},
|
| 23454 |
+
{
|
| 23455 |
+
"epoch": 6.822448979591837,
|
| 23456 |
+
"grad_norm": 2.46875,
|
| 23457 |
+
"learning_rate": 2.850664075153871e-06,
|
| 23458 |
+
"loss": 0.8025,
|
| 23459 |
+
"step": 3343
|
| 23460 |
+
},
|
| 23461 |
+
{
|
| 23462 |
+
"epoch": 6.8244897959183675,
|
| 23463 |
+
"grad_norm": 2.546875,
|
| 23464 |
+
"learning_rate": 2.818270165208941e-06,
|
| 23465 |
+
"loss": 0.7896,
|
| 23466 |
+
"step": 3344
|
| 23467 |
+
},
|
| 23468 |
+
{
|
| 23469 |
+
"epoch": 6.826530612244898,
|
| 23470 |
+
"grad_norm": 2.5,
|
| 23471 |
+
"learning_rate": 2.7858762552640102e-06,
|
| 23472 |
+
"loss": 0.8735,
|
| 23473 |
+
"step": 3345
|
| 23474 |
+
},
|
| 23475 |
+
{
|
| 23476 |
+
"epoch": 6.828571428571428,
|
| 23477 |
+
"grad_norm": 2.390625,
|
| 23478 |
+
"learning_rate": 2.75348234531908e-06,
|
| 23479 |
+
"loss": 0.7765,
|
| 23480 |
+
"step": 3346
|
| 23481 |
+
},
|
| 23482 |
+
{
|
| 23483 |
+
"epoch": 6.830612244897959,
|
| 23484 |
+
"grad_norm": 2.296875,
|
| 23485 |
+
"learning_rate": 2.72108843537415e-06,
|
| 23486 |
+
"loss": 0.7669,
|
| 23487 |
+
"step": 3347
|
| 23488 |
+
},
|
| 23489 |
+
{
|
| 23490 |
+
"epoch": 6.83265306122449,
|
| 23491 |
+
"grad_norm": 2.578125,
|
| 23492 |
+
"learning_rate": 2.6886945254292197e-06,
|
| 23493 |
+
"loss": 0.8444,
|
| 23494 |
+
"step": 3348
|
| 23495 |
+
},
|
| 23496 |
+
{
|
| 23497 |
+
"epoch": 6.83469387755102,
|
| 23498 |
+
"grad_norm": 2.625,
|
| 23499 |
+
"learning_rate": 2.656300615484289e-06,
|
| 23500 |
+
"loss": 0.8611,
|
| 23501 |
+
"step": 3349
|
| 23502 |
+
},
|
| 23503 |
+
{
|
| 23504 |
+
"epoch": 6.836734693877551,
|
| 23505 |
+
"grad_norm": 2.578125,
|
| 23506 |
+
"learning_rate": 2.623906705539359e-06,
|
| 23507 |
+
"loss": 0.7838,
|
| 23508 |
+
"step": 3350
|
| 23509 |
+
},
|
| 23510 |
+
{
|
| 23511 |
+
"epoch": 6.838775510204082,
|
| 23512 |
+
"grad_norm": 2.234375,
|
| 23513 |
+
"learning_rate": 2.5915127955944283e-06,
|
| 23514 |
+
"loss": 0.7828,
|
| 23515 |
+
"step": 3351
|
| 23516 |
+
},
|
| 23517 |
+
{
|
| 23518 |
+
"epoch": 6.840816326530613,
|
| 23519 |
+
"grad_norm": 2.28125,
|
| 23520 |
+
"learning_rate": 2.559118885649498e-06,
|
| 23521 |
+
"loss": 0.7854,
|
| 23522 |
+
"step": 3352
|
| 23523 |
+
},
|
| 23524 |
+
{
|
| 23525 |
+
"epoch": 6.8428571428571425,
|
| 23526 |
+
"grad_norm": 2.421875,
|
| 23527 |
+
"learning_rate": 2.5267249757045675e-06,
|
| 23528 |
+
"loss": 0.7977,
|
| 23529 |
+
"step": 3353
|
| 23530 |
+
},
|
| 23531 |
+
{
|
| 23532 |
+
"epoch": 6.844897959183673,
|
| 23533 |
+
"grad_norm": 2.484375,
|
| 23534 |
+
"learning_rate": 2.4943310657596373e-06,
|
| 23535 |
+
"loss": 0.7652,
|
| 23536 |
+
"step": 3354
|
| 23537 |
+
},
|
| 23538 |
+
{
|
| 23539 |
+
"epoch": 6.846938775510204,
|
| 23540 |
+
"grad_norm": 2.328125,
|
| 23541 |
+
"learning_rate": 2.461937155814707e-06,
|
| 23542 |
+
"loss": 0.7696,
|
| 23543 |
+
"step": 3355
|
| 23544 |
+
},
|
| 23545 |
+
{
|
| 23546 |
+
"epoch": 6.848979591836734,
|
| 23547 |
+
"grad_norm": 2.15625,
|
| 23548 |
+
"learning_rate": 2.429543245869777e-06,
|
| 23549 |
+
"loss": 0.8339,
|
| 23550 |
+
"step": 3356
|
| 23551 |
+
},
|
| 23552 |
+
{
|
| 23553 |
+
"epoch": 6.851020408163265,
|
| 23554 |
+
"grad_norm": 2.4375,
|
| 23555 |
+
"learning_rate": 2.3971493359248463e-06,
|
| 23556 |
+
"loss": 0.7937,
|
| 23557 |
+
"step": 3357
|
| 23558 |
+
},
|
| 23559 |
+
{
|
| 23560 |
+
"epoch": 6.853061224489796,
|
| 23561 |
+
"grad_norm": 2.40625,
|
| 23562 |
+
"learning_rate": 2.364755425979916e-06,
|
| 23563 |
+
"loss": 0.825,
|
| 23564 |
+
"step": 3358
|
| 23565 |
+
},
|
| 23566 |
+
{
|
| 23567 |
+
"epoch": 6.855102040816327,
|
| 23568 |
+
"grad_norm": 2.46875,
|
| 23569 |
+
"learning_rate": 2.3323615160349855e-06,
|
| 23570 |
+
"loss": 0.7848,
|
| 23571 |
+
"step": 3359
|
| 23572 |
+
},
|
| 23573 |
+
{
|
| 23574 |
+
"epoch": 6.857142857142857,
|
| 23575 |
+
"grad_norm": 2.640625,
|
| 23576 |
+
"learning_rate": 2.2999676060900553e-06,
|
| 23577 |
+
"loss": 0.8695,
|
| 23578 |
+
"step": 3360
|
| 23579 |
+
},
|
| 23580 |
+
{
|
| 23581 |
+
"epoch": 6.859183673469388,
|
| 23582 |
+
"grad_norm": 2.46875,
|
| 23583 |
+
"learning_rate": 2.2675736961451247e-06,
|
| 23584 |
+
"loss": 0.7602,
|
| 23585 |
+
"step": 3361
|
| 23586 |
+
},
|
| 23587 |
+
{
|
| 23588 |
+
"epoch": 6.8612244897959185,
|
| 23589 |
+
"grad_norm": 2.40625,
|
| 23590 |
+
"learning_rate": 2.2351797862001945e-06,
|
| 23591 |
+
"loss": 0.8322,
|
| 23592 |
+
"step": 3362
|
| 23593 |
+
},
|
| 23594 |
+
{
|
| 23595 |
+
"epoch": 6.863265306122449,
|
| 23596 |
+
"grad_norm": 2.578125,
|
| 23597 |
+
"learning_rate": 2.2027858762552643e-06,
|
| 23598 |
+
"loss": 0.8178,
|
| 23599 |
+
"step": 3363
|
| 23600 |
+
},
|
| 23601 |
+
{
|
| 23602 |
+
"epoch": 6.865306122448979,
|
| 23603 |
+
"grad_norm": 2.359375,
|
| 23604 |
+
"learning_rate": 2.170391966310334e-06,
|
| 23605 |
+
"loss": 0.7774,
|
| 23606 |
+
"step": 3364
|
| 23607 |
+
},
|
| 23608 |
+
{
|
| 23609 |
+
"epoch": 6.86734693877551,
|
| 23610 |
+
"grad_norm": 2.40625,
|
| 23611 |
+
"learning_rate": 2.1379980563654035e-06,
|
| 23612 |
+
"loss": 0.7743,
|
| 23613 |
+
"step": 3365
|
| 23614 |
+
},
|
| 23615 |
+
{
|
| 23616 |
+
"epoch": 6.869387755102041,
|
| 23617 |
+
"grad_norm": 2.4375,
|
| 23618 |
+
"learning_rate": 2.1056041464204733e-06,
|
| 23619 |
+
"loss": 0.7883,
|
| 23620 |
+
"step": 3366
|
| 23621 |
+
},
|
| 23622 |
+
{
|
| 23623 |
+
"epoch": 6.871428571428572,
|
| 23624 |
+
"grad_norm": 2.59375,
|
| 23625 |
+
"learning_rate": 2.0732102364755427e-06,
|
| 23626 |
+
"loss": 0.8688,
|
| 23627 |
+
"step": 3367
|
| 23628 |
+
},
|
| 23629 |
+
{
|
| 23630 |
+
"epoch": 6.873469387755102,
|
| 23631 |
+
"grad_norm": 2.234375,
|
| 23632 |
+
"learning_rate": 2.040816326530612e-06,
|
| 23633 |
+
"loss": 0.7091,
|
| 23634 |
+
"step": 3368
|
| 23635 |
+
},
|
| 23636 |
+
{
|
| 23637 |
+
"epoch": 6.875510204081633,
|
| 23638 |
+
"grad_norm": 2.453125,
|
| 23639 |
+
"learning_rate": 2.008422416585682e-06,
|
| 23640 |
+
"loss": 0.8402,
|
| 23641 |
+
"step": 3369
|
| 23642 |
+
},
|
| 23643 |
+
{
|
| 23644 |
+
"epoch": 6.877551020408164,
|
| 23645 |
+
"grad_norm": 2.1875,
|
| 23646 |
+
"learning_rate": 1.9760285066407513e-06,
|
| 23647 |
+
"loss": 0.7653,
|
| 23648 |
+
"step": 3370
|
| 23649 |
+
},
|
| 23650 |
+
{
|
| 23651 |
+
"epoch": 6.8795918367346935,
|
| 23652 |
+
"grad_norm": 2.3125,
|
| 23653 |
+
"learning_rate": 1.9436345966958215e-06,
|
| 23654 |
+
"loss": 0.7588,
|
| 23655 |
+
"step": 3371
|
| 23656 |
+
},
|
| 23657 |
+
{
|
| 23658 |
+
"epoch": 6.881632653061224,
|
| 23659 |
+
"grad_norm": 2.265625,
|
| 23660 |
+
"learning_rate": 1.911240686750891e-06,
|
| 23661 |
+
"loss": 0.7664,
|
| 23662 |
+
"step": 3372
|
| 23663 |
+
},
|
| 23664 |
+
{
|
| 23665 |
+
"epoch": 6.883673469387755,
|
| 23666 |
+
"grad_norm": 2.578125,
|
| 23667 |
+
"learning_rate": 1.8788467768059605e-06,
|
| 23668 |
+
"loss": 0.8108,
|
| 23669 |
+
"step": 3373
|
| 23670 |
+
},
|
| 23671 |
+
{
|
| 23672 |
+
"epoch": 6.885714285714286,
|
| 23673 |
+
"grad_norm": 2.359375,
|
| 23674 |
+
"learning_rate": 1.84645286686103e-06,
|
| 23675 |
+
"loss": 0.8603,
|
| 23676 |
+
"step": 3374
|
| 23677 |
+
},
|
| 23678 |
+
{
|
| 23679 |
+
"epoch": 6.887755102040816,
|
| 23680 |
+
"grad_norm": 2.359375,
|
| 23681 |
+
"learning_rate": 1.8140589569161e-06,
|
| 23682 |
+
"loss": 0.7905,
|
| 23683 |
+
"step": 3375
|
| 23684 |
+
},
|
| 23685 |
+
{
|
| 23686 |
+
"epoch": 6.889795918367347,
|
| 23687 |
+
"grad_norm": 2.296875,
|
| 23688 |
+
"learning_rate": 1.7816650469711693e-06,
|
| 23689 |
+
"loss": 0.767,
|
| 23690 |
+
"step": 3376
|
| 23691 |
+
},
|
| 23692 |
+
{
|
| 23693 |
+
"epoch": 6.891836734693878,
|
| 23694 |
+
"grad_norm": 2.3125,
|
| 23695 |
+
"learning_rate": 1.7492711370262391e-06,
|
| 23696 |
+
"loss": 0.8304,
|
| 23697 |
+
"step": 3377
|
| 23698 |
+
},
|
| 23699 |
+
{
|
| 23700 |
+
"epoch": 6.893877551020408,
|
| 23701 |
+
"grad_norm": 2.4375,
|
| 23702 |
+
"learning_rate": 1.7168772270813087e-06,
|
| 23703 |
+
"loss": 0.7503,
|
| 23704 |
+
"step": 3378
|
| 23705 |
+
},
|
| 23706 |
+
{
|
| 23707 |
+
"epoch": 6.895918367346939,
|
| 23708 |
+
"grad_norm": 2.421875,
|
| 23709 |
+
"learning_rate": 1.6844833171363785e-06,
|
| 23710 |
+
"loss": 0.831,
|
| 23711 |
+
"step": 3379
|
| 23712 |
+
},
|
| 23713 |
+
{
|
| 23714 |
+
"epoch": 6.8979591836734695,
|
| 23715 |
+
"grad_norm": 2.359375,
|
| 23716 |
+
"learning_rate": 1.652089407191448e-06,
|
| 23717 |
+
"loss": 0.7488,
|
| 23718 |
+
"step": 3380
|
| 23719 |
+
},
|
| 23720 |
+
{
|
| 23721 |
+
"epoch": 6.9,
|
| 23722 |
+
"grad_norm": 2.4375,
|
| 23723 |
+
"learning_rate": 1.6196954972465177e-06,
|
| 23724 |
+
"loss": 0.7855,
|
| 23725 |
+
"step": 3381
|
| 23726 |
+
},
|
| 23727 |
+
{
|
| 23728 |
+
"epoch": 6.90204081632653,
|
| 23729 |
+
"grad_norm": 2.53125,
|
| 23730 |
+
"learning_rate": 1.5873015873015873e-06,
|
| 23731 |
+
"loss": 0.838,
|
| 23732 |
+
"step": 3382
|
| 23733 |
+
},
|
| 23734 |
+
{
|
| 23735 |
+
"epoch": 6.904081632653061,
|
| 23736 |
+
"grad_norm": 2.34375,
|
| 23737 |
+
"learning_rate": 1.554907677356657e-06,
|
| 23738 |
+
"loss": 0.8058,
|
| 23739 |
+
"step": 3383
|
| 23740 |
+
},
|
| 23741 |
+
{
|
| 23742 |
+
"epoch": 6.906122448979592,
|
| 23743 |
+
"grad_norm": 2.390625,
|
| 23744 |
+
"learning_rate": 1.5225137674117265e-06,
|
| 23745 |
+
"loss": 0.8049,
|
| 23746 |
+
"step": 3384
|
| 23747 |
+
},
|
| 23748 |
+
{
|
| 23749 |
+
"epoch": 6.908163265306122,
|
| 23750 |
+
"grad_norm": 2.1875,
|
| 23751 |
+
"learning_rate": 1.4901198574667963e-06,
|
| 23752 |
+
"loss": 0.7409,
|
| 23753 |
+
"step": 3385
|
| 23754 |
+
},
|
| 23755 |
+
{
|
| 23756 |
+
"epoch": 6.910204081632653,
|
| 23757 |
+
"grad_norm": 2.78125,
|
| 23758 |
+
"learning_rate": 1.457725947521866e-06,
|
| 23759 |
+
"loss": 0.8607,
|
| 23760 |
+
"step": 3386
|
| 23761 |
+
},
|
| 23762 |
+
{
|
| 23763 |
+
"epoch": 6.912244897959184,
|
| 23764 |
+
"grad_norm": 2.40625,
|
| 23765 |
+
"learning_rate": 1.4253320375769355e-06,
|
| 23766 |
+
"loss": 0.856,
|
| 23767 |
+
"step": 3387
|
| 23768 |
+
},
|
| 23769 |
+
{
|
| 23770 |
+
"epoch": 6.914285714285715,
|
| 23771 |
+
"grad_norm": 2.453125,
|
| 23772 |
+
"learning_rate": 1.3929381276320051e-06,
|
| 23773 |
+
"loss": 0.738,
|
| 23774 |
+
"step": 3388
|
| 23775 |
+
},
|
| 23776 |
+
{
|
| 23777 |
+
"epoch": 6.916326530612245,
|
| 23778 |
+
"grad_norm": 2.25,
|
| 23779 |
+
"learning_rate": 1.360544217687075e-06,
|
| 23780 |
+
"loss": 0.7383,
|
| 23781 |
+
"step": 3389
|
| 23782 |
+
},
|
| 23783 |
+
{
|
| 23784 |
+
"epoch": 6.918367346938775,
|
| 23785 |
+
"grad_norm": 2.546875,
|
| 23786 |
+
"learning_rate": 1.3281503077421445e-06,
|
| 23787 |
+
"loss": 0.7855,
|
| 23788 |
+
"step": 3390
|
| 23789 |
+
},
|
| 23790 |
+
{
|
| 23791 |
+
"epoch": 6.920408163265306,
|
| 23792 |
+
"grad_norm": 2.578125,
|
| 23793 |
+
"learning_rate": 1.2957563977972141e-06,
|
| 23794 |
+
"loss": 0.841,
|
| 23795 |
+
"step": 3391
|
| 23796 |
+
},
|
| 23797 |
+
{
|
| 23798 |
+
"epoch": 6.922448979591836,
|
| 23799 |
+
"grad_norm": 2.265625,
|
| 23800 |
+
"learning_rate": 1.2633624878522837e-06,
|
| 23801 |
+
"loss": 0.7723,
|
| 23802 |
+
"step": 3392
|
| 23803 |
+
},
|
| 23804 |
+
{
|
| 23805 |
+
"epoch": 6.924489795918367,
|
| 23806 |
+
"grad_norm": 2.34375,
|
| 23807 |
+
"learning_rate": 1.2309685779073535e-06,
|
| 23808 |
+
"loss": 0.7689,
|
| 23809 |
+
"step": 3393
|
| 23810 |
+
},
|
| 23811 |
+
{
|
| 23812 |
+
"epoch": 6.926530612244898,
|
| 23813 |
+
"grad_norm": 2.40625,
|
| 23814 |
+
"learning_rate": 1.1985746679624231e-06,
|
| 23815 |
+
"loss": 0.7993,
|
| 23816 |
+
"step": 3394
|
| 23817 |
+
},
|
| 23818 |
+
{
|
| 23819 |
+
"epoch": 6.928571428571429,
|
| 23820 |
+
"grad_norm": 2.625,
|
| 23821 |
+
"learning_rate": 1.1661807580174927e-06,
|
| 23822 |
+
"loss": 0.8413,
|
| 23823 |
+
"step": 3395
|
| 23824 |
+
},
|
| 23825 |
+
{
|
| 23826 |
+
"epoch": 6.930612244897959,
|
| 23827 |
+
"grad_norm": 2.25,
|
| 23828 |
+
"learning_rate": 1.1337868480725623e-06,
|
| 23829 |
+
"loss": 0.8014,
|
| 23830 |
+
"step": 3396
|
| 23831 |
+
},
|
| 23832 |
+
{
|
| 23833 |
+
"epoch": 6.93265306122449,
|
| 23834 |
+
"grad_norm": 2.578125,
|
| 23835 |
+
"learning_rate": 1.1013929381276321e-06,
|
| 23836 |
+
"loss": 0.8613,
|
| 23837 |
+
"step": 3397
|
| 23838 |
+
},
|
| 23839 |
+
{
|
| 23840 |
+
"epoch": 6.9346938775510205,
|
| 23841 |
+
"grad_norm": 2.296875,
|
| 23842 |
+
"learning_rate": 1.0689990281827017e-06,
|
| 23843 |
+
"loss": 0.8238,
|
| 23844 |
+
"step": 3398
|
| 23845 |
+
},
|
| 23846 |
+
{
|
| 23847 |
+
"epoch": 6.936734693877551,
|
| 23848 |
+
"grad_norm": 2.421875,
|
| 23849 |
+
"learning_rate": 1.0366051182377713e-06,
|
| 23850 |
+
"loss": 0.8114,
|
| 23851 |
+
"step": 3399
|
| 23852 |
+
},
|
| 23853 |
+
{
|
| 23854 |
+
"epoch": 6.938775510204081,
|
| 23855 |
+
"grad_norm": 2.59375,
|
| 23856 |
+
"learning_rate": 1.004211208292841e-06,
|
| 23857 |
+
"loss": 0.7968,
|
| 23858 |
+
"step": 3400
|
| 23859 |
+
},
|
| 23860 |
+
{
|
| 23861 |
+
"epoch": 6.940816326530612,
|
| 23862 |
+
"grad_norm": 2.390625,
|
| 23863 |
+
"learning_rate": 9.718172983479108e-07,
|
| 23864 |
+
"loss": 0.7894,
|
| 23865 |
+
"step": 3401
|
| 23866 |
+
},
|
| 23867 |
+
{
|
| 23868 |
+
"epoch": 6.942857142857143,
|
| 23869 |
+
"grad_norm": 2.265625,
|
| 23870 |
+
"learning_rate": 9.394233884029803e-07,
|
| 23871 |
+
"loss": 0.7437,
|
| 23872 |
+
"step": 3402
|
| 23873 |
+
},
|
| 23874 |
+
{
|
| 23875 |
+
"epoch": 6.944897959183674,
|
| 23876 |
+
"grad_norm": 2.53125,
|
| 23877 |
+
"learning_rate": 9.0702947845805e-07,
|
| 23878 |
+
"loss": 0.8233,
|
| 23879 |
+
"step": 3403
|
| 23880 |
+
},
|
| 23881 |
+
{
|
| 23882 |
+
"epoch": 6.946938775510204,
|
| 23883 |
+
"grad_norm": 2.46875,
|
| 23884 |
+
"learning_rate": 8.746355685131196e-07,
|
| 23885 |
+
"loss": 0.7742,
|
| 23886 |
+
"step": 3404
|
| 23887 |
+
},
|
| 23888 |
+
{
|
| 23889 |
+
"epoch": 6.948979591836735,
|
| 23890 |
+
"grad_norm": 2.40625,
|
| 23891 |
+
"learning_rate": 8.422416585681893e-07,
|
| 23892 |
+
"loss": 0.8311,
|
| 23893 |
+
"step": 3405
|
| 23894 |
+
},
|
| 23895 |
+
{
|
| 23896 |
+
"epoch": 6.951020408163266,
|
| 23897 |
+
"grad_norm": 2.359375,
|
| 23898 |
+
"learning_rate": 8.098477486232589e-07,
|
| 23899 |
+
"loss": 0.8189,
|
| 23900 |
+
"step": 3406
|
| 23901 |
+
},
|
| 23902 |
+
{
|
| 23903 |
+
"epoch": 6.953061224489796,
|
| 23904 |
+
"grad_norm": 2.3125,
|
| 23905 |
+
"learning_rate": 7.774538386783285e-07,
|
| 23906 |
+
"loss": 0.7435,
|
| 23907 |
+
"step": 3407
|
| 23908 |
+
},
|
| 23909 |
+
{
|
| 23910 |
+
"epoch": 6.955102040816326,
|
| 23911 |
+
"grad_norm": 2.5625,
|
| 23912 |
+
"learning_rate": 7.450599287333982e-07,
|
| 23913 |
+
"loss": 0.8108,
|
| 23914 |
+
"step": 3408
|
| 23915 |
+
},
|
| 23916 |
+
{
|
| 23917 |
+
"epoch": 6.957142857142857,
|
| 23918 |
+
"grad_norm": 2.515625,
|
| 23919 |
+
"learning_rate": 7.126660187884678e-07,
|
| 23920 |
+
"loss": 0.7694,
|
| 23921 |
+
"step": 3409
|
| 23922 |
+
},
|
| 23923 |
+
{
|
| 23924 |
+
"epoch": 6.959183673469388,
|
| 23925 |
+
"grad_norm": 2.28125,
|
| 23926 |
+
"learning_rate": 6.802721088435375e-07,
|
| 23927 |
+
"loss": 0.8291,
|
| 23928 |
+
"step": 3410
|
| 23929 |
+
},
|
| 23930 |
+
{
|
| 23931 |
+
"epoch": 6.961224489795918,
|
| 23932 |
+
"grad_norm": 2.546875,
|
| 23933 |
+
"learning_rate": 6.478781988986071e-07,
|
| 23934 |
+
"loss": 0.8302,
|
| 23935 |
+
"step": 3411
|
| 23936 |
+
},
|
| 23937 |
+
{
|
| 23938 |
+
"epoch": 6.963265306122449,
|
| 23939 |
+
"grad_norm": 2.46875,
|
| 23940 |
+
"learning_rate": 6.154842889536768e-07,
|
| 23941 |
+
"loss": 0.8729,
|
| 23942 |
+
"step": 3412
|
| 23943 |
+
},
|
| 23944 |
+
{
|
| 23945 |
+
"epoch": 6.96530612244898,
|
| 23946 |
+
"grad_norm": 2.53125,
|
| 23947 |
+
"learning_rate": 5.830903790087464e-07,
|
| 23948 |
+
"loss": 0.7484,
|
| 23949 |
+
"step": 3413
|
| 23950 |
+
},
|
| 23951 |
+
{
|
| 23952 |
+
"epoch": 6.96734693877551,
|
| 23953 |
+
"grad_norm": 2.28125,
|
| 23954 |
+
"learning_rate": 5.506964690638161e-07,
|
| 23955 |
+
"loss": 0.7746,
|
| 23956 |
+
"step": 3414
|
| 23957 |
+
},
|
| 23958 |
+
{
|
| 23959 |
+
"epoch": 6.969387755102041,
|
| 23960 |
+
"grad_norm": 2.515625,
|
| 23961 |
+
"learning_rate": 5.183025591188857e-07,
|
| 23962 |
+
"loss": 0.8473,
|
| 23963 |
+
"step": 3415
|
| 23964 |
+
},
|
| 23965 |
+
{
|
| 23966 |
+
"epoch": 6.9714285714285715,
|
| 23967 |
+
"grad_norm": 2.421875,
|
| 23968 |
+
"learning_rate": 4.859086491739554e-07,
|
| 23969 |
+
"loss": 0.8695,
|
| 23970 |
+
"step": 3416
|
| 23971 |
+
},
|
| 23972 |
+
{
|
| 23973 |
+
"epoch": 6.973469387755102,
|
| 23974 |
+
"grad_norm": 2.421875,
|
| 23975 |
+
"learning_rate": 4.53514739229025e-07,
|
| 23976 |
+
"loss": 0.8249,
|
| 23977 |
+
"step": 3417
|
| 23978 |
+
},
|
| 23979 |
+
{
|
| 23980 |
+
"epoch": 6.975510204081632,
|
| 23981 |
+
"grad_norm": 2.359375,
|
| 23982 |
+
"learning_rate": 4.2112082928409463e-07,
|
| 23983 |
+
"loss": 0.7749,
|
| 23984 |
+
"step": 3418
|
| 23985 |
+
},
|
| 23986 |
+
{
|
| 23987 |
+
"epoch": 6.977551020408163,
|
| 23988 |
+
"grad_norm": 2.21875,
|
| 23989 |
+
"learning_rate": 3.8872691933916423e-07,
|
| 23990 |
+
"loss": 0.7507,
|
| 23991 |
+
"step": 3419
|
| 23992 |
+
},
|
| 23993 |
+
{
|
| 23994 |
+
"epoch": 6.979591836734694,
|
| 23995 |
+
"grad_norm": 2.421875,
|
| 23996 |
+
"learning_rate": 3.563330093942339e-07,
|
| 23997 |
+
"loss": 0.7236,
|
| 23998 |
+
"step": 3420
|
| 23999 |
+
},
|
| 24000 |
+
{
|
| 24001 |
+
"epoch": 6.981632653061224,
|
| 24002 |
+
"grad_norm": 2.484375,
|
| 24003 |
+
"learning_rate": 3.2393909944930353e-07,
|
| 24004 |
+
"loss": 0.8538,
|
| 24005 |
+
"step": 3421
|
| 24006 |
+
},
|
| 24007 |
+
{
|
| 24008 |
+
"epoch": 6.983673469387755,
|
| 24009 |
+
"grad_norm": 2.5,
|
| 24010 |
+
"learning_rate": 2.915451895043732e-07,
|
| 24011 |
+
"loss": 0.8119,
|
| 24012 |
+
"step": 3422
|
| 24013 |
+
},
|
| 24014 |
+
{
|
| 24015 |
+
"epoch": 6.985714285714286,
|
| 24016 |
+
"grad_norm": 2.375,
|
| 24017 |
+
"learning_rate": 2.5915127955944284e-07,
|
| 24018 |
+
"loss": 0.9002,
|
| 24019 |
+
"step": 3423
|
| 24020 |
+
},
|
| 24021 |
+
{
|
| 24022 |
+
"epoch": 6.987755102040817,
|
| 24023 |
+
"grad_norm": 2.34375,
|
| 24024 |
+
"learning_rate": 2.267573696145125e-07,
|
| 24025 |
+
"loss": 0.8106,
|
| 24026 |
+
"step": 3424
|
| 24027 |
+
},
|
| 24028 |
+
{
|
| 24029 |
+
"epoch": 6.989795918367347,
|
| 24030 |
+
"grad_norm": 2.53125,
|
| 24031 |
+
"learning_rate": 1.9436345966958211e-07,
|
| 24032 |
+
"loss": 0.7706,
|
| 24033 |
+
"step": 3425
|
| 24034 |
+
},
|
| 24035 |
+
{
|
| 24036 |
+
"epoch": 6.9918367346938775,
|
| 24037 |
+
"grad_norm": 2.328125,
|
| 24038 |
+
"learning_rate": 1.6196954972465177e-07,
|
| 24039 |
+
"loss": 0.8167,
|
| 24040 |
+
"step": 3426
|
| 24041 |
+
},
|
| 24042 |
+
{
|
| 24043 |
+
"epoch": 6.993877551020408,
|
| 24044 |
+
"grad_norm": 2.5,
|
| 24045 |
+
"learning_rate": 1.2957563977972142e-07,
|
| 24046 |
+
"loss": 0.8576,
|
| 24047 |
+
"step": 3427
|
| 24048 |
+
},
|
| 24049 |
+
{
|
| 24050 |
+
"epoch": 6.995918367346938,
|
| 24051 |
+
"grad_norm": 2.53125,
|
| 24052 |
+
"learning_rate": 9.718172983479106e-08,
|
| 24053 |
+
"loss": 0.797,
|
| 24054 |
+
"step": 3428
|
| 24055 |
+
},
|
| 24056 |
+
{
|
| 24057 |
+
"epoch": 6.997959183673469,
|
| 24058 |
+
"grad_norm": 2.359375,
|
| 24059 |
+
"learning_rate": 6.478781988986071e-08,
|
| 24060 |
+
"loss": 0.7789,
|
| 24061 |
+
"step": 3429
|
| 24062 |
+
},
|
| 24063 |
+
{
|
| 24064 |
+
"epoch": 7.0,
|
| 24065 |
+
"grad_norm": 2.421875,
|
| 24066 |
+
"learning_rate": 3.2393909944930355e-08,
|
| 24067 |
+
"loss": 0.7793,
|
| 24068 |
+
"step": 3430
|
| 24069 |
}
|
| 24070 |
],
|
| 24071 |
"logging_steps": 1,
|
|
|
|
| 24080 |
"should_evaluate": false,
|
| 24081 |
"should_log": false,
|
| 24082 |
"should_save": true,
|
| 24083 |
+
"should_training_stop": true
|
| 24084 |
},
|
| 24085 |
"attributes": {}
|
| 24086 |
}
|
| 24087 |
},
|
| 24088 |
+
"total_flos": 3.377162808135254e+18,
|
| 24089 |
"train_batch_size": 32,
|
| 24090 |
"trial_name": null,
|
| 24091 |
"trial_params": null
|