Training in progress, step 8500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 91951912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:427d176ee258c43754a05c6feee487bab90e0d6a9093140a46de2a22cc1f8d0c
|
| 3 |
size 91951912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 183991627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cf36814a9c3acb4d638d14aa867356585282a98636d3b77263111609c782714
|
| 3 |
size 183991627
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f196323d7423b60f8e4ceb7dbf8715ee326c0d068e5ff164f13c63b279b9f1a0
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ccaeb1056302281e231273fa02d3383f76b127e298ace02e671c2fd97405e36
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -56136,6 +56136,3514 @@
|
|
| 56136 |
"eval_samples_per_second": 25.667,
|
| 56137 |
"eval_steps_per_second": 1.604,
|
| 56138 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56139 |
}
|
| 56140 |
],
|
| 56141 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.00085,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 8500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 56136 |
"eval_samples_per_second": 25.667,
|
| 56137 |
"eval_steps_per_second": 1.604,
|
| 56138 |
"step": 8000
|
| 56139 |
+
},
|
| 56140 |
+
{
|
| 56141 |
+
"epoch": 0.0008001,
|
| 56142 |
+
"grad_norm": 0.3202129602432251,
|
| 56143 |
+
"learning_rate": 8e-05,
|
| 56144 |
+
"loss": 0.1265,
|
| 56145 |
+
"step": 8001
|
| 56146 |
+
},
|
| 56147 |
+
{
|
| 56148 |
+
"epoch": 0.0008002,
|
| 56149 |
+
"grad_norm": 0.3307330310344696,
|
| 56150 |
+
"learning_rate": 8.001e-05,
|
| 56151 |
+
"loss": 0.1258,
|
| 56152 |
+
"step": 8002
|
| 56153 |
+
},
|
| 56154 |
+
{
|
| 56155 |
+
"epoch": 0.0008003,
|
| 56156 |
+
"grad_norm": 0.6703978180885315,
|
| 56157 |
+
"learning_rate": 8.002e-05,
|
| 56158 |
+
"loss": 0.1598,
|
| 56159 |
+
"step": 8003
|
| 56160 |
+
},
|
| 56161 |
+
{
|
| 56162 |
+
"epoch": 0.0008004,
|
| 56163 |
+
"grad_norm": 0.4847652316093445,
|
| 56164 |
+
"learning_rate": 8.003e-05,
|
| 56165 |
+
"loss": 0.1378,
|
| 56166 |
+
"step": 8004
|
| 56167 |
+
},
|
| 56168 |
+
{
|
| 56169 |
+
"epoch": 0.0008005,
|
| 56170 |
+
"grad_norm": 0.366886705160141,
|
| 56171 |
+
"learning_rate": 8.004e-05,
|
| 56172 |
+
"loss": 0.1316,
|
| 56173 |
+
"step": 8005
|
| 56174 |
+
},
|
| 56175 |
+
{
|
| 56176 |
+
"epoch": 0.0008006,
|
| 56177 |
+
"grad_norm": 0.3496139347553253,
|
| 56178 |
+
"learning_rate": 8.005000000000001e-05,
|
| 56179 |
+
"loss": 0.1282,
|
| 56180 |
+
"step": 8006
|
| 56181 |
+
},
|
| 56182 |
+
{
|
| 56183 |
+
"epoch": 0.0008007,
|
| 56184 |
+
"grad_norm": 0.3310615122318268,
|
| 56185 |
+
"learning_rate": 8.006e-05,
|
| 56186 |
+
"loss": 0.1295,
|
| 56187 |
+
"step": 8007
|
| 56188 |
+
},
|
| 56189 |
+
{
|
| 56190 |
+
"epoch": 0.0008008,
|
| 56191 |
+
"grad_norm": 0.3476350009441376,
|
| 56192 |
+
"learning_rate": 8.007e-05,
|
| 56193 |
+
"loss": 0.1304,
|
| 56194 |
+
"step": 8008
|
| 56195 |
+
},
|
| 56196 |
+
{
|
| 56197 |
+
"epoch": 0.0008009,
|
| 56198 |
+
"grad_norm": 0.457918643951416,
|
| 56199 |
+
"learning_rate": 8.008e-05,
|
| 56200 |
+
"loss": 0.156,
|
| 56201 |
+
"step": 8009
|
| 56202 |
+
},
|
| 56203 |
+
{
|
| 56204 |
+
"epoch": 0.000801,
|
| 56205 |
+
"grad_norm": 0.34123826026916504,
|
| 56206 |
+
"learning_rate": 8.009e-05,
|
| 56207 |
+
"loss": 0.135,
|
| 56208 |
+
"step": 8010
|
| 56209 |
+
},
|
| 56210 |
+
{
|
| 56211 |
+
"epoch": 0.0008011,
|
| 56212 |
+
"grad_norm": 0.31119588017463684,
|
| 56213 |
+
"learning_rate": 8.010000000000001e-05,
|
| 56214 |
+
"loss": 0.1221,
|
| 56215 |
+
"step": 8011
|
| 56216 |
+
},
|
| 56217 |
+
{
|
| 56218 |
+
"epoch": 0.0008012,
|
| 56219 |
+
"grad_norm": 0.3695564270019531,
|
| 56220 |
+
"learning_rate": 8.011e-05,
|
| 56221 |
+
"loss": 0.1353,
|
| 56222 |
+
"step": 8012
|
| 56223 |
+
},
|
| 56224 |
+
{
|
| 56225 |
+
"epoch": 0.0008013,
|
| 56226 |
+
"grad_norm": 0.49144652485847473,
|
| 56227 |
+
"learning_rate": 8.012e-05,
|
| 56228 |
+
"loss": 0.1431,
|
| 56229 |
+
"step": 8013
|
| 56230 |
+
},
|
| 56231 |
+
{
|
| 56232 |
+
"epoch": 0.0008014,
|
| 56233 |
+
"grad_norm": 0.4846400022506714,
|
| 56234 |
+
"learning_rate": 8.013000000000001e-05,
|
| 56235 |
+
"loss": 0.1354,
|
| 56236 |
+
"step": 8014
|
| 56237 |
+
},
|
| 56238 |
+
{
|
| 56239 |
+
"epoch": 0.0008015,
|
| 56240 |
+
"grad_norm": 0.3272921144962311,
|
| 56241 |
+
"learning_rate": 8.014e-05,
|
| 56242 |
+
"loss": 0.1292,
|
| 56243 |
+
"step": 8015
|
| 56244 |
+
},
|
| 56245 |
+
{
|
| 56246 |
+
"epoch": 0.0008016,
|
| 56247 |
+
"grad_norm": 0.3025423288345337,
|
| 56248 |
+
"learning_rate": 8.015e-05,
|
| 56249 |
+
"loss": 0.1259,
|
| 56250 |
+
"step": 8016
|
| 56251 |
+
},
|
| 56252 |
+
{
|
| 56253 |
+
"epoch": 0.0008017,
|
| 56254 |
+
"grad_norm": 0.8752557039260864,
|
| 56255 |
+
"learning_rate": 8.016e-05,
|
| 56256 |
+
"loss": 0.1584,
|
| 56257 |
+
"step": 8017
|
| 56258 |
+
},
|
| 56259 |
+
{
|
| 56260 |
+
"epoch": 0.0008018,
|
| 56261 |
+
"grad_norm": 0.4220777750015259,
|
| 56262 |
+
"learning_rate": 8.017000000000001e-05,
|
| 56263 |
+
"loss": 0.1422,
|
| 56264 |
+
"step": 8018
|
| 56265 |
+
},
|
| 56266 |
+
{
|
| 56267 |
+
"epoch": 0.0008019,
|
| 56268 |
+
"grad_norm": 0.8674742579460144,
|
| 56269 |
+
"learning_rate": 8.018e-05,
|
| 56270 |
+
"loss": 0.219,
|
| 56271 |
+
"step": 8019
|
| 56272 |
+
},
|
| 56273 |
+
{
|
| 56274 |
+
"epoch": 0.000802,
|
| 56275 |
+
"grad_norm": 0.5381582379341125,
|
| 56276 |
+
"learning_rate": 8.019e-05,
|
| 56277 |
+
"loss": 0.1398,
|
| 56278 |
+
"step": 8020
|
| 56279 |
+
},
|
| 56280 |
+
{
|
| 56281 |
+
"epoch": 0.0008021,
|
| 56282 |
+
"grad_norm": 1.6414377689361572,
|
| 56283 |
+
"learning_rate": 8.020000000000001e-05,
|
| 56284 |
+
"loss": 0.2237,
|
| 56285 |
+
"step": 8021
|
| 56286 |
+
},
|
| 56287 |
+
{
|
| 56288 |
+
"epoch": 0.0008022,
|
| 56289 |
+
"grad_norm": 0.4506913721561432,
|
| 56290 |
+
"learning_rate": 8.020999999999999e-05,
|
| 56291 |
+
"loss": 0.1249,
|
| 56292 |
+
"step": 8022
|
| 56293 |
+
},
|
| 56294 |
+
{
|
| 56295 |
+
"epoch": 0.0008023,
|
| 56296 |
+
"grad_norm": 9.942520141601562,
|
| 56297 |
+
"learning_rate": 8.022e-05,
|
| 56298 |
+
"loss": 0.7549,
|
| 56299 |
+
"step": 8023
|
| 56300 |
+
},
|
| 56301 |
+
{
|
| 56302 |
+
"epoch": 0.0008024,
|
| 56303 |
+
"grad_norm": 0.599538266658783,
|
| 56304 |
+
"learning_rate": 8.023000000000001e-05,
|
| 56305 |
+
"loss": 0.1387,
|
| 56306 |
+
"step": 8024
|
| 56307 |
+
},
|
| 56308 |
+
{
|
| 56309 |
+
"epoch": 0.0008025,
|
| 56310 |
+
"grad_norm": 0.495687872171402,
|
| 56311 |
+
"learning_rate": 8.023999999999999e-05,
|
| 56312 |
+
"loss": 0.1231,
|
| 56313 |
+
"step": 8025
|
| 56314 |
+
},
|
| 56315 |
+
{
|
| 56316 |
+
"epoch": 0.0008026,
|
| 56317 |
+
"grad_norm": 0.5193979144096375,
|
| 56318 |
+
"learning_rate": 8.025e-05,
|
| 56319 |
+
"loss": 0.1304,
|
| 56320 |
+
"step": 8026
|
| 56321 |
+
},
|
| 56322 |
+
{
|
| 56323 |
+
"epoch": 0.0008027,
|
| 56324 |
+
"grad_norm": 0.3968997299671173,
|
| 56325 |
+
"learning_rate": 8.026000000000001e-05,
|
| 56326 |
+
"loss": 0.1213,
|
| 56327 |
+
"step": 8027
|
| 56328 |
+
},
|
| 56329 |
+
{
|
| 56330 |
+
"epoch": 0.0008028,
|
| 56331 |
+
"grad_norm": 0.37203750014305115,
|
| 56332 |
+
"learning_rate": 8.027e-05,
|
| 56333 |
+
"loss": 0.1225,
|
| 56334 |
+
"step": 8028
|
| 56335 |
+
},
|
| 56336 |
+
{
|
| 56337 |
+
"epoch": 0.0008029,
|
| 56338 |
+
"grad_norm": 0.4064584970474243,
|
| 56339 |
+
"learning_rate": 8.028e-05,
|
| 56340 |
+
"loss": 0.1312,
|
| 56341 |
+
"step": 8029
|
| 56342 |
+
},
|
| 56343 |
+
{
|
| 56344 |
+
"epoch": 0.000803,
|
| 56345 |
+
"grad_norm": 2.3482778072357178,
|
| 56346 |
+
"learning_rate": 8.029e-05,
|
| 56347 |
+
"loss": 0.3098,
|
| 56348 |
+
"step": 8030
|
| 56349 |
+
},
|
| 56350 |
+
{
|
| 56351 |
+
"epoch": 0.0008031,
|
| 56352 |
+
"grad_norm": 0.4314936101436615,
|
| 56353 |
+
"learning_rate": 8.03e-05,
|
| 56354 |
+
"loss": 0.1359,
|
| 56355 |
+
"step": 8031
|
| 56356 |
+
},
|
| 56357 |
+
{
|
| 56358 |
+
"epoch": 0.0008032,
|
| 56359 |
+
"grad_norm": 0.4015841484069824,
|
| 56360 |
+
"learning_rate": 8.031e-05,
|
| 56361 |
+
"loss": 0.1229,
|
| 56362 |
+
"step": 8032
|
| 56363 |
+
},
|
| 56364 |
+
{
|
| 56365 |
+
"epoch": 0.0008033,
|
| 56366 |
+
"grad_norm": 0.4461618661880493,
|
| 56367 |
+
"learning_rate": 8.032e-05,
|
| 56368 |
+
"loss": 0.1239,
|
| 56369 |
+
"step": 8033
|
| 56370 |
+
},
|
| 56371 |
+
{
|
| 56372 |
+
"epoch": 0.0008034,
|
| 56373 |
+
"grad_norm": 2.072455883026123,
|
| 56374 |
+
"learning_rate": 8.033e-05,
|
| 56375 |
+
"loss": 0.3016,
|
| 56376 |
+
"step": 8034
|
| 56377 |
+
},
|
| 56378 |
+
{
|
| 56379 |
+
"epoch": 0.0008035,
|
| 56380 |
+
"grad_norm": 0.39215677976608276,
|
| 56381 |
+
"learning_rate": 8.034e-05,
|
| 56382 |
+
"loss": 0.1331,
|
| 56383 |
+
"step": 8035
|
| 56384 |
+
},
|
| 56385 |
+
{
|
| 56386 |
+
"epoch": 0.0008036,
|
| 56387 |
+
"grad_norm": 0.5516240000724792,
|
| 56388 |
+
"learning_rate": 8.035e-05,
|
| 56389 |
+
"loss": 0.1457,
|
| 56390 |
+
"step": 8036
|
| 56391 |
+
},
|
| 56392 |
+
{
|
| 56393 |
+
"epoch": 0.0008037,
|
| 56394 |
+
"grad_norm": 0.7387846112251282,
|
| 56395 |
+
"learning_rate": 8.036e-05,
|
| 56396 |
+
"loss": 0.1438,
|
| 56397 |
+
"step": 8037
|
| 56398 |
+
},
|
| 56399 |
+
{
|
| 56400 |
+
"epoch": 0.0008038,
|
| 56401 |
+
"grad_norm": 0.3824997544288635,
|
| 56402 |
+
"learning_rate": 8.037e-05,
|
| 56403 |
+
"loss": 0.1241,
|
| 56404 |
+
"step": 8038
|
| 56405 |
+
},
|
| 56406 |
+
{
|
| 56407 |
+
"epoch": 0.0008039,
|
| 56408 |
+
"grad_norm": 0.4071812629699707,
|
| 56409 |
+
"learning_rate": 8.038e-05,
|
| 56410 |
+
"loss": 0.1257,
|
| 56411 |
+
"step": 8039
|
| 56412 |
+
},
|
| 56413 |
+
{
|
| 56414 |
+
"epoch": 0.000804,
|
| 56415 |
+
"grad_norm": 0.3403361439704895,
|
| 56416 |
+
"learning_rate": 8.039000000000001e-05,
|
| 56417 |
+
"loss": 0.1212,
|
| 56418 |
+
"step": 8040
|
| 56419 |
+
},
|
| 56420 |
+
{
|
| 56421 |
+
"epoch": 0.0008041,
|
| 56422 |
+
"grad_norm": 0.580666720867157,
|
| 56423 |
+
"learning_rate": 8.04e-05,
|
| 56424 |
+
"loss": 0.1626,
|
| 56425 |
+
"step": 8041
|
| 56426 |
+
},
|
| 56427 |
+
{
|
| 56428 |
+
"epoch": 0.0008042,
|
| 56429 |
+
"grad_norm": 0.3647790253162384,
|
| 56430 |
+
"learning_rate": 8.041e-05,
|
| 56431 |
+
"loss": 0.1339,
|
| 56432 |
+
"step": 8042
|
| 56433 |
+
},
|
| 56434 |
+
{
|
| 56435 |
+
"epoch": 0.0008043,
|
| 56436 |
+
"grad_norm": 0.34806761145591736,
|
| 56437 |
+
"learning_rate": 8.042e-05,
|
| 56438 |
+
"loss": 0.1258,
|
| 56439 |
+
"step": 8043
|
| 56440 |
+
},
|
| 56441 |
+
{
|
| 56442 |
+
"epoch": 0.0008044,
|
| 56443 |
+
"grad_norm": 1.0396161079406738,
|
| 56444 |
+
"learning_rate": 8.043e-05,
|
| 56445 |
+
"loss": 0.1851,
|
| 56446 |
+
"step": 8044
|
| 56447 |
+
},
|
| 56448 |
+
{
|
| 56449 |
+
"epoch": 0.0008045,
|
| 56450 |
+
"grad_norm": 0.3170357346534729,
|
| 56451 |
+
"learning_rate": 8.044000000000001e-05,
|
| 56452 |
+
"loss": 0.1263,
|
| 56453 |
+
"step": 8045
|
| 56454 |
+
},
|
| 56455 |
+
{
|
| 56456 |
+
"epoch": 0.0008046,
|
| 56457 |
+
"grad_norm": 0.3407961428165436,
|
| 56458 |
+
"learning_rate": 8.045e-05,
|
| 56459 |
+
"loss": 0.1237,
|
| 56460 |
+
"step": 8046
|
| 56461 |
+
},
|
| 56462 |
+
{
|
| 56463 |
+
"epoch": 0.0008047,
|
| 56464 |
+
"grad_norm": 0.4640645384788513,
|
| 56465 |
+
"learning_rate": 8.046e-05,
|
| 56466 |
+
"loss": 0.1443,
|
| 56467 |
+
"step": 8047
|
| 56468 |
+
},
|
| 56469 |
+
{
|
| 56470 |
+
"epoch": 0.0008048,
|
| 56471 |
+
"grad_norm": 0.3018249571323395,
|
| 56472 |
+
"learning_rate": 8.047000000000001e-05,
|
| 56473 |
+
"loss": 0.111,
|
| 56474 |
+
"step": 8048
|
| 56475 |
+
},
|
| 56476 |
+
{
|
| 56477 |
+
"epoch": 0.0008049,
|
| 56478 |
+
"grad_norm": 0.7696290612220764,
|
| 56479 |
+
"learning_rate": 8.048e-05,
|
| 56480 |
+
"loss": 0.1543,
|
| 56481 |
+
"step": 8049
|
| 56482 |
+
},
|
| 56483 |
+
{
|
| 56484 |
+
"epoch": 0.000805,
|
| 56485 |
+
"grad_norm": 0.3650062680244446,
|
| 56486 |
+
"learning_rate": 8.049e-05,
|
| 56487 |
+
"loss": 0.1274,
|
| 56488 |
+
"step": 8050
|
| 56489 |
+
},
|
| 56490 |
+
{
|
| 56491 |
+
"epoch": 0.0008051,
|
| 56492 |
+
"grad_norm": 0.34273412823677063,
|
| 56493 |
+
"learning_rate": 8.05e-05,
|
| 56494 |
+
"loss": 0.1268,
|
| 56495 |
+
"step": 8051
|
| 56496 |
+
},
|
| 56497 |
+
{
|
| 56498 |
+
"epoch": 0.0008052,
|
| 56499 |
+
"grad_norm": 0.35924819111824036,
|
| 56500 |
+
"learning_rate": 8.051000000000001e-05,
|
| 56501 |
+
"loss": 0.1298,
|
| 56502 |
+
"step": 8052
|
| 56503 |
+
},
|
| 56504 |
+
{
|
| 56505 |
+
"epoch": 0.0008053,
|
| 56506 |
+
"grad_norm": 0.3455607295036316,
|
| 56507 |
+
"learning_rate": 8.052e-05,
|
| 56508 |
+
"loss": 0.1315,
|
| 56509 |
+
"step": 8053
|
| 56510 |
+
},
|
| 56511 |
+
{
|
| 56512 |
+
"epoch": 0.0008054,
|
| 56513 |
+
"grad_norm": 0.39464035630226135,
|
| 56514 |
+
"learning_rate": 8.053e-05,
|
| 56515 |
+
"loss": 0.1275,
|
| 56516 |
+
"step": 8054
|
| 56517 |
+
},
|
| 56518 |
+
{
|
| 56519 |
+
"epoch": 0.0008055,
|
| 56520 |
+
"grad_norm": 0.3353613615036011,
|
| 56521 |
+
"learning_rate": 8.054000000000001e-05,
|
| 56522 |
+
"loss": 0.1278,
|
| 56523 |
+
"step": 8055
|
| 56524 |
+
},
|
| 56525 |
+
{
|
| 56526 |
+
"epoch": 0.0008056,
|
| 56527 |
+
"grad_norm": 0.315790593624115,
|
| 56528 |
+
"learning_rate": 8.054999999999999e-05,
|
| 56529 |
+
"loss": 0.1274,
|
| 56530 |
+
"step": 8056
|
| 56531 |
+
},
|
| 56532 |
+
{
|
| 56533 |
+
"epoch": 0.0008057,
|
| 56534 |
+
"grad_norm": 0.29406729340553284,
|
| 56535 |
+
"learning_rate": 8.056e-05,
|
| 56536 |
+
"loss": 0.1207,
|
| 56537 |
+
"step": 8057
|
| 56538 |
+
},
|
| 56539 |
+
{
|
| 56540 |
+
"epoch": 0.0008058,
|
| 56541 |
+
"grad_norm": 0.32833918929100037,
|
| 56542 |
+
"learning_rate": 8.057000000000001e-05,
|
| 56543 |
+
"loss": 0.1249,
|
| 56544 |
+
"step": 8058
|
| 56545 |
+
},
|
| 56546 |
+
{
|
| 56547 |
+
"epoch": 0.0008059,
|
| 56548 |
+
"grad_norm": 0.3407994508743286,
|
| 56549 |
+
"learning_rate": 8.057999999999999e-05,
|
| 56550 |
+
"loss": 0.1265,
|
| 56551 |
+
"step": 8059
|
| 56552 |
+
},
|
| 56553 |
+
{
|
| 56554 |
+
"epoch": 0.000806,
|
| 56555 |
+
"grad_norm": 0.28301092982292175,
|
| 56556 |
+
"learning_rate": 8.059e-05,
|
| 56557 |
+
"loss": 0.1237,
|
| 56558 |
+
"step": 8060
|
| 56559 |
+
},
|
| 56560 |
+
{
|
| 56561 |
+
"epoch": 0.0008061,
|
| 56562 |
+
"grad_norm": 0.2914838194847107,
|
| 56563 |
+
"learning_rate": 8.060000000000001e-05,
|
| 56564 |
+
"loss": 0.1217,
|
| 56565 |
+
"step": 8061
|
| 56566 |
+
},
|
| 56567 |
+
{
|
| 56568 |
+
"epoch": 0.0008062,
|
| 56569 |
+
"grad_norm": 0.32073450088500977,
|
| 56570 |
+
"learning_rate": 8.061e-05,
|
| 56571 |
+
"loss": 0.1251,
|
| 56572 |
+
"step": 8062
|
| 56573 |
+
},
|
| 56574 |
+
{
|
| 56575 |
+
"epoch": 0.0008063,
|
| 56576 |
+
"grad_norm": 0.29743877053260803,
|
| 56577 |
+
"learning_rate": 8.062e-05,
|
| 56578 |
+
"loss": 0.1174,
|
| 56579 |
+
"step": 8063
|
| 56580 |
+
},
|
| 56581 |
+
{
|
| 56582 |
+
"epoch": 0.0008064,
|
| 56583 |
+
"grad_norm": 0.3622455298900604,
|
| 56584 |
+
"learning_rate": 8.063e-05,
|
| 56585 |
+
"loss": 0.1258,
|
| 56586 |
+
"step": 8064
|
| 56587 |
+
},
|
| 56588 |
+
{
|
| 56589 |
+
"epoch": 0.0008065,
|
| 56590 |
+
"grad_norm": 0.3479747772216797,
|
| 56591 |
+
"learning_rate": 8.064e-05,
|
| 56592 |
+
"loss": 0.1384,
|
| 56593 |
+
"step": 8065
|
| 56594 |
+
},
|
| 56595 |
+
{
|
| 56596 |
+
"epoch": 0.0008066,
|
| 56597 |
+
"grad_norm": 0.5194042921066284,
|
| 56598 |
+
"learning_rate": 8.065e-05,
|
| 56599 |
+
"loss": 0.1379,
|
| 56600 |
+
"step": 8066
|
| 56601 |
+
},
|
| 56602 |
+
{
|
| 56603 |
+
"epoch": 0.0008067,
|
| 56604 |
+
"grad_norm": 0.2719845771789551,
|
| 56605 |
+
"learning_rate": 8.066e-05,
|
| 56606 |
+
"loss": 0.1227,
|
| 56607 |
+
"step": 8067
|
| 56608 |
+
},
|
| 56609 |
+
{
|
| 56610 |
+
"epoch": 0.0008068,
|
| 56611 |
+
"grad_norm": 0.3114382028579712,
|
| 56612 |
+
"learning_rate": 8.067e-05,
|
| 56613 |
+
"loss": 0.1308,
|
| 56614 |
+
"step": 8068
|
| 56615 |
+
},
|
| 56616 |
+
{
|
| 56617 |
+
"epoch": 0.0008069,
|
| 56618 |
+
"grad_norm": 0.3259691894054413,
|
| 56619 |
+
"learning_rate": 8.068000000000001e-05,
|
| 56620 |
+
"loss": 0.1252,
|
| 56621 |
+
"step": 8069
|
| 56622 |
+
},
|
| 56623 |
+
{
|
| 56624 |
+
"epoch": 0.000807,
|
| 56625 |
+
"grad_norm": 0.32140761613845825,
|
| 56626 |
+
"learning_rate": 8.069e-05,
|
| 56627 |
+
"loss": 0.1254,
|
| 56628 |
+
"step": 8070
|
| 56629 |
+
},
|
| 56630 |
+
{
|
| 56631 |
+
"epoch": 0.0008071,
|
| 56632 |
+
"grad_norm": 0.2814655005931854,
|
| 56633 |
+
"learning_rate": 8.07e-05,
|
| 56634 |
+
"loss": 0.1228,
|
| 56635 |
+
"step": 8071
|
| 56636 |
+
},
|
| 56637 |
+
{
|
| 56638 |
+
"epoch": 0.0008072,
|
| 56639 |
+
"grad_norm": 0.3107726275920868,
|
| 56640 |
+
"learning_rate": 8.071e-05,
|
| 56641 |
+
"loss": 0.1238,
|
| 56642 |
+
"step": 8072
|
| 56643 |
+
},
|
| 56644 |
+
{
|
| 56645 |
+
"epoch": 0.0008073,
|
| 56646 |
+
"grad_norm": 0.3063673973083496,
|
| 56647 |
+
"learning_rate": 8.072e-05,
|
| 56648 |
+
"loss": 0.1251,
|
| 56649 |
+
"step": 8073
|
| 56650 |
+
},
|
| 56651 |
+
{
|
| 56652 |
+
"epoch": 0.0008074,
|
| 56653 |
+
"grad_norm": 0.28373897075653076,
|
| 56654 |
+
"learning_rate": 8.073000000000001e-05,
|
| 56655 |
+
"loss": 0.1212,
|
| 56656 |
+
"step": 8074
|
| 56657 |
+
},
|
| 56658 |
+
{
|
| 56659 |
+
"epoch": 0.0008075,
|
| 56660 |
+
"grad_norm": 0.5714243054389954,
|
| 56661 |
+
"learning_rate": 8.074e-05,
|
| 56662 |
+
"loss": 0.1334,
|
| 56663 |
+
"step": 8075
|
| 56664 |
+
},
|
| 56665 |
+
{
|
| 56666 |
+
"epoch": 0.0008076,
|
| 56667 |
+
"grad_norm": 0.26238927245140076,
|
| 56668 |
+
"learning_rate": 8.075e-05,
|
| 56669 |
+
"loss": 0.1173,
|
| 56670 |
+
"step": 8076
|
| 56671 |
+
},
|
| 56672 |
+
{
|
| 56673 |
+
"epoch": 0.0008077,
|
| 56674 |
+
"grad_norm": 0.3149022161960602,
|
| 56675 |
+
"learning_rate": 8.076e-05,
|
| 56676 |
+
"loss": 0.1275,
|
| 56677 |
+
"step": 8077
|
| 56678 |
+
},
|
| 56679 |
+
{
|
| 56680 |
+
"epoch": 0.0008078,
|
| 56681 |
+
"grad_norm": 0.29632896184921265,
|
| 56682 |
+
"learning_rate": 8.077e-05,
|
| 56683 |
+
"loss": 0.1239,
|
| 56684 |
+
"step": 8078
|
| 56685 |
+
},
|
| 56686 |
+
{
|
| 56687 |
+
"epoch": 0.0008079,
|
| 56688 |
+
"grad_norm": 0.27960312366485596,
|
| 56689 |
+
"learning_rate": 8.078000000000001e-05,
|
| 56690 |
+
"loss": 0.12,
|
| 56691 |
+
"step": 8079
|
| 56692 |
+
},
|
| 56693 |
+
{
|
| 56694 |
+
"epoch": 0.000808,
|
| 56695 |
+
"grad_norm": 0.2839134931564331,
|
| 56696 |
+
"learning_rate": 8.079e-05,
|
| 56697 |
+
"loss": 0.1249,
|
| 56698 |
+
"step": 8080
|
| 56699 |
+
},
|
| 56700 |
+
{
|
| 56701 |
+
"epoch": 0.0008081,
|
| 56702 |
+
"grad_norm": 0.3265821933746338,
|
| 56703 |
+
"learning_rate": 8.08e-05,
|
| 56704 |
+
"loss": 0.1254,
|
| 56705 |
+
"step": 8081
|
| 56706 |
+
},
|
| 56707 |
+
{
|
| 56708 |
+
"epoch": 0.0008082,
|
| 56709 |
+
"grad_norm": 0.3058502674102783,
|
| 56710 |
+
"learning_rate": 8.081000000000001e-05,
|
| 56711 |
+
"loss": 0.1233,
|
| 56712 |
+
"step": 8082
|
| 56713 |
+
},
|
| 56714 |
+
{
|
| 56715 |
+
"epoch": 0.0008083,
|
| 56716 |
+
"grad_norm": 0.49245432019233704,
|
| 56717 |
+
"learning_rate": 8.082e-05,
|
| 56718 |
+
"loss": 0.1345,
|
| 56719 |
+
"step": 8083
|
| 56720 |
+
},
|
| 56721 |
+
{
|
| 56722 |
+
"epoch": 0.0008084,
|
| 56723 |
+
"grad_norm": 0.2840079963207245,
|
| 56724 |
+
"learning_rate": 8.083e-05,
|
| 56725 |
+
"loss": 0.1202,
|
| 56726 |
+
"step": 8084
|
| 56727 |
+
},
|
| 56728 |
+
{
|
| 56729 |
+
"epoch": 0.0008085,
|
| 56730 |
+
"grad_norm": 0.43045926094055176,
|
| 56731 |
+
"learning_rate": 8.084e-05,
|
| 56732 |
+
"loss": 0.1366,
|
| 56733 |
+
"step": 8085
|
| 56734 |
+
},
|
| 56735 |
+
{
|
| 56736 |
+
"epoch": 0.0008086,
|
| 56737 |
+
"grad_norm": 0.2691253125667572,
|
| 56738 |
+
"learning_rate": 8.085000000000001e-05,
|
| 56739 |
+
"loss": 0.1226,
|
| 56740 |
+
"step": 8086
|
| 56741 |
+
},
|
| 56742 |
+
{
|
| 56743 |
+
"epoch": 0.0008087,
|
| 56744 |
+
"grad_norm": 0.24027742445468903,
|
| 56745 |
+
"learning_rate": 8.086e-05,
|
| 56746 |
+
"loss": 0.1117,
|
| 56747 |
+
"step": 8087
|
| 56748 |
+
},
|
| 56749 |
+
{
|
| 56750 |
+
"epoch": 0.0008088,
|
| 56751 |
+
"grad_norm": 0.3302721381187439,
|
| 56752 |
+
"learning_rate": 8.087e-05,
|
| 56753 |
+
"loss": 0.1266,
|
| 56754 |
+
"step": 8088
|
| 56755 |
+
},
|
| 56756 |
+
{
|
| 56757 |
+
"epoch": 0.0008089,
|
| 56758 |
+
"grad_norm": 0.29586073756217957,
|
| 56759 |
+
"learning_rate": 8.088000000000001e-05,
|
| 56760 |
+
"loss": 0.1246,
|
| 56761 |
+
"step": 8089
|
| 56762 |
+
},
|
| 56763 |
+
{
|
| 56764 |
+
"epoch": 0.000809,
|
| 56765 |
+
"grad_norm": 0.257040798664093,
|
| 56766 |
+
"learning_rate": 8.088999999999999e-05,
|
| 56767 |
+
"loss": 0.1138,
|
| 56768 |
+
"step": 8090
|
| 56769 |
+
},
|
| 56770 |
+
{
|
| 56771 |
+
"epoch": 0.0008091,
|
| 56772 |
+
"grad_norm": 0.35814082622528076,
|
| 56773 |
+
"learning_rate": 8.09e-05,
|
| 56774 |
+
"loss": 0.1345,
|
| 56775 |
+
"step": 8091
|
| 56776 |
+
},
|
| 56777 |
+
{
|
| 56778 |
+
"epoch": 0.0008092,
|
| 56779 |
+
"grad_norm": 0.377176970243454,
|
| 56780 |
+
"learning_rate": 8.091000000000001e-05,
|
| 56781 |
+
"loss": 0.1333,
|
| 56782 |
+
"step": 8092
|
| 56783 |
+
},
|
| 56784 |
+
{
|
| 56785 |
+
"epoch": 0.0008093,
|
| 56786 |
+
"grad_norm": 0.24571584165096283,
|
| 56787 |
+
"learning_rate": 8.091999999999999e-05,
|
| 56788 |
+
"loss": 0.1115,
|
| 56789 |
+
"step": 8093
|
| 56790 |
+
},
|
| 56791 |
+
{
|
| 56792 |
+
"epoch": 0.0008094,
|
| 56793 |
+
"grad_norm": 0.5715231895446777,
|
| 56794 |
+
"learning_rate": 8.093e-05,
|
| 56795 |
+
"loss": 0.1378,
|
| 56796 |
+
"step": 8094
|
| 56797 |
+
},
|
| 56798 |
+
{
|
| 56799 |
+
"epoch": 0.0008095,
|
| 56800 |
+
"grad_norm": 0.3336717486381531,
|
| 56801 |
+
"learning_rate": 8.094000000000001e-05,
|
| 56802 |
+
"loss": 0.1282,
|
| 56803 |
+
"step": 8095
|
| 56804 |
+
},
|
| 56805 |
+
{
|
| 56806 |
+
"epoch": 0.0008096,
|
| 56807 |
+
"grad_norm": 0.3536299765110016,
|
| 56808 |
+
"learning_rate": 8.095e-05,
|
| 56809 |
+
"loss": 0.1324,
|
| 56810 |
+
"step": 8096
|
| 56811 |
+
},
|
| 56812 |
+
{
|
| 56813 |
+
"epoch": 0.0008097,
|
| 56814 |
+
"grad_norm": 0.24772508442401886,
|
| 56815 |
+
"learning_rate": 8.096e-05,
|
| 56816 |
+
"loss": 0.1127,
|
| 56817 |
+
"step": 8097
|
| 56818 |
+
},
|
| 56819 |
+
{
|
| 56820 |
+
"epoch": 0.0008098,
|
| 56821 |
+
"grad_norm": 0.3084630072116852,
|
| 56822 |
+
"learning_rate": 8.097e-05,
|
| 56823 |
+
"loss": 0.1203,
|
| 56824 |
+
"step": 8098
|
| 56825 |
+
},
|
| 56826 |
+
{
|
| 56827 |
+
"epoch": 0.0008099,
|
| 56828 |
+
"grad_norm": 0.34206777811050415,
|
| 56829 |
+
"learning_rate": 8.098e-05,
|
| 56830 |
+
"loss": 0.1332,
|
| 56831 |
+
"step": 8099
|
| 56832 |
+
},
|
| 56833 |
+
{
|
| 56834 |
+
"epoch": 0.00081,
|
| 56835 |
+
"grad_norm": 0.24609732627868652,
|
| 56836 |
+
"learning_rate": 8.099e-05,
|
| 56837 |
+
"loss": 0.1144,
|
| 56838 |
+
"step": 8100
|
| 56839 |
+
},
|
| 56840 |
+
{
|
| 56841 |
+
"epoch": 0.0008101,
|
| 56842 |
+
"grad_norm": 0.29518646001815796,
|
| 56843 |
+
"learning_rate": 8.1e-05,
|
| 56844 |
+
"loss": 0.1219,
|
| 56845 |
+
"step": 8101
|
| 56846 |
+
},
|
| 56847 |
+
{
|
| 56848 |
+
"epoch": 0.0008102,
|
| 56849 |
+
"grad_norm": 0.3137916922569275,
|
| 56850 |
+
"learning_rate": 8.101e-05,
|
| 56851 |
+
"loss": 0.1224,
|
| 56852 |
+
"step": 8102
|
| 56853 |
+
},
|
| 56854 |
+
{
|
| 56855 |
+
"epoch": 0.0008103,
|
| 56856 |
+
"grad_norm": 0.2565911114215851,
|
| 56857 |
+
"learning_rate": 8.102e-05,
|
| 56858 |
+
"loss": 0.1164,
|
| 56859 |
+
"step": 8103
|
| 56860 |
+
},
|
| 56861 |
+
{
|
| 56862 |
+
"epoch": 0.0008104,
|
| 56863 |
+
"grad_norm": 0.9588075876235962,
|
| 56864 |
+
"learning_rate": 8.103e-05,
|
| 56865 |
+
"loss": 0.1422,
|
| 56866 |
+
"step": 8104
|
| 56867 |
+
},
|
| 56868 |
+
{
|
| 56869 |
+
"epoch": 0.0008105,
|
| 56870 |
+
"grad_norm": 0.25571489334106445,
|
| 56871 |
+
"learning_rate": 8.104e-05,
|
| 56872 |
+
"loss": 0.1109,
|
| 56873 |
+
"step": 8105
|
| 56874 |
+
},
|
| 56875 |
+
{
|
| 56876 |
+
"epoch": 0.0008106,
|
| 56877 |
+
"grad_norm": 0.2894692122936249,
|
| 56878 |
+
"learning_rate": 8.105e-05,
|
| 56879 |
+
"loss": 0.1214,
|
| 56880 |
+
"step": 8106
|
| 56881 |
+
},
|
| 56882 |
+
{
|
| 56883 |
+
"epoch": 0.0008107,
|
| 56884 |
+
"grad_norm": 0.2801630198955536,
|
| 56885 |
+
"learning_rate": 8.106e-05,
|
| 56886 |
+
"loss": 0.1125,
|
| 56887 |
+
"step": 8107
|
| 56888 |
+
},
|
| 56889 |
+
{
|
| 56890 |
+
"epoch": 0.0008108,
|
| 56891 |
+
"grad_norm": 0.3092218339443207,
|
| 56892 |
+
"learning_rate": 8.107000000000001e-05,
|
| 56893 |
+
"loss": 0.1234,
|
| 56894 |
+
"step": 8108
|
| 56895 |
+
},
|
| 56896 |
+
{
|
| 56897 |
+
"epoch": 0.0008109,
|
| 56898 |
+
"grad_norm": 0.38859060406684875,
|
| 56899 |
+
"learning_rate": 8.108e-05,
|
| 56900 |
+
"loss": 0.1315,
|
| 56901 |
+
"step": 8109
|
| 56902 |
+
},
|
| 56903 |
+
{
|
| 56904 |
+
"epoch": 0.000811,
|
| 56905 |
+
"grad_norm": 0.3201065957546234,
|
| 56906 |
+
"learning_rate": 8.109e-05,
|
| 56907 |
+
"loss": 0.125,
|
| 56908 |
+
"step": 8110
|
| 56909 |
+
},
|
| 56910 |
+
{
|
| 56911 |
+
"epoch": 0.0008111,
|
| 56912 |
+
"grad_norm": 0.27451232075691223,
|
| 56913 |
+
"learning_rate": 8.11e-05,
|
| 56914 |
+
"loss": 0.1229,
|
| 56915 |
+
"step": 8111
|
| 56916 |
+
},
|
| 56917 |
+
{
|
| 56918 |
+
"epoch": 0.0008112,
|
| 56919 |
+
"grad_norm": 0.30030497908592224,
|
| 56920 |
+
"learning_rate": 8.111e-05,
|
| 56921 |
+
"loss": 0.1257,
|
| 56922 |
+
"step": 8112
|
| 56923 |
+
},
|
| 56924 |
+
{
|
| 56925 |
+
"epoch": 0.0008113,
|
| 56926 |
+
"grad_norm": 0.32421204447746277,
|
| 56927 |
+
"learning_rate": 8.112000000000001e-05,
|
| 56928 |
+
"loss": 0.1359,
|
| 56929 |
+
"step": 8113
|
| 56930 |
+
},
|
| 56931 |
+
{
|
| 56932 |
+
"epoch": 0.0008114,
|
| 56933 |
+
"grad_norm": 0.26296764612197876,
|
| 56934 |
+
"learning_rate": 8.113e-05,
|
| 56935 |
+
"loss": 0.1181,
|
| 56936 |
+
"step": 8114
|
| 56937 |
+
},
|
| 56938 |
+
{
|
| 56939 |
+
"epoch": 0.0008115,
|
| 56940 |
+
"grad_norm": 0.24811072647571564,
|
| 56941 |
+
"learning_rate": 8.114e-05,
|
| 56942 |
+
"loss": 0.1193,
|
| 56943 |
+
"step": 8115
|
| 56944 |
+
},
|
| 56945 |
+
{
|
| 56946 |
+
"epoch": 0.0008116,
|
| 56947 |
+
"grad_norm": 0.2737072706222534,
|
| 56948 |
+
"learning_rate": 8.115000000000001e-05,
|
| 56949 |
+
"loss": 0.1152,
|
| 56950 |
+
"step": 8116
|
| 56951 |
+
},
|
| 56952 |
+
{
|
| 56953 |
+
"epoch": 0.0008117,
|
| 56954 |
+
"grad_norm": 0.3693912923336029,
|
| 56955 |
+
"learning_rate": 8.116e-05,
|
| 56956 |
+
"loss": 0.1359,
|
| 56957 |
+
"step": 8117
|
| 56958 |
+
},
|
| 56959 |
+
{
|
| 56960 |
+
"epoch": 0.0008118,
|
| 56961 |
+
"grad_norm": 0.3073267936706543,
|
| 56962 |
+
"learning_rate": 8.117e-05,
|
| 56963 |
+
"loss": 0.1245,
|
| 56964 |
+
"step": 8118
|
| 56965 |
+
},
|
| 56966 |
+
{
|
| 56967 |
+
"epoch": 0.0008119,
|
| 56968 |
+
"grad_norm": 0.2775425910949707,
|
| 56969 |
+
"learning_rate": 8.118e-05,
|
| 56970 |
+
"loss": 0.1235,
|
| 56971 |
+
"step": 8119
|
| 56972 |
+
},
|
| 56973 |
+
{
|
| 56974 |
+
"epoch": 0.000812,
|
| 56975 |
+
"grad_norm": 0.26199913024902344,
|
| 56976 |
+
"learning_rate": 8.119e-05,
|
| 56977 |
+
"loss": 0.12,
|
| 56978 |
+
"step": 8120
|
| 56979 |
+
},
|
| 56980 |
+
{
|
| 56981 |
+
"epoch": 0.0008121,
|
| 56982 |
+
"grad_norm": 0.33429381251335144,
|
| 56983 |
+
"learning_rate": 8.12e-05,
|
| 56984 |
+
"loss": 0.1306,
|
| 56985 |
+
"step": 8121
|
| 56986 |
+
},
|
| 56987 |
+
{
|
| 56988 |
+
"epoch": 0.0008122,
|
| 56989 |
+
"grad_norm": 0.2805701792240143,
|
| 56990 |
+
"learning_rate": 8.121e-05,
|
| 56991 |
+
"loss": 0.1198,
|
| 56992 |
+
"step": 8122
|
| 56993 |
+
},
|
| 56994 |
+
{
|
| 56995 |
+
"epoch": 0.0008123,
|
| 56996 |
+
"grad_norm": 0.26230496168136597,
|
| 56997 |
+
"learning_rate": 8.122000000000001e-05,
|
| 56998 |
+
"loss": 0.1206,
|
| 56999 |
+
"step": 8123
|
| 57000 |
+
},
|
| 57001 |
+
{
|
| 57002 |
+
"epoch": 0.0008124,
|
| 57003 |
+
"grad_norm": 0.27616167068481445,
|
| 57004 |
+
"learning_rate": 8.122999999999999e-05,
|
| 57005 |
+
"loss": 0.1215,
|
| 57006 |
+
"step": 8124
|
| 57007 |
+
},
|
| 57008 |
+
{
|
| 57009 |
+
"epoch": 0.0008125,
|
| 57010 |
+
"grad_norm": 0.4020824134349823,
|
| 57011 |
+
"learning_rate": 8.124e-05,
|
| 57012 |
+
"loss": 0.1347,
|
| 57013 |
+
"step": 8125
|
| 57014 |
+
},
|
| 57015 |
+
{
|
| 57016 |
+
"epoch": 0.0008126,
|
| 57017 |
+
"grad_norm": 0.30386677384376526,
|
| 57018 |
+
"learning_rate": 8.125000000000001e-05,
|
| 57019 |
+
"loss": 0.1313,
|
| 57020 |
+
"step": 8126
|
| 57021 |
+
},
|
| 57022 |
+
{
|
| 57023 |
+
"epoch": 0.0008127,
|
| 57024 |
+
"grad_norm": 0.2896801829338074,
|
| 57025 |
+
"learning_rate": 8.125999999999999e-05,
|
| 57026 |
+
"loss": 0.1203,
|
| 57027 |
+
"step": 8127
|
| 57028 |
+
},
|
| 57029 |
+
{
|
| 57030 |
+
"epoch": 0.0008128,
|
| 57031 |
+
"grad_norm": 0.2686094045639038,
|
| 57032 |
+
"learning_rate": 8.127e-05,
|
| 57033 |
+
"loss": 0.1224,
|
| 57034 |
+
"step": 8128
|
| 57035 |
+
},
|
| 57036 |
+
{
|
| 57037 |
+
"epoch": 0.0008129,
|
| 57038 |
+
"grad_norm": 0.3080318868160248,
|
| 57039 |
+
"learning_rate": 8.128000000000001e-05,
|
| 57040 |
+
"loss": 0.1224,
|
| 57041 |
+
"step": 8129
|
| 57042 |
+
},
|
| 57043 |
+
{
|
| 57044 |
+
"epoch": 0.000813,
|
| 57045 |
+
"grad_norm": 0.331500768661499,
|
| 57046 |
+
"learning_rate": 8.129e-05,
|
| 57047 |
+
"loss": 0.1261,
|
| 57048 |
+
"step": 8130
|
| 57049 |
+
},
|
| 57050 |
+
{
|
| 57051 |
+
"epoch": 0.0008131,
|
| 57052 |
+
"grad_norm": 0.25597918033599854,
|
| 57053 |
+
"learning_rate": 8.13e-05,
|
| 57054 |
+
"loss": 0.1242,
|
| 57055 |
+
"step": 8131
|
| 57056 |
+
},
|
| 57057 |
+
{
|
| 57058 |
+
"epoch": 0.0008132,
|
| 57059 |
+
"grad_norm": 0.2504562735557556,
|
| 57060 |
+
"learning_rate": 8.131e-05,
|
| 57061 |
+
"loss": 0.1209,
|
| 57062 |
+
"step": 8132
|
| 57063 |
+
},
|
| 57064 |
+
{
|
| 57065 |
+
"epoch": 0.0008133,
|
| 57066 |
+
"grad_norm": 0.27307194471359253,
|
| 57067 |
+
"learning_rate": 8.132e-05,
|
| 57068 |
+
"loss": 0.1226,
|
| 57069 |
+
"step": 8133
|
| 57070 |
+
},
|
| 57071 |
+
{
|
| 57072 |
+
"epoch": 0.0008134,
|
| 57073 |
+
"grad_norm": 0.25977298617362976,
|
| 57074 |
+
"learning_rate": 8.133e-05,
|
| 57075 |
+
"loss": 0.1166,
|
| 57076 |
+
"step": 8134
|
| 57077 |
+
},
|
| 57078 |
+
{
|
| 57079 |
+
"epoch": 0.0008135,
|
| 57080 |
+
"grad_norm": 0.403717041015625,
|
| 57081 |
+
"learning_rate": 8.134e-05,
|
| 57082 |
+
"loss": 0.1316,
|
| 57083 |
+
"step": 8135
|
| 57084 |
+
},
|
| 57085 |
+
{
|
| 57086 |
+
"epoch": 0.0008136,
|
| 57087 |
+
"grad_norm": 0.39485740661621094,
|
| 57088 |
+
"learning_rate": 8.135e-05,
|
| 57089 |
+
"loss": 0.1393,
|
| 57090 |
+
"step": 8136
|
| 57091 |
+
},
|
| 57092 |
+
{
|
| 57093 |
+
"epoch": 0.0008137,
|
| 57094 |
+
"grad_norm": 0.27854466438293457,
|
| 57095 |
+
"learning_rate": 8.136e-05,
|
| 57096 |
+
"loss": 0.1177,
|
| 57097 |
+
"step": 8137
|
| 57098 |
+
},
|
| 57099 |
+
{
|
| 57100 |
+
"epoch": 0.0008138,
|
| 57101 |
+
"grad_norm": 0.2546297311782837,
|
| 57102 |
+
"learning_rate": 8.137e-05,
|
| 57103 |
+
"loss": 0.1178,
|
| 57104 |
+
"step": 8138
|
| 57105 |
+
},
|
| 57106 |
+
{
|
| 57107 |
+
"epoch": 0.0008139,
|
| 57108 |
+
"grad_norm": 0.24863463640213013,
|
| 57109 |
+
"learning_rate": 8.138e-05,
|
| 57110 |
+
"loss": 0.1205,
|
| 57111 |
+
"step": 8139
|
| 57112 |
+
},
|
| 57113 |
+
{
|
| 57114 |
+
"epoch": 0.000814,
|
| 57115 |
+
"grad_norm": 0.2401416152715683,
|
| 57116 |
+
"learning_rate": 8.139e-05,
|
| 57117 |
+
"loss": 0.1115,
|
| 57118 |
+
"step": 8140
|
| 57119 |
+
},
|
| 57120 |
+
{
|
| 57121 |
+
"epoch": 0.0008141,
|
| 57122 |
+
"grad_norm": 0.38651880621910095,
|
| 57123 |
+
"learning_rate": 8.14e-05,
|
| 57124 |
+
"loss": 0.1298,
|
| 57125 |
+
"step": 8141
|
| 57126 |
+
},
|
| 57127 |
+
{
|
| 57128 |
+
"epoch": 0.0008142,
|
| 57129 |
+
"grad_norm": 0.24856111407279968,
|
| 57130 |
+
"learning_rate": 8.141e-05,
|
| 57131 |
+
"loss": 0.12,
|
| 57132 |
+
"step": 8142
|
| 57133 |
+
},
|
| 57134 |
+
{
|
| 57135 |
+
"epoch": 0.0008143,
|
| 57136 |
+
"grad_norm": 0.27614912390708923,
|
| 57137 |
+
"learning_rate": 8.142e-05,
|
| 57138 |
+
"loss": 0.1236,
|
| 57139 |
+
"step": 8143
|
| 57140 |
+
},
|
| 57141 |
+
{
|
| 57142 |
+
"epoch": 0.0008144,
|
| 57143 |
+
"grad_norm": 0.2812386155128479,
|
| 57144 |
+
"learning_rate": 8.143e-05,
|
| 57145 |
+
"loss": 0.1273,
|
| 57146 |
+
"step": 8144
|
| 57147 |
+
},
|
| 57148 |
+
{
|
| 57149 |
+
"epoch": 0.0008145,
|
| 57150 |
+
"grad_norm": 0.27021878957748413,
|
| 57151 |
+
"learning_rate": 8.144e-05,
|
| 57152 |
+
"loss": 0.1223,
|
| 57153 |
+
"step": 8145
|
| 57154 |
+
},
|
| 57155 |
+
{
|
| 57156 |
+
"epoch": 0.0008146,
|
| 57157 |
+
"grad_norm": 0.25452882051467896,
|
| 57158 |
+
"learning_rate": 8.145e-05,
|
| 57159 |
+
"loss": 0.115,
|
| 57160 |
+
"step": 8146
|
| 57161 |
+
},
|
| 57162 |
+
{
|
| 57163 |
+
"epoch": 0.0008147,
|
| 57164 |
+
"grad_norm": 0.2515009641647339,
|
| 57165 |
+
"learning_rate": 8.146000000000001e-05,
|
| 57166 |
+
"loss": 0.1155,
|
| 57167 |
+
"step": 8147
|
| 57168 |
+
},
|
| 57169 |
+
{
|
| 57170 |
+
"epoch": 0.0008148,
|
| 57171 |
+
"grad_norm": 0.27468255162239075,
|
| 57172 |
+
"learning_rate": 8.147e-05,
|
| 57173 |
+
"loss": 0.1277,
|
| 57174 |
+
"step": 8148
|
| 57175 |
+
},
|
| 57176 |
+
{
|
| 57177 |
+
"epoch": 0.0008149,
|
| 57178 |
+
"grad_norm": 0.30062854290008545,
|
| 57179 |
+
"learning_rate": 8.148e-05,
|
| 57180 |
+
"loss": 0.1307,
|
| 57181 |
+
"step": 8149
|
| 57182 |
+
},
|
| 57183 |
+
{
|
| 57184 |
+
"epoch": 0.000815,
|
| 57185 |
+
"grad_norm": 0.26882845163345337,
|
| 57186 |
+
"learning_rate": 8.149000000000001e-05,
|
| 57187 |
+
"loss": 0.1234,
|
| 57188 |
+
"step": 8150
|
| 57189 |
+
},
|
| 57190 |
+
{
|
| 57191 |
+
"epoch": 0.0008151,
|
| 57192 |
+
"grad_norm": 0.4284106194972992,
|
| 57193 |
+
"learning_rate": 8.15e-05,
|
| 57194 |
+
"loss": 0.1329,
|
| 57195 |
+
"step": 8151
|
| 57196 |
+
},
|
| 57197 |
+
{
|
| 57198 |
+
"epoch": 0.0008152,
|
| 57199 |
+
"grad_norm": 0.2600254714488983,
|
| 57200 |
+
"learning_rate": 8.151e-05,
|
| 57201 |
+
"loss": 0.1199,
|
| 57202 |
+
"step": 8152
|
| 57203 |
+
},
|
| 57204 |
+
{
|
| 57205 |
+
"epoch": 0.0008153,
|
| 57206 |
+
"grad_norm": 0.2593938112258911,
|
| 57207 |
+
"learning_rate": 8.152e-05,
|
| 57208 |
+
"loss": 0.1167,
|
| 57209 |
+
"step": 8153
|
| 57210 |
+
},
|
| 57211 |
+
{
|
| 57212 |
+
"epoch": 0.0008154,
|
| 57213 |
+
"grad_norm": 0.35058048367500305,
|
| 57214 |
+
"learning_rate": 8.153e-05,
|
| 57215 |
+
"loss": 0.1319,
|
| 57216 |
+
"step": 8154
|
| 57217 |
+
},
|
| 57218 |
+
{
|
| 57219 |
+
"epoch": 0.0008155,
|
| 57220 |
+
"grad_norm": 0.2629115581512451,
|
| 57221 |
+
"learning_rate": 8.154e-05,
|
| 57222 |
+
"loss": 0.1207,
|
| 57223 |
+
"step": 8155
|
| 57224 |
+
},
|
| 57225 |
+
{
|
| 57226 |
+
"epoch": 0.0008156,
|
| 57227 |
+
"grad_norm": 0.2404366284608841,
|
| 57228 |
+
"learning_rate": 8.155e-05,
|
| 57229 |
+
"loss": 0.1185,
|
| 57230 |
+
"step": 8156
|
| 57231 |
+
},
|
| 57232 |
+
{
|
| 57233 |
+
"epoch": 0.0008157,
|
| 57234 |
+
"grad_norm": 0.31744587421417236,
|
| 57235 |
+
"learning_rate": 8.156000000000001e-05,
|
| 57236 |
+
"loss": 0.1356,
|
| 57237 |
+
"step": 8157
|
| 57238 |
+
},
|
| 57239 |
+
{
|
| 57240 |
+
"epoch": 0.0008158,
|
| 57241 |
+
"grad_norm": 0.26406434178352356,
|
| 57242 |
+
"learning_rate": 8.156999999999999e-05,
|
| 57243 |
+
"loss": 0.1199,
|
| 57244 |
+
"step": 8158
|
| 57245 |
+
},
|
| 57246 |
+
{
|
| 57247 |
+
"epoch": 0.0008159,
|
| 57248 |
+
"grad_norm": 0.49710792303085327,
|
| 57249 |
+
"learning_rate": 8.158e-05,
|
| 57250 |
+
"loss": 0.1284,
|
| 57251 |
+
"step": 8159
|
| 57252 |
+
},
|
| 57253 |
+
{
|
| 57254 |
+
"epoch": 0.000816,
|
| 57255 |
+
"grad_norm": 0.2635711431503296,
|
| 57256 |
+
"learning_rate": 8.159000000000001e-05,
|
| 57257 |
+
"loss": 0.122,
|
| 57258 |
+
"step": 8160
|
| 57259 |
+
},
|
| 57260 |
+
{
|
| 57261 |
+
"epoch": 0.0008161,
|
| 57262 |
+
"grad_norm": 0.2565058171749115,
|
| 57263 |
+
"learning_rate": 8.159999999999999e-05,
|
| 57264 |
+
"loss": 0.1224,
|
| 57265 |
+
"step": 8161
|
| 57266 |
+
},
|
| 57267 |
+
{
|
| 57268 |
+
"epoch": 0.0008162,
|
| 57269 |
+
"grad_norm": 0.277601033449173,
|
| 57270 |
+
"learning_rate": 8.161e-05,
|
| 57271 |
+
"loss": 0.1196,
|
| 57272 |
+
"step": 8162
|
| 57273 |
+
},
|
| 57274 |
+
{
|
| 57275 |
+
"epoch": 0.0008163,
|
| 57276 |
+
"grad_norm": 0.35559678077697754,
|
| 57277 |
+
"learning_rate": 8.162000000000001e-05,
|
| 57278 |
+
"loss": 0.1246,
|
| 57279 |
+
"step": 8163
|
| 57280 |
+
},
|
| 57281 |
+
{
|
| 57282 |
+
"epoch": 0.0008164,
|
| 57283 |
+
"grad_norm": 0.32934850454330444,
|
| 57284 |
+
"learning_rate": 8.163e-05,
|
| 57285 |
+
"loss": 0.1314,
|
| 57286 |
+
"step": 8164
|
| 57287 |
+
},
|
| 57288 |
+
{
|
| 57289 |
+
"epoch": 0.0008165,
|
| 57290 |
+
"grad_norm": 0.45738476514816284,
|
| 57291 |
+
"learning_rate": 8.164e-05,
|
| 57292 |
+
"loss": 0.1327,
|
| 57293 |
+
"step": 8165
|
| 57294 |
+
},
|
| 57295 |
+
{
|
| 57296 |
+
"epoch": 0.0008166,
|
| 57297 |
+
"grad_norm": 0.3290201723575592,
|
| 57298 |
+
"learning_rate": 8.165e-05,
|
| 57299 |
+
"loss": 0.1301,
|
| 57300 |
+
"step": 8166
|
| 57301 |
+
},
|
| 57302 |
+
{
|
| 57303 |
+
"epoch": 0.0008167,
|
| 57304 |
+
"grad_norm": 0.3304916322231293,
|
| 57305 |
+
"learning_rate": 8.166e-05,
|
| 57306 |
+
"loss": 0.1236,
|
| 57307 |
+
"step": 8167
|
| 57308 |
+
},
|
| 57309 |
+
{
|
| 57310 |
+
"epoch": 0.0008168,
|
| 57311 |
+
"grad_norm": 0.4709419310092926,
|
| 57312 |
+
"learning_rate": 8.167e-05,
|
| 57313 |
+
"loss": 0.1309,
|
| 57314 |
+
"step": 8168
|
| 57315 |
+
},
|
| 57316 |
+
{
|
| 57317 |
+
"epoch": 0.0008169,
|
| 57318 |
+
"grad_norm": 0.2472590059041977,
|
| 57319 |
+
"learning_rate": 8.168e-05,
|
| 57320 |
+
"loss": 0.1136,
|
| 57321 |
+
"step": 8169
|
| 57322 |
+
},
|
| 57323 |
+
{
|
| 57324 |
+
"epoch": 0.000817,
|
| 57325 |
+
"grad_norm": 0.322237104177475,
|
| 57326 |
+
"learning_rate": 8.169e-05,
|
| 57327 |
+
"loss": 0.1248,
|
| 57328 |
+
"step": 8170
|
| 57329 |
+
},
|
| 57330 |
+
{
|
| 57331 |
+
"epoch": 0.0008171,
|
| 57332 |
+
"grad_norm": 0.2979406416416168,
|
| 57333 |
+
"learning_rate": 8.17e-05,
|
| 57334 |
+
"loss": 0.1334,
|
| 57335 |
+
"step": 8171
|
| 57336 |
+
},
|
| 57337 |
+
{
|
| 57338 |
+
"epoch": 0.0008172,
|
| 57339 |
+
"grad_norm": 0.2639550566673279,
|
| 57340 |
+
"learning_rate": 8.171e-05,
|
| 57341 |
+
"loss": 0.1192,
|
| 57342 |
+
"step": 8172
|
| 57343 |
+
},
|
| 57344 |
+
{
|
| 57345 |
+
"epoch": 0.0008173,
|
| 57346 |
+
"grad_norm": 0.3248070776462555,
|
| 57347 |
+
"learning_rate": 8.172e-05,
|
| 57348 |
+
"loss": 0.1211,
|
| 57349 |
+
"step": 8173
|
| 57350 |
+
},
|
| 57351 |
+
{
|
| 57352 |
+
"epoch": 0.0008174,
|
| 57353 |
+
"grad_norm": 0.3602343797683716,
|
| 57354 |
+
"learning_rate": 8.173e-05,
|
| 57355 |
+
"loss": 0.1223,
|
| 57356 |
+
"step": 8174
|
| 57357 |
+
},
|
| 57358 |
+
{
|
| 57359 |
+
"epoch": 0.0008175,
|
| 57360 |
+
"grad_norm": 0.33443495631217957,
|
| 57361 |
+
"learning_rate": 8.174e-05,
|
| 57362 |
+
"loss": 0.1254,
|
| 57363 |
+
"step": 8175
|
| 57364 |
+
},
|
| 57365 |
+
{
|
| 57366 |
+
"epoch": 0.0008176,
|
| 57367 |
+
"grad_norm": 0.23635318875312805,
|
| 57368 |
+
"learning_rate": 8.175e-05,
|
| 57369 |
+
"loss": 0.1149,
|
| 57370 |
+
"step": 8176
|
| 57371 |
+
},
|
| 57372 |
+
{
|
| 57373 |
+
"epoch": 0.0008177,
|
| 57374 |
+
"grad_norm": 0.23680636286735535,
|
| 57375 |
+
"learning_rate": 8.176e-05,
|
| 57376 |
+
"loss": 0.116,
|
| 57377 |
+
"step": 8177
|
| 57378 |
+
},
|
| 57379 |
+
{
|
| 57380 |
+
"epoch": 0.0008178,
|
| 57381 |
+
"grad_norm": 0.3024715483188629,
|
| 57382 |
+
"learning_rate": 8.177e-05,
|
| 57383 |
+
"loss": 0.1249,
|
| 57384 |
+
"step": 8178
|
| 57385 |
+
},
|
| 57386 |
+
{
|
| 57387 |
+
"epoch": 0.0008179,
|
| 57388 |
+
"grad_norm": 0.26406145095825195,
|
| 57389 |
+
"learning_rate": 8.178e-05,
|
| 57390 |
+
"loss": 0.1166,
|
| 57391 |
+
"step": 8179
|
| 57392 |
+
},
|
| 57393 |
+
{
|
| 57394 |
+
"epoch": 0.000818,
|
| 57395 |
+
"grad_norm": 0.3081522583961487,
|
| 57396 |
+
"learning_rate": 8.179e-05,
|
| 57397 |
+
"loss": 0.1176,
|
| 57398 |
+
"step": 8180
|
| 57399 |
+
},
|
| 57400 |
+
{
|
| 57401 |
+
"epoch": 0.0008181,
|
| 57402 |
+
"grad_norm": 0.2564171552658081,
|
| 57403 |
+
"learning_rate": 8.180000000000001e-05,
|
| 57404 |
+
"loss": 0.1165,
|
| 57405 |
+
"step": 8181
|
| 57406 |
+
},
|
| 57407 |
+
{
|
| 57408 |
+
"epoch": 0.0008182,
|
| 57409 |
+
"grad_norm": 0.25777319073677063,
|
| 57410 |
+
"learning_rate": 8.181e-05,
|
| 57411 |
+
"loss": 0.1143,
|
| 57412 |
+
"step": 8182
|
| 57413 |
+
},
|
| 57414 |
+
{
|
| 57415 |
+
"epoch": 0.0008183,
|
| 57416 |
+
"grad_norm": 0.2623406648635864,
|
| 57417 |
+
"learning_rate": 8.182e-05,
|
| 57418 |
+
"loss": 0.1231,
|
| 57419 |
+
"step": 8183
|
| 57420 |
+
},
|
| 57421 |
+
{
|
| 57422 |
+
"epoch": 0.0008184,
|
| 57423 |
+
"grad_norm": 0.3436448276042938,
|
| 57424 |
+
"learning_rate": 8.183000000000001e-05,
|
| 57425 |
+
"loss": 0.1289,
|
| 57426 |
+
"step": 8184
|
| 57427 |
+
},
|
| 57428 |
+
{
|
| 57429 |
+
"epoch": 0.0008185,
|
| 57430 |
+
"grad_norm": 0.39584359526634216,
|
| 57431 |
+
"learning_rate": 8.184e-05,
|
| 57432 |
+
"loss": 0.1311,
|
| 57433 |
+
"step": 8185
|
| 57434 |
+
},
|
| 57435 |
+
{
|
| 57436 |
+
"epoch": 0.0008186,
|
| 57437 |
+
"grad_norm": 0.2454989105463028,
|
| 57438 |
+
"learning_rate": 8.185e-05,
|
| 57439 |
+
"loss": 0.1146,
|
| 57440 |
+
"step": 8186
|
| 57441 |
+
},
|
| 57442 |
+
{
|
| 57443 |
+
"epoch": 0.0008187,
|
| 57444 |
+
"grad_norm": 0.444217324256897,
|
| 57445 |
+
"learning_rate": 8.186e-05,
|
| 57446 |
+
"loss": 0.1276,
|
| 57447 |
+
"step": 8187
|
| 57448 |
+
},
|
| 57449 |
+
{
|
| 57450 |
+
"epoch": 0.0008188,
|
| 57451 |
+
"grad_norm": 0.3582991361618042,
|
| 57452 |
+
"learning_rate": 8.187e-05,
|
| 57453 |
+
"loss": 0.1314,
|
| 57454 |
+
"step": 8188
|
| 57455 |
+
},
|
| 57456 |
+
{
|
| 57457 |
+
"epoch": 0.0008189,
|
| 57458 |
+
"grad_norm": 0.462783545255661,
|
| 57459 |
+
"learning_rate": 8.188e-05,
|
| 57460 |
+
"loss": 0.1328,
|
| 57461 |
+
"step": 8189
|
| 57462 |
+
},
|
| 57463 |
+
{
|
| 57464 |
+
"epoch": 0.000819,
|
| 57465 |
+
"grad_norm": 0.2717292308807373,
|
| 57466 |
+
"learning_rate": 8.189e-05,
|
| 57467 |
+
"loss": 0.1164,
|
| 57468 |
+
"step": 8190
|
| 57469 |
+
},
|
| 57470 |
+
{
|
| 57471 |
+
"epoch": 0.0008191,
|
| 57472 |
+
"grad_norm": 0.257985383272171,
|
| 57473 |
+
"learning_rate": 8.190000000000001e-05,
|
| 57474 |
+
"loss": 0.1202,
|
| 57475 |
+
"step": 8191
|
| 57476 |
+
},
|
| 57477 |
+
{
|
| 57478 |
+
"epoch": 0.0008192,
|
| 57479 |
+
"grad_norm": 0.2859156131744385,
|
| 57480 |
+
"learning_rate": 8.191e-05,
|
| 57481 |
+
"loss": 0.1195,
|
| 57482 |
+
"step": 8192
|
| 57483 |
+
},
|
| 57484 |
+
{
|
| 57485 |
+
"epoch": 0.0008193,
|
| 57486 |
+
"grad_norm": 0.27496665716171265,
|
| 57487 |
+
"learning_rate": 8.192e-05,
|
| 57488 |
+
"loss": 0.1266,
|
| 57489 |
+
"step": 8193
|
| 57490 |
+
},
|
| 57491 |
+
{
|
| 57492 |
+
"epoch": 0.0008194,
|
| 57493 |
+
"grad_norm": 0.2906060516834259,
|
| 57494 |
+
"learning_rate": 8.193000000000001e-05,
|
| 57495 |
+
"loss": 0.1279,
|
| 57496 |
+
"step": 8194
|
| 57497 |
+
},
|
| 57498 |
+
{
|
| 57499 |
+
"epoch": 0.0008195,
|
| 57500 |
+
"grad_norm": 0.25681042671203613,
|
| 57501 |
+
"learning_rate": 8.193999999999999e-05,
|
| 57502 |
+
"loss": 0.1152,
|
| 57503 |
+
"step": 8195
|
| 57504 |
+
},
|
| 57505 |
+
{
|
| 57506 |
+
"epoch": 0.0008196,
|
| 57507 |
+
"grad_norm": 0.28119799494743347,
|
| 57508 |
+
"learning_rate": 8.195e-05,
|
| 57509 |
+
"loss": 0.118,
|
| 57510 |
+
"step": 8196
|
| 57511 |
+
},
|
| 57512 |
+
{
|
| 57513 |
+
"epoch": 0.0008197,
|
| 57514 |
+
"grad_norm": 0.24184001982212067,
|
| 57515 |
+
"learning_rate": 8.196000000000001e-05,
|
| 57516 |
+
"loss": 0.1182,
|
| 57517 |
+
"step": 8197
|
| 57518 |
+
},
|
| 57519 |
+
{
|
| 57520 |
+
"epoch": 0.0008198,
|
| 57521 |
+
"grad_norm": 0.2804012596607208,
|
| 57522 |
+
"learning_rate": 8.197e-05,
|
| 57523 |
+
"loss": 0.1234,
|
| 57524 |
+
"step": 8198
|
| 57525 |
+
},
|
| 57526 |
+
{
|
| 57527 |
+
"epoch": 0.0008199,
|
| 57528 |
+
"grad_norm": 0.2748105227947235,
|
| 57529 |
+
"learning_rate": 8.198e-05,
|
| 57530 |
+
"loss": 0.1118,
|
| 57531 |
+
"step": 8199
|
| 57532 |
+
},
|
| 57533 |
+
{
|
| 57534 |
+
"epoch": 0.00082,
|
| 57535 |
+
"grad_norm": 0.25761836767196655,
|
| 57536 |
+
"learning_rate": 8.199e-05,
|
| 57537 |
+
"loss": 0.111,
|
| 57538 |
+
"step": 8200
|
| 57539 |
+
},
|
| 57540 |
+
{
|
| 57541 |
+
"epoch": 0.0008201,
|
| 57542 |
+
"grad_norm": 0.2614924907684326,
|
| 57543 |
+
"learning_rate": 8.2e-05,
|
| 57544 |
+
"loss": 0.12,
|
| 57545 |
+
"step": 8201
|
| 57546 |
+
},
|
| 57547 |
+
{
|
| 57548 |
+
"epoch": 0.0008202,
|
| 57549 |
+
"grad_norm": 0.27884936332702637,
|
| 57550 |
+
"learning_rate": 8.201e-05,
|
| 57551 |
+
"loss": 0.1265,
|
| 57552 |
+
"step": 8202
|
| 57553 |
+
},
|
| 57554 |
+
{
|
| 57555 |
+
"epoch": 0.0008203,
|
| 57556 |
+
"grad_norm": 0.37377434968948364,
|
| 57557 |
+
"learning_rate": 8.202e-05,
|
| 57558 |
+
"loss": 0.1292,
|
| 57559 |
+
"step": 8203
|
| 57560 |
+
},
|
| 57561 |
+
{
|
| 57562 |
+
"epoch": 0.0008204,
|
| 57563 |
+
"grad_norm": 0.23581278324127197,
|
| 57564 |
+
"learning_rate": 8.203e-05,
|
| 57565 |
+
"loss": 0.1152,
|
| 57566 |
+
"step": 8204
|
| 57567 |
+
},
|
| 57568 |
+
{
|
| 57569 |
+
"epoch": 0.0008205,
|
| 57570 |
+
"grad_norm": 0.2529856562614441,
|
| 57571 |
+
"learning_rate": 8.204e-05,
|
| 57572 |
+
"loss": 0.1223,
|
| 57573 |
+
"step": 8205
|
| 57574 |
+
},
|
| 57575 |
+
{
|
| 57576 |
+
"epoch": 0.0008206,
|
| 57577 |
+
"grad_norm": 0.3910468816757202,
|
| 57578 |
+
"learning_rate": 8.205e-05,
|
| 57579 |
+
"loss": 0.1265,
|
| 57580 |
+
"step": 8206
|
| 57581 |
+
},
|
| 57582 |
+
{
|
| 57583 |
+
"epoch": 0.0008207,
|
| 57584 |
+
"grad_norm": 0.3044634461402893,
|
| 57585 |
+
"learning_rate": 8.206e-05,
|
| 57586 |
+
"loss": 0.1208,
|
| 57587 |
+
"step": 8207
|
| 57588 |
+
},
|
| 57589 |
+
{
|
| 57590 |
+
"epoch": 0.0008208,
|
| 57591 |
+
"grad_norm": 0.29144471883773804,
|
| 57592 |
+
"learning_rate": 8.207e-05,
|
| 57593 |
+
"loss": 0.1287,
|
| 57594 |
+
"step": 8208
|
| 57595 |
+
},
|
| 57596 |
+
{
|
| 57597 |
+
"epoch": 0.0008209,
|
| 57598 |
+
"grad_norm": 0.5447407960891724,
|
| 57599 |
+
"learning_rate": 8.208e-05,
|
| 57600 |
+
"loss": 0.136,
|
| 57601 |
+
"step": 8209
|
| 57602 |
+
},
|
| 57603 |
+
{
|
| 57604 |
+
"epoch": 0.000821,
|
| 57605 |
+
"grad_norm": 0.3419993817806244,
|
| 57606 |
+
"learning_rate": 8.209e-05,
|
| 57607 |
+
"loss": 0.1263,
|
| 57608 |
+
"step": 8210
|
| 57609 |
+
},
|
| 57610 |
+
{
|
| 57611 |
+
"epoch": 0.0008211,
|
| 57612 |
+
"grad_norm": 0.27843335270881653,
|
| 57613 |
+
"learning_rate": 8.21e-05,
|
| 57614 |
+
"loss": 0.1265,
|
| 57615 |
+
"step": 8211
|
| 57616 |
+
},
|
| 57617 |
+
{
|
| 57618 |
+
"epoch": 0.0008212,
|
| 57619 |
+
"grad_norm": 0.36781027913093567,
|
| 57620 |
+
"learning_rate": 8.211e-05,
|
| 57621 |
+
"loss": 0.1213,
|
| 57622 |
+
"step": 8212
|
| 57623 |
+
},
|
| 57624 |
+
{
|
| 57625 |
+
"epoch": 0.0008213,
|
| 57626 |
+
"grad_norm": 0.42243266105651855,
|
| 57627 |
+
"learning_rate": 8.212e-05,
|
| 57628 |
+
"loss": 0.1275,
|
| 57629 |
+
"step": 8213
|
| 57630 |
+
},
|
| 57631 |
+
{
|
| 57632 |
+
"epoch": 0.0008214,
|
| 57633 |
+
"grad_norm": 0.26543208956718445,
|
| 57634 |
+
"learning_rate": 8.213e-05,
|
| 57635 |
+
"loss": 0.1194,
|
| 57636 |
+
"step": 8214
|
| 57637 |
+
},
|
| 57638 |
+
{
|
| 57639 |
+
"epoch": 0.0008215,
|
| 57640 |
+
"grad_norm": 0.29444581270217896,
|
| 57641 |
+
"learning_rate": 8.214000000000001e-05,
|
| 57642 |
+
"loss": 0.1199,
|
| 57643 |
+
"step": 8215
|
| 57644 |
+
},
|
| 57645 |
+
{
|
| 57646 |
+
"epoch": 0.0008216,
|
| 57647 |
+
"grad_norm": 0.3137739300727844,
|
| 57648 |
+
"learning_rate": 8.215e-05,
|
| 57649 |
+
"loss": 0.1231,
|
| 57650 |
+
"step": 8216
|
| 57651 |
+
},
|
| 57652 |
+
{
|
| 57653 |
+
"epoch": 0.0008217,
|
| 57654 |
+
"grad_norm": 0.2812126874923706,
|
| 57655 |
+
"learning_rate": 8.216e-05,
|
| 57656 |
+
"loss": 0.1256,
|
| 57657 |
+
"step": 8217
|
| 57658 |
+
},
|
| 57659 |
+
{
|
| 57660 |
+
"epoch": 0.0008218,
|
| 57661 |
+
"grad_norm": 0.30354052782058716,
|
| 57662 |
+
"learning_rate": 8.217000000000001e-05,
|
| 57663 |
+
"loss": 0.1254,
|
| 57664 |
+
"step": 8218
|
| 57665 |
+
},
|
| 57666 |
+
{
|
| 57667 |
+
"epoch": 0.0008219,
|
| 57668 |
+
"grad_norm": 0.3041916489601135,
|
| 57669 |
+
"learning_rate": 8.218e-05,
|
| 57670 |
+
"loss": 0.1278,
|
| 57671 |
+
"step": 8219
|
| 57672 |
+
},
|
| 57673 |
+
{
|
| 57674 |
+
"epoch": 0.000822,
|
| 57675 |
+
"grad_norm": 0.29837197065353394,
|
| 57676 |
+
"learning_rate": 8.219e-05,
|
| 57677 |
+
"loss": 0.1136,
|
| 57678 |
+
"step": 8220
|
| 57679 |
+
},
|
| 57680 |
+
{
|
| 57681 |
+
"epoch": 0.0008221,
|
| 57682 |
+
"grad_norm": 0.2326834797859192,
|
| 57683 |
+
"learning_rate": 8.22e-05,
|
| 57684 |
+
"loss": 0.1123,
|
| 57685 |
+
"step": 8221
|
| 57686 |
+
},
|
| 57687 |
+
{
|
| 57688 |
+
"epoch": 0.0008222,
|
| 57689 |
+
"grad_norm": 0.25673767924308777,
|
| 57690 |
+
"learning_rate": 8.221e-05,
|
| 57691 |
+
"loss": 0.1183,
|
| 57692 |
+
"step": 8222
|
| 57693 |
+
},
|
| 57694 |
+
{
|
| 57695 |
+
"epoch": 0.0008223,
|
| 57696 |
+
"grad_norm": 0.38557425141334534,
|
| 57697 |
+
"learning_rate": 8.222e-05,
|
| 57698 |
+
"loss": 0.1257,
|
| 57699 |
+
"step": 8223
|
| 57700 |
+
},
|
| 57701 |
+
{
|
| 57702 |
+
"epoch": 0.0008224,
|
| 57703 |
+
"grad_norm": 0.31960251927375793,
|
| 57704 |
+
"learning_rate": 8.223e-05,
|
| 57705 |
+
"loss": 0.1212,
|
| 57706 |
+
"step": 8224
|
| 57707 |
+
},
|
| 57708 |
+
{
|
| 57709 |
+
"epoch": 0.0008225,
|
| 57710 |
+
"grad_norm": 0.3729652166366577,
|
| 57711 |
+
"learning_rate": 8.224000000000001e-05,
|
| 57712 |
+
"loss": 0.1312,
|
| 57713 |
+
"step": 8225
|
| 57714 |
+
},
|
| 57715 |
+
{
|
| 57716 |
+
"epoch": 0.0008226,
|
| 57717 |
+
"grad_norm": 0.2720782160758972,
|
| 57718 |
+
"learning_rate": 8.225e-05,
|
| 57719 |
+
"loss": 0.1269,
|
| 57720 |
+
"step": 8226
|
| 57721 |
+
},
|
| 57722 |
+
{
|
| 57723 |
+
"epoch": 0.0008227,
|
| 57724 |
+
"grad_norm": 0.2964369058609009,
|
| 57725 |
+
"learning_rate": 8.226e-05,
|
| 57726 |
+
"loss": 0.1229,
|
| 57727 |
+
"step": 8227
|
| 57728 |
+
},
|
| 57729 |
+
{
|
| 57730 |
+
"epoch": 0.0008228,
|
| 57731 |
+
"grad_norm": 0.3284490406513214,
|
| 57732 |
+
"learning_rate": 8.227000000000001e-05,
|
| 57733 |
+
"loss": 0.13,
|
| 57734 |
+
"step": 8228
|
| 57735 |
+
},
|
| 57736 |
+
{
|
| 57737 |
+
"epoch": 0.0008229,
|
| 57738 |
+
"grad_norm": 0.25771719217300415,
|
| 57739 |
+
"learning_rate": 8.227999999999999e-05,
|
| 57740 |
+
"loss": 0.1165,
|
| 57741 |
+
"step": 8229
|
| 57742 |
+
},
|
| 57743 |
+
{
|
| 57744 |
+
"epoch": 0.000823,
|
| 57745 |
+
"grad_norm": 0.2636684477329254,
|
| 57746 |
+
"learning_rate": 8.229e-05,
|
| 57747 |
+
"loss": 0.1226,
|
| 57748 |
+
"step": 8230
|
| 57749 |
+
},
|
| 57750 |
+
{
|
| 57751 |
+
"epoch": 0.0008231,
|
| 57752 |
+
"grad_norm": 0.2904820442199707,
|
| 57753 |
+
"learning_rate": 8.230000000000001e-05,
|
| 57754 |
+
"loss": 0.1261,
|
| 57755 |
+
"step": 8231
|
| 57756 |
+
},
|
| 57757 |
+
{
|
| 57758 |
+
"epoch": 0.0008232,
|
| 57759 |
+
"grad_norm": 0.25932803750038147,
|
| 57760 |
+
"learning_rate": 8.231e-05,
|
| 57761 |
+
"loss": 0.1142,
|
| 57762 |
+
"step": 8232
|
| 57763 |
+
},
|
| 57764 |
+
{
|
| 57765 |
+
"epoch": 0.0008233,
|
| 57766 |
+
"grad_norm": 0.24145160615444183,
|
| 57767 |
+
"learning_rate": 8.232e-05,
|
| 57768 |
+
"loss": 0.1175,
|
| 57769 |
+
"step": 8233
|
| 57770 |
+
},
|
| 57771 |
+
{
|
| 57772 |
+
"epoch": 0.0008234,
|
| 57773 |
+
"grad_norm": 0.2582058608531952,
|
| 57774 |
+
"learning_rate": 8.233e-05,
|
| 57775 |
+
"loss": 0.1182,
|
| 57776 |
+
"step": 8234
|
| 57777 |
+
},
|
| 57778 |
+
{
|
| 57779 |
+
"epoch": 0.0008235,
|
| 57780 |
+
"grad_norm": 0.2467871904373169,
|
| 57781 |
+
"learning_rate": 8.234e-05,
|
| 57782 |
+
"loss": 0.1223,
|
| 57783 |
+
"step": 8235
|
| 57784 |
+
},
|
| 57785 |
+
{
|
| 57786 |
+
"epoch": 0.0008236,
|
| 57787 |
+
"grad_norm": 0.24092982709407806,
|
| 57788 |
+
"learning_rate": 8.235e-05,
|
| 57789 |
+
"loss": 0.1166,
|
| 57790 |
+
"step": 8236
|
| 57791 |
+
},
|
| 57792 |
+
{
|
| 57793 |
+
"epoch": 0.0008237,
|
| 57794 |
+
"grad_norm": 0.4118715524673462,
|
| 57795 |
+
"learning_rate": 8.236e-05,
|
| 57796 |
+
"loss": 0.1287,
|
| 57797 |
+
"step": 8237
|
| 57798 |
+
},
|
| 57799 |
+
{
|
| 57800 |
+
"epoch": 0.0008238,
|
| 57801 |
+
"grad_norm": 0.2566819190979004,
|
| 57802 |
+
"learning_rate": 8.237e-05,
|
| 57803 |
+
"loss": 0.1171,
|
| 57804 |
+
"step": 8238
|
| 57805 |
+
},
|
| 57806 |
+
{
|
| 57807 |
+
"epoch": 0.0008239,
|
| 57808 |
+
"grad_norm": 0.26521503925323486,
|
| 57809 |
+
"learning_rate": 8.238e-05,
|
| 57810 |
+
"loss": 0.118,
|
| 57811 |
+
"step": 8239
|
| 57812 |
+
},
|
| 57813 |
+
{
|
| 57814 |
+
"epoch": 0.000824,
|
| 57815 |
+
"grad_norm": 0.4187837243080139,
|
| 57816 |
+
"learning_rate": 8.239e-05,
|
| 57817 |
+
"loss": 0.1308,
|
| 57818 |
+
"step": 8240
|
| 57819 |
+
},
|
| 57820 |
+
{
|
| 57821 |
+
"epoch": 0.0008241,
|
| 57822 |
+
"grad_norm": 0.28740209341049194,
|
| 57823 |
+
"learning_rate": 8.24e-05,
|
| 57824 |
+
"loss": 0.1256,
|
| 57825 |
+
"step": 8241
|
| 57826 |
+
},
|
| 57827 |
+
{
|
| 57828 |
+
"epoch": 0.0008242,
|
| 57829 |
+
"grad_norm": 0.2839011251926422,
|
| 57830 |
+
"learning_rate": 8.241e-05,
|
| 57831 |
+
"loss": 0.125,
|
| 57832 |
+
"step": 8242
|
| 57833 |
+
},
|
| 57834 |
+
{
|
| 57835 |
+
"epoch": 0.0008243,
|
| 57836 |
+
"grad_norm": 0.8069186210632324,
|
| 57837 |
+
"learning_rate": 8.242e-05,
|
| 57838 |
+
"loss": 0.1632,
|
| 57839 |
+
"step": 8243
|
| 57840 |
+
},
|
| 57841 |
+
{
|
| 57842 |
+
"epoch": 0.0008244,
|
| 57843 |
+
"grad_norm": 0.24288016557693481,
|
| 57844 |
+
"learning_rate": 8.243e-05,
|
| 57845 |
+
"loss": 0.1135,
|
| 57846 |
+
"step": 8244
|
| 57847 |
+
},
|
| 57848 |
+
{
|
| 57849 |
+
"epoch": 0.0008245,
|
| 57850 |
+
"grad_norm": 0.3409069776535034,
|
| 57851 |
+
"learning_rate": 8.244e-05,
|
| 57852 |
+
"loss": 0.1301,
|
| 57853 |
+
"step": 8245
|
| 57854 |
+
},
|
| 57855 |
+
{
|
| 57856 |
+
"epoch": 0.0008246,
|
| 57857 |
+
"grad_norm": 0.24525922536849976,
|
| 57858 |
+
"learning_rate": 8.245e-05,
|
| 57859 |
+
"loss": 0.1158,
|
| 57860 |
+
"step": 8246
|
| 57861 |
+
},
|
| 57862 |
+
{
|
| 57863 |
+
"epoch": 0.0008247,
|
| 57864 |
+
"grad_norm": 0.2744600772857666,
|
| 57865 |
+
"learning_rate": 8.246e-05,
|
| 57866 |
+
"loss": 0.1173,
|
| 57867 |
+
"step": 8247
|
| 57868 |
+
},
|
| 57869 |
+
{
|
| 57870 |
+
"epoch": 0.0008248,
|
| 57871 |
+
"grad_norm": 0.4034813344478607,
|
| 57872 |
+
"learning_rate": 8.247e-05,
|
| 57873 |
+
"loss": 0.1368,
|
| 57874 |
+
"step": 8248
|
| 57875 |
+
},
|
| 57876 |
+
{
|
| 57877 |
+
"epoch": 0.0008249,
|
| 57878 |
+
"grad_norm": 0.319559782743454,
|
| 57879 |
+
"learning_rate": 8.248000000000001e-05,
|
| 57880 |
+
"loss": 0.1246,
|
| 57881 |
+
"step": 8249
|
| 57882 |
+
},
|
| 57883 |
+
{
|
| 57884 |
+
"epoch": 0.000825,
|
| 57885 |
+
"grad_norm": 0.27088743448257446,
|
| 57886 |
+
"learning_rate": 8.249e-05,
|
| 57887 |
+
"loss": 0.1181,
|
| 57888 |
+
"step": 8250
|
| 57889 |
+
},
|
| 57890 |
+
{
|
| 57891 |
+
"epoch": 0.0008251,
|
| 57892 |
+
"grad_norm": 0.2523137032985687,
|
| 57893 |
+
"learning_rate": 8.25e-05,
|
| 57894 |
+
"loss": 0.1176,
|
| 57895 |
+
"step": 8251
|
| 57896 |
+
},
|
| 57897 |
+
{
|
| 57898 |
+
"epoch": 0.0008252,
|
| 57899 |
+
"grad_norm": 0.25668174028396606,
|
| 57900 |
+
"learning_rate": 8.251000000000001e-05,
|
| 57901 |
+
"loss": 0.1184,
|
| 57902 |
+
"step": 8252
|
| 57903 |
+
},
|
| 57904 |
+
{
|
| 57905 |
+
"epoch": 0.0008253,
|
| 57906 |
+
"grad_norm": 0.37655991315841675,
|
| 57907 |
+
"learning_rate": 8.252e-05,
|
| 57908 |
+
"loss": 0.1366,
|
| 57909 |
+
"step": 8253
|
| 57910 |
+
},
|
| 57911 |
+
{
|
| 57912 |
+
"epoch": 0.0008254,
|
| 57913 |
+
"grad_norm": 0.24293242394924164,
|
| 57914 |
+
"learning_rate": 8.253e-05,
|
| 57915 |
+
"loss": 0.1133,
|
| 57916 |
+
"step": 8254
|
| 57917 |
+
},
|
| 57918 |
+
{
|
| 57919 |
+
"epoch": 0.0008255,
|
| 57920 |
+
"grad_norm": 0.28997358679771423,
|
| 57921 |
+
"learning_rate": 8.254e-05,
|
| 57922 |
+
"loss": 0.1235,
|
| 57923 |
+
"step": 8255
|
| 57924 |
+
},
|
| 57925 |
+
{
|
| 57926 |
+
"epoch": 0.0008256,
|
| 57927 |
+
"grad_norm": 0.28856682777404785,
|
| 57928 |
+
"learning_rate": 8.255e-05,
|
| 57929 |
+
"loss": 0.1293,
|
| 57930 |
+
"step": 8256
|
| 57931 |
+
},
|
| 57932 |
+
{
|
| 57933 |
+
"epoch": 0.0008257,
|
| 57934 |
+
"grad_norm": 0.2605460286140442,
|
| 57935 |
+
"learning_rate": 8.256e-05,
|
| 57936 |
+
"loss": 0.121,
|
| 57937 |
+
"step": 8257
|
| 57938 |
+
},
|
| 57939 |
+
{
|
| 57940 |
+
"epoch": 0.0008258,
|
| 57941 |
+
"grad_norm": 0.25905779004096985,
|
| 57942 |
+
"learning_rate": 8.257e-05,
|
| 57943 |
+
"loss": 0.1118,
|
| 57944 |
+
"step": 8258
|
| 57945 |
+
},
|
| 57946 |
+
{
|
| 57947 |
+
"epoch": 0.0008259,
|
| 57948 |
+
"grad_norm": 0.270008385181427,
|
| 57949 |
+
"learning_rate": 8.258000000000001e-05,
|
| 57950 |
+
"loss": 0.124,
|
| 57951 |
+
"step": 8259
|
| 57952 |
+
},
|
| 57953 |
+
{
|
| 57954 |
+
"epoch": 0.000826,
|
| 57955 |
+
"grad_norm": 0.270720899105072,
|
| 57956 |
+
"learning_rate": 8.259e-05,
|
| 57957 |
+
"loss": 0.1185,
|
| 57958 |
+
"step": 8260
|
| 57959 |
+
},
|
| 57960 |
+
{
|
| 57961 |
+
"epoch": 0.0008261,
|
| 57962 |
+
"grad_norm": 0.3338068425655365,
|
| 57963 |
+
"learning_rate": 8.26e-05,
|
| 57964 |
+
"loss": 0.123,
|
| 57965 |
+
"step": 8261
|
| 57966 |
+
},
|
| 57967 |
+
{
|
| 57968 |
+
"epoch": 0.0008262,
|
| 57969 |
+
"grad_norm": 3.9577460289001465,
|
| 57970 |
+
"learning_rate": 8.261000000000001e-05,
|
| 57971 |
+
"loss": 0.2931,
|
| 57972 |
+
"step": 8262
|
| 57973 |
+
},
|
| 57974 |
+
{
|
| 57975 |
+
"epoch": 0.0008263,
|
| 57976 |
+
"grad_norm": 0.2671869695186615,
|
| 57977 |
+
"learning_rate": 8.261999999999999e-05,
|
| 57978 |
+
"loss": 0.1177,
|
| 57979 |
+
"step": 8263
|
| 57980 |
+
},
|
| 57981 |
+
{
|
| 57982 |
+
"epoch": 0.0008264,
|
| 57983 |
+
"grad_norm": 0.32506656646728516,
|
| 57984 |
+
"learning_rate": 8.263e-05,
|
| 57985 |
+
"loss": 0.1233,
|
| 57986 |
+
"step": 8264
|
| 57987 |
+
},
|
| 57988 |
+
{
|
| 57989 |
+
"epoch": 0.0008265,
|
| 57990 |
+
"grad_norm": 0.2978303134441376,
|
| 57991 |
+
"learning_rate": 8.264000000000001e-05,
|
| 57992 |
+
"loss": 0.1177,
|
| 57993 |
+
"step": 8265
|
| 57994 |
+
},
|
| 57995 |
+
{
|
| 57996 |
+
"epoch": 0.0008266,
|
| 57997 |
+
"grad_norm": 0.33331626653671265,
|
| 57998 |
+
"learning_rate": 8.265e-05,
|
| 57999 |
+
"loss": 0.1274,
|
| 58000 |
+
"step": 8266
|
| 58001 |
+
},
|
| 58002 |
+
{
|
| 58003 |
+
"epoch": 0.0008267,
|
| 58004 |
+
"grad_norm": 0.3332047760486603,
|
| 58005 |
+
"learning_rate": 8.266e-05,
|
| 58006 |
+
"loss": 0.1182,
|
| 58007 |
+
"step": 8267
|
| 58008 |
+
},
|
| 58009 |
+
{
|
| 58010 |
+
"epoch": 0.0008268,
|
| 58011 |
+
"grad_norm": 0.38557150959968567,
|
| 58012 |
+
"learning_rate": 8.267e-05,
|
| 58013 |
+
"loss": 0.1348,
|
| 58014 |
+
"step": 8268
|
| 58015 |
+
},
|
| 58016 |
+
{
|
| 58017 |
+
"epoch": 0.0008269,
|
| 58018 |
+
"grad_norm": 0.3090338408946991,
|
| 58019 |
+
"learning_rate": 8.268e-05,
|
| 58020 |
+
"loss": 0.1211,
|
| 58021 |
+
"step": 8269
|
| 58022 |
+
},
|
| 58023 |
+
{
|
| 58024 |
+
"epoch": 0.000827,
|
| 58025 |
+
"grad_norm": 0.28389936685562134,
|
| 58026 |
+
"learning_rate": 8.269e-05,
|
| 58027 |
+
"loss": 0.1183,
|
| 58028 |
+
"step": 8270
|
| 58029 |
+
},
|
| 58030 |
+
{
|
| 58031 |
+
"epoch": 0.0008271,
|
| 58032 |
+
"grad_norm": 0.2615092396736145,
|
| 58033 |
+
"learning_rate": 8.27e-05,
|
| 58034 |
+
"loss": 0.1169,
|
| 58035 |
+
"step": 8271
|
| 58036 |
+
},
|
| 58037 |
+
{
|
| 58038 |
+
"epoch": 0.0008272,
|
| 58039 |
+
"grad_norm": 0.3285996615886688,
|
| 58040 |
+
"learning_rate": 8.271e-05,
|
| 58041 |
+
"loss": 0.1208,
|
| 58042 |
+
"step": 8272
|
| 58043 |
+
},
|
| 58044 |
+
{
|
| 58045 |
+
"epoch": 0.0008273,
|
| 58046 |
+
"grad_norm": 0.49238863587379456,
|
| 58047 |
+
"learning_rate": 8.272e-05,
|
| 58048 |
+
"loss": 0.1453,
|
| 58049 |
+
"step": 8273
|
| 58050 |
+
},
|
| 58051 |
+
{
|
| 58052 |
+
"epoch": 0.0008274,
|
| 58053 |
+
"grad_norm": 0.2479100525379181,
|
| 58054 |
+
"learning_rate": 8.273e-05,
|
| 58055 |
+
"loss": 0.1099,
|
| 58056 |
+
"step": 8274
|
| 58057 |
+
},
|
| 58058 |
+
{
|
| 58059 |
+
"epoch": 0.0008275,
|
| 58060 |
+
"grad_norm": 0.35445767641067505,
|
| 58061 |
+
"learning_rate": 8.274e-05,
|
| 58062 |
+
"loss": 0.1161,
|
| 58063 |
+
"step": 8275
|
| 58064 |
+
},
|
| 58065 |
+
{
|
| 58066 |
+
"epoch": 0.0008276,
|
| 58067 |
+
"grad_norm": 0.3050071895122528,
|
| 58068 |
+
"learning_rate": 8.275e-05,
|
| 58069 |
+
"loss": 0.1182,
|
| 58070 |
+
"step": 8276
|
| 58071 |
+
},
|
| 58072 |
+
{
|
| 58073 |
+
"epoch": 0.0008277,
|
| 58074 |
+
"grad_norm": 0.33559903502464294,
|
| 58075 |
+
"learning_rate": 8.276e-05,
|
| 58076 |
+
"loss": 0.1299,
|
| 58077 |
+
"step": 8277
|
| 58078 |
+
},
|
| 58079 |
+
{
|
| 58080 |
+
"epoch": 0.0008278,
|
| 58081 |
+
"grad_norm": 0.482623428106308,
|
| 58082 |
+
"learning_rate": 8.277e-05,
|
| 58083 |
+
"loss": 0.1367,
|
| 58084 |
+
"step": 8278
|
| 58085 |
+
},
|
| 58086 |
+
{
|
| 58087 |
+
"epoch": 0.0008279,
|
| 58088 |
+
"grad_norm": 0.2906891405582428,
|
| 58089 |
+
"learning_rate": 8.278e-05,
|
| 58090 |
+
"loss": 0.118,
|
| 58091 |
+
"step": 8279
|
| 58092 |
+
},
|
| 58093 |
+
{
|
| 58094 |
+
"epoch": 0.000828,
|
| 58095 |
+
"grad_norm": 0.2577823996543884,
|
| 58096 |
+
"learning_rate": 8.279e-05,
|
| 58097 |
+
"loss": 0.111,
|
| 58098 |
+
"step": 8280
|
| 58099 |
+
},
|
| 58100 |
+
{
|
| 58101 |
+
"epoch": 0.0008281,
|
| 58102 |
+
"grad_norm": 0.3549817204475403,
|
| 58103 |
+
"learning_rate": 8.280000000000001e-05,
|
| 58104 |
+
"loss": 0.1409,
|
| 58105 |
+
"step": 8281
|
| 58106 |
+
},
|
| 58107 |
+
{
|
| 58108 |
+
"epoch": 0.0008282,
|
| 58109 |
+
"grad_norm": 0.3377881944179535,
|
| 58110 |
+
"learning_rate": 8.281e-05,
|
| 58111 |
+
"loss": 0.1229,
|
| 58112 |
+
"step": 8282
|
| 58113 |
+
},
|
| 58114 |
+
{
|
| 58115 |
+
"epoch": 0.0008283,
|
| 58116 |
+
"grad_norm": 0.35716354846954346,
|
| 58117 |
+
"learning_rate": 8.282000000000001e-05,
|
| 58118 |
+
"loss": 0.1268,
|
| 58119 |
+
"step": 8283
|
| 58120 |
+
},
|
| 58121 |
+
{
|
| 58122 |
+
"epoch": 0.0008284,
|
| 58123 |
+
"grad_norm": 0.29833707213401794,
|
| 58124 |
+
"learning_rate": 8.283e-05,
|
| 58125 |
+
"loss": 0.1182,
|
| 58126 |
+
"step": 8284
|
| 58127 |
+
},
|
| 58128 |
+
{
|
| 58129 |
+
"epoch": 0.0008285,
|
| 58130 |
+
"grad_norm": 0.5133569240570068,
|
| 58131 |
+
"learning_rate": 8.284e-05,
|
| 58132 |
+
"loss": 0.1214,
|
| 58133 |
+
"step": 8285
|
| 58134 |
+
},
|
| 58135 |
+
{
|
| 58136 |
+
"epoch": 0.0008286,
|
| 58137 |
+
"grad_norm": 0.24109426140785217,
|
| 58138 |
+
"learning_rate": 8.285000000000001e-05,
|
| 58139 |
+
"loss": 0.1091,
|
| 58140 |
+
"step": 8286
|
| 58141 |
+
},
|
| 58142 |
+
{
|
| 58143 |
+
"epoch": 0.0008287,
|
| 58144 |
+
"grad_norm": 0.6600997447967529,
|
| 58145 |
+
"learning_rate": 8.286e-05,
|
| 58146 |
+
"loss": 0.1371,
|
| 58147 |
+
"step": 8287
|
| 58148 |
+
},
|
| 58149 |
+
{
|
| 58150 |
+
"epoch": 0.0008288,
|
| 58151 |
+
"grad_norm": 0.3192616403102875,
|
| 58152 |
+
"learning_rate": 8.287e-05,
|
| 58153 |
+
"loss": 0.1214,
|
| 58154 |
+
"step": 8288
|
| 58155 |
+
},
|
| 58156 |
+
{
|
| 58157 |
+
"epoch": 0.0008289,
|
| 58158 |
+
"grad_norm": 0.34313368797302246,
|
| 58159 |
+
"learning_rate": 8.288e-05,
|
| 58160 |
+
"loss": 0.1159,
|
| 58161 |
+
"step": 8289
|
| 58162 |
+
},
|
| 58163 |
+
{
|
| 58164 |
+
"epoch": 0.000829,
|
| 58165 |
+
"grad_norm": 0.31975916028022766,
|
| 58166 |
+
"learning_rate": 8.289e-05,
|
| 58167 |
+
"loss": 0.1293,
|
| 58168 |
+
"step": 8290
|
| 58169 |
+
},
|
| 58170 |
+
{
|
| 58171 |
+
"epoch": 0.0008291,
|
| 58172 |
+
"grad_norm": 0.8865758180618286,
|
| 58173 |
+
"learning_rate": 8.29e-05,
|
| 58174 |
+
"loss": 0.1255,
|
| 58175 |
+
"step": 8291
|
| 58176 |
+
},
|
| 58177 |
+
{
|
| 58178 |
+
"epoch": 0.0008292,
|
| 58179 |
+
"grad_norm": 0.9230272769927979,
|
| 58180 |
+
"learning_rate": 8.291e-05,
|
| 58181 |
+
"loss": 0.1318,
|
| 58182 |
+
"step": 8292
|
| 58183 |
+
},
|
| 58184 |
+
{
|
| 58185 |
+
"epoch": 0.0008293,
|
| 58186 |
+
"grad_norm": 0.6130544543266296,
|
| 58187 |
+
"learning_rate": 8.292000000000001e-05,
|
| 58188 |
+
"loss": 0.1463,
|
| 58189 |
+
"step": 8293
|
| 58190 |
+
},
|
| 58191 |
+
{
|
| 58192 |
+
"epoch": 0.0008294,
|
| 58193 |
+
"grad_norm": 0.401056170463562,
|
| 58194 |
+
"learning_rate": 8.293e-05,
|
| 58195 |
+
"loss": 0.1218,
|
| 58196 |
+
"step": 8294
|
| 58197 |
+
},
|
| 58198 |
+
{
|
| 58199 |
+
"epoch": 0.0008295,
|
| 58200 |
+
"grad_norm": 0.3474794924259186,
|
| 58201 |
+
"learning_rate": 8.294e-05,
|
| 58202 |
+
"loss": 0.1077,
|
| 58203 |
+
"step": 8295
|
| 58204 |
+
},
|
| 58205 |
+
{
|
| 58206 |
+
"epoch": 0.0008296,
|
| 58207 |
+
"grad_norm": 0.3603403866291046,
|
| 58208 |
+
"learning_rate": 8.295000000000001e-05,
|
| 58209 |
+
"loss": 0.1161,
|
| 58210 |
+
"step": 8296
|
| 58211 |
+
},
|
| 58212 |
+
{
|
| 58213 |
+
"epoch": 0.0008297,
|
| 58214 |
+
"grad_norm": 0.46914738416671753,
|
| 58215 |
+
"learning_rate": 8.295999999999999e-05,
|
| 58216 |
+
"loss": 0.1332,
|
| 58217 |
+
"step": 8297
|
| 58218 |
+
},
|
| 58219 |
+
{
|
| 58220 |
+
"epoch": 0.0008298,
|
| 58221 |
+
"grad_norm": 0.4348466992378235,
|
| 58222 |
+
"learning_rate": 8.297e-05,
|
| 58223 |
+
"loss": 0.1332,
|
| 58224 |
+
"step": 8298
|
| 58225 |
+
},
|
| 58226 |
+
{
|
| 58227 |
+
"epoch": 0.0008299,
|
| 58228 |
+
"grad_norm": 0.3736206889152527,
|
| 58229 |
+
"learning_rate": 8.298000000000001e-05,
|
| 58230 |
+
"loss": 0.1256,
|
| 58231 |
+
"step": 8299
|
| 58232 |
+
},
|
| 58233 |
+
{
|
| 58234 |
+
"epoch": 0.00083,
|
| 58235 |
+
"grad_norm": 0.3494785726070404,
|
| 58236 |
+
"learning_rate": 8.299e-05,
|
| 58237 |
+
"loss": 0.1221,
|
| 58238 |
+
"step": 8300
|
| 58239 |
+
},
|
| 58240 |
+
{
|
| 58241 |
+
"epoch": 0.0008301,
|
| 58242 |
+
"grad_norm": 0.2794828712940216,
|
| 58243 |
+
"learning_rate": 8.3e-05,
|
| 58244 |
+
"loss": 0.1077,
|
| 58245 |
+
"step": 8301
|
| 58246 |
+
},
|
| 58247 |
+
{
|
| 58248 |
+
"epoch": 0.0008302,
|
| 58249 |
+
"grad_norm": 0.3573426902294159,
|
| 58250 |
+
"learning_rate": 8.301e-05,
|
| 58251 |
+
"loss": 0.1187,
|
| 58252 |
+
"step": 8302
|
| 58253 |
+
},
|
| 58254 |
+
{
|
| 58255 |
+
"epoch": 0.0008303,
|
| 58256 |
+
"grad_norm": 0.2746645212173462,
|
| 58257 |
+
"learning_rate": 8.302e-05,
|
| 58258 |
+
"loss": 0.11,
|
| 58259 |
+
"step": 8303
|
| 58260 |
+
},
|
| 58261 |
+
{
|
| 58262 |
+
"epoch": 0.0008304,
|
| 58263 |
+
"grad_norm": 0.35572004318237305,
|
| 58264 |
+
"learning_rate": 8.303e-05,
|
| 58265 |
+
"loss": 0.1227,
|
| 58266 |
+
"step": 8304
|
| 58267 |
+
},
|
| 58268 |
+
{
|
| 58269 |
+
"epoch": 0.0008305,
|
| 58270 |
+
"grad_norm": 0.28891506791114807,
|
| 58271 |
+
"learning_rate": 8.304e-05,
|
| 58272 |
+
"loss": 0.1162,
|
| 58273 |
+
"step": 8305
|
| 58274 |
+
},
|
| 58275 |
+
{
|
| 58276 |
+
"epoch": 0.0008306,
|
| 58277 |
+
"grad_norm": 0.33817705512046814,
|
| 58278 |
+
"learning_rate": 8.305e-05,
|
| 58279 |
+
"loss": 0.124,
|
| 58280 |
+
"step": 8306
|
| 58281 |
+
},
|
| 58282 |
+
{
|
| 58283 |
+
"epoch": 0.0008307,
|
| 58284 |
+
"grad_norm": 0.2758975923061371,
|
| 58285 |
+
"learning_rate": 8.306e-05,
|
| 58286 |
+
"loss": 0.1147,
|
| 58287 |
+
"step": 8307
|
| 58288 |
+
},
|
| 58289 |
+
{
|
| 58290 |
+
"epoch": 0.0008308,
|
| 58291 |
+
"grad_norm": 0.4337676763534546,
|
| 58292 |
+
"learning_rate": 8.307e-05,
|
| 58293 |
+
"loss": 0.147,
|
| 58294 |
+
"step": 8308
|
| 58295 |
+
},
|
| 58296 |
+
{
|
| 58297 |
+
"epoch": 0.0008309,
|
| 58298 |
+
"grad_norm": 0.3356058895587921,
|
| 58299 |
+
"learning_rate": 8.308e-05,
|
| 58300 |
+
"loss": 0.1208,
|
| 58301 |
+
"step": 8309
|
| 58302 |
+
},
|
| 58303 |
+
{
|
| 58304 |
+
"epoch": 0.000831,
|
| 58305 |
+
"grad_norm": 0.2898893356323242,
|
| 58306 |
+
"learning_rate": 8.309e-05,
|
| 58307 |
+
"loss": 0.1221,
|
| 58308 |
+
"step": 8310
|
| 58309 |
+
},
|
| 58310 |
+
{
|
| 58311 |
+
"epoch": 0.0008311,
|
| 58312 |
+
"grad_norm": 0.2947258949279785,
|
| 58313 |
+
"learning_rate": 8.31e-05,
|
| 58314 |
+
"loss": 0.1136,
|
| 58315 |
+
"step": 8311
|
| 58316 |
+
},
|
| 58317 |
+
{
|
| 58318 |
+
"epoch": 0.0008312,
|
| 58319 |
+
"grad_norm": 0.4135041832923889,
|
| 58320 |
+
"learning_rate": 8.311e-05,
|
| 58321 |
+
"loss": 0.1261,
|
| 58322 |
+
"step": 8312
|
| 58323 |
+
},
|
| 58324 |
+
{
|
| 58325 |
+
"epoch": 0.0008313,
|
| 58326 |
+
"grad_norm": 0.3066425323486328,
|
| 58327 |
+
"learning_rate": 8.312e-05,
|
| 58328 |
+
"loss": 0.1221,
|
| 58329 |
+
"step": 8313
|
| 58330 |
+
},
|
| 58331 |
+
{
|
| 58332 |
+
"epoch": 0.0008314,
|
| 58333 |
+
"grad_norm": 0.2656776010990143,
|
| 58334 |
+
"learning_rate": 8.313e-05,
|
| 58335 |
+
"loss": 0.1135,
|
| 58336 |
+
"step": 8314
|
| 58337 |
+
},
|
| 58338 |
+
{
|
| 58339 |
+
"epoch": 0.0008315,
|
| 58340 |
+
"grad_norm": 0.2955726981163025,
|
| 58341 |
+
"learning_rate": 8.314000000000001e-05,
|
| 58342 |
+
"loss": 0.1167,
|
| 58343 |
+
"step": 8315
|
| 58344 |
+
},
|
| 58345 |
+
{
|
| 58346 |
+
"epoch": 0.0008316,
|
| 58347 |
+
"grad_norm": 0.2642335891723633,
|
| 58348 |
+
"learning_rate": 8.315e-05,
|
| 58349 |
+
"loss": 0.1128,
|
| 58350 |
+
"step": 8316
|
| 58351 |
+
},
|
| 58352 |
+
{
|
| 58353 |
+
"epoch": 0.0008317,
|
| 58354 |
+
"grad_norm": 0.23299124836921692,
|
| 58355 |
+
"learning_rate": 8.316000000000001e-05,
|
| 58356 |
+
"loss": 0.1079,
|
| 58357 |
+
"step": 8317
|
| 58358 |
+
},
|
| 58359 |
+
{
|
| 58360 |
+
"epoch": 0.0008318,
|
| 58361 |
+
"grad_norm": 0.2927297353744507,
|
| 58362 |
+
"learning_rate": 8.317e-05,
|
| 58363 |
+
"loss": 0.1194,
|
| 58364 |
+
"step": 8318
|
| 58365 |
+
},
|
| 58366 |
+
{
|
| 58367 |
+
"epoch": 0.0008319,
|
| 58368 |
+
"grad_norm": 0.3799995183944702,
|
| 58369 |
+
"learning_rate": 8.318e-05,
|
| 58370 |
+
"loss": 0.117,
|
| 58371 |
+
"step": 8319
|
| 58372 |
+
},
|
| 58373 |
+
{
|
| 58374 |
+
"epoch": 0.000832,
|
| 58375 |
+
"grad_norm": 0.39418503642082214,
|
| 58376 |
+
"learning_rate": 8.319000000000001e-05,
|
| 58377 |
+
"loss": 0.1209,
|
| 58378 |
+
"step": 8320
|
| 58379 |
+
},
|
| 58380 |
+
{
|
| 58381 |
+
"epoch": 0.0008321,
|
| 58382 |
+
"grad_norm": 0.28276681900024414,
|
| 58383 |
+
"learning_rate": 8.32e-05,
|
| 58384 |
+
"loss": 0.1157,
|
| 58385 |
+
"step": 8321
|
| 58386 |
+
},
|
| 58387 |
+
{
|
| 58388 |
+
"epoch": 0.0008322,
|
| 58389 |
+
"grad_norm": 0.34471064805984497,
|
| 58390 |
+
"learning_rate": 8.321e-05,
|
| 58391 |
+
"loss": 0.1181,
|
| 58392 |
+
"step": 8322
|
| 58393 |
+
},
|
| 58394 |
+
{
|
| 58395 |
+
"epoch": 0.0008323,
|
| 58396 |
+
"grad_norm": 0.3510381579399109,
|
| 58397 |
+
"learning_rate": 8.322e-05,
|
| 58398 |
+
"loss": 0.1277,
|
| 58399 |
+
"step": 8323
|
| 58400 |
+
},
|
| 58401 |
+
{
|
| 58402 |
+
"epoch": 0.0008324,
|
| 58403 |
+
"grad_norm": 0.30988457798957825,
|
| 58404 |
+
"learning_rate": 8.323e-05,
|
| 58405 |
+
"loss": 0.1222,
|
| 58406 |
+
"step": 8324
|
| 58407 |
+
},
|
| 58408 |
+
{
|
| 58409 |
+
"epoch": 0.0008325,
|
| 58410 |
+
"grad_norm": 0.2697628438472748,
|
| 58411 |
+
"learning_rate": 8.324e-05,
|
| 58412 |
+
"loss": 0.1149,
|
| 58413 |
+
"step": 8325
|
| 58414 |
+
},
|
| 58415 |
+
{
|
| 58416 |
+
"epoch": 0.0008326,
|
| 58417 |
+
"grad_norm": 0.27366572618484497,
|
| 58418 |
+
"learning_rate": 8.325e-05,
|
| 58419 |
+
"loss": 0.1159,
|
| 58420 |
+
"step": 8326
|
| 58421 |
+
},
|
| 58422 |
+
{
|
| 58423 |
+
"epoch": 0.0008327,
|
| 58424 |
+
"grad_norm": 0.25706392526626587,
|
| 58425 |
+
"learning_rate": 8.326000000000001e-05,
|
| 58426 |
+
"loss": 0.1133,
|
| 58427 |
+
"step": 8327
|
| 58428 |
+
},
|
| 58429 |
+
{
|
| 58430 |
+
"epoch": 0.0008328,
|
| 58431 |
+
"grad_norm": 0.4249774217605591,
|
| 58432 |
+
"learning_rate": 8.327e-05,
|
| 58433 |
+
"loss": 0.1223,
|
| 58434 |
+
"step": 8328
|
| 58435 |
+
},
|
| 58436 |
+
{
|
| 58437 |
+
"epoch": 0.0008329,
|
| 58438 |
+
"grad_norm": 0.2878964841365814,
|
| 58439 |
+
"learning_rate": 8.328e-05,
|
| 58440 |
+
"loss": 0.119,
|
| 58441 |
+
"step": 8329
|
| 58442 |
+
},
|
| 58443 |
+
{
|
| 58444 |
+
"epoch": 0.000833,
|
| 58445 |
+
"grad_norm": 0.34603971242904663,
|
| 58446 |
+
"learning_rate": 8.329000000000001e-05,
|
| 58447 |
+
"loss": 0.1332,
|
| 58448 |
+
"step": 8330
|
| 58449 |
+
},
|
| 58450 |
+
{
|
| 58451 |
+
"epoch": 0.0008331,
|
| 58452 |
+
"grad_norm": 0.2570502460002899,
|
| 58453 |
+
"learning_rate": 8.329999999999999e-05,
|
| 58454 |
+
"loss": 0.1158,
|
| 58455 |
+
"step": 8331
|
| 58456 |
+
},
|
| 58457 |
+
{
|
| 58458 |
+
"epoch": 0.0008332,
|
| 58459 |
+
"grad_norm": 0.25689229369163513,
|
| 58460 |
+
"learning_rate": 8.331e-05,
|
| 58461 |
+
"loss": 0.1164,
|
| 58462 |
+
"step": 8332
|
| 58463 |
+
},
|
| 58464 |
+
{
|
| 58465 |
+
"epoch": 0.0008333,
|
| 58466 |
+
"grad_norm": 0.4386882185935974,
|
| 58467 |
+
"learning_rate": 8.332000000000001e-05,
|
| 58468 |
+
"loss": 0.1314,
|
| 58469 |
+
"step": 8333
|
| 58470 |
+
},
|
| 58471 |
+
{
|
| 58472 |
+
"epoch": 0.0008334,
|
| 58473 |
+
"grad_norm": 0.26009953022003174,
|
| 58474 |
+
"learning_rate": 8.333e-05,
|
| 58475 |
+
"loss": 0.1135,
|
| 58476 |
+
"step": 8334
|
| 58477 |
+
},
|
| 58478 |
+
{
|
| 58479 |
+
"epoch": 0.0008335,
|
| 58480 |
+
"grad_norm": 0.2905921936035156,
|
| 58481 |
+
"learning_rate": 8.334e-05,
|
| 58482 |
+
"loss": 0.124,
|
| 58483 |
+
"step": 8335
|
| 58484 |
+
},
|
| 58485 |
+
{
|
| 58486 |
+
"epoch": 0.0008336,
|
| 58487 |
+
"grad_norm": 0.3394087851047516,
|
| 58488 |
+
"learning_rate": 8.335e-05,
|
| 58489 |
+
"loss": 0.1251,
|
| 58490 |
+
"step": 8336
|
| 58491 |
+
},
|
| 58492 |
+
{
|
| 58493 |
+
"epoch": 0.0008337,
|
| 58494 |
+
"grad_norm": 0.32263341546058655,
|
| 58495 |
+
"learning_rate": 8.336e-05,
|
| 58496 |
+
"loss": 0.1244,
|
| 58497 |
+
"step": 8337
|
| 58498 |
+
},
|
| 58499 |
+
{
|
| 58500 |
+
"epoch": 0.0008338,
|
| 58501 |
+
"grad_norm": 0.35376691818237305,
|
| 58502 |
+
"learning_rate": 8.337e-05,
|
| 58503 |
+
"loss": 0.1265,
|
| 58504 |
+
"step": 8338
|
| 58505 |
+
},
|
| 58506 |
+
{
|
| 58507 |
+
"epoch": 0.0008339,
|
| 58508 |
+
"grad_norm": 0.2781798839569092,
|
| 58509 |
+
"learning_rate": 8.338e-05,
|
| 58510 |
+
"loss": 0.1192,
|
| 58511 |
+
"step": 8339
|
| 58512 |
+
},
|
| 58513 |
+
{
|
| 58514 |
+
"epoch": 0.000834,
|
| 58515 |
+
"grad_norm": 0.2511618733406067,
|
| 58516 |
+
"learning_rate": 8.339e-05,
|
| 58517 |
+
"loss": 0.1119,
|
| 58518 |
+
"step": 8340
|
| 58519 |
+
},
|
| 58520 |
+
{
|
| 58521 |
+
"epoch": 0.0008341,
|
| 58522 |
+
"grad_norm": 0.4951576292514801,
|
| 58523 |
+
"learning_rate": 8.34e-05,
|
| 58524 |
+
"loss": 0.1381,
|
| 58525 |
+
"step": 8341
|
| 58526 |
+
},
|
| 58527 |
+
{
|
| 58528 |
+
"epoch": 0.0008342,
|
| 58529 |
+
"grad_norm": 0.264965683221817,
|
| 58530 |
+
"learning_rate": 8.341e-05,
|
| 58531 |
+
"loss": 0.1125,
|
| 58532 |
+
"step": 8342
|
| 58533 |
+
},
|
| 58534 |
+
{
|
| 58535 |
+
"epoch": 0.0008343,
|
| 58536 |
+
"grad_norm": 0.2721898853778839,
|
| 58537 |
+
"learning_rate": 8.342e-05,
|
| 58538 |
+
"loss": 0.1144,
|
| 58539 |
+
"step": 8343
|
| 58540 |
+
},
|
| 58541 |
+
{
|
| 58542 |
+
"epoch": 0.0008344,
|
| 58543 |
+
"grad_norm": 0.27462533116340637,
|
| 58544 |
+
"learning_rate": 8.343e-05,
|
| 58545 |
+
"loss": 0.109,
|
| 58546 |
+
"step": 8344
|
| 58547 |
+
},
|
| 58548 |
+
{
|
| 58549 |
+
"epoch": 0.0008345,
|
| 58550 |
+
"grad_norm": 0.2744678556919098,
|
| 58551 |
+
"learning_rate": 8.344e-05,
|
| 58552 |
+
"loss": 0.1149,
|
| 58553 |
+
"step": 8345
|
| 58554 |
+
},
|
| 58555 |
+
{
|
| 58556 |
+
"epoch": 0.0008346,
|
| 58557 |
+
"grad_norm": 0.5289849042892456,
|
| 58558 |
+
"learning_rate": 8.345e-05,
|
| 58559 |
+
"loss": 0.1454,
|
| 58560 |
+
"step": 8346
|
| 58561 |
+
},
|
| 58562 |
+
{
|
| 58563 |
+
"epoch": 0.0008347,
|
| 58564 |
+
"grad_norm": 0.2547566294670105,
|
| 58565 |
+
"learning_rate": 8.346e-05,
|
| 58566 |
+
"loss": 0.1118,
|
| 58567 |
+
"step": 8347
|
| 58568 |
+
},
|
| 58569 |
+
{
|
| 58570 |
+
"epoch": 0.0008348,
|
| 58571 |
+
"grad_norm": 0.3034285604953766,
|
| 58572 |
+
"learning_rate": 8.347e-05,
|
| 58573 |
+
"loss": 0.1199,
|
| 58574 |
+
"step": 8348
|
| 58575 |
+
},
|
| 58576 |
+
{
|
| 58577 |
+
"epoch": 0.0008349,
|
| 58578 |
+
"grad_norm": 0.30945277214050293,
|
| 58579 |
+
"learning_rate": 8.348000000000001e-05,
|
| 58580 |
+
"loss": 0.1213,
|
| 58581 |
+
"step": 8349
|
| 58582 |
+
},
|
| 58583 |
+
{
|
| 58584 |
+
"epoch": 0.000835,
|
| 58585 |
+
"grad_norm": 0.2395797222852707,
|
| 58586 |
+
"learning_rate": 8.349e-05,
|
| 58587 |
+
"loss": 0.1089,
|
| 58588 |
+
"step": 8350
|
| 58589 |
+
},
|
| 58590 |
+
{
|
| 58591 |
+
"epoch": 0.0008351,
|
| 58592 |
+
"grad_norm": 0.3820746839046478,
|
| 58593 |
+
"learning_rate": 8.350000000000001e-05,
|
| 58594 |
+
"loss": 0.1277,
|
| 58595 |
+
"step": 8351
|
| 58596 |
+
},
|
| 58597 |
+
{
|
| 58598 |
+
"epoch": 0.0008352,
|
| 58599 |
+
"grad_norm": 0.25428974628448486,
|
| 58600 |
+
"learning_rate": 8.351e-05,
|
| 58601 |
+
"loss": 0.1135,
|
| 58602 |
+
"step": 8352
|
| 58603 |
+
},
|
| 58604 |
+
{
|
| 58605 |
+
"epoch": 0.0008353,
|
| 58606 |
+
"grad_norm": 0.3403705358505249,
|
| 58607 |
+
"learning_rate": 8.352e-05,
|
| 58608 |
+
"loss": 0.1158,
|
| 58609 |
+
"step": 8353
|
| 58610 |
+
},
|
| 58611 |
+
{
|
| 58612 |
+
"epoch": 0.0008354,
|
| 58613 |
+
"grad_norm": 0.2928326725959778,
|
| 58614 |
+
"learning_rate": 8.353000000000001e-05,
|
| 58615 |
+
"loss": 0.1223,
|
| 58616 |
+
"step": 8354
|
| 58617 |
+
},
|
| 58618 |
+
{
|
| 58619 |
+
"epoch": 0.0008355,
|
| 58620 |
+
"grad_norm": 0.2385818064212799,
|
| 58621 |
+
"learning_rate": 8.354e-05,
|
| 58622 |
+
"loss": 0.1093,
|
| 58623 |
+
"step": 8355
|
| 58624 |
+
},
|
| 58625 |
+
{
|
| 58626 |
+
"epoch": 0.0008356,
|
| 58627 |
+
"grad_norm": 0.2374759018421173,
|
| 58628 |
+
"learning_rate": 8.355e-05,
|
| 58629 |
+
"loss": 0.1085,
|
| 58630 |
+
"step": 8356
|
| 58631 |
+
},
|
| 58632 |
+
{
|
| 58633 |
+
"epoch": 0.0008357,
|
| 58634 |
+
"grad_norm": 0.35778728127479553,
|
| 58635 |
+
"learning_rate": 8.356e-05,
|
| 58636 |
+
"loss": 0.1292,
|
| 58637 |
+
"step": 8357
|
| 58638 |
+
},
|
| 58639 |
+
{
|
| 58640 |
+
"epoch": 0.0008358,
|
| 58641 |
+
"grad_norm": 0.25890466570854187,
|
| 58642 |
+
"learning_rate": 8.357e-05,
|
| 58643 |
+
"loss": 0.1151,
|
| 58644 |
+
"step": 8358
|
| 58645 |
+
},
|
| 58646 |
+
{
|
| 58647 |
+
"epoch": 0.0008359,
|
| 58648 |
+
"grad_norm": 0.2757404148578644,
|
| 58649 |
+
"learning_rate": 8.358e-05,
|
| 58650 |
+
"loss": 0.1199,
|
| 58651 |
+
"step": 8359
|
| 58652 |
+
},
|
| 58653 |
+
{
|
| 58654 |
+
"epoch": 0.000836,
|
| 58655 |
+
"grad_norm": 0.28230440616607666,
|
| 58656 |
+
"learning_rate": 8.359e-05,
|
| 58657 |
+
"loss": 0.113,
|
| 58658 |
+
"step": 8360
|
| 58659 |
+
},
|
| 58660 |
+
{
|
| 58661 |
+
"epoch": 0.0008361,
|
| 58662 |
+
"grad_norm": 0.3027632534503937,
|
| 58663 |
+
"learning_rate": 8.36e-05,
|
| 58664 |
+
"loss": 0.1225,
|
| 58665 |
+
"step": 8361
|
| 58666 |
+
},
|
| 58667 |
+
{
|
| 58668 |
+
"epoch": 0.0008362,
|
| 58669 |
+
"grad_norm": 0.2576223611831665,
|
| 58670 |
+
"learning_rate": 8.361e-05,
|
| 58671 |
+
"loss": 0.116,
|
| 58672 |
+
"step": 8362
|
| 58673 |
+
},
|
| 58674 |
+
{
|
| 58675 |
+
"epoch": 0.0008363,
|
| 58676 |
+
"grad_norm": 0.392742395401001,
|
| 58677 |
+
"learning_rate": 8.362e-05,
|
| 58678 |
+
"loss": 0.127,
|
| 58679 |
+
"step": 8363
|
| 58680 |
+
},
|
| 58681 |
+
{
|
| 58682 |
+
"epoch": 0.0008364,
|
| 58683 |
+
"grad_norm": 0.22860021889209747,
|
| 58684 |
+
"learning_rate": 8.363000000000001e-05,
|
| 58685 |
+
"loss": 0.1018,
|
| 58686 |
+
"step": 8364
|
| 58687 |
+
},
|
| 58688 |
+
{
|
| 58689 |
+
"epoch": 0.0008365,
|
| 58690 |
+
"grad_norm": 0.5199876427650452,
|
| 58691 |
+
"learning_rate": 8.363999999999999e-05,
|
| 58692 |
+
"loss": 0.126,
|
| 58693 |
+
"step": 8365
|
| 58694 |
+
},
|
| 58695 |
+
{
|
| 58696 |
+
"epoch": 0.0008366,
|
| 58697 |
+
"grad_norm": 0.43568482995033264,
|
| 58698 |
+
"learning_rate": 8.365e-05,
|
| 58699 |
+
"loss": 0.1201,
|
| 58700 |
+
"step": 8366
|
| 58701 |
+
},
|
| 58702 |
+
{
|
| 58703 |
+
"epoch": 0.0008367,
|
| 58704 |
+
"grad_norm": 0.3191339373588562,
|
| 58705 |
+
"learning_rate": 8.366000000000001e-05,
|
| 58706 |
+
"loss": 0.1221,
|
| 58707 |
+
"step": 8367
|
| 58708 |
+
},
|
| 58709 |
+
{
|
| 58710 |
+
"epoch": 0.0008368,
|
| 58711 |
+
"grad_norm": 0.2429150938987732,
|
| 58712 |
+
"learning_rate": 8.367e-05,
|
| 58713 |
+
"loss": 0.1099,
|
| 58714 |
+
"step": 8368
|
| 58715 |
+
},
|
| 58716 |
+
{
|
| 58717 |
+
"epoch": 0.0008369,
|
| 58718 |
+
"grad_norm": 0.2731536030769348,
|
| 58719 |
+
"learning_rate": 8.368e-05,
|
| 58720 |
+
"loss": 0.1168,
|
| 58721 |
+
"step": 8369
|
| 58722 |
+
},
|
| 58723 |
+
{
|
| 58724 |
+
"epoch": 0.000837,
|
| 58725 |
+
"grad_norm": 0.2571616470813751,
|
| 58726 |
+
"learning_rate": 8.369000000000001e-05,
|
| 58727 |
+
"loss": 0.1115,
|
| 58728 |
+
"step": 8370
|
| 58729 |
+
},
|
| 58730 |
+
{
|
| 58731 |
+
"epoch": 0.0008371,
|
| 58732 |
+
"grad_norm": 0.2584238052368164,
|
| 58733 |
+
"learning_rate": 8.37e-05,
|
| 58734 |
+
"loss": 0.108,
|
| 58735 |
+
"step": 8371
|
| 58736 |
+
},
|
| 58737 |
+
{
|
| 58738 |
+
"epoch": 0.0008372,
|
| 58739 |
+
"grad_norm": 0.6482557654380798,
|
| 58740 |
+
"learning_rate": 8.371e-05,
|
| 58741 |
+
"loss": 0.1639,
|
| 58742 |
+
"step": 8372
|
| 58743 |
+
},
|
| 58744 |
+
{
|
| 58745 |
+
"epoch": 0.0008373,
|
| 58746 |
+
"grad_norm": 0.40680238604545593,
|
| 58747 |
+
"learning_rate": 8.372e-05,
|
| 58748 |
+
"loss": 0.1381,
|
| 58749 |
+
"step": 8373
|
| 58750 |
+
},
|
| 58751 |
+
{
|
| 58752 |
+
"epoch": 0.0008374,
|
| 58753 |
+
"grad_norm": 0.2401718944311142,
|
| 58754 |
+
"learning_rate": 8.373e-05,
|
| 58755 |
+
"loss": 0.1072,
|
| 58756 |
+
"step": 8374
|
| 58757 |
+
},
|
| 58758 |
+
{
|
| 58759 |
+
"epoch": 0.0008375,
|
| 58760 |
+
"grad_norm": 0.28217509388923645,
|
| 58761 |
+
"learning_rate": 8.374e-05,
|
| 58762 |
+
"loss": 0.1088,
|
| 58763 |
+
"step": 8375
|
| 58764 |
+
},
|
| 58765 |
+
{
|
| 58766 |
+
"epoch": 0.0008376,
|
| 58767 |
+
"grad_norm": 0.4751772880554199,
|
| 58768 |
+
"learning_rate": 8.375e-05,
|
| 58769 |
+
"loss": 0.1427,
|
| 58770 |
+
"step": 8376
|
| 58771 |
+
},
|
| 58772 |
+
{
|
| 58773 |
+
"epoch": 0.0008377,
|
| 58774 |
+
"grad_norm": 0.25133126974105835,
|
| 58775 |
+
"learning_rate": 8.376e-05,
|
| 58776 |
+
"loss": 0.1102,
|
| 58777 |
+
"step": 8377
|
| 58778 |
+
},
|
| 58779 |
+
{
|
| 58780 |
+
"epoch": 0.0008378,
|
| 58781 |
+
"grad_norm": 0.29090291261672974,
|
| 58782 |
+
"learning_rate": 8.377e-05,
|
| 58783 |
+
"loss": 0.1121,
|
| 58784 |
+
"step": 8378
|
| 58785 |
+
},
|
| 58786 |
+
{
|
| 58787 |
+
"epoch": 0.0008379,
|
| 58788 |
+
"grad_norm": 0.2963152825832367,
|
| 58789 |
+
"learning_rate": 8.378e-05,
|
| 58790 |
+
"loss": 0.1144,
|
| 58791 |
+
"step": 8379
|
| 58792 |
+
},
|
| 58793 |
+
{
|
| 58794 |
+
"epoch": 0.000838,
|
| 58795 |
+
"grad_norm": 0.3084557354450226,
|
| 58796 |
+
"learning_rate": 8.379e-05,
|
| 58797 |
+
"loss": 0.1188,
|
| 58798 |
+
"step": 8380
|
| 58799 |
+
},
|
| 58800 |
+
{
|
| 58801 |
+
"epoch": 0.0008381,
|
| 58802 |
+
"grad_norm": 0.29527875781059265,
|
| 58803 |
+
"learning_rate": 8.38e-05,
|
| 58804 |
+
"loss": 0.1141,
|
| 58805 |
+
"step": 8381
|
| 58806 |
+
},
|
| 58807 |
+
{
|
| 58808 |
+
"epoch": 0.0008382,
|
| 58809 |
+
"grad_norm": 0.7038413882255554,
|
| 58810 |
+
"learning_rate": 8.381e-05,
|
| 58811 |
+
"loss": 0.154,
|
| 58812 |
+
"step": 8382
|
| 58813 |
+
},
|
| 58814 |
+
{
|
| 58815 |
+
"epoch": 0.0008383,
|
| 58816 |
+
"grad_norm": 0.25467610359191895,
|
| 58817 |
+
"learning_rate": 8.382e-05,
|
| 58818 |
+
"loss": 0.1071,
|
| 58819 |
+
"step": 8383
|
| 58820 |
+
},
|
| 58821 |
+
{
|
| 58822 |
+
"epoch": 0.0008384,
|
| 58823 |
+
"grad_norm": 0.29565995931625366,
|
| 58824 |
+
"learning_rate": 8.383e-05,
|
| 58825 |
+
"loss": 0.1226,
|
| 58826 |
+
"step": 8384
|
| 58827 |
+
},
|
| 58828 |
+
{
|
| 58829 |
+
"epoch": 0.0008385,
|
| 58830 |
+
"grad_norm": 0.29088881611824036,
|
| 58831 |
+
"learning_rate": 8.384000000000001e-05,
|
| 58832 |
+
"loss": 0.115,
|
| 58833 |
+
"step": 8385
|
| 58834 |
+
},
|
| 58835 |
+
{
|
| 58836 |
+
"epoch": 0.0008386,
|
| 58837 |
+
"grad_norm": 0.24134796857833862,
|
| 58838 |
+
"learning_rate": 8.385e-05,
|
| 58839 |
+
"loss": 0.1085,
|
| 58840 |
+
"step": 8386
|
| 58841 |
+
},
|
| 58842 |
+
{
|
| 58843 |
+
"epoch": 0.0008387,
|
| 58844 |
+
"grad_norm": 0.46633341908454895,
|
| 58845 |
+
"learning_rate": 8.386e-05,
|
| 58846 |
+
"loss": 0.1179,
|
| 58847 |
+
"step": 8387
|
| 58848 |
+
},
|
| 58849 |
+
{
|
| 58850 |
+
"epoch": 0.0008388,
|
| 58851 |
+
"grad_norm": 0.2913179397583008,
|
| 58852 |
+
"learning_rate": 8.387000000000001e-05,
|
| 58853 |
+
"loss": 0.1212,
|
| 58854 |
+
"step": 8388
|
| 58855 |
+
},
|
| 58856 |
+
{
|
| 58857 |
+
"epoch": 0.0008389,
|
| 58858 |
+
"grad_norm": 0.3899138271808624,
|
| 58859 |
+
"learning_rate": 8.388e-05,
|
| 58860 |
+
"loss": 0.1265,
|
| 58861 |
+
"step": 8389
|
| 58862 |
+
},
|
| 58863 |
+
{
|
| 58864 |
+
"epoch": 0.000839,
|
| 58865 |
+
"grad_norm": 0.4130428731441498,
|
| 58866 |
+
"learning_rate": 8.389e-05,
|
| 58867 |
+
"loss": 0.1283,
|
| 58868 |
+
"step": 8390
|
| 58869 |
+
},
|
| 58870 |
+
{
|
| 58871 |
+
"epoch": 0.0008391,
|
| 58872 |
+
"grad_norm": 0.3082103431224823,
|
| 58873 |
+
"learning_rate": 8.39e-05,
|
| 58874 |
+
"loss": 0.1255,
|
| 58875 |
+
"step": 8391
|
| 58876 |
+
},
|
| 58877 |
+
{
|
| 58878 |
+
"epoch": 0.0008392,
|
| 58879 |
+
"grad_norm": 0.40410521626472473,
|
| 58880 |
+
"learning_rate": 8.391e-05,
|
| 58881 |
+
"loss": 0.122,
|
| 58882 |
+
"step": 8392
|
| 58883 |
+
},
|
| 58884 |
+
{
|
| 58885 |
+
"epoch": 0.0008393,
|
| 58886 |
+
"grad_norm": 0.9000693559646606,
|
| 58887 |
+
"learning_rate": 8.392e-05,
|
| 58888 |
+
"loss": 0.1403,
|
| 58889 |
+
"step": 8393
|
| 58890 |
+
},
|
| 58891 |
+
{
|
| 58892 |
+
"epoch": 0.0008394,
|
| 58893 |
+
"grad_norm": 0.2621038854122162,
|
| 58894 |
+
"learning_rate": 8.393e-05,
|
| 58895 |
+
"loss": 0.1055,
|
| 58896 |
+
"step": 8394
|
| 58897 |
+
},
|
| 58898 |
+
{
|
| 58899 |
+
"epoch": 0.0008395,
|
| 58900 |
+
"grad_norm": 0.2831513583660126,
|
| 58901 |
+
"learning_rate": 8.394e-05,
|
| 58902 |
+
"loss": 0.1067,
|
| 58903 |
+
"step": 8395
|
| 58904 |
+
},
|
| 58905 |
+
{
|
| 58906 |
+
"epoch": 0.0008396,
|
| 58907 |
+
"grad_norm": 0.44854921102523804,
|
| 58908 |
+
"learning_rate": 8.395e-05,
|
| 58909 |
+
"loss": 0.1294,
|
| 58910 |
+
"step": 8396
|
| 58911 |
+
},
|
| 58912 |
+
{
|
| 58913 |
+
"epoch": 0.0008397,
|
| 58914 |
+
"grad_norm": 0.28162941336631775,
|
| 58915 |
+
"learning_rate": 8.396e-05,
|
| 58916 |
+
"loss": 0.1076,
|
| 58917 |
+
"step": 8397
|
| 58918 |
+
},
|
| 58919 |
+
{
|
| 58920 |
+
"epoch": 0.0008398,
|
| 58921 |
+
"grad_norm": 0.33356034755706787,
|
| 58922 |
+
"learning_rate": 8.397000000000001e-05,
|
| 58923 |
+
"loss": 0.1154,
|
| 58924 |
+
"step": 8398
|
| 58925 |
+
},
|
| 58926 |
+
{
|
| 58927 |
+
"epoch": 0.0008399,
|
| 58928 |
+
"grad_norm": 0.2977070212364197,
|
| 58929 |
+
"learning_rate": 8.397999999999999e-05,
|
| 58930 |
+
"loss": 0.1146,
|
| 58931 |
+
"step": 8399
|
| 58932 |
+
},
|
| 58933 |
+
{
|
| 58934 |
+
"epoch": 0.00084,
|
| 58935 |
+
"grad_norm": 0.6036233305931091,
|
| 58936 |
+
"learning_rate": 8.399e-05,
|
| 58937 |
+
"loss": 0.1455,
|
| 58938 |
+
"step": 8400
|
| 58939 |
+
},
|
| 58940 |
+
{
|
| 58941 |
+
"epoch": 0.0008401,
|
| 58942 |
+
"grad_norm": 0.26441118121147156,
|
| 58943 |
+
"learning_rate": 8.400000000000001e-05,
|
| 58944 |
+
"loss": 0.1116,
|
| 58945 |
+
"step": 8401
|
| 58946 |
+
},
|
| 58947 |
+
{
|
| 58948 |
+
"epoch": 0.0008402,
|
| 58949 |
+
"grad_norm": 0.37586623430252075,
|
| 58950 |
+
"learning_rate": 8.401e-05,
|
| 58951 |
+
"loss": 0.1124,
|
| 58952 |
+
"step": 8402
|
| 58953 |
+
},
|
| 58954 |
+
{
|
| 58955 |
+
"epoch": 0.0008403,
|
| 58956 |
+
"grad_norm": 0.42590731382369995,
|
| 58957 |
+
"learning_rate": 8.402e-05,
|
| 58958 |
+
"loss": 0.1282,
|
| 58959 |
+
"step": 8403
|
| 58960 |
+
},
|
| 58961 |
+
{
|
| 58962 |
+
"epoch": 0.0008404,
|
| 58963 |
+
"grad_norm": 0.3122142553329468,
|
| 58964 |
+
"learning_rate": 8.403000000000001e-05,
|
| 58965 |
+
"loss": 0.1232,
|
| 58966 |
+
"step": 8404
|
| 58967 |
+
},
|
| 58968 |
+
{
|
| 58969 |
+
"epoch": 0.0008405,
|
| 58970 |
+
"grad_norm": 0.6514351963996887,
|
| 58971 |
+
"learning_rate": 8.404e-05,
|
| 58972 |
+
"loss": 0.142,
|
| 58973 |
+
"step": 8405
|
| 58974 |
+
},
|
| 58975 |
+
{
|
| 58976 |
+
"epoch": 0.0008406,
|
| 58977 |
+
"grad_norm": 0.2786729633808136,
|
| 58978 |
+
"learning_rate": 8.405e-05,
|
| 58979 |
+
"loss": 0.111,
|
| 58980 |
+
"step": 8406
|
| 58981 |
+
},
|
| 58982 |
+
{
|
| 58983 |
+
"epoch": 0.0008407,
|
| 58984 |
+
"grad_norm": 0.28742948174476624,
|
| 58985 |
+
"learning_rate": 8.406e-05,
|
| 58986 |
+
"loss": 0.1119,
|
| 58987 |
+
"step": 8407
|
| 58988 |
+
},
|
| 58989 |
+
{
|
| 58990 |
+
"epoch": 0.0008408,
|
| 58991 |
+
"grad_norm": 0.320432186126709,
|
| 58992 |
+
"learning_rate": 8.407e-05,
|
| 58993 |
+
"loss": 0.1128,
|
| 58994 |
+
"step": 8408
|
| 58995 |
+
},
|
| 58996 |
+
{
|
| 58997 |
+
"epoch": 0.0008409,
|
| 58998 |
+
"grad_norm": 0.3040125072002411,
|
| 58999 |
+
"learning_rate": 8.408e-05,
|
| 59000 |
+
"loss": 0.1146,
|
| 59001 |
+
"step": 8409
|
| 59002 |
+
},
|
| 59003 |
+
{
|
| 59004 |
+
"epoch": 0.000841,
|
| 59005 |
+
"grad_norm": 0.39792320132255554,
|
| 59006 |
+
"learning_rate": 8.409e-05,
|
| 59007 |
+
"loss": 0.1161,
|
| 59008 |
+
"step": 8410
|
| 59009 |
+
},
|
| 59010 |
+
{
|
| 59011 |
+
"epoch": 0.0008411,
|
| 59012 |
+
"grad_norm": 0.26484251022338867,
|
| 59013 |
+
"learning_rate": 8.41e-05,
|
| 59014 |
+
"loss": 0.1102,
|
| 59015 |
+
"step": 8411
|
| 59016 |
+
},
|
| 59017 |
+
{
|
| 59018 |
+
"epoch": 0.0008412,
|
| 59019 |
+
"grad_norm": 0.25933507084846497,
|
| 59020 |
+
"learning_rate": 8.411e-05,
|
| 59021 |
+
"loss": 0.1091,
|
| 59022 |
+
"step": 8412
|
| 59023 |
+
},
|
| 59024 |
+
{
|
| 59025 |
+
"epoch": 0.0008413,
|
| 59026 |
+
"grad_norm": 0.2580576241016388,
|
| 59027 |
+
"learning_rate": 8.412e-05,
|
| 59028 |
+
"loss": 0.1069,
|
| 59029 |
+
"step": 8413
|
| 59030 |
+
},
|
| 59031 |
+
{
|
| 59032 |
+
"epoch": 0.0008414,
|
| 59033 |
+
"grad_norm": 0.26504597067832947,
|
| 59034 |
+
"learning_rate": 8.413e-05,
|
| 59035 |
+
"loss": 0.1089,
|
| 59036 |
+
"step": 8414
|
| 59037 |
+
},
|
| 59038 |
+
{
|
| 59039 |
+
"epoch": 0.0008415,
|
| 59040 |
+
"grad_norm": 0.26909440755844116,
|
| 59041 |
+
"learning_rate": 8.414e-05,
|
| 59042 |
+
"loss": 0.1143,
|
| 59043 |
+
"step": 8415
|
| 59044 |
+
},
|
| 59045 |
+
{
|
| 59046 |
+
"epoch": 0.0008416,
|
| 59047 |
+
"grad_norm": 0.25820961594581604,
|
| 59048 |
+
"learning_rate": 8.415e-05,
|
| 59049 |
+
"loss": 0.1106,
|
| 59050 |
+
"step": 8416
|
| 59051 |
+
},
|
| 59052 |
+
{
|
| 59053 |
+
"epoch": 0.0008417,
|
| 59054 |
+
"grad_norm": 0.23238538205623627,
|
| 59055 |
+
"learning_rate": 8.416e-05,
|
| 59056 |
+
"loss": 0.1038,
|
| 59057 |
+
"step": 8417
|
| 59058 |
+
},
|
| 59059 |
+
{
|
| 59060 |
+
"epoch": 0.0008418,
|
| 59061 |
+
"grad_norm": 0.335588663816452,
|
| 59062 |
+
"learning_rate": 8.417e-05,
|
| 59063 |
+
"loss": 0.1224,
|
| 59064 |
+
"step": 8418
|
| 59065 |
+
},
|
| 59066 |
+
{
|
| 59067 |
+
"epoch": 0.0008419,
|
| 59068 |
+
"grad_norm": 0.5604079365730286,
|
| 59069 |
+
"learning_rate": 8.418000000000001e-05,
|
| 59070 |
+
"loss": 0.1375,
|
| 59071 |
+
"step": 8419
|
| 59072 |
+
},
|
| 59073 |
+
{
|
| 59074 |
+
"epoch": 0.000842,
|
| 59075 |
+
"grad_norm": 0.2831794321537018,
|
| 59076 |
+
"learning_rate": 8.419e-05,
|
| 59077 |
+
"loss": 0.1165,
|
| 59078 |
+
"step": 8420
|
| 59079 |
+
},
|
| 59080 |
+
{
|
| 59081 |
+
"epoch": 0.0008421,
|
| 59082 |
+
"grad_norm": 0.6355200409889221,
|
| 59083 |
+
"learning_rate": 8.42e-05,
|
| 59084 |
+
"loss": 0.127,
|
| 59085 |
+
"step": 8421
|
| 59086 |
+
},
|
| 59087 |
+
{
|
| 59088 |
+
"epoch": 0.0008422,
|
| 59089 |
+
"grad_norm": 0.2544653117656708,
|
| 59090 |
+
"learning_rate": 8.421000000000001e-05,
|
| 59091 |
+
"loss": 0.1092,
|
| 59092 |
+
"step": 8422
|
| 59093 |
+
},
|
| 59094 |
+
{
|
| 59095 |
+
"epoch": 0.0008423,
|
| 59096 |
+
"grad_norm": 0.28864893317222595,
|
| 59097 |
+
"learning_rate": 8.422e-05,
|
| 59098 |
+
"loss": 0.1159,
|
| 59099 |
+
"step": 8423
|
| 59100 |
+
},
|
| 59101 |
+
{
|
| 59102 |
+
"epoch": 0.0008424,
|
| 59103 |
+
"grad_norm": 0.32663410902023315,
|
| 59104 |
+
"learning_rate": 8.423e-05,
|
| 59105 |
+
"loss": 0.1195,
|
| 59106 |
+
"step": 8424
|
| 59107 |
+
},
|
| 59108 |
+
{
|
| 59109 |
+
"epoch": 0.0008425,
|
| 59110 |
+
"grad_norm": 0.2896595597267151,
|
| 59111 |
+
"learning_rate": 8.424e-05,
|
| 59112 |
+
"loss": 0.1182,
|
| 59113 |
+
"step": 8425
|
| 59114 |
+
},
|
| 59115 |
+
{
|
| 59116 |
+
"epoch": 0.0008426,
|
| 59117 |
+
"grad_norm": 0.5385744571685791,
|
| 59118 |
+
"learning_rate": 8.425e-05,
|
| 59119 |
+
"loss": 0.1254,
|
| 59120 |
+
"step": 8426
|
| 59121 |
+
},
|
| 59122 |
+
{
|
| 59123 |
+
"epoch": 0.0008427,
|
| 59124 |
+
"grad_norm": 0.2622899115085602,
|
| 59125 |
+
"learning_rate": 8.426e-05,
|
| 59126 |
+
"loss": 0.1069,
|
| 59127 |
+
"step": 8427
|
| 59128 |
+
},
|
| 59129 |
+
{
|
| 59130 |
+
"epoch": 0.0008428,
|
| 59131 |
+
"grad_norm": 0.4006927013397217,
|
| 59132 |
+
"learning_rate": 8.427e-05,
|
| 59133 |
+
"loss": 0.1242,
|
| 59134 |
+
"step": 8428
|
| 59135 |
+
},
|
| 59136 |
+
{
|
| 59137 |
+
"epoch": 0.0008429,
|
| 59138 |
+
"grad_norm": 0.4451708197593689,
|
| 59139 |
+
"learning_rate": 8.428e-05,
|
| 59140 |
+
"loss": 0.1318,
|
| 59141 |
+
"step": 8429
|
| 59142 |
+
},
|
| 59143 |
+
{
|
| 59144 |
+
"epoch": 0.000843,
|
| 59145 |
+
"grad_norm": 0.25315025448799133,
|
| 59146 |
+
"learning_rate": 8.429e-05,
|
| 59147 |
+
"loss": 0.1099,
|
| 59148 |
+
"step": 8430
|
| 59149 |
+
},
|
| 59150 |
+
{
|
| 59151 |
+
"epoch": 0.0008431,
|
| 59152 |
+
"grad_norm": 0.4274576008319855,
|
| 59153 |
+
"learning_rate": 8.43e-05,
|
| 59154 |
+
"loss": 0.1301,
|
| 59155 |
+
"step": 8431
|
| 59156 |
+
},
|
| 59157 |
+
{
|
| 59158 |
+
"epoch": 0.0008432,
|
| 59159 |
+
"grad_norm": 0.2771688997745514,
|
| 59160 |
+
"learning_rate": 8.431000000000001e-05,
|
| 59161 |
+
"loss": 0.1118,
|
| 59162 |
+
"step": 8432
|
| 59163 |
+
},
|
| 59164 |
+
{
|
| 59165 |
+
"epoch": 0.0008433,
|
| 59166 |
+
"grad_norm": 0.24496810138225555,
|
| 59167 |
+
"learning_rate": 8.431999999999999e-05,
|
| 59168 |
+
"loss": 0.1057,
|
| 59169 |
+
"step": 8433
|
| 59170 |
+
},
|
| 59171 |
+
{
|
| 59172 |
+
"epoch": 0.0008434,
|
| 59173 |
+
"grad_norm": 0.25610044598579407,
|
| 59174 |
+
"learning_rate": 8.433e-05,
|
| 59175 |
+
"loss": 0.1071,
|
| 59176 |
+
"step": 8434
|
| 59177 |
+
},
|
| 59178 |
+
{
|
| 59179 |
+
"epoch": 0.0008435,
|
| 59180 |
+
"grad_norm": 0.26334473490715027,
|
| 59181 |
+
"learning_rate": 8.434000000000001e-05,
|
| 59182 |
+
"loss": 0.1143,
|
| 59183 |
+
"step": 8435
|
| 59184 |
+
},
|
| 59185 |
+
{
|
| 59186 |
+
"epoch": 0.0008436,
|
| 59187 |
+
"grad_norm": 0.2896568477153778,
|
| 59188 |
+
"learning_rate": 8.435e-05,
|
| 59189 |
+
"loss": 0.1201,
|
| 59190 |
+
"step": 8436
|
| 59191 |
+
},
|
| 59192 |
+
{
|
| 59193 |
+
"epoch": 0.0008437,
|
| 59194 |
+
"grad_norm": 0.455269992351532,
|
| 59195 |
+
"learning_rate": 8.436e-05,
|
| 59196 |
+
"loss": 0.1387,
|
| 59197 |
+
"step": 8437
|
| 59198 |
+
},
|
| 59199 |
+
{
|
| 59200 |
+
"epoch": 0.0008438,
|
| 59201 |
+
"grad_norm": 0.33185434341430664,
|
| 59202 |
+
"learning_rate": 8.437000000000001e-05,
|
| 59203 |
+
"loss": 0.1221,
|
| 59204 |
+
"step": 8438
|
| 59205 |
+
},
|
| 59206 |
+
{
|
| 59207 |
+
"epoch": 0.0008439,
|
| 59208 |
+
"grad_norm": 0.2473549097776413,
|
| 59209 |
+
"learning_rate": 8.438e-05,
|
| 59210 |
+
"loss": 0.1053,
|
| 59211 |
+
"step": 8439
|
| 59212 |
+
},
|
| 59213 |
+
{
|
| 59214 |
+
"epoch": 0.000844,
|
| 59215 |
+
"grad_norm": 0.63054358959198,
|
| 59216 |
+
"learning_rate": 8.439e-05,
|
| 59217 |
+
"loss": 0.1376,
|
| 59218 |
+
"step": 8440
|
| 59219 |
+
},
|
| 59220 |
+
{
|
| 59221 |
+
"epoch": 0.0008441,
|
| 59222 |
+
"grad_norm": 0.37012726068496704,
|
| 59223 |
+
"learning_rate": 8.44e-05,
|
| 59224 |
+
"loss": 0.1287,
|
| 59225 |
+
"step": 8441
|
| 59226 |
+
},
|
| 59227 |
+
{
|
| 59228 |
+
"epoch": 0.0008442,
|
| 59229 |
+
"grad_norm": 0.3412618041038513,
|
| 59230 |
+
"learning_rate": 8.441e-05,
|
| 59231 |
+
"loss": 0.131,
|
| 59232 |
+
"step": 8442
|
| 59233 |
+
},
|
| 59234 |
+
{
|
| 59235 |
+
"epoch": 0.0008443,
|
| 59236 |
+
"grad_norm": 0.6600422263145447,
|
| 59237 |
+
"learning_rate": 8.442e-05,
|
| 59238 |
+
"loss": 0.146,
|
| 59239 |
+
"step": 8443
|
| 59240 |
+
},
|
| 59241 |
+
{
|
| 59242 |
+
"epoch": 0.0008444,
|
| 59243 |
+
"grad_norm": 0.8839293122291565,
|
| 59244 |
+
"learning_rate": 8.443e-05,
|
| 59245 |
+
"loss": 0.154,
|
| 59246 |
+
"step": 8444
|
| 59247 |
+
},
|
| 59248 |
+
{
|
| 59249 |
+
"epoch": 0.0008445,
|
| 59250 |
+
"grad_norm": 0.28500112891197205,
|
| 59251 |
+
"learning_rate": 8.444e-05,
|
| 59252 |
+
"loss": 0.1059,
|
| 59253 |
+
"step": 8445
|
| 59254 |
+
},
|
| 59255 |
+
{
|
| 59256 |
+
"epoch": 0.0008446,
|
| 59257 |
+
"grad_norm": 0.3337307870388031,
|
| 59258 |
+
"learning_rate": 8.445e-05,
|
| 59259 |
+
"loss": 0.1151,
|
| 59260 |
+
"step": 8446
|
| 59261 |
+
},
|
| 59262 |
+
{
|
| 59263 |
+
"epoch": 0.0008447,
|
| 59264 |
+
"grad_norm": 0.3037877380847931,
|
| 59265 |
+
"learning_rate": 8.446e-05,
|
| 59266 |
+
"loss": 0.1074,
|
| 59267 |
+
"step": 8447
|
| 59268 |
+
},
|
| 59269 |
+
{
|
| 59270 |
+
"epoch": 0.0008448,
|
| 59271 |
+
"grad_norm": 0.5050310492515564,
|
| 59272 |
+
"learning_rate": 8.447e-05,
|
| 59273 |
+
"loss": 0.1215,
|
| 59274 |
+
"step": 8448
|
| 59275 |
+
},
|
| 59276 |
+
{
|
| 59277 |
+
"epoch": 0.0008449,
|
| 59278 |
+
"grad_norm": 0.3303443193435669,
|
| 59279 |
+
"learning_rate": 8.448e-05,
|
| 59280 |
+
"loss": 0.1108,
|
| 59281 |
+
"step": 8449
|
| 59282 |
+
},
|
| 59283 |
+
{
|
| 59284 |
+
"epoch": 0.000845,
|
| 59285 |
+
"grad_norm": 0.3081909120082855,
|
| 59286 |
+
"learning_rate": 8.449e-05,
|
| 59287 |
+
"loss": 0.1127,
|
| 59288 |
+
"step": 8450
|
| 59289 |
+
},
|
| 59290 |
+
{
|
| 59291 |
+
"epoch": 0.0008451,
|
| 59292 |
+
"grad_norm": 0.28372883796691895,
|
| 59293 |
+
"learning_rate": 8.45e-05,
|
| 59294 |
+
"loss": 0.1074,
|
| 59295 |
+
"step": 8451
|
| 59296 |
+
},
|
| 59297 |
+
{
|
| 59298 |
+
"epoch": 0.0008452,
|
| 59299 |
+
"grad_norm": 0.2864672541618347,
|
| 59300 |
+
"learning_rate": 8.451e-05,
|
| 59301 |
+
"loss": 0.112,
|
| 59302 |
+
"step": 8452
|
| 59303 |
+
},
|
| 59304 |
+
{
|
| 59305 |
+
"epoch": 0.0008453,
|
| 59306 |
+
"grad_norm": 0.2667675018310547,
|
| 59307 |
+
"learning_rate": 8.452000000000001e-05,
|
| 59308 |
+
"loss": 0.1096,
|
| 59309 |
+
"step": 8453
|
| 59310 |
+
},
|
| 59311 |
+
{
|
| 59312 |
+
"epoch": 0.0008454,
|
| 59313 |
+
"grad_norm": 0.2535669803619385,
|
| 59314 |
+
"learning_rate": 8.453e-05,
|
| 59315 |
+
"loss": 0.1085,
|
| 59316 |
+
"step": 8454
|
| 59317 |
+
},
|
| 59318 |
+
{
|
| 59319 |
+
"epoch": 0.0008455,
|
| 59320 |
+
"grad_norm": 1.1039838790893555,
|
| 59321 |
+
"learning_rate": 8.454e-05,
|
| 59322 |
+
"loss": 0.1671,
|
| 59323 |
+
"step": 8455
|
| 59324 |
+
},
|
| 59325 |
+
{
|
| 59326 |
+
"epoch": 0.0008456,
|
| 59327 |
+
"grad_norm": 0.27928635478019714,
|
| 59328 |
+
"learning_rate": 8.455000000000001e-05,
|
| 59329 |
+
"loss": 0.1092,
|
| 59330 |
+
"step": 8456
|
| 59331 |
+
},
|
| 59332 |
+
{
|
| 59333 |
+
"epoch": 0.0008457,
|
| 59334 |
+
"grad_norm": 0.2974961996078491,
|
| 59335 |
+
"learning_rate": 8.456e-05,
|
| 59336 |
+
"loss": 0.1086,
|
| 59337 |
+
"step": 8457
|
| 59338 |
+
},
|
| 59339 |
+
{
|
| 59340 |
+
"epoch": 0.0008458,
|
| 59341 |
+
"grad_norm": 0.43167659640312195,
|
| 59342 |
+
"learning_rate": 8.457e-05,
|
| 59343 |
+
"loss": 0.1269,
|
| 59344 |
+
"step": 8458
|
| 59345 |
+
},
|
| 59346 |
+
{
|
| 59347 |
+
"epoch": 0.0008459,
|
| 59348 |
+
"grad_norm": 0.2962111830711365,
|
| 59349 |
+
"learning_rate": 8.458e-05,
|
| 59350 |
+
"loss": 0.1082,
|
| 59351 |
+
"step": 8459
|
| 59352 |
+
},
|
| 59353 |
+
{
|
| 59354 |
+
"epoch": 0.000846,
|
| 59355 |
+
"grad_norm": 0.28472232818603516,
|
| 59356 |
+
"learning_rate": 8.459e-05,
|
| 59357 |
+
"loss": 0.114,
|
| 59358 |
+
"step": 8460
|
| 59359 |
+
},
|
| 59360 |
+
{
|
| 59361 |
+
"epoch": 0.0008461,
|
| 59362 |
+
"grad_norm": 0.3290531039237976,
|
| 59363 |
+
"learning_rate": 8.46e-05,
|
| 59364 |
+
"loss": 0.1186,
|
| 59365 |
+
"step": 8461
|
| 59366 |
+
},
|
| 59367 |
+
{
|
| 59368 |
+
"epoch": 0.0008462,
|
| 59369 |
+
"grad_norm": 0.31834477186203003,
|
| 59370 |
+
"learning_rate": 8.461e-05,
|
| 59371 |
+
"loss": 0.1156,
|
| 59372 |
+
"step": 8462
|
| 59373 |
+
},
|
| 59374 |
+
{
|
| 59375 |
+
"epoch": 0.0008463,
|
| 59376 |
+
"grad_norm": 2.159715414047241,
|
| 59377 |
+
"learning_rate": 8.462e-05,
|
| 59378 |
+
"loss": 0.1547,
|
| 59379 |
+
"step": 8463
|
| 59380 |
+
},
|
| 59381 |
+
{
|
| 59382 |
+
"epoch": 0.0008464,
|
| 59383 |
+
"grad_norm": 0.3941461741924286,
|
| 59384 |
+
"learning_rate": 8.463e-05,
|
| 59385 |
+
"loss": 0.1165,
|
| 59386 |
+
"step": 8464
|
| 59387 |
+
},
|
| 59388 |
+
{
|
| 59389 |
+
"epoch": 0.0008465,
|
| 59390 |
+
"grad_norm": 0.39252033829689026,
|
| 59391 |
+
"learning_rate": 8.464e-05,
|
| 59392 |
+
"loss": 0.1136,
|
| 59393 |
+
"step": 8465
|
| 59394 |
+
},
|
| 59395 |
+
{
|
| 59396 |
+
"epoch": 0.0008466,
|
| 59397 |
+
"grad_norm": 0.4327937364578247,
|
| 59398 |
+
"learning_rate": 8.465000000000001e-05,
|
| 59399 |
+
"loss": 0.1255,
|
| 59400 |
+
"step": 8466
|
| 59401 |
+
},
|
| 59402 |
+
{
|
| 59403 |
+
"epoch": 0.0008467,
|
| 59404 |
+
"grad_norm": 0.457118421792984,
|
| 59405 |
+
"learning_rate": 8.465999999999999e-05,
|
| 59406 |
+
"loss": 0.1273,
|
| 59407 |
+
"step": 8467
|
| 59408 |
+
},
|
| 59409 |
+
{
|
| 59410 |
+
"epoch": 0.0008468,
|
| 59411 |
+
"grad_norm": 0.6907281279563904,
|
| 59412 |
+
"learning_rate": 8.467e-05,
|
| 59413 |
+
"loss": 0.1348,
|
| 59414 |
+
"step": 8468
|
| 59415 |
+
},
|
| 59416 |
+
{
|
| 59417 |
+
"epoch": 0.0008469,
|
| 59418 |
+
"grad_norm": 0.29370391368865967,
|
| 59419 |
+
"learning_rate": 8.468000000000001e-05,
|
| 59420 |
+
"loss": 0.1116,
|
| 59421 |
+
"step": 8469
|
| 59422 |
+
},
|
| 59423 |
+
{
|
| 59424 |
+
"epoch": 0.000847,
|
| 59425 |
+
"grad_norm": 0.5513447523117065,
|
| 59426 |
+
"learning_rate": 8.469e-05,
|
| 59427 |
+
"loss": 0.1266,
|
| 59428 |
+
"step": 8470
|
| 59429 |
+
},
|
| 59430 |
+
{
|
| 59431 |
+
"epoch": 0.0008471,
|
| 59432 |
+
"grad_norm": 0.29566922783851624,
|
| 59433 |
+
"learning_rate": 8.47e-05,
|
| 59434 |
+
"loss": 0.1077,
|
| 59435 |
+
"step": 8471
|
| 59436 |
+
},
|
| 59437 |
+
{
|
| 59438 |
+
"epoch": 0.0008472,
|
| 59439 |
+
"grad_norm": 0.3664743900299072,
|
| 59440 |
+
"learning_rate": 8.471000000000001e-05,
|
| 59441 |
+
"loss": 0.1161,
|
| 59442 |
+
"step": 8472
|
| 59443 |
+
},
|
| 59444 |
+
{
|
| 59445 |
+
"epoch": 0.0008473,
|
| 59446 |
+
"grad_norm": 0.3511149287223816,
|
| 59447 |
+
"learning_rate": 8.472e-05,
|
| 59448 |
+
"loss": 0.1137,
|
| 59449 |
+
"step": 8473
|
| 59450 |
+
},
|
| 59451 |
+
{
|
| 59452 |
+
"epoch": 0.0008474,
|
| 59453 |
+
"grad_norm": 0.39576053619384766,
|
| 59454 |
+
"learning_rate": 8.473e-05,
|
| 59455 |
+
"loss": 0.1152,
|
| 59456 |
+
"step": 8474
|
| 59457 |
+
},
|
| 59458 |
+
{
|
| 59459 |
+
"epoch": 0.0008475,
|
| 59460 |
+
"grad_norm": 0.31602656841278076,
|
| 59461 |
+
"learning_rate": 8.474e-05,
|
| 59462 |
+
"loss": 0.1146,
|
| 59463 |
+
"step": 8475
|
| 59464 |
+
},
|
| 59465 |
+
{
|
| 59466 |
+
"epoch": 0.0008476,
|
| 59467 |
+
"grad_norm": 0.28262972831726074,
|
| 59468 |
+
"learning_rate": 8.475e-05,
|
| 59469 |
+
"loss": 0.1112,
|
| 59470 |
+
"step": 8476
|
| 59471 |
+
},
|
| 59472 |
+
{
|
| 59473 |
+
"epoch": 0.0008477,
|
| 59474 |
+
"grad_norm": 0.3561192452907562,
|
| 59475 |
+
"learning_rate": 8.476e-05,
|
| 59476 |
+
"loss": 0.1194,
|
| 59477 |
+
"step": 8477
|
| 59478 |
+
},
|
| 59479 |
+
{
|
| 59480 |
+
"epoch": 0.0008478,
|
| 59481 |
+
"grad_norm": 0.30674663186073303,
|
| 59482 |
+
"learning_rate": 8.477e-05,
|
| 59483 |
+
"loss": 0.1216,
|
| 59484 |
+
"step": 8478
|
| 59485 |
+
},
|
| 59486 |
+
{
|
| 59487 |
+
"epoch": 0.0008479,
|
| 59488 |
+
"grad_norm": 0.5238985419273376,
|
| 59489 |
+
"learning_rate": 8.478e-05,
|
| 59490 |
+
"loss": 0.1331,
|
| 59491 |
+
"step": 8479
|
| 59492 |
+
},
|
| 59493 |
+
{
|
| 59494 |
+
"epoch": 0.000848,
|
| 59495 |
+
"grad_norm": 0.3820216655731201,
|
| 59496 |
+
"learning_rate": 8.479e-05,
|
| 59497 |
+
"loss": 0.1179,
|
| 59498 |
+
"step": 8480
|
| 59499 |
+
},
|
| 59500 |
+
{
|
| 59501 |
+
"epoch": 0.0008481,
|
| 59502 |
+
"grad_norm": 0.30187010765075684,
|
| 59503 |
+
"learning_rate": 8.48e-05,
|
| 59504 |
+
"loss": 0.1151,
|
| 59505 |
+
"step": 8481
|
| 59506 |
+
},
|
| 59507 |
+
{
|
| 59508 |
+
"epoch": 0.0008482,
|
| 59509 |
+
"grad_norm": 0.3691226541996002,
|
| 59510 |
+
"learning_rate": 8.481e-05,
|
| 59511 |
+
"loss": 0.1114,
|
| 59512 |
+
"step": 8482
|
| 59513 |
+
},
|
| 59514 |
+
{
|
| 59515 |
+
"epoch": 0.0008483,
|
| 59516 |
+
"grad_norm": 0.293366014957428,
|
| 59517 |
+
"learning_rate": 8.482e-05,
|
| 59518 |
+
"loss": 0.114,
|
| 59519 |
+
"step": 8483
|
| 59520 |
+
},
|
| 59521 |
+
{
|
| 59522 |
+
"epoch": 0.0008484,
|
| 59523 |
+
"grad_norm": 0.2826138138771057,
|
| 59524 |
+
"learning_rate": 8.483e-05,
|
| 59525 |
+
"loss": 0.1098,
|
| 59526 |
+
"step": 8484
|
| 59527 |
+
},
|
| 59528 |
+
{
|
| 59529 |
+
"epoch": 0.0008485,
|
| 59530 |
+
"grad_norm": 0.4060609042644501,
|
| 59531 |
+
"learning_rate": 8.484e-05,
|
| 59532 |
+
"loss": 0.1251,
|
| 59533 |
+
"step": 8485
|
| 59534 |
+
},
|
| 59535 |
+
{
|
| 59536 |
+
"epoch": 0.0008486,
|
| 59537 |
+
"grad_norm": 0.23822547495365143,
|
| 59538 |
+
"learning_rate": 8.485e-05,
|
| 59539 |
+
"loss": 0.1011,
|
| 59540 |
+
"step": 8486
|
| 59541 |
+
},
|
| 59542 |
+
{
|
| 59543 |
+
"epoch": 0.0008487,
|
| 59544 |
+
"grad_norm": 0.31863072514533997,
|
| 59545 |
+
"learning_rate": 8.486000000000001e-05,
|
| 59546 |
+
"loss": 0.1199,
|
| 59547 |
+
"step": 8487
|
| 59548 |
+
},
|
| 59549 |
+
{
|
| 59550 |
+
"epoch": 0.0008488,
|
| 59551 |
+
"grad_norm": 0.7325024604797363,
|
| 59552 |
+
"learning_rate": 8.487e-05,
|
| 59553 |
+
"loss": 0.1514,
|
| 59554 |
+
"step": 8488
|
| 59555 |
+
},
|
| 59556 |
+
{
|
| 59557 |
+
"epoch": 0.0008489,
|
| 59558 |
+
"grad_norm": 0.2716192305088043,
|
| 59559 |
+
"learning_rate": 8.488e-05,
|
| 59560 |
+
"loss": 0.1089,
|
| 59561 |
+
"step": 8489
|
| 59562 |
+
},
|
| 59563 |
+
{
|
| 59564 |
+
"epoch": 0.000849,
|
| 59565 |
+
"grad_norm": 0.3262501060962677,
|
| 59566 |
+
"learning_rate": 8.489000000000001e-05,
|
| 59567 |
+
"loss": 0.1185,
|
| 59568 |
+
"step": 8490
|
| 59569 |
+
},
|
| 59570 |
+
{
|
| 59571 |
+
"epoch": 0.0008491,
|
| 59572 |
+
"grad_norm": 0.27587273716926575,
|
| 59573 |
+
"learning_rate": 8.49e-05,
|
| 59574 |
+
"loss": 0.1134,
|
| 59575 |
+
"step": 8491
|
| 59576 |
+
},
|
| 59577 |
+
{
|
| 59578 |
+
"epoch": 0.0008492,
|
| 59579 |
+
"grad_norm": 0.31848254799842834,
|
| 59580 |
+
"learning_rate": 8.491e-05,
|
| 59581 |
+
"loss": 0.1135,
|
| 59582 |
+
"step": 8492
|
| 59583 |
+
},
|
| 59584 |
+
{
|
| 59585 |
+
"epoch": 0.0008493,
|
| 59586 |
+
"grad_norm": 0.3487236797809601,
|
| 59587 |
+
"learning_rate": 8.492000000000001e-05,
|
| 59588 |
+
"loss": 0.1143,
|
| 59589 |
+
"step": 8493
|
| 59590 |
+
},
|
| 59591 |
+
{
|
| 59592 |
+
"epoch": 0.0008494,
|
| 59593 |
+
"grad_norm": 0.2887483537197113,
|
| 59594 |
+
"learning_rate": 8.493e-05,
|
| 59595 |
+
"loss": 0.1134,
|
| 59596 |
+
"step": 8494
|
| 59597 |
+
},
|
| 59598 |
+
{
|
| 59599 |
+
"epoch": 0.0008495,
|
| 59600 |
+
"grad_norm": 3.056828498840332,
|
| 59601 |
+
"learning_rate": 8.494e-05,
|
| 59602 |
+
"loss": 0.2388,
|
| 59603 |
+
"step": 8495
|
| 59604 |
+
},
|
| 59605 |
+
{
|
| 59606 |
+
"epoch": 0.0008496,
|
| 59607 |
+
"grad_norm": 0.4552193582057953,
|
| 59608 |
+
"learning_rate": 8.495e-05,
|
| 59609 |
+
"loss": 0.1301,
|
| 59610 |
+
"step": 8496
|
| 59611 |
+
},
|
| 59612 |
+
{
|
| 59613 |
+
"epoch": 0.0008497,
|
| 59614 |
+
"grad_norm": 0.2789609134197235,
|
| 59615 |
+
"learning_rate": 8.496e-05,
|
| 59616 |
+
"loss": 0.1079,
|
| 59617 |
+
"step": 8497
|
| 59618 |
+
},
|
| 59619 |
+
{
|
| 59620 |
+
"epoch": 0.0008498,
|
| 59621 |
+
"grad_norm": 0.2965538501739502,
|
| 59622 |
+
"learning_rate": 8.497e-05,
|
| 59623 |
+
"loss": 0.1034,
|
| 59624 |
+
"step": 8498
|
| 59625 |
+
},
|
| 59626 |
+
{
|
| 59627 |
+
"epoch": 0.0008499,
|
| 59628 |
+
"grad_norm": 0.31228092312812805,
|
| 59629 |
+
"learning_rate": 8.498e-05,
|
| 59630 |
+
"loss": 0.1108,
|
| 59631 |
+
"step": 8499
|
| 59632 |
+
},
|
| 59633 |
+
{
|
| 59634 |
+
"epoch": 0.00085,
|
| 59635 |
+
"grad_norm": 0.32444027066230774,
|
| 59636 |
+
"learning_rate": 8.499000000000001e-05,
|
| 59637 |
+
"loss": 0.1105,
|
| 59638 |
+
"step": 8500
|
| 59639 |
+
},
|
| 59640 |
+
{
|
| 59641 |
+
"epoch": 0.00085,
|
| 59642 |
+
"eval_loss": 0.012609053403139114,
|
| 59643 |
+
"eval_runtime": 362.403,
|
| 59644 |
+
"eval_samples_per_second": 27.594,
|
| 59645 |
+
"eval_steps_per_second": 1.725,
|
| 59646 |
+
"step": 8500
|
| 59647 |
}
|
| 59648 |
],
|
| 59649 |
"logging_steps": 1,
|