Training in progress, step 7000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 91951912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:453db0967893a01c23bbf7ad7a571d0a1b58910c0c9acc98e0fe55fb80c46c5e
|
| 3 |
size 91951912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 183991627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dd9c96de5cfae7f6382dfc3421f7df541be5d2a79ed536d7f3f79ed0663e697
|
| 3 |
size 183991627
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a2492917bec3c7fed4a6e84689f88a4241e7d20b33a15984c2f90c6bc7cb605
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -45612,6 +45612,3514 @@
|
|
| 45612 |
"eval_samples_per_second": 27.399,
|
| 45613 |
"eval_steps_per_second": 1.712,
|
| 45614 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45615 |
}
|
| 45616 |
],
|
| 45617 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.0007,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 7000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 45612 |
"eval_samples_per_second": 27.399,
|
| 45613 |
"eval_steps_per_second": 1.712,
|
| 45614 |
"step": 6500
|
| 45615 |
+
},
|
| 45616 |
+
{
|
| 45617 |
+
"epoch": 0.0006501,
|
| 45618 |
+
"grad_norm": 1.03931725025177,
|
| 45619 |
+
"learning_rate": 6.500000000000001e-05,
|
| 45620 |
+
"loss": 0.3943,
|
| 45621 |
+
"step": 6501
|
| 45622 |
+
},
|
| 45623 |
+
{
|
| 45624 |
+
"epoch": 0.0006502,
|
| 45625 |
+
"grad_norm": 1.1277257204055786,
|
| 45626 |
+
"learning_rate": 6.501e-05,
|
| 45627 |
+
"loss": 0.4182,
|
| 45628 |
+
"step": 6502
|
| 45629 |
+
},
|
| 45630 |
+
{
|
| 45631 |
+
"epoch": 0.0006503,
|
| 45632 |
+
"grad_norm": 1.1667401790618896,
|
| 45633 |
+
"learning_rate": 6.502e-05,
|
| 45634 |
+
"loss": 0.4065,
|
| 45635 |
+
"step": 6503
|
| 45636 |
+
},
|
| 45637 |
+
{
|
| 45638 |
+
"epoch": 0.0006504,
|
| 45639 |
+
"grad_norm": 1.1649590730667114,
|
| 45640 |
+
"learning_rate": 6.503e-05,
|
| 45641 |
+
"loss": 0.4451,
|
| 45642 |
+
"step": 6504
|
| 45643 |
+
},
|
| 45644 |
+
{
|
| 45645 |
+
"epoch": 0.0006505,
|
| 45646 |
+
"grad_norm": 1.085089087486267,
|
| 45647 |
+
"learning_rate": 6.504e-05,
|
| 45648 |
+
"loss": 0.4204,
|
| 45649 |
+
"step": 6505
|
| 45650 |
+
},
|
| 45651 |
+
{
|
| 45652 |
+
"epoch": 0.0006506,
|
| 45653 |
+
"grad_norm": 0.9987074136734009,
|
| 45654 |
+
"learning_rate": 6.505e-05,
|
| 45655 |
+
"loss": 0.4111,
|
| 45656 |
+
"step": 6506
|
| 45657 |
+
},
|
| 45658 |
+
{
|
| 45659 |
+
"epoch": 0.0006507,
|
| 45660 |
+
"grad_norm": 1.0891286134719849,
|
| 45661 |
+
"learning_rate": 6.506e-05,
|
| 45662 |
+
"loss": 0.4248,
|
| 45663 |
+
"step": 6507
|
| 45664 |
+
},
|
| 45665 |
+
{
|
| 45666 |
+
"epoch": 0.0006508,
|
| 45667 |
+
"grad_norm": 0.9831911325454712,
|
| 45668 |
+
"learning_rate": 6.507e-05,
|
| 45669 |
+
"loss": 0.3943,
|
| 45670 |
+
"step": 6508
|
| 45671 |
+
},
|
| 45672 |
+
{
|
| 45673 |
+
"epoch": 0.0006509,
|
| 45674 |
+
"grad_norm": 0.9570473432540894,
|
| 45675 |
+
"learning_rate": 6.508e-05,
|
| 45676 |
+
"loss": 0.385,
|
| 45677 |
+
"step": 6509
|
| 45678 |
+
},
|
| 45679 |
+
{
|
| 45680 |
+
"epoch": 0.000651,
|
| 45681 |
+
"grad_norm": 1.354292869567871,
|
| 45682 |
+
"learning_rate": 6.509e-05,
|
| 45683 |
+
"loss": 0.5371,
|
| 45684 |
+
"step": 6510
|
| 45685 |
+
},
|
| 45686 |
+
{
|
| 45687 |
+
"epoch": 0.0006511,
|
| 45688 |
+
"grad_norm": 1.0083353519439697,
|
| 45689 |
+
"learning_rate": 6.51e-05,
|
| 45690 |
+
"loss": 0.4094,
|
| 45691 |
+
"step": 6511
|
| 45692 |
+
},
|
| 45693 |
+
{
|
| 45694 |
+
"epoch": 0.0006512,
|
| 45695 |
+
"grad_norm": 0.9337421655654907,
|
| 45696 |
+
"learning_rate": 6.511e-05,
|
| 45697 |
+
"loss": 0.4092,
|
| 45698 |
+
"step": 6512
|
| 45699 |
+
},
|
| 45700 |
+
{
|
| 45701 |
+
"epoch": 0.0006513,
|
| 45702 |
+
"grad_norm": 0.9101032018661499,
|
| 45703 |
+
"learning_rate": 6.512e-05,
|
| 45704 |
+
"loss": 0.387,
|
| 45705 |
+
"step": 6513
|
| 45706 |
+
},
|
| 45707 |
+
{
|
| 45708 |
+
"epoch": 0.0006514,
|
| 45709 |
+
"grad_norm": 0.994778037071228,
|
| 45710 |
+
"learning_rate": 6.513000000000001e-05,
|
| 45711 |
+
"loss": 0.4221,
|
| 45712 |
+
"step": 6514
|
| 45713 |
+
},
|
| 45714 |
+
{
|
| 45715 |
+
"epoch": 0.0006515,
|
| 45716 |
+
"grad_norm": 0.8868163228034973,
|
| 45717 |
+
"learning_rate": 6.514e-05,
|
| 45718 |
+
"loss": 0.3887,
|
| 45719 |
+
"step": 6515
|
| 45720 |
+
},
|
| 45721 |
+
{
|
| 45722 |
+
"epoch": 0.0006516,
|
| 45723 |
+
"grad_norm": 1.0790033340454102,
|
| 45724 |
+
"learning_rate": 6.515e-05,
|
| 45725 |
+
"loss": 0.4199,
|
| 45726 |
+
"step": 6516
|
| 45727 |
+
},
|
| 45728 |
+
{
|
| 45729 |
+
"epoch": 0.0006517,
|
| 45730 |
+
"grad_norm": 0.9513962268829346,
|
| 45731 |
+
"learning_rate": 6.516e-05,
|
| 45732 |
+
"loss": 0.4036,
|
| 45733 |
+
"step": 6517
|
| 45734 |
+
},
|
| 45735 |
+
{
|
| 45736 |
+
"epoch": 0.0006518,
|
| 45737 |
+
"grad_norm": 0.966842532157898,
|
| 45738 |
+
"learning_rate": 6.517e-05,
|
| 45739 |
+
"loss": 0.4221,
|
| 45740 |
+
"step": 6518
|
| 45741 |
+
},
|
| 45742 |
+
{
|
| 45743 |
+
"epoch": 0.0006519,
|
| 45744 |
+
"grad_norm": 0.8695230484008789,
|
| 45745 |
+
"learning_rate": 6.518e-05,
|
| 45746 |
+
"loss": 0.3608,
|
| 45747 |
+
"step": 6519
|
| 45748 |
+
},
|
| 45749 |
+
{
|
| 45750 |
+
"epoch": 0.000652,
|
| 45751 |
+
"grad_norm": 0.9728794693946838,
|
| 45752 |
+
"learning_rate": 6.519e-05,
|
| 45753 |
+
"loss": 0.4062,
|
| 45754 |
+
"step": 6520
|
| 45755 |
+
},
|
| 45756 |
+
{
|
| 45757 |
+
"epoch": 0.0006521,
|
| 45758 |
+
"grad_norm": 1.1127318143844604,
|
| 45759 |
+
"learning_rate": 6.52e-05,
|
| 45760 |
+
"loss": 0.4353,
|
| 45761 |
+
"step": 6521
|
| 45762 |
+
},
|
| 45763 |
+
{
|
| 45764 |
+
"epoch": 0.0006522,
|
| 45765 |
+
"grad_norm": 0.9356803297996521,
|
| 45766 |
+
"learning_rate": 6.521000000000001e-05,
|
| 45767 |
+
"loss": 0.3857,
|
| 45768 |
+
"step": 6522
|
| 45769 |
+
},
|
| 45770 |
+
{
|
| 45771 |
+
"epoch": 0.0006523,
|
| 45772 |
+
"grad_norm": 0.9150017499923706,
|
| 45773 |
+
"learning_rate": 6.522e-05,
|
| 45774 |
+
"loss": 0.3821,
|
| 45775 |
+
"step": 6523
|
| 45776 |
+
},
|
| 45777 |
+
{
|
| 45778 |
+
"epoch": 0.0006524,
|
| 45779 |
+
"grad_norm": 1.0721808671951294,
|
| 45780 |
+
"learning_rate": 6.523e-05,
|
| 45781 |
+
"loss": 0.4216,
|
| 45782 |
+
"step": 6524
|
| 45783 |
+
},
|
| 45784 |
+
{
|
| 45785 |
+
"epoch": 0.0006525,
|
| 45786 |
+
"grad_norm": 0.9526981711387634,
|
| 45787 |
+
"learning_rate": 6.524e-05,
|
| 45788 |
+
"loss": 0.3779,
|
| 45789 |
+
"step": 6525
|
| 45790 |
+
},
|
| 45791 |
+
{
|
| 45792 |
+
"epoch": 0.0006526,
|
| 45793 |
+
"grad_norm": 1.0103404521942139,
|
| 45794 |
+
"learning_rate": 6.525e-05,
|
| 45795 |
+
"loss": 0.4048,
|
| 45796 |
+
"step": 6526
|
| 45797 |
+
},
|
| 45798 |
+
{
|
| 45799 |
+
"epoch": 0.0006527,
|
| 45800 |
+
"grad_norm": 0.9327437877655029,
|
| 45801 |
+
"learning_rate": 6.526e-05,
|
| 45802 |
+
"loss": 0.395,
|
| 45803 |
+
"step": 6527
|
| 45804 |
+
},
|
| 45805 |
+
{
|
| 45806 |
+
"epoch": 0.0006528,
|
| 45807 |
+
"grad_norm": 0.8834521174430847,
|
| 45808 |
+
"learning_rate": 6.527e-05,
|
| 45809 |
+
"loss": 0.3818,
|
| 45810 |
+
"step": 6528
|
| 45811 |
+
},
|
| 45812 |
+
{
|
| 45813 |
+
"epoch": 0.0006529,
|
| 45814 |
+
"grad_norm": 0.8776892423629761,
|
| 45815 |
+
"learning_rate": 6.528000000000001e-05,
|
| 45816 |
+
"loss": 0.3735,
|
| 45817 |
+
"step": 6529
|
| 45818 |
+
},
|
| 45819 |
+
{
|
| 45820 |
+
"epoch": 0.000653,
|
| 45821 |
+
"grad_norm": 0.9939847588539124,
|
| 45822 |
+
"learning_rate": 6.528999999999999e-05,
|
| 45823 |
+
"loss": 0.4111,
|
| 45824 |
+
"step": 6530
|
| 45825 |
+
},
|
| 45826 |
+
{
|
| 45827 |
+
"epoch": 0.0006531,
|
| 45828 |
+
"grad_norm": 0.998724639415741,
|
| 45829 |
+
"learning_rate": 6.53e-05,
|
| 45830 |
+
"loss": 0.4087,
|
| 45831 |
+
"step": 6531
|
| 45832 |
+
},
|
| 45833 |
+
{
|
| 45834 |
+
"epoch": 0.0006532,
|
| 45835 |
+
"grad_norm": 0.9717676639556885,
|
| 45836 |
+
"learning_rate": 6.531000000000001e-05,
|
| 45837 |
+
"loss": 0.3853,
|
| 45838 |
+
"step": 6532
|
| 45839 |
+
},
|
| 45840 |
+
{
|
| 45841 |
+
"epoch": 0.0006533,
|
| 45842 |
+
"grad_norm": 1.451299786567688,
|
| 45843 |
+
"learning_rate": 6.531999999999999e-05,
|
| 45844 |
+
"loss": 0.5186,
|
| 45845 |
+
"step": 6533
|
| 45846 |
+
},
|
| 45847 |
+
{
|
| 45848 |
+
"epoch": 0.0006534,
|
| 45849 |
+
"grad_norm": 0.95253586769104,
|
| 45850 |
+
"learning_rate": 6.533e-05,
|
| 45851 |
+
"loss": 0.3835,
|
| 45852 |
+
"step": 6534
|
| 45853 |
+
},
|
| 45854 |
+
{
|
| 45855 |
+
"epoch": 0.0006535,
|
| 45856 |
+
"grad_norm": 0.8519267439842224,
|
| 45857 |
+
"learning_rate": 6.534000000000001e-05,
|
| 45858 |
+
"loss": 0.344,
|
| 45859 |
+
"step": 6535
|
| 45860 |
+
},
|
| 45861 |
+
{
|
| 45862 |
+
"epoch": 0.0006536,
|
| 45863 |
+
"grad_norm": 1.0273712873458862,
|
| 45864 |
+
"learning_rate": 6.535e-05,
|
| 45865 |
+
"loss": 0.4075,
|
| 45866 |
+
"step": 6536
|
| 45867 |
+
},
|
| 45868 |
+
{
|
| 45869 |
+
"epoch": 0.0006537,
|
| 45870 |
+
"grad_norm": 1.1981201171875,
|
| 45871 |
+
"learning_rate": 6.536e-05,
|
| 45872 |
+
"loss": 0.4307,
|
| 45873 |
+
"step": 6537
|
| 45874 |
+
},
|
| 45875 |
+
{
|
| 45876 |
+
"epoch": 0.0006538,
|
| 45877 |
+
"grad_norm": 1.0395232439041138,
|
| 45878 |
+
"learning_rate": 6.537e-05,
|
| 45879 |
+
"loss": 0.3965,
|
| 45880 |
+
"step": 6538
|
| 45881 |
+
},
|
| 45882 |
+
{
|
| 45883 |
+
"epoch": 0.0006539,
|
| 45884 |
+
"grad_norm": 1.3438701629638672,
|
| 45885 |
+
"learning_rate": 6.538e-05,
|
| 45886 |
+
"loss": 0.405,
|
| 45887 |
+
"step": 6539
|
| 45888 |
+
},
|
| 45889 |
+
{
|
| 45890 |
+
"epoch": 0.000654,
|
| 45891 |
+
"grad_norm": 0.9981584548950195,
|
| 45892 |
+
"learning_rate": 6.539e-05,
|
| 45893 |
+
"loss": 0.3733,
|
| 45894 |
+
"step": 6540
|
| 45895 |
+
},
|
| 45896 |
+
{
|
| 45897 |
+
"epoch": 0.0006541,
|
| 45898 |
+
"grad_norm": 0.9692195057868958,
|
| 45899 |
+
"learning_rate": 6.54e-05,
|
| 45900 |
+
"loss": 0.3872,
|
| 45901 |
+
"step": 6541
|
| 45902 |
+
},
|
| 45903 |
+
{
|
| 45904 |
+
"epoch": 0.0006542,
|
| 45905 |
+
"grad_norm": 1.4983618259429932,
|
| 45906 |
+
"learning_rate": 6.541e-05,
|
| 45907 |
+
"loss": 0.5432,
|
| 45908 |
+
"step": 6542
|
| 45909 |
+
},
|
| 45910 |
+
{
|
| 45911 |
+
"epoch": 0.0006543,
|
| 45912 |
+
"grad_norm": 1.5753177404403687,
|
| 45913 |
+
"learning_rate": 6.542e-05,
|
| 45914 |
+
"loss": 0.4004,
|
| 45915 |
+
"step": 6543
|
| 45916 |
+
},
|
| 45917 |
+
{
|
| 45918 |
+
"epoch": 0.0006544,
|
| 45919 |
+
"grad_norm": 1.2118226289749146,
|
| 45920 |
+
"learning_rate": 6.543e-05,
|
| 45921 |
+
"loss": 0.3994,
|
| 45922 |
+
"step": 6544
|
| 45923 |
+
},
|
| 45924 |
+
{
|
| 45925 |
+
"epoch": 0.0006545,
|
| 45926 |
+
"grad_norm": 0.9601621627807617,
|
| 45927 |
+
"learning_rate": 6.544e-05,
|
| 45928 |
+
"loss": 0.3821,
|
| 45929 |
+
"step": 6545
|
| 45930 |
+
},
|
| 45931 |
+
{
|
| 45932 |
+
"epoch": 0.0006546,
|
| 45933 |
+
"grad_norm": 0.9599550366401672,
|
| 45934 |
+
"learning_rate": 6.545e-05,
|
| 45935 |
+
"loss": 0.3679,
|
| 45936 |
+
"step": 6546
|
| 45937 |
+
},
|
| 45938 |
+
{
|
| 45939 |
+
"epoch": 0.0006547,
|
| 45940 |
+
"grad_norm": 2.707258939743042,
|
| 45941 |
+
"learning_rate": 6.546e-05,
|
| 45942 |
+
"loss": 0.4639,
|
| 45943 |
+
"step": 6547
|
| 45944 |
+
},
|
| 45945 |
+
{
|
| 45946 |
+
"epoch": 0.0006548,
|
| 45947 |
+
"grad_norm": 2.5643324851989746,
|
| 45948 |
+
"learning_rate": 6.547000000000001e-05,
|
| 45949 |
+
"loss": 0.5105,
|
| 45950 |
+
"step": 6548
|
| 45951 |
+
},
|
| 45952 |
+
{
|
| 45953 |
+
"epoch": 0.0006549,
|
| 45954 |
+
"grad_norm": 1.3906834125518799,
|
| 45955 |
+
"learning_rate": 6.548e-05,
|
| 45956 |
+
"loss": 0.427,
|
| 45957 |
+
"step": 6549
|
| 45958 |
+
},
|
| 45959 |
+
{
|
| 45960 |
+
"epoch": 0.000655,
|
| 45961 |
+
"grad_norm": 1.1361576318740845,
|
| 45962 |
+
"learning_rate": 6.549e-05,
|
| 45963 |
+
"loss": 0.3708,
|
| 45964 |
+
"step": 6550
|
| 45965 |
+
},
|
| 45966 |
+
{
|
| 45967 |
+
"epoch": 0.0006551,
|
| 45968 |
+
"grad_norm": 0.9891266822814941,
|
| 45969 |
+
"learning_rate": 6.55e-05,
|
| 45970 |
+
"loss": 0.3682,
|
| 45971 |
+
"step": 6551
|
| 45972 |
+
},
|
| 45973 |
+
{
|
| 45974 |
+
"epoch": 0.0006552,
|
| 45975 |
+
"grad_norm": 1.2472726106643677,
|
| 45976 |
+
"learning_rate": 6.551e-05,
|
| 45977 |
+
"loss": 0.4463,
|
| 45978 |
+
"step": 6552
|
| 45979 |
+
},
|
| 45980 |
+
{
|
| 45981 |
+
"epoch": 0.0006553,
|
| 45982 |
+
"grad_norm": 1.0641355514526367,
|
| 45983 |
+
"learning_rate": 6.552e-05,
|
| 45984 |
+
"loss": 0.3435,
|
| 45985 |
+
"step": 6553
|
| 45986 |
+
},
|
| 45987 |
+
{
|
| 45988 |
+
"epoch": 0.0006554,
|
| 45989 |
+
"grad_norm": 1.7209123373031616,
|
| 45990 |
+
"learning_rate": 6.553e-05,
|
| 45991 |
+
"loss": 0.499,
|
| 45992 |
+
"step": 6554
|
| 45993 |
+
},
|
| 45994 |
+
{
|
| 45995 |
+
"epoch": 0.0006555,
|
| 45996 |
+
"grad_norm": 0.9860501289367676,
|
| 45997 |
+
"learning_rate": 6.554e-05,
|
| 45998 |
+
"loss": 0.3564,
|
| 45999 |
+
"step": 6555
|
| 46000 |
+
},
|
| 46001 |
+
{
|
| 46002 |
+
"epoch": 0.0006556,
|
| 46003 |
+
"grad_norm": 1.7217719554901123,
|
| 46004 |
+
"learning_rate": 6.555000000000001e-05,
|
| 46005 |
+
"loss": 0.4077,
|
| 46006 |
+
"step": 6556
|
| 46007 |
+
},
|
| 46008 |
+
{
|
| 46009 |
+
"epoch": 0.0006557,
|
| 46010 |
+
"grad_norm": 0.9856640696525574,
|
| 46011 |
+
"learning_rate": 6.556e-05,
|
| 46012 |
+
"loss": 0.3628,
|
| 46013 |
+
"step": 6557
|
| 46014 |
+
},
|
| 46015 |
+
{
|
| 46016 |
+
"epoch": 0.0006558,
|
| 46017 |
+
"grad_norm": 1.0517157316207886,
|
| 46018 |
+
"learning_rate": 6.557e-05,
|
| 46019 |
+
"loss": 0.3804,
|
| 46020 |
+
"step": 6558
|
| 46021 |
+
},
|
| 46022 |
+
{
|
| 46023 |
+
"epoch": 0.0006559,
|
| 46024 |
+
"grad_norm": 1.0080536603927612,
|
| 46025 |
+
"learning_rate": 6.558e-05,
|
| 46026 |
+
"loss": 0.3699,
|
| 46027 |
+
"step": 6559
|
| 46028 |
+
},
|
| 46029 |
+
{
|
| 46030 |
+
"epoch": 0.000656,
|
| 46031 |
+
"grad_norm": 3.1549456119537354,
|
| 46032 |
+
"learning_rate": 6.559e-05,
|
| 46033 |
+
"loss": 0.4944,
|
| 46034 |
+
"step": 6560
|
| 46035 |
+
},
|
| 46036 |
+
{
|
| 46037 |
+
"epoch": 0.0006561,
|
| 46038 |
+
"grad_norm": 1.1440428495407104,
|
| 46039 |
+
"learning_rate": 6.56e-05,
|
| 46040 |
+
"loss": 0.3672,
|
| 46041 |
+
"step": 6561
|
| 46042 |
+
},
|
| 46043 |
+
{
|
| 46044 |
+
"epoch": 0.0006562,
|
| 46045 |
+
"grad_norm": 1.6338598728179932,
|
| 46046 |
+
"learning_rate": 6.561e-05,
|
| 46047 |
+
"loss": 0.4395,
|
| 46048 |
+
"step": 6562
|
| 46049 |
+
},
|
| 46050 |
+
{
|
| 46051 |
+
"epoch": 0.0006563,
|
| 46052 |
+
"grad_norm": 1.1482481956481934,
|
| 46053 |
+
"learning_rate": 6.562000000000001e-05,
|
| 46054 |
+
"loss": 0.3816,
|
| 46055 |
+
"step": 6563
|
| 46056 |
+
},
|
| 46057 |
+
{
|
| 46058 |
+
"epoch": 0.0006564,
|
| 46059 |
+
"grad_norm": 0.991045355796814,
|
| 46060 |
+
"learning_rate": 6.563e-05,
|
| 46061 |
+
"loss": 0.3801,
|
| 46062 |
+
"step": 6564
|
| 46063 |
+
},
|
| 46064 |
+
{
|
| 46065 |
+
"epoch": 0.0006565,
|
| 46066 |
+
"grad_norm": 1.1286438703536987,
|
| 46067 |
+
"learning_rate": 6.564e-05,
|
| 46068 |
+
"loss": 0.3643,
|
| 46069 |
+
"step": 6565
|
| 46070 |
+
},
|
| 46071 |
+
{
|
| 46072 |
+
"epoch": 0.0006566,
|
| 46073 |
+
"grad_norm": 0.9734741449356079,
|
| 46074 |
+
"learning_rate": 6.565000000000001e-05,
|
| 46075 |
+
"loss": 0.3469,
|
| 46076 |
+
"step": 6566
|
| 46077 |
+
},
|
| 46078 |
+
{
|
| 46079 |
+
"epoch": 0.0006567,
|
| 46080 |
+
"grad_norm": 1.013515830039978,
|
| 46081 |
+
"learning_rate": 6.565999999999999e-05,
|
| 46082 |
+
"loss": 0.374,
|
| 46083 |
+
"step": 6567
|
| 46084 |
+
},
|
| 46085 |
+
{
|
| 46086 |
+
"epoch": 0.0006568,
|
| 46087 |
+
"grad_norm": 4.014554023742676,
|
| 46088 |
+
"learning_rate": 6.567e-05,
|
| 46089 |
+
"loss": 0.6902,
|
| 46090 |
+
"step": 6568
|
| 46091 |
+
},
|
| 46092 |
+
{
|
| 46093 |
+
"epoch": 0.0006569,
|
| 46094 |
+
"grad_norm": 1.896406888961792,
|
| 46095 |
+
"learning_rate": 6.568000000000001e-05,
|
| 46096 |
+
"loss": 0.3804,
|
| 46097 |
+
"step": 6569
|
| 46098 |
+
},
|
| 46099 |
+
{
|
| 46100 |
+
"epoch": 0.000657,
|
| 46101 |
+
"grad_norm": 1.2844294309616089,
|
| 46102 |
+
"learning_rate": 6.569e-05,
|
| 46103 |
+
"loss": 0.3657,
|
| 46104 |
+
"step": 6570
|
| 46105 |
+
},
|
| 46106 |
+
{
|
| 46107 |
+
"epoch": 0.0006571,
|
| 46108 |
+
"grad_norm": 1.3451746702194214,
|
| 46109 |
+
"learning_rate": 6.57e-05,
|
| 46110 |
+
"loss": 0.3879,
|
| 46111 |
+
"step": 6571
|
| 46112 |
+
},
|
| 46113 |
+
{
|
| 46114 |
+
"epoch": 0.0006572,
|
| 46115 |
+
"grad_norm": 0.845024824142456,
|
| 46116 |
+
"learning_rate": 6.571e-05,
|
| 46117 |
+
"loss": 0.3506,
|
| 46118 |
+
"step": 6572
|
| 46119 |
+
},
|
| 46120 |
+
{
|
| 46121 |
+
"epoch": 0.0006573,
|
| 46122 |
+
"grad_norm": 1.3707619905471802,
|
| 46123 |
+
"learning_rate": 6.572e-05,
|
| 46124 |
+
"loss": 0.4194,
|
| 46125 |
+
"step": 6573
|
| 46126 |
+
},
|
| 46127 |
+
{
|
| 46128 |
+
"epoch": 0.0006574,
|
| 46129 |
+
"grad_norm": 1.108884334564209,
|
| 46130 |
+
"learning_rate": 6.573e-05,
|
| 46131 |
+
"loss": 0.3682,
|
| 46132 |
+
"step": 6574
|
| 46133 |
+
},
|
| 46134 |
+
{
|
| 46135 |
+
"epoch": 0.0006575,
|
| 46136 |
+
"grad_norm": 1.4851621389389038,
|
| 46137 |
+
"learning_rate": 6.574e-05,
|
| 46138 |
+
"loss": 0.396,
|
| 46139 |
+
"step": 6575
|
| 46140 |
+
},
|
| 46141 |
+
{
|
| 46142 |
+
"epoch": 0.0006576,
|
| 46143 |
+
"grad_norm": 0.9890068769454956,
|
| 46144 |
+
"learning_rate": 6.575e-05,
|
| 46145 |
+
"loss": 0.3594,
|
| 46146 |
+
"step": 6576
|
| 46147 |
+
},
|
| 46148 |
+
{
|
| 46149 |
+
"epoch": 0.0006577,
|
| 46150 |
+
"grad_norm": 0.9809015989303589,
|
| 46151 |
+
"learning_rate": 6.576e-05,
|
| 46152 |
+
"loss": 0.3511,
|
| 46153 |
+
"step": 6577
|
| 46154 |
+
},
|
| 46155 |
+
{
|
| 46156 |
+
"epoch": 0.0006578,
|
| 46157 |
+
"grad_norm": 0.9301626682281494,
|
| 46158 |
+
"learning_rate": 6.577e-05,
|
| 46159 |
+
"loss": 0.3523,
|
| 46160 |
+
"step": 6578
|
| 46161 |
+
},
|
| 46162 |
+
{
|
| 46163 |
+
"epoch": 0.0006579,
|
| 46164 |
+
"grad_norm": 0.8530791997909546,
|
| 46165 |
+
"learning_rate": 6.578e-05,
|
| 46166 |
+
"loss": 0.3623,
|
| 46167 |
+
"step": 6579
|
| 46168 |
+
},
|
| 46169 |
+
{
|
| 46170 |
+
"epoch": 0.000658,
|
| 46171 |
+
"grad_norm": 0.7916417121887207,
|
| 46172 |
+
"learning_rate": 6.579e-05,
|
| 46173 |
+
"loss": 0.3401,
|
| 46174 |
+
"step": 6580
|
| 46175 |
+
},
|
| 46176 |
+
{
|
| 46177 |
+
"epoch": 0.0006581,
|
| 46178 |
+
"grad_norm": 0.9918129444122314,
|
| 46179 |
+
"learning_rate": 6.58e-05,
|
| 46180 |
+
"loss": 0.3806,
|
| 46181 |
+
"step": 6581
|
| 46182 |
+
},
|
| 46183 |
+
{
|
| 46184 |
+
"epoch": 0.0006582,
|
| 46185 |
+
"grad_norm": 0.9188030958175659,
|
| 46186 |
+
"learning_rate": 6.581000000000001e-05,
|
| 46187 |
+
"loss": 0.3701,
|
| 46188 |
+
"step": 6582
|
| 46189 |
+
},
|
| 46190 |
+
{
|
| 46191 |
+
"epoch": 0.0006583,
|
| 46192 |
+
"grad_norm": 0.993864893913269,
|
| 46193 |
+
"learning_rate": 6.582e-05,
|
| 46194 |
+
"loss": 0.4053,
|
| 46195 |
+
"step": 6583
|
| 46196 |
+
},
|
| 46197 |
+
{
|
| 46198 |
+
"epoch": 0.0006584,
|
| 46199 |
+
"grad_norm": 0.8649798631668091,
|
| 46200 |
+
"learning_rate": 6.583e-05,
|
| 46201 |
+
"loss": 0.3525,
|
| 46202 |
+
"step": 6584
|
| 46203 |
+
},
|
| 46204 |
+
{
|
| 46205 |
+
"epoch": 0.0006585,
|
| 46206 |
+
"grad_norm": 0.9735991358757019,
|
| 46207 |
+
"learning_rate": 6.584e-05,
|
| 46208 |
+
"loss": 0.3691,
|
| 46209 |
+
"step": 6585
|
| 46210 |
+
},
|
| 46211 |
+
{
|
| 46212 |
+
"epoch": 0.0006586,
|
| 46213 |
+
"grad_norm": 1.0432755947113037,
|
| 46214 |
+
"learning_rate": 6.585e-05,
|
| 46215 |
+
"loss": 0.3967,
|
| 46216 |
+
"step": 6586
|
| 46217 |
+
},
|
| 46218 |
+
{
|
| 46219 |
+
"epoch": 0.0006587,
|
| 46220 |
+
"grad_norm": 1.1535252332687378,
|
| 46221 |
+
"learning_rate": 6.586e-05,
|
| 46222 |
+
"loss": 0.3975,
|
| 46223 |
+
"step": 6587
|
| 46224 |
+
},
|
| 46225 |
+
{
|
| 46226 |
+
"epoch": 0.0006588,
|
| 46227 |
+
"grad_norm": 0.8514976501464844,
|
| 46228 |
+
"learning_rate": 6.587e-05,
|
| 46229 |
+
"loss": 0.3455,
|
| 46230 |
+
"step": 6588
|
| 46231 |
+
},
|
| 46232 |
+
{
|
| 46233 |
+
"epoch": 0.0006589,
|
| 46234 |
+
"grad_norm": 0.8397539258003235,
|
| 46235 |
+
"learning_rate": 6.588e-05,
|
| 46236 |
+
"loss": 0.3289,
|
| 46237 |
+
"step": 6589
|
| 46238 |
+
},
|
| 46239 |
+
{
|
| 46240 |
+
"epoch": 0.000659,
|
| 46241 |
+
"grad_norm": 0.8472413420677185,
|
| 46242 |
+
"learning_rate": 6.589000000000001e-05,
|
| 46243 |
+
"loss": 0.3477,
|
| 46244 |
+
"step": 6590
|
| 46245 |
+
},
|
| 46246 |
+
{
|
| 46247 |
+
"epoch": 0.0006591,
|
| 46248 |
+
"grad_norm": 0.8127468824386597,
|
| 46249 |
+
"learning_rate": 6.59e-05,
|
| 46250 |
+
"loss": 0.3496,
|
| 46251 |
+
"step": 6591
|
| 46252 |
+
},
|
| 46253 |
+
{
|
| 46254 |
+
"epoch": 0.0006592,
|
| 46255 |
+
"grad_norm": 0.8434761166572571,
|
| 46256 |
+
"learning_rate": 6.591e-05,
|
| 46257 |
+
"loss": 0.3477,
|
| 46258 |
+
"step": 6592
|
| 46259 |
+
},
|
| 46260 |
+
{
|
| 46261 |
+
"epoch": 0.0006593,
|
| 46262 |
+
"grad_norm": 0.9362355470657349,
|
| 46263 |
+
"learning_rate": 6.592e-05,
|
| 46264 |
+
"loss": 0.3794,
|
| 46265 |
+
"step": 6593
|
| 46266 |
+
},
|
| 46267 |
+
{
|
| 46268 |
+
"epoch": 0.0006594,
|
| 46269 |
+
"grad_norm": 1.5990657806396484,
|
| 46270 |
+
"learning_rate": 6.593e-05,
|
| 46271 |
+
"loss": 0.5615,
|
| 46272 |
+
"step": 6594
|
| 46273 |
+
},
|
| 46274 |
+
{
|
| 46275 |
+
"epoch": 0.0006595,
|
| 46276 |
+
"grad_norm": 0.7806490063667297,
|
| 46277 |
+
"learning_rate": 6.594e-05,
|
| 46278 |
+
"loss": 0.3267,
|
| 46279 |
+
"step": 6595
|
| 46280 |
+
},
|
| 46281 |
+
{
|
| 46282 |
+
"epoch": 0.0006596,
|
| 46283 |
+
"grad_norm": 0.9472602009773254,
|
| 46284 |
+
"learning_rate": 6.595e-05,
|
| 46285 |
+
"loss": 0.3694,
|
| 46286 |
+
"step": 6596
|
| 46287 |
+
},
|
| 46288 |
+
{
|
| 46289 |
+
"epoch": 0.0006597,
|
| 46290 |
+
"grad_norm": 1.820241928100586,
|
| 46291 |
+
"learning_rate": 6.596000000000001e-05,
|
| 46292 |
+
"loss": 0.3623,
|
| 46293 |
+
"step": 6597
|
| 46294 |
+
},
|
| 46295 |
+
{
|
| 46296 |
+
"epoch": 0.0006598,
|
| 46297 |
+
"grad_norm": 1.3111149072647095,
|
| 46298 |
+
"learning_rate": 6.597e-05,
|
| 46299 |
+
"loss": 0.4211,
|
| 46300 |
+
"step": 6598
|
| 46301 |
+
},
|
| 46302 |
+
{
|
| 46303 |
+
"epoch": 0.0006599,
|
| 46304 |
+
"grad_norm": 1.7332957983016968,
|
| 46305 |
+
"learning_rate": 6.598e-05,
|
| 46306 |
+
"loss": 0.5574,
|
| 46307 |
+
"step": 6599
|
| 46308 |
+
},
|
| 46309 |
+
{
|
| 46310 |
+
"epoch": 0.00066,
|
| 46311 |
+
"grad_norm": 1.0712653398513794,
|
| 46312 |
+
"learning_rate": 6.599000000000001e-05,
|
| 46313 |
+
"loss": 0.3293,
|
| 46314 |
+
"step": 6600
|
| 46315 |
+
},
|
| 46316 |
+
{
|
| 46317 |
+
"epoch": 0.0006601,
|
| 46318 |
+
"grad_norm": 6.920145034790039,
|
| 46319 |
+
"learning_rate": 6.599999999999999e-05,
|
| 46320 |
+
"loss": 0.6079,
|
| 46321 |
+
"step": 6601
|
| 46322 |
+
},
|
| 46323 |
+
{
|
| 46324 |
+
"epoch": 0.0006602,
|
| 46325 |
+
"grad_norm": 1.0699074268341064,
|
| 46326 |
+
"learning_rate": 6.601e-05,
|
| 46327 |
+
"loss": 0.3572,
|
| 46328 |
+
"step": 6602
|
| 46329 |
+
},
|
| 46330 |
+
{
|
| 46331 |
+
"epoch": 0.0006603,
|
| 46332 |
+
"grad_norm": 0.9052705764770508,
|
| 46333 |
+
"learning_rate": 6.602000000000001e-05,
|
| 46334 |
+
"loss": 0.3411,
|
| 46335 |
+
"step": 6603
|
| 46336 |
+
},
|
| 46337 |
+
{
|
| 46338 |
+
"epoch": 0.0006604,
|
| 46339 |
+
"grad_norm": 0.9185029864311218,
|
| 46340 |
+
"learning_rate": 6.603e-05,
|
| 46341 |
+
"loss": 0.3469,
|
| 46342 |
+
"step": 6604
|
| 46343 |
+
},
|
| 46344 |
+
{
|
| 46345 |
+
"epoch": 0.0006605,
|
| 46346 |
+
"grad_norm": 0.8422958254814148,
|
| 46347 |
+
"learning_rate": 6.604e-05,
|
| 46348 |
+
"loss": 0.3257,
|
| 46349 |
+
"step": 6605
|
| 46350 |
+
},
|
| 46351 |
+
{
|
| 46352 |
+
"epoch": 0.0006606,
|
| 46353 |
+
"grad_norm": 0.9281983971595764,
|
| 46354 |
+
"learning_rate": 6.605e-05,
|
| 46355 |
+
"loss": 0.3499,
|
| 46356 |
+
"step": 6606
|
| 46357 |
+
},
|
| 46358 |
+
{
|
| 46359 |
+
"epoch": 0.0006607,
|
| 46360 |
+
"grad_norm": 0.8922631144523621,
|
| 46361 |
+
"learning_rate": 6.606e-05,
|
| 46362 |
+
"loss": 0.3479,
|
| 46363 |
+
"step": 6607
|
| 46364 |
+
},
|
| 46365 |
+
{
|
| 46366 |
+
"epoch": 0.0006608,
|
| 46367 |
+
"grad_norm": 0.8519024848937988,
|
| 46368 |
+
"learning_rate": 6.607e-05,
|
| 46369 |
+
"loss": 0.3303,
|
| 46370 |
+
"step": 6608
|
| 46371 |
+
},
|
| 46372 |
+
{
|
| 46373 |
+
"epoch": 0.0006609,
|
| 46374 |
+
"grad_norm": 1.4792611598968506,
|
| 46375 |
+
"learning_rate": 6.608e-05,
|
| 46376 |
+
"loss": 0.408,
|
| 46377 |
+
"step": 6609
|
| 46378 |
+
},
|
| 46379 |
+
{
|
| 46380 |
+
"epoch": 0.000661,
|
| 46381 |
+
"grad_norm": 0.9307885766029358,
|
| 46382 |
+
"learning_rate": 6.609e-05,
|
| 46383 |
+
"loss": 0.3396,
|
| 46384 |
+
"step": 6610
|
| 46385 |
+
},
|
| 46386 |
+
{
|
| 46387 |
+
"epoch": 0.0006611,
|
| 46388 |
+
"grad_norm": 0.9870061278343201,
|
| 46389 |
+
"learning_rate": 6.61e-05,
|
| 46390 |
+
"loss": 0.3652,
|
| 46391 |
+
"step": 6611
|
| 46392 |
+
},
|
| 46393 |
+
{
|
| 46394 |
+
"epoch": 0.0006612,
|
| 46395 |
+
"grad_norm": 0.9354746341705322,
|
| 46396 |
+
"learning_rate": 6.611e-05,
|
| 46397 |
+
"loss": 0.3447,
|
| 46398 |
+
"step": 6612
|
| 46399 |
+
},
|
| 46400 |
+
{
|
| 46401 |
+
"epoch": 0.0006613,
|
| 46402 |
+
"grad_norm": 4.36238956451416,
|
| 46403 |
+
"learning_rate": 6.612e-05,
|
| 46404 |
+
"loss": 0.6912,
|
| 46405 |
+
"step": 6613
|
| 46406 |
+
},
|
| 46407 |
+
{
|
| 46408 |
+
"epoch": 0.0006614,
|
| 46409 |
+
"grad_norm": 1.068058729171753,
|
| 46410 |
+
"learning_rate": 6.613e-05,
|
| 46411 |
+
"loss": 0.356,
|
| 46412 |
+
"step": 6614
|
| 46413 |
+
},
|
| 46414 |
+
{
|
| 46415 |
+
"epoch": 0.0006615,
|
| 46416 |
+
"grad_norm": 0.9313966035842896,
|
| 46417 |
+
"learning_rate": 6.614e-05,
|
| 46418 |
+
"loss": 0.3164,
|
| 46419 |
+
"step": 6615
|
| 46420 |
+
},
|
| 46421 |
+
{
|
| 46422 |
+
"epoch": 0.0006616,
|
| 46423 |
+
"grad_norm": 0.8230025172233582,
|
| 46424 |
+
"learning_rate": 6.615e-05,
|
| 46425 |
+
"loss": 0.3357,
|
| 46426 |
+
"step": 6616
|
| 46427 |
+
},
|
| 46428 |
+
{
|
| 46429 |
+
"epoch": 0.0006617,
|
| 46430 |
+
"grad_norm": 0.7870452404022217,
|
| 46431 |
+
"learning_rate": 6.616e-05,
|
| 46432 |
+
"loss": 0.3269,
|
| 46433 |
+
"step": 6617
|
| 46434 |
+
},
|
| 46435 |
+
{
|
| 46436 |
+
"epoch": 0.0006618,
|
| 46437 |
+
"grad_norm": 0.9555226564407349,
|
| 46438 |
+
"learning_rate": 6.617e-05,
|
| 46439 |
+
"loss": 0.3372,
|
| 46440 |
+
"step": 6618
|
| 46441 |
+
},
|
| 46442 |
+
{
|
| 46443 |
+
"epoch": 0.0006619,
|
| 46444 |
+
"grad_norm": 0.7982156872749329,
|
| 46445 |
+
"learning_rate": 6.618e-05,
|
| 46446 |
+
"loss": 0.3091,
|
| 46447 |
+
"step": 6619
|
| 46448 |
+
},
|
| 46449 |
+
{
|
| 46450 |
+
"epoch": 0.000662,
|
| 46451 |
+
"grad_norm": 0.8481671810150146,
|
| 46452 |
+
"learning_rate": 6.619e-05,
|
| 46453 |
+
"loss": 0.3271,
|
| 46454 |
+
"step": 6620
|
| 46455 |
+
},
|
| 46456 |
+
{
|
| 46457 |
+
"epoch": 0.0006621,
|
| 46458 |
+
"grad_norm": 1.6612424850463867,
|
| 46459 |
+
"learning_rate": 6.62e-05,
|
| 46460 |
+
"loss": 0.4949,
|
| 46461 |
+
"step": 6621
|
| 46462 |
+
},
|
| 46463 |
+
{
|
| 46464 |
+
"epoch": 0.0006622,
|
| 46465 |
+
"grad_norm": 0.8787587881088257,
|
| 46466 |
+
"learning_rate": 6.621e-05,
|
| 46467 |
+
"loss": 0.3311,
|
| 46468 |
+
"step": 6622
|
| 46469 |
+
},
|
| 46470 |
+
{
|
| 46471 |
+
"epoch": 0.0006623,
|
| 46472 |
+
"grad_norm": 0.8238551020622253,
|
| 46473 |
+
"learning_rate": 6.622e-05,
|
| 46474 |
+
"loss": 0.3291,
|
| 46475 |
+
"step": 6623
|
| 46476 |
+
},
|
| 46477 |
+
{
|
| 46478 |
+
"epoch": 0.0006624,
|
| 46479 |
+
"grad_norm": 0.8153005838394165,
|
| 46480 |
+
"learning_rate": 6.623000000000001e-05,
|
| 46481 |
+
"loss": 0.3267,
|
| 46482 |
+
"step": 6624
|
| 46483 |
+
},
|
| 46484 |
+
{
|
| 46485 |
+
"epoch": 0.0006625,
|
| 46486 |
+
"grad_norm": 1.2538508176803589,
|
| 46487 |
+
"learning_rate": 6.624e-05,
|
| 46488 |
+
"loss": 0.3623,
|
| 46489 |
+
"step": 6625
|
| 46490 |
+
},
|
| 46491 |
+
{
|
| 46492 |
+
"epoch": 0.0006626,
|
| 46493 |
+
"grad_norm": 0.8147600293159485,
|
| 46494 |
+
"learning_rate": 6.625e-05,
|
| 46495 |
+
"loss": 0.3313,
|
| 46496 |
+
"step": 6626
|
| 46497 |
+
},
|
| 46498 |
+
{
|
| 46499 |
+
"epoch": 0.0006627,
|
| 46500 |
+
"grad_norm": 0.9060648679733276,
|
| 46501 |
+
"learning_rate": 6.626e-05,
|
| 46502 |
+
"loss": 0.353,
|
| 46503 |
+
"step": 6627
|
| 46504 |
+
},
|
| 46505 |
+
{
|
| 46506 |
+
"epoch": 0.0006628,
|
| 46507 |
+
"grad_norm": 0.81404048204422,
|
| 46508 |
+
"learning_rate": 6.627e-05,
|
| 46509 |
+
"loss": 0.3438,
|
| 46510 |
+
"step": 6628
|
| 46511 |
+
},
|
| 46512 |
+
{
|
| 46513 |
+
"epoch": 0.0006629,
|
| 46514 |
+
"grad_norm": 0.8007271885871887,
|
| 46515 |
+
"learning_rate": 6.628e-05,
|
| 46516 |
+
"loss": 0.333,
|
| 46517 |
+
"step": 6629
|
| 46518 |
+
},
|
| 46519 |
+
{
|
| 46520 |
+
"epoch": 0.000663,
|
| 46521 |
+
"grad_norm": 0.7897888422012329,
|
| 46522 |
+
"learning_rate": 6.629e-05,
|
| 46523 |
+
"loss": 0.3291,
|
| 46524 |
+
"step": 6630
|
| 46525 |
+
},
|
| 46526 |
+
{
|
| 46527 |
+
"epoch": 0.0006631,
|
| 46528 |
+
"grad_norm": 0.7997754812240601,
|
| 46529 |
+
"learning_rate": 6.630000000000001e-05,
|
| 46530 |
+
"loss": 0.3167,
|
| 46531 |
+
"step": 6631
|
| 46532 |
+
},
|
| 46533 |
+
{
|
| 46534 |
+
"epoch": 0.0006632,
|
| 46535 |
+
"grad_norm": 0.7762719988822937,
|
| 46536 |
+
"learning_rate": 6.631e-05,
|
| 46537 |
+
"loss": 0.3237,
|
| 46538 |
+
"step": 6632
|
| 46539 |
+
},
|
| 46540 |
+
{
|
| 46541 |
+
"epoch": 0.0006633,
|
| 46542 |
+
"grad_norm": 0.7300854325294495,
|
| 46543 |
+
"learning_rate": 6.632e-05,
|
| 46544 |
+
"loss": 0.3445,
|
| 46545 |
+
"step": 6633
|
| 46546 |
+
},
|
| 46547 |
+
{
|
| 46548 |
+
"epoch": 0.0006634,
|
| 46549 |
+
"grad_norm": 3.1596171855926514,
|
| 46550 |
+
"learning_rate": 6.633000000000001e-05,
|
| 46551 |
+
"loss": 0.6033,
|
| 46552 |
+
"step": 6634
|
| 46553 |
+
},
|
| 46554 |
+
{
|
| 46555 |
+
"epoch": 0.0006635,
|
| 46556 |
+
"grad_norm": 0.7889407873153687,
|
| 46557 |
+
"learning_rate": 6.633999999999999e-05,
|
| 46558 |
+
"loss": 0.3152,
|
| 46559 |
+
"step": 6635
|
| 46560 |
+
},
|
| 46561 |
+
{
|
| 46562 |
+
"epoch": 0.0006636,
|
| 46563 |
+
"grad_norm": 0.7632907629013062,
|
| 46564 |
+
"learning_rate": 6.635e-05,
|
| 46565 |
+
"loss": 0.3101,
|
| 46566 |
+
"step": 6636
|
| 46567 |
+
},
|
| 46568 |
+
{
|
| 46569 |
+
"epoch": 0.0006637,
|
| 46570 |
+
"grad_norm": 0.8322415947914124,
|
| 46571 |
+
"learning_rate": 6.636000000000001e-05,
|
| 46572 |
+
"loss": 0.324,
|
| 46573 |
+
"step": 6637
|
| 46574 |
+
},
|
| 46575 |
+
{
|
| 46576 |
+
"epoch": 0.0006638,
|
| 46577 |
+
"grad_norm": 0.6845356822013855,
|
| 46578 |
+
"learning_rate": 6.636999999999999e-05,
|
| 46579 |
+
"loss": 0.3118,
|
| 46580 |
+
"step": 6638
|
| 46581 |
+
},
|
| 46582 |
+
{
|
| 46583 |
+
"epoch": 0.0006639,
|
| 46584 |
+
"grad_norm": 1.0866153240203857,
|
| 46585 |
+
"learning_rate": 6.638e-05,
|
| 46586 |
+
"loss": 0.3462,
|
| 46587 |
+
"step": 6639
|
| 46588 |
+
},
|
| 46589 |
+
{
|
| 46590 |
+
"epoch": 0.000664,
|
| 46591 |
+
"grad_norm": 0.6863608956336975,
|
| 46592 |
+
"learning_rate": 6.639e-05,
|
| 46593 |
+
"loss": 0.3103,
|
| 46594 |
+
"step": 6640
|
| 46595 |
+
},
|
| 46596 |
+
{
|
| 46597 |
+
"epoch": 0.0006641,
|
| 46598 |
+
"grad_norm": 0.7848332524299622,
|
| 46599 |
+
"learning_rate": 6.64e-05,
|
| 46600 |
+
"loss": 0.3274,
|
| 46601 |
+
"step": 6641
|
| 46602 |
+
},
|
| 46603 |
+
{
|
| 46604 |
+
"epoch": 0.0006642,
|
| 46605 |
+
"grad_norm": 1.3172481060028076,
|
| 46606 |
+
"learning_rate": 6.641e-05,
|
| 46607 |
+
"loss": 0.396,
|
| 46608 |
+
"step": 6642
|
| 46609 |
+
},
|
| 46610 |
+
{
|
| 46611 |
+
"epoch": 0.0006643,
|
| 46612 |
+
"grad_norm": 0.7006875276565552,
|
| 46613 |
+
"learning_rate": 6.642e-05,
|
| 46614 |
+
"loss": 0.3181,
|
| 46615 |
+
"step": 6643
|
| 46616 |
+
},
|
| 46617 |
+
{
|
| 46618 |
+
"epoch": 0.0006644,
|
| 46619 |
+
"grad_norm": 0.8330028653144836,
|
| 46620 |
+
"learning_rate": 6.643e-05,
|
| 46621 |
+
"loss": 0.3379,
|
| 46622 |
+
"step": 6644
|
| 46623 |
+
},
|
| 46624 |
+
{
|
| 46625 |
+
"epoch": 0.0006645,
|
| 46626 |
+
"grad_norm": 0.6954527497291565,
|
| 46627 |
+
"learning_rate": 6.644e-05,
|
| 46628 |
+
"loss": 0.302,
|
| 46629 |
+
"step": 6645
|
| 46630 |
+
},
|
| 46631 |
+
{
|
| 46632 |
+
"epoch": 0.0006646,
|
| 46633 |
+
"grad_norm": 0.8154837489128113,
|
| 46634 |
+
"learning_rate": 6.645e-05,
|
| 46635 |
+
"loss": 0.3411,
|
| 46636 |
+
"step": 6646
|
| 46637 |
+
},
|
| 46638 |
+
{
|
| 46639 |
+
"epoch": 0.0006647,
|
| 46640 |
+
"grad_norm": 0.6617774367332458,
|
| 46641 |
+
"learning_rate": 6.646e-05,
|
| 46642 |
+
"loss": 0.3037,
|
| 46643 |
+
"step": 6647
|
| 46644 |
+
},
|
| 46645 |
+
{
|
| 46646 |
+
"epoch": 0.0006648,
|
| 46647 |
+
"grad_norm": 0.7139044404029846,
|
| 46648 |
+
"learning_rate": 6.647e-05,
|
| 46649 |
+
"loss": 0.3293,
|
| 46650 |
+
"step": 6648
|
| 46651 |
+
},
|
| 46652 |
+
{
|
| 46653 |
+
"epoch": 0.0006649,
|
| 46654 |
+
"grad_norm": 0.693368673324585,
|
| 46655 |
+
"learning_rate": 6.648e-05,
|
| 46656 |
+
"loss": 0.3127,
|
| 46657 |
+
"step": 6649
|
| 46658 |
+
},
|
| 46659 |
+
{
|
| 46660 |
+
"epoch": 0.000665,
|
| 46661 |
+
"grad_norm": 0.7504817843437195,
|
| 46662 |
+
"learning_rate": 6.649e-05,
|
| 46663 |
+
"loss": 0.3447,
|
| 46664 |
+
"step": 6650
|
| 46665 |
+
},
|
| 46666 |
+
{
|
| 46667 |
+
"epoch": 0.0006651,
|
| 46668 |
+
"grad_norm": 0.7309079766273499,
|
| 46669 |
+
"learning_rate": 6.65e-05,
|
| 46670 |
+
"loss": 0.3193,
|
| 46671 |
+
"step": 6651
|
| 46672 |
+
},
|
| 46673 |
+
{
|
| 46674 |
+
"epoch": 0.0006652,
|
| 46675 |
+
"grad_norm": 1.2137302160263062,
|
| 46676 |
+
"learning_rate": 6.651e-05,
|
| 46677 |
+
"loss": 0.4045,
|
| 46678 |
+
"step": 6652
|
| 46679 |
+
},
|
| 46680 |
+
{
|
| 46681 |
+
"epoch": 0.0006653,
|
| 46682 |
+
"grad_norm": 0.7493794560432434,
|
| 46683 |
+
"learning_rate": 6.652000000000001e-05,
|
| 46684 |
+
"loss": 0.322,
|
| 46685 |
+
"step": 6653
|
| 46686 |
+
},
|
| 46687 |
+
{
|
| 46688 |
+
"epoch": 0.0006654,
|
| 46689 |
+
"grad_norm": 0.8475434184074402,
|
| 46690 |
+
"learning_rate": 6.653e-05,
|
| 46691 |
+
"loss": 0.3335,
|
| 46692 |
+
"step": 6654
|
| 46693 |
+
},
|
| 46694 |
+
{
|
| 46695 |
+
"epoch": 0.0006655,
|
| 46696 |
+
"grad_norm": 0.7326182723045349,
|
| 46697 |
+
"learning_rate": 6.654e-05,
|
| 46698 |
+
"loss": 0.314,
|
| 46699 |
+
"step": 6655
|
| 46700 |
+
},
|
| 46701 |
+
{
|
| 46702 |
+
"epoch": 0.0006656,
|
| 46703 |
+
"grad_norm": 1.0516753196716309,
|
| 46704 |
+
"learning_rate": 6.655e-05,
|
| 46705 |
+
"loss": 0.3462,
|
| 46706 |
+
"step": 6656
|
| 46707 |
+
},
|
| 46708 |
+
{
|
| 46709 |
+
"epoch": 0.0006657,
|
| 46710 |
+
"grad_norm": 0.7796412706375122,
|
| 46711 |
+
"learning_rate": 6.656e-05,
|
| 46712 |
+
"loss": 0.3206,
|
| 46713 |
+
"step": 6657
|
| 46714 |
+
},
|
| 46715 |
+
{
|
| 46716 |
+
"epoch": 0.0006658,
|
| 46717 |
+
"grad_norm": 0.7046060562133789,
|
| 46718 |
+
"learning_rate": 6.657000000000001e-05,
|
| 46719 |
+
"loss": 0.304,
|
| 46720 |
+
"step": 6658
|
| 46721 |
+
},
|
| 46722 |
+
{
|
| 46723 |
+
"epoch": 0.0006659,
|
| 46724 |
+
"grad_norm": 0.7619063258171082,
|
| 46725 |
+
"learning_rate": 6.658e-05,
|
| 46726 |
+
"loss": 0.3269,
|
| 46727 |
+
"step": 6659
|
| 46728 |
+
},
|
| 46729 |
+
{
|
| 46730 |
+
"epoch": 0.000666,
|
| 46731 |
+
"grad_norm": 0.7655344009399414,
|
| 46732 |
+
"learning_rate": 6.659e-05,
|
| 46733 |
+
"loss": 0.3435,
|
| 46734 |
+
"step": 6660
|
| 46735 |
+
},
|
| 46736 |
+
{
|
| 46737 |
+
"epoch": 0.0006661,
|
| 46738 |
+
"grad_norm": 0.7178213000297546,
|
| 46739 |
+
"learning_rate": 6.66e-05,
|
| 46740 |
+
"loss": 0.3218,
|
| 46741 |
+
"step": 6661
|
| 46742 |
+
},
|
| 46743 |
+
{
|
| 46744 |
+
"epoch": 0.0006662,
|
| 46745 |
+
"grad_norm": 0.80521160364151,
|
| 46746 |
+
"learning_rate": 6.661e-05,
|
| 46747 |
+
"loss": 0.3391,
|
| 46748 |
+
"step": 6662
|
| 46749 |
+
},
|
| 46750 |
+
{
|
| 46751 |
+
"epoch": 0.0006663,
|
| 46752 |
+
"grad_norm": 0.7512543201446533,
|
| 46753 |
+
"learning_rate": 6.662e-05,
|
| 46754 |
+
"loss": 0.3042,
|
| 46755 |
+
"step": 6663
|
| 46756 |
+
},
|
| 46757 |
+
{
|
| 46758 |
+
"epoch": 0.0006664,
|
| 46759 |
+
"grad_norm": 0.7814407348632812,
|
| 46760 |
+
"learning_rate": 6.663e-05,
|
| 46761 |
+
"loss": 0.332,
|
| 46762 |
+
"step": 6664
|
| 46763 |
+
},
|
| 46764 |
+
{
|
| 46765 |
+
"epoch": 0.0006665,
|
| 46766 |
+
"grad_norm": 0.7427365183830261,
|
| 46767 |
+
"learning_rate": 6.664000000000001e-05,
|
| 46768 |
+
"loss": 0.323,
|
| 46769 |
+
"step": 6665
|
| 46770 |
+
},
|
| 46771 |
+
{
|
| 46772 |
+
"epoch": 0.0006666,
|
| 46773 |
+
"grad_norm": 0.6811539530754089,
|
| 46774 |
+
"learning_rate": 6.665e-05,
|
| 46775 |
+
"loss": 0.3052,
|
| 46776 |
+
"step": 6666
|
| 46777 |
+
},
|
| 46778 |
+
{
|
| 46779 |
+
"epoch": 0.0006667,
|
| 46780 |
+
"grad_norm": 0.7411268353462219,
|
| 46781 |
+
"learning_rate": 6.666e-05,
|
| 46782 |
+
"loss": 0.3005,
|
| 46783 |
+
"step": 6667
|
| 46784 |
+
},
|
| 46785 |
+
{
|
| 46786 |
+
"epoch": 0.0006668,
|
| 46787 |
+
"grad_norm": 0.7068269848823547,
|
| 46788 |
+
"learning_rate": 6.667000000000001e-05,
|
| 46789 |
+
"loss": 0.3284,
|
| 46790 |
+
"step": 6668
|
| 46791 |
+
},
|
| 46792 |
+
{
|
| 46793 |
+
"epoch": 0.0006669,
|
| 46794 |
+
"grad_norm": 0.7144364714622498,
|
| 46795 |
+
"learning_rate": 6.667999999999999e-05,
|
| 46796 |
+
"loss": 0.3391,
|
| 46797 |
+
"step": 6669
|
| 46798 |
+
},
|
| 46799 |
+
{
|
| 46800 |
+
"epoch": 0.000667,
|
| 46801 |
+
"grad_norm": 0.6556010246276855,
|
| 46802 |
+
"learning_rate": 6.669e-05,
|
| 46803 |
+
"loss": 0.3145,
|
| 46804 |
+
"step": 6670
|
| 46805 |
+
},
|
| 46806 |
+
{
|
| 46807 |
+
"epoch": 0.0006671,
|
| 46808 |
+
"grad_norm": 1.214873194694519,
|
| 46809 |
+
"learning_rate": 6.670000000000001e-05,
|
| 46810 |
+
"loss": 0.4189,
|
| 46811 |
+
"step": 6671
|
| 46812 |
+
},
|
| 46813 |
+
{
|
| 46814 |
+
"epoch": 0.0006672,
|
| 46815 |
+
"grad_norm": 0.7603350281715393,
|
| 46816 |
+
"learning_rate": 6.670999999999999e-05,
|
| 46817 |
+
"loss": 0.3342,
|
| 46818 |
+
"step": 6672
|
| 46819 |
+
},
|
| 46820 |
+
{
|
| 46821 |
+
"epoch": 0.0006673,
|
| 46822 |
+
"grad_norm": 0.7102590799331665,
|
| 46823 |
+
"learning_rate": 6.672e-05,
|
| 46824 |
+
"loss": 0.3152,
|
| 46825 |
+
"step": 6673
|
| 46826 |
+
},
|
| 46827 |
+
{
|
| 46828 |
+
"epoch": 0.0006674,
|
| 46829 |
+
"grad_norm": 0.7383694648742676,
|
| 46830 |
+
"learning_rate": 6.673e-05,
|
| 46831 |
+
"loss": 0.3435,
|
| 46832 |
+
"step": 6674
|
| 46833 |
+
},
|
| 46834 |
+
{
|
| 46835 |
+
"epoch": 0.0006675,
|
| 46836 |
+
"grad_norm": 0.6528864502906799,
|
| 46837 |
+
"learning_rate": 6.674e-05,
|
| 46838 |
+
"loss": 0.3225,
|
| 46839 |
+
"step": 6675
|
| 46840 |
+
},
|
| 46841 |
+
{
|
| 46842 |
+
"epoch": 0.0006676,
|
| 46843 |
+
"grad_norm": 0.8522922992706299,
|
| 46844 |
+
"learning_rate": 6.675e-05,
|
| 46845 |
+
"loss": 0.3503,
|
| 46846 |
+
"step": 6676
|
| 46847 |
+
},
|
| 46848 |
+
{
|
| 46849 |
+
"epoch": 0.0006677,
|
| 46850 |
+
"grad_norm": 0.7166839241981506,
|
| 46851 |
+
"learning_rate": 6.676e-05,
|
| 46852 |
+
"loss": 0.3264,
|
| 46853 |
+
"step": 6677
|
| 46854 |
+
},
|
| 46855 |
+
{
|
| 46856 |
+
"epoch": 0.0006678,
|
| 46857 |
+
"grad_norm": 0.7173413634300232,
|
| 46858 |
+
"learning_rate": 6.677e-05,
|
| 46859 |
+
"loss": 0.3152,
|
| 46860 |
+
"step": 6678
|
| 46861 |
+
},
|
| 46862 |
+
{
|
| 46863 |
+
"epoch": 0.0006679,
|
| 46864 |
+
"grad_norm": 0.7953842878341675,
|
| 46865 |
+
"learning_rate": 6.678e-05,
|
| 46866 |
+
"loss": 0.3286,
|
| 46867 |
+
"step": 6679
|
| 46868 |
+
},
|
| 46869 |
+
{
|
| 46870 |
+
"epoch": 0.000668,
|
| 46871 |
+
"grad_norm": 0.7262922525405884,
|
| 46872 |
+
"learning_rate": 6.679e-05,
|
| 46873 |
+
"loss": 0.3206,
|
| 46874 |
+
"step": 6680
|
| 46875 |
+
},
|
| 46876 |
+
{
|
| 46877 |
+
"epoch": 0.0006681,
|
| 46878 |
+
"grad_norm": 0.6505809426307678,
|
| 46879 |
+
"learning_rate": 6.68e-05,
|
| 46880 |
+
"loss": 0.3037,
|
| 46881 |
+
"step": 6681
|
| 46882 |
+
},
|
| 46883 |
+
{
|
| 46884 |
+
"epoch": 0.0006682,
|
| 46885 |
+
"grad_norm": 0.8676546812057495,
|
| 46886 |
+
"learning_rate": 6.681e-05,
|
| 46887 |
+
"loss": 0.3711,
|
| 46888 |
+
"step": 6682
|
| 46889 |
+
},
|
| 46890 |
+
{
|
| 46891 |
+
"epoch": 0.0006683,
|
| 46892 |
+
"grad_norm": 0.638432502746582,
|
| 46893 |
+
"learning_rate": 6.682e-05,
|
| 46894 |
+
"loss": 0.2893,
|
| 46895 |
+
"step": 6683
|
| 46896 |
+
},
|
| 46897 |
+
{
|
| 46898 |
+
"epoch": 0.0006684,
|
| 46899 |
+
"grad_norm": 0.8237349390983582,
|
| 46900 |
+
"learning_rate": 6.683e-05,
|
| 46901 |
+
"loss": 0.3335,
|
| 46902 |
+
"step": 6684
|
| 46903 |
+
},
|
| 46904 |
+
{
|
| 46905 |
+
"epoch": 0.0006685,
|
| 46906 |
+
"grad_norm": 0.727479100227356,
|
| 46907 |
+
"learning_rate": 6.684e-05,
|
| 46908 |
+
"loss": 0.3264,
|
| 46909 |
+
"step": 6685
|
| 46910 |
+
},
|
| 46911 |
+
{
|
| 46912 |
+
"epoch": 0.0006686,
|
| 46913 |
+
"grad_norm": 0.6328103542327881,
|
| 46914 |
+
"learning_rate": 6.685e-05,
|
| 46915 |
+
"loss": 0.3086,
|
| 46916 |
+
"step": 6686
|
| 46917 |
+
},
|
| 46918 |
+
{
|
| 46919 |
+
"epoch": 0.0006687,
|
| 46920 |
+
"grad_norm": 0.7690942883491516,
|
| 46921 |
+
"learning_rate": 6.686000000000001e-05,
|
| 46922 |
+
"loss": 0.3218,
|
| 46923 |
+
"step": 6687
|
| 46924 |
+
},
|
| 46925 |
+
{
|
| 46926 |
+
"epoch": 0.0006688,
|
| 46927 |
+
"grad_norm": 0.69964200258255,
|
| 46928 |
+
"learning_rate": 6.687e-05,
|
| 46929 |
+
"loss": 0.3259,
|
| 46930 |
+
"step": 6688
|
| 46931 |
+
},
|
| 46932 |
+
{
|
| 46933 |
+
"epoch": 0.0006689,
|
| 46934 |
+
"grad_norm": 0.6972114443778992,
|
| 46935 |
+
"learning_rate": 6.688e-05,
|
| 46936 |
+
"loss": 0.3328,
|
| 46937 |
+
"step": 6689
|
| 46938 |
+
},
|
| 46939 |
+
{
|
| 46940 |
+
"epoch": 0.000669,
|
| 46941 |
+
"grad_norm": 0.697297215461731,
|
| 46942 |
+
"learning_rate": 6.689e-05,
|
| 46943 |
+
"loss": 0.3267,
|
| 46944 |
+
"step": 6690
|
| 46945 |
+
},
|
| 46946 |
+
{
|
| 46947 |
+
"epoch": 0.0006691,
|
| 46948 |
+
"grad_norm": 1.2948123216629028,
|
| 46949 |
+
"learning_rate": 6.69e-05,
|
| 46950 |
+
"loss": 0.4321,
|
| 46951 |
+
"step": 6691
|
| 46952 |
+
},
|
| 46953 |
+
{
|
| 46954 |
+
"epoch": 0.0006692,
|
| 46955 |
+
"grad_norm": 0.7972717881202698,
|
| 46956 |
+
"learning_rate": 6.691000000000001e-05,
|
| 46957 |
+
"loss": 0.3184,
|
| 46958 |
+
"step": 6692
|
| 46959 |
+
},
|
| 46960 |
+
{
|
| 46961 |
+
"epoch": 0.0006693,
|
| 46962 |
+
"grad_norm": 0.777863085269928,
|
| 46963 |
+
"learning_rate": 6.692e-05,
|
| 46964 |
+
"loss": 0.3042,
|
| 46965 |
+
"step": 6693
|
| 46966 |
+
},
|
| 46967 |
+
{
|
| 46968 |
+
"epoch": 0.0006694,
|
| 46969 |
+
"grad_norm": 0.952314555644989,
|
| 46970 |
+
"learning_rate": 6.693e-05,
|
| 46971 |
+
"loss": 0.3782,
|
| 46972 |
+
"step": 6694
|
| 46973 |
+
},
|
| 46974 |
+
{
|
| 46975 |
+
"epoch": 0.0006695,
|
| 46976 |
+
"grad_norm": 0.667868435382843,
|
| 46977 |
+
"learning_rate": 6.694e-05,
|
| 46978 |
+
"loss": 0.2976,
|
| 46979 |
+
"step": 6695
|
| 46980 |
+
},
|
| 46981 |
+
{
|
| 46982 |
+
"epoch": 0.0006696,
|
| 46983 |
+
"grad_norm": 0.7156415581703186,
|
| 46984 |
+
"learning_rate": 6.695e-05,
|
| 46985 |
+
"loss": 0.3081,
|
| 46986 |
+
"step": 6696
|
| 46987 |
+
},
|
| 46988 |
+
{
|
| 46989 |
+
"epoch": 0.0006697,
|
| 46990 |
+
"grad_norm": 0.6847723126411438,
|
| 46991 |
+
"learning_rate": 6.696e-05,
|
| 46992 |
+
"loss": 0.3027,
|
| 46993 |
+
"step": 6697
|
| 46994 |
+
},
|
| 46995 |
+
{
|
| 46996 |
+
"epoch": 0.0006698,
|
| 46997 |
+
"grad_norm": 0.6780564785003662,
|
| 46998 |
+
"learning_rate": 6.697e-05,
|
| 46999 |
+
"loss": 0.3074,
|
| 47000 |
+
"step": 6698
|
| 47001 |
+
},
|
| 47002 |
+
{
|
| 47003 |
+
"epoch": 0.0006699,
|
| 47004 |
+
"grad_norm": 0.6628689765930176,
|
| 47005 |
+
"learning_rate": 6.698000000000001e-05,
|
| 47006 |
+
"loss": 0.3027,
|
| 47007 |
+
"step": 6699
|
| 47008 |
+
},
|
| 47009 |
+
{
|
| 47010 |
+
"epoch": 0.00067,
|
| 47011 |
+
"grad_norm": 0.7941015958786011,
|
| 47012 |
+
"learning_rate": 6.699e-05,
|
| 47013 |
+
"loss": 0.3264,
|
| 47014 |
+
"step": 6700
|
| 47015 |
+
},
|
| 47016 |
+
{
|
| 47017 |
+
"epoch": 0.0006701,
|
| 47018 |
+
"grad_norm": 0.8983973860740662,
|
| 47019 |
+
"learning_rate": 6.7e-05,
|
| 47020 |
+
"loss": 0.3368,
|
| 47021 |
+
"step": 6701
|
| 47022 |
+
},
|
| 47023 |
+
{
|
| 47024 |
+
"epoch": 0.0006702,
|
| 47025 |
+
"grad_norm": 0.8160014748573303,
|
| 47026 |
+
"learning_rate": 6.701000000000001e-05,
|
| 47027 |
+
"loss": 0.3267,
|
| 47028 |
+
"step": 6702
|
| 47029 |
+
},
|
| 47030 |
+
{
|
| 47031 |
+
"epoch": 0.0006703,
|
| 47032 |
+
"grad_norm": 1.5155754089355469,
|
| 47033 |
+
"learning_rate": 6.701999999999999e-05,
|
| 47034 |
+
"loss": 0.4702,
|
| 47035 |
+
"step": 6703
|
| 47036 |
+
},
|
| 47037 |
+
{
|
| 47038 |
+
"epoch": 0.0006704,
|
| 47039 |
+
"grad_norm": 0.8674948811531067,
|
| 47040 |
+
"learning_rate": 6.703e-05,
|
| 47041 |
+
"loss": 0.3071,
|
| 47042 |
+
"step": 6704
|
| 47043 |
+
},
|
| 47044 |
+
{
|
| 47045 |
+
"epoch": 0.0006705,
|
| 47046 |
+
"grad_norm": 1.395816445350647,
|
| 47047 |
+
"learning_rate": 6.704000000000001e-05,
|
| 47048 |
+
"loss": 0.3999,
|
| 47049 |
+
"step": 6705
|
| 47050 |
+
},
|
| 47051 |
+
{
|
| 47052 |
+
"epoch": 0.0006706,
|
| 47053 |
+
"grad_norm": 0.8511848449707031,
|
| 47054 |
+
"learning_rate": 6.704999999999999e-05,
|
| 47055 |
+
"loss": 0.3259,
|
| 47056 |
+
"step": 6706
|
| 47057 |
+
},
|
| 47058 |
+
{
|
| 47059 |
+
"epoch": 0.0006707,
|
| 47060 |
+
"grad_norm": 0.7739931344985962,
|
| 47061 |
+
"learning_rate": 6.706e-05,
|
| 47062 |
+
"loss": 0.3135,
|
| 47063 |
+
"step": 6707
|
| 47064 |
+
},
|
| 47065 |
+
{
|
| 47066 |
+
"epoch": 0.0006708,
|
| 47067 |
+
"grad_norm": 0.7451652884483337,
|
| 47068 |
+
"learning_rate": 6.707e-05,
|
| 47069 |
+
"loss": 0.3245,
|
| 47070 |
+
"step": 6708
|
| 47071 |
+
},
|
| 47072 |
+
{
|
| 47073 |
+
"epoch": 0.0006709,
|
| 47074 |
+
"grad_norm": 0.7722559571266174,
|
| 47075 |
+
"learning_rate": 6.708e-05,
|
| 47076 |
+
"loss": 0.3467,
|
| 47077 |
+
"step": 6709
|
| 47078 |
+
},
|
| 47079 |
+
{
|
| 47080 |
+
"epoch": 0.000671,
|
| 47081 |
+
"grad_norm": 0.8112444877624512,
|
| 47082 |
+
"learning_rate": 6.709e-05,
|
| 47083 |
+
"loss": 0.3267,
|
| 47084 |
+
"step": 6710
|
| 47085 |
+
},
|
| 47086 |
+
{
|
| 47087 |
+
"epoch": 0.0006711,
|
| 47088 |
+
"grad_norm": 1.377299427986145,
|
| 47089 |
+
"learning_rate": 6.71e-05,
|
| 47090 |
+
"loss": 0.4106,
|
| 47091 |
+
"step": 6711
|
| 47092 |
+
},
|
| 47093 |
+
{
|
| 47094 |
+
"epoch": 0.0006712,
|
| 47095 |
+
"grad_norm": 0.758207380771637,
|
| 47096 |
+
"learning_rate": 6.711e-05,
|
| 47097 |
+
"loss": 0.2981,
|
| 47098 |
+
"step": 6712
|
| 47099 |
+
},
|
| 47100 |
+
{
|
| 47101 |
+
"epoch": 0.0006713,
|
| 47102 |
+
"grad_norm": 0.9617581963539124,
|
| 47103 |
+
"learning_rate": 6.712e-05,
|
| 47104 |
+
"loss": 0.3169,
|
| 47105 |
+
"step": 6713
|
| 47106 |
+
},
|
| 47107 |
+
{
|
| 47108 |
+
"epoch": 0.0006714,
|
| 47109 |
+
"grad_norm": 0.7949542999267578,
|
| 47110 |
+
"learning_rate": 6.713e-05,
|
| 47111 |
+
"loss": 0.3223,
|
| 47112 |
+
"step": 6714
|
| 47113 |
+
},
|
| 47114 |
+
{
|
| 47115 |
+
"epoch": 0.0006715,
|
| 47116 |
+
"grad_norm": 0.7971898913383484,
|
| 47117 |
+
"learning_rate": 6.714e-05,
|
| 47118 |
+
"loss": 0.3206,
|
| 47119 |
+
"step": 6715
|
| 47120 |
+
},
|
| 47121 |
+
{
|
| 47122 |
+
"epoch": 0.0006716,
|
| 47123 |
+
"grad_norm": 0.8708210587501526,
|
| 47124 |
+
"learning_rate": 6.715e-05,
|
| 47125 |
+
"loss": 0.343,
|
| 47126 |
+
"step": 6716
|
| 47127 |
+
},
|
| 47128 |
+
{
|
| 47129 |
+
"epoch": 0.0006717,
|
| 47130 |
+
"grad_norm": 0.8736813068389893,
|
| 47131 |
+
"learning_rate": 6.716e-05,
|
| 47132 |
+
"loss": 0.3489,
|
| 47133 |
+
"step": 6717
|
| 47134 |
+
},
|
| 47135 |
+
{
|
| 47136 |
+
"epoch": 0.0006718,
|
| 47137 |
+
"grad_norm": 1.4544368982315063,
|
| 47138 |
+
"learning_rate": 6.717e-05,
|
| 47139 |
+
"loss": 0.3719,
|
| 47140 |
+
"step": 6718
|
| 47141 |
+
},
|
| 47142 |
+
{
|
| 47143 |
+
"epoch": 0.0006719,
|
| 47144 |
+
"grad_norm": 0.7504727244377136,
|
| 47145 |
+
"learning_rate": 6.718e-05,
|
| 47146 |
+
"loss": 0.2983,
|
| 47147 |
+
"step": 6719
|
| 47148 |
+
},
|
| 47149 |
+
{
|
| 47150 |
+
"epoch": 0.000672,
|
| 47151 |
+
"grad_norm": 0.7012472152709961,
|
| 47152 |
+
"learning_rate": 6.719e-05,
|
| 47153 |
+
"loss": 0.2863,
|
| 47154 |
+
"step": 6720
|
| 47155 |
+
},
|
| 47156 |
+
{
|
| 47157 |
+
"epoch": 0.0006721,
|
| 47158 |
+
"grad_norm": 0.7165892720222473,
|
| 47159 |
+
"learning_rate": 6.720000000000001e-05,
|
| 47160 |
+
"loss": 0.2915,
|
| 47161 |
+
"step": 6721
|
| 47162 |
+
},
|
| 47163 |
+
{
|
| 47164 |
+
"epoch": 0.0006722,
|
| 47165 |
+
"grad_norm": 0.687541663646698,
|
| 47166 |
+
"learning_rate": 6.721e-05,
|
| 47167 |
+
"loss": 0.2988,
|
| 47168 |
+
"step": 6722
|
| 47169 |
+
},
|
| 47170 |
+
{
|
| 47171 |
+
"epoch": 0.0006723,
|
| 47172 |
+
"grad_norm": 1.2955049276351929,
|
| 47173 |
+
"learning_rate": 6.722e-05,
|
| 47174 |
+
"loss": 0.3955,
|
| 47175 |
+
"step": 6723
|
| 47176 |
+
},
|
| 47177 |
+
{
|
| 47178 |
+
"epoch": 0.0006724,
|
| 47179 |
+
"grad_norm": 0.7190659046173096,
|
| 47180 |
+
"learning_rate": 6.723e-05,
|
| 47181 |
+
"loss": 0.2971,
|
| 47182 |
+
"step": 6724
|
| 47183 |
+
},
|
| 47184 |
+
{
|
| 47185 |
+
"epoch": 0.0006725,
|
| 47186 |
+
"grad_norm": 0.9322375059127808,
|
| 47187 |
+
"learning_rate": 6.724e-05,
|
| 47188 |
+
"loss": 0.3459,
|
| 47189 |
+
"step": 6725
|
| 47190 |
+
},
|
| 47191 |
+
{
|
| 47192 |
+
"epoch": 0.0006726,
|
| 47193 |
+
"grad_norm": 1.2733296155929565,
|
| 47194 |
+
"learning_rate": 6.725000000000001e-05,
|
| 47195 |
+
"loss": 0.4817,
|
| 47196 |
+
"step": 6726
|
| 47197 |
+
},
|
| 47198 |
+
{
|
| 47199 |
+
"epoch": 0.0006727,
|
| 47200 |
+
"grad_norm": 0.8756181597709656,
|
| 47201 |
+
"learning_rate": 6.726e-05,
|
| 47202 |
+
"loss": 0.3357,
|
| 47203 |
+
"step": 6727
|
| 47204 |
+
},
|
| 47205 |
+
{
|
| 47206 |
+
"epoch": 0.0006728,
|
| 47207 |
+
"grad_norm": 0.8180313110351562,
|
| 47208 |
+
"learning_rate": 6.727e-05,
|
| 47209 |
+
"loss": 0.3359,
|
| 47210 |
+
"step": 6728
|
| 47211 |
+
},
|
| 47212 |
+
{
|
| 47213 |
+
"epoch": 0.0006729,
|
| 47214 |
+
"grad_norm": 0.7750518918037415,
|
| 47215 |
+
"learning_rate": 6.728e-05,
|
| 47216 |
+
"loss": 0.3181,
|
| 47217 |
+
"step": 6729
|
| 47218 |
+
},
|
| 47219 |
+
{
|
| 47220 |
+
"epoch": 0.000673,
|
| 47221 |
+
"grad_norm": 0.7350186705589294,
|
| 47222 |
+
"learning_rate": 6.729e-05,
|
| 47223 |
+
"loss": 0.2993,
|
| 47224 |
+
"step": 6730
|
| 47225 |
+
},
|
| 47226 |
+
{
|
| 47227 |
+
"epoch": 0.0006731,
|
| 47228 |
+
"grad_norm": 1.417085886001587,
|
| 47229 |
+
"learning_rate": 6.73e-05,
|
| 47230 |
+
"loss": 0.4121,
|
| 47231 |
+
"step": 6731
|
| 47232 |
+
},
|
| 47233 |
+
{
|
| 47234 |
+
"epoch": 0.0006732,
|
| 47235 |
+
"grad_norm": 1.0426276922225952,
|
| 47236 |
+
"learning_rate": 6.731e-05,
|
| 47237 |
+
"loss": 0.394,
|
| 47238 |
+
"step": 6732
|
| 47239 |
+
},
|
| 47240 |
+
{
|
| 47241 |
+
"epoch": 0.0006733,
|
| 47242 |
+
"grad_norm": 0.8477088212966919,
|
| 47243 |
+
"learning_rate": 6.732000000000001e-05,
|
| 47244 |
+
"loss": 0.3076,
|
| 47245 |
+
"step": 6733
|
| 47246 |
+
},
|
| 47247 |
+
{
|
| 47248 |
+
"epoch": 0.0006734,
|
| 47249 |
+
"grad_norm": 0.811088502407074,
|
| 47250 |
+
"learning_rate": 6.733e-05,
|
| 47251 |
+
"loss": 0.3157,
|
| 47252 |
+
"step": 6734
|
| 47253 |
+
},
|
| 47254 |
+
{
|
| 47255 |
+
"epoch": 0.0006735,
|
| 47256 |
+
"grad_norm": 0.9746713638305664,
|
| 47257 |
+
"learning_rate": 6.734e-05,
|
| 47258 |
+
"loss": 0.312,
|
| 47259 |
+
"step": 6735
|
| 47260 |
+
},
|
| 47261 |
+
{
|
| 47262 |
+
"epoch": 0.0006736,
|
| 47263 |
+
"grad_norm": 0.8316006064414978,
|
| 47264 |
+
"learning_rate": 6.735000000000001e-05,
|
| 47265 |
+
"loss": 0.3254,
|
| 47266 |
+
"step": 6736
|
| 47267 |
+
},
|
| 47268 |
+
{
|
| 47269 |
+
"epoch": 0.0006737,
|
| 47270 |
+
"grad_norm": 0.9084553122520447,
|
| 47271 |
+
"learning_rate": 6.735999999999999e-05,
|
| 47272 |
+
"loss": 0.3184,
|
| 47273 |
+
"step": 6737
|
| 47274 |
+
},
|
| 47275 |
+
{
|
| 47276 |
+
"epoch": 0.0006738,
|
| 47277 |
+
"grad_norm": 0.936915934085846,
|
| 47278 |
+
"learning_rate": 6.737e-05,
|
| 47279 |
+
"loss": 0.3342,
|
| 47280 |
+
"step": 6738
|
| 47281 |
+
},
|
| 47282 |
+
{
|
| 47283 |
+
"epoch": 0.0006739,
|
| 47284 |
+
"grad_norm": 0.9024308323860168,
|
| 47285 |
+
"learning_rate": 6.738000000000001e-05,
|
| 47286 |
+
"loss": 0.3513,
|
| 47287 |
+
"step": 6739
|
| 47288 |
+
},
|
| 47289 |
+
{
|
| 47290 |
+
"epoch": 0.000674,
|
| 47291 |
+
"grad_norm": 5.580689430236816,
|
| 47292 |
+
"learning_rate": 6.738999999999999e-05,
|
| 47293 |
+
"loss": 0.7666,
|
| 47294 |
+
"step": 6740
|
| 47295 |
+
},
|
| 47296 |
+
{
|
| 47297 |
+
"epoch": 0.0006741,
|
| 47298 |
+
"grad_norm": 1.0180385112762451,
|
| 47299 |
+
"learning_rate": 6.74e-05,
|
| 47300 |
+
"loss": 0.3221,
|
| 47301 |
+
"step": 6741
|
| 47302 |
+
},
|
| 47303 |
+
{
|
| 47304 |
+
"epoch": 0.0006742,
|
| 47305 |
+
"grad_norm": 1.036532998085022,
|
| 47306 |
+
"learning_rate": 6.741000000000001e-05,
|
| 47307 |
+
"loss": 0.3491,
|
| 47308 |
+
"step": 6742
|
| 47309 |
+
},
|
| 47310 |
+
{
|
| 47311 |
+
"epoch": 0.0006743,
|
| 47312 |
+
"grad_norm": 0.769421398639679,
|
| 47313 |
+
"learning_rate": 6.742e-05,
|
| 47314 |
+
"loss": 0.3059,
|
| 47315 |
+
"step": 6743
|
| 47316 |
+
},
|
| 47317 |
+
{
|
| 47318 |
+
"epoch": 0.0006744,
|
| 47319 |
+
"grad_norm": 0.8509790897369385,
|
| 47320 |
+
"learning_rate": 6.743e-05,
|
| 47321 |
+
"loss": 0.3162,
|
| 47322 |
+
"step": 6744
|
| 47323 |
+
},
|
| 47324 |
+
{
|
| 47325 |
+
"epoch": 0.0006745,
|
| 47326 |
+
"grad_norm": 1.186266303062439,
|
| 47327 |
+
"learning_rate": 6.744e-05,
|
| 47328 |
+
"loss": 0.4888,
|
| 47329 |
+
"step": 6745
|
| 47330 |
+
},
|
| 47331 |
+
{
|
| 47332 |
+
"epoch": 0.0006746,
|
| 47333 |
+
"grad_norm": 0.9085695743560791,
|
| 47334 |
+
"learning_rate": 6.745e-05,
|
| 47335 |
+
"loss": 0.322,
|
| 47336 |
+
"step": 6746
|
| 47337 |
+
},
|
| 47338 |
+
{
|
| 47339 |
+
"epoch": 0.0006747,
|
| 47340 |
+
"grad_norm": 0.8790482878684998,
|
| 47341 |
+
"learning_rate": 6.746e-05,
|
| 47342 |
+
"loss": 0.3257,
|
| 47343 |
+
"step": 6747
|
| 47344 |
+
},
|
| 47345 |
+
{
|
| 47346 |
+
"epoch": 0.0006748,
|
| 47347 |
+
"grad_norm": 0.8217053413391113,
|
| 47348 |
+
"learning_rate": 6.747e-05,
|
| 47349 |
+
"loss": 0.2786,
|
| 47350 |
+
"step": 6748
|
| 47351 |
+
},
|
| 47352 |
+
{
|
| 47353 |
+
"epoch": 0.0006749,
|
| 47354 |
+
"grad_norm": 1.2313982248306274,
|
| 47355 |
+
"learning_rate": 6.748e-05,
|
| 47356 |
+
"loss": 0.3308,
|
| 47357 |
+
"step": 6749
|
| 47358 |
+
},
|
| 47359 |
+
{
|
| 47360 |
+
"epoch": 0.000675,
|
| 47361 |
+
"grad_norm": 1.2116960287094116,
|
| 47362 |
+
"learning_rate": 6.749e-05,
|
| 47363 |
+
"loss": 0.3868,
|
| 47364 |
+
"step": 6750
|
| 47365 |
+
},
|
| 47366 |
+
{
|
| 47367 |
+
"epoch": 0.0006751,
|
| 47368 |
+
"grad_norm": 0.8842405080795288,
|
| 47369 |
+
"learning_rate": 6.75e-05,
|
| 47370 |
+
"loss": 0.3301,
|
| 47371 |
+
"step": 6751
|
| 47372 |
+
},
|
| 47373 |
+
{
|
| 47374 |
+
"epoch": 0.0006752,
|
| 47375 |
+
"grad_norm": 0.70157790184021,
|
| 47376 |
+
"learning_rate": 6.751e-05,
|
| 47377 |
+
"loss": 0.2893,
|
| 47378 |
+
"step": 6752
|
| 47379 |
+
},
|
| 47380 |
+
{
|
| 47381 |
+
"epoch": 0.0006753,
|
| 47382 |
+
"grad_norm": 0.7880963087081909,
|
| 47383 |
+
"learning_rate": 6.752e-05,
|
| 47384 |
+
"loss": 0.3149,
|
| 47385 |
+
"step": 6753
|
| 47386 |
+
},
|
| 47387 |
+
{
|
| 47388 |
+
"epoch": 0.0006754,
|
| 47389 |
+
"grad_norm": 1.089812994003296,
|
| 47390 |
+
"learning_rate": 6.753e-05,
|
| 47391 |
+
"loss": 0.405,
|
| 47392 |
+
"step": 6754
|
| 47393 |
+
},
|
| 47394 |
+
{
|
| 47395 |
+
"epoch": 0.0006755,
|
| 47396 |
+
"grad_norm": 0.7792335152626038,
|
| 47397 |
+
"learning_rate": 6.754000000000001e-05,
|
| 47398 |
+
"loss": 0.3137,
|
| 47399 |
+
"step": 6755
|
| 47400 |
+
},
|
| 47401 |
+
{
|
| 47402 |
+
"epoch": 0.0006756,
|
| 47403 |
+
"grad_norm": 0.7259790301322937,
|
| 47404 |
+
"learning_rate": 6.755e-05,
|
| 47405 |
+
"loss": 0.3015,
|
| 47406 |
+
"step": 6756
|
| 47407 |
+
},
|
| 47408 |
+
{
|
| 47409 |
+
"epoch": 0.0006757,
|
| 47410 |
+
"grad_norm": 0.7795138359069824,
|
| 47411 |
+
"learning_rate": 6.756e-05,
|
| 47412 |
+
"loss": 0.313,
|
| 47413 |
+
"step": 6757
|
| 47414 |
+
},
|
| 47415 |
+
{
|
| 47416 |
+
"epoch": 0.0006758,
|
| 47417 |
+
"grad_norm": 0.7948536276817322,
|
| 47418 |
+
"learning_rate": 6.757e-05,
|
| 47419 |
+
"loss": 0.3145,
|
| 47420 |
+
"step": 6758
|
| 47421 |
+
},
|
| 47422 |
+
{
|
| 47423 |
+
"epoch": 0.0006759,
|
| 47424 |
+
"grad_norm": 0.7575865983963013,
|
| 47425 |
+
"learning_rate": 6.758e-05,
|
| 47426 |
+
"loss": 0.3254,
|
| 47427 |
+
"step": 6759
|
| 47428 |
+
},
|
| 47429 |
+
{
|
| 47430 |
+
"epoch": 0.000676,
|
| 47431 |
+
"grad_norm": 0.9956995248794556,
|
| 47432 |
+
"learning_rate": 6.759000000000001e-05,
|
| 47433 |
+
"loss": 0.3379,
|
| 47434 |
+
"step": 6760
|
| 47435 |
+
},
|
| 47436 |
+
{
|
| 47437 |
+
"epoch": 0.0006761,
|
| 47438 |
+
"grad_norm": 0.7819147706031799,
|
| 47439 |
+
"learning_rate": 6.76e-05,
|
| 47440 |
+
"loss": 0.3311,
|
| 47441 |
+
"step": 6761
|
| 47442 |
+
},
|
| 47443 |
+
{
|
| 47444 |
+
"epoch": 0.0006762,
|
| 47445 |
+
"grad_norm": 0.9284989237785339,
|
| 47446 |
+
"learning_rate": 6.761e-05,
|
| 47447 |
+
"loss": 0.3269,
|
| 47448 |
+
"step": 6762
|
| 47449 |
+
},
|
| 47450 |
+
{
|
| 47451 |
+
"epoch": 0.0006763,
|
| 47452 |
+
"grad_norm": 0.7394283413887024,
|
| 47453 |
+
"learning_rate": 6.762e-05,
|
| 47454 |
+
"loss": 0.2828,
|
| 47455 |
+
"step": 6763
|
| 47456 |
+
},
|
| 47457 |
+
{
|
| 47458 |
+
"epoch": 0.0006764,
|
| 47459 |
+
"grad_norm": 0.9893227219581604,
|
| 47460 |
+
"learning_rate": 6.763e-05,
|
| 47461 |
+
"loss": 0.3328,
|
| 47462 |
+
"step": 6764
|
| 47463 |
+
},
|
| 47464 |
+
{
|
| 47465 |
+
"epoch": 0.0006765,
|
| 47466 |
+
"grad_norm": 0.6851495504379272,
|
| 47467 |
+
"learning_rate": 6.764e-05,
|
| 47468 |
+
"loss": 0.2831,
|
| 47469 |
+
"step": 6765
|
| 47470 |
+
},
|
| 47471 |
+
{
|
| 47472 |
+
"epoch": 0.0006766,
|
| 47473 |
+
"grad_norm": 0.7171580791473389,
|
| 47474 |
+
"learning_rate": 6.765e-05,
|
| 47475 |
+
"loss": 0.281,
|
| 47476 |
+
"step": 6766
|
| 47477 |
+
},
|
| 47478 |
+
{
|
| 47479 |
+
"epoch": 0.0006767,
|
| 47480 |
+
"grad_norm": 0.7466902136802673,
|
| 47481 |
+
"learning_rate": 6.766000000000001e-05,
|
| 47482 |
+
"loss": 0.2981,
|
| 47483 |
+
"step": 6767
|
| 47484 |
+
},
|
| 47485 |
+
{
|
| 47486 |
+
"epoch": 0.0006768,
|
| 47487 |
+
"grad_norm": 0.7984172701835632,
|
| 47488 |
+
"learning_rate": 6.767e-05,
|
| 47489 |
+
"loss": 0.2993,
|
| 47490 |
+
"step": 6768
|
| 47491 |
+
},
|
| 47492 |
+
{
|
| 47493 |
+
"epoch": 0.0006769,
|
| 47494 |
+
"grad_norm": 0.7800801992416382,
|
| 47495 |
+
"learning_rate": 6.768e-05,
|
| 47496 |
+
"loss": 0.3191,
|
| 47497 |
+
"step": 6769
|
| 47498 |
+
},
|
| 47499 |
+
{
|
| 47500 |
+
"epoch": 0.000677,
|
| 47501 |
+
"grad_norm": 0.7060449123382568,
|
| 47502 |
+
"learning_rate": 6.769000000000001e-05,
|
| 47503 |
+
"loss": 0.2939,
|
| 47504 |
+
"step": 6770
|
| 47505 |
+
},
|
| 47506 |
+
{
|
| 47507 |
+
"epoch": 0.0006771,
|
| 47508 |
+
"grad_norm": 0.8864577412605286,
|
| 47509 |
+
"learning_rate": 6.769999999999999e-05,
|
| 47510 |
+
"loss": 0.3203,
|
| 47511 |
+
"step": 6771
|
| 47512 |
+
},
|
| 47513 |
+
{
|
| 47514 |
+
"epoch": 0.0006772,
|
| 47515 |
+
"grad_norm": 0.702970027923584,
|
| 47516 |
+
"learning_rate": 6.771e-05,
|
| 47517 |
+
"loss": 0.2996,
|
| 47518 |
+
"step": 6772
|
| 47519 |
+
},
|
| 47520 |
+
{
|
| 47521 |
+
"epoch": 0.0006773,
|
| 47522 |
+
"grad_norm": 0.8598361015319824,
|
| 47523 |
+
"learning_rate": 6.772000000000001e-05,
|
| 47524 |
+
"loss": 0.3125,
|
| 47525 |
+
"step": 6773
|
| 47526 |
+
},
|
| 47527 |
+
{
|
| 47528 |
+
"epoch": 0.0006774,
|
| 47529 |
+
"grad_norm": 0.7288208603858948,
|
| 47530 |
+
"learning_rate": 6.772999999999999e-05,
|
| 47531 |
+
"loss": 0.2969,
|
| 47532 |
+
"step": 6774
|
| 47533 |
+
},
|
| 47534 |
+
{
|
| 47535 |
+
"epoch": 0.0006775,
|
| 47536 |
+
"grad_norm": 0.8956363201141357,
|
| 47537 |
+
"learning_rate": 6.774e-05,
|
| 47538 |
+
"loss": 0.3125,
|
| 47539 |
+
"step": 6775
|
| 47540 |
+
},
|
| 47541 |
+
{
|
| 47542 |
+
"epoch": 0.0006776,
|
| 47543 |
+
"grad_norm": 0.631296694278717,
|
| 47544 |
+
"learning_rate": 6.775000000000001e-05,
|
| 47545 |
+
"loss": 0.29,
|
| 47546 |
+
"step": 6776
|
| 47547 |
+
},
|
| 47548 |
+
{
|
| 47549 |
+
"epoch": 0.0006777,
|
| 47550 |
+
"grad_norm": 1.2960058450698853,
|
| 47551 |
+
"learning_rate": 6.776e-05,
|
| 47552 |
+
"loss": 0.478,
|
| 47553 |
+
"step": 6777
|
| 47554 |
+
},
|
| 47555 |
+
{
|
| 47556 |
+
"epoch": 0.0006778,
|
| 47557 |
+
"grad_norm": 1.2035611867904663,
|
| 47558 |
+
"learning_rate": 6.777e-05,
|
| 47559 |
+
"loss": 0.3376,
|
| 47560 |
+
"step": 6778
|
| 47561 |
+
},
|
| 47562 |
+
{
|
| 47563 |
+
"epoch": 0.0006779,
|
| 47564 |
+
"grad_norm": 0.7155436277389526,
|
| 47565 |
+
"learning_rate": 6.778e-05,
|
| 47566 |
+
"loss": 0.2738,
|
| 47567 |
+
"step": 6779
|
| 47568 |
+
},
|
| 47569 |
+
{
|
| 47570 |
+
"epoch": 0.000678,
|
| 47571 |
+
"grad_norm": 0.7673712372779846,
|
| 47572 |
+
"learning_rate": 6.779e-05,
|
| 47573 |
+
"loss": 0.3064,
|
| 47574 |
+
"step": 6780
|
| 47575 |
+
},
|
| 47576 |
+
{
|
| 47577 |
+
"epoch": 0.0006781,
|
| 47578 |
+
"grad_norm": 0.8256517052650452,
|
| 47579 |
+
"learning_rate": 6.78e-05,
|
| 47580 |
+
"loss": 0.3408,
|
| 47581 |
+
"step": 6781
|
| 47582 |
+
},
|
| 47583 |
+
{
|
| 47584 |
+
"epoch": 0.0006782,
|
| 47585 |
+
"grad_norm": 0.7277462482452393,
|
| 47586 |
+
"learning_rate": 6.781e-05,
|
| 47587 |
+
"loss": 0.2986,
|
| 47588 |
+
"step": 6782
|
| 47589 |
+
},
|
| 47590 |
+
{
|
| 47591 |
+
"epoch": 0.0006783,
|
| 47592 |
+
"grad_norm": 0.8773739337921143,
|
| 47593 |
+
"learning_rate": 6.782e-05,
|
| 47594 |
+
"loss": 0.3452,
|
| 47595 |
+
"step": 6783
|
| 47596 |
+
},
|
| 47597 |
+
{
|
| 47598 |
+
"epoch": 0.0006784,
|
| 47599 |
+
"grad_norm": 0.7977916598320007,
|
| 47600 |
+
"learning_rate": 6.783e-05,
|
| 47601 |
+
"loss": 0.3228,
|
| 47602 |
+
"step": 6784
|
| 47603 |
+
},
|
| 47604 |
+
{
|
| 47605 |
+
"epoch": 0.0006785,
|
| 47606 |
+
"grad_norm": 0.6325885653495789,
|
| 47607 |
+
"learning_rate": 6.784e-05,
|
| 47608 |
+
"loss": 0.2753,
|
| 47609 |
+
"step": 6785
|
| 47610 |
+
},
|
| 47611 |
+
{
|
| 47612 |
+
"epoch": 0.0006786,
|
| 47613 |
+
"grad_norm": 0.9474706053733826,
|
| 47614 |
+
"learning_rate": 6.785e-05,
|
| 47615 |
+
"loss": 0.342,
|
| 47616 |
+
"step": 6786
|
| 47617 |
+
},
|
| 47618 |
+
{
|
| 47619 |
+
"epoch": 0.0006787,
|
| 47620 |
+
"grad_norm": 0.7229621410369873,
|
| 47621 |
+
"learning_rate": 6.786e-05,
|
| 47622 |
+
"loss": 0.2976,
|
| 47623 |
+
"step": 6787
|
| 47624 |
+
},
|
| 47625 |
+
{
|
| 47626 |
+
"epoch": 0.0006788,
|
| 47627 |
+
"grad_norm": 0.6040972471237183,
|
| 47628 |
+
"learning_rate": 6.787e-05,
|
| 47629 |
+
"loss": 0.2732,
|
| 47630 |
+
"step": 6788
|
| 47631 |
+
},
|
| 47632 |
+
{
|
| 47633 |
+
"epoch": 0.0006789,
|
| 47634 |
+
"grad_norm": 1.5880738496780396,
|
| 47635 |
+
"learning_rate": 6.788000000000001e-05,
|
| 47636 |
+
"loss": 0.5254,
|
| 47637 |
+
"step": 6789
|
| 47638 |
+
},
|
| 47639 |
+
{
|
| 47640 |
+
"epoch": 0.000679,
|
| 47641 |
+
"grad_norm": 0.766530454158783,
|
| 47642 |
+
"learning_rate": 6.789e-05,
|
| 47643 |
+
"loss": 0.3114,
|
| 47644 |
+
"step": 6790
|
| 47645 |
+
},
|
| 47646 |
+
{
|
| 47647 |
+
"epoch": 0.0006791,
|
| 47648 |
+
"grad_norm": 0.7376402616500854,
|
| 47649 |
+
"learning_rate": 6.79e-05,
|
| 47650 |
+
"loss": 0.3174,
|
| 47651 |
+
"step": 6791
|
| 47652 |
+
},
|
| 47653 |
+
{
|
| 47654 |
+
"epoch": 0.0006792,
|
| 47655 |
+
"grad_norm": 3.2554433345794678,
|
| 47656 |
+
"learning_rate": 6.791e-05,
|
| 47657 |
+
"loss": 0.4823,
|
| 47658 |
+
"step": 6792
|
| 47659 |
+
},
|
| 47660 |
+
{
|
| 47661 |
+
"epoch": 0.0006793,
|
| 47662 |
+
"grad_norm": 0.840853750705719,
|
| 47663 |
+
"learning_rate": 6.792e-05,
|
| 47664 |
+
"loss": 0.3218,
|
| 47665 |
+
"step": 6793
|
| 47666 |
+
},
|
| 47667 |
+
{
|
| 47668 |
+
"epoch": 0.0006794,
|
| 47669 |
+
"grad_norm": 0.8738373517990112,
|
| 47670 |
+
"learning_rate": 6.793000000000001e-05,
|
| 47671 |
+
"loss": 0.3186,
|
| 47672 |
+
"step": 6794
|
| 47673 |
+
},
|
| 47674 |
+
{
|
| 47675 |
+
"epoch": 0.0006795,
|
| 47676 |
+
"grad_norm": 0.7738059163093567,
|
| 47677 |
+
"learning_rate": 6.794e-05,
|
| 47678 |
+
"loss": 0.3142,
|
| 47679 |
+
"step": 6795
|
| 47680 |
+
},
|
| 47681 |
+
{
|
| 47682 |
+
"epoch": 0.0006796,
|
| 47683 |
+
"grad_norm": 0.7867991924285889,
|
| 47684 |
+
"learning_rate": 6.795e-05,
|
| 47685 |
+
"loss": 0.2955,
|
| 47686 |
+
"step": 6796
|
| 47687 |
+
},
|
| 47688 |
+
{
|
| 47689 |
+
"epoch": 0.0006797,
|
| 47690 |
+
"grad_norm": 0.7323338389396667,
|
| 47691 |
+
"learning_rate": 6.796e-05,
|
| 47692 |
+
"loss": 0.2834,
|
| 47693 |
+
"step": 6797
|
| 47694 |
+
},
|
| 47695 |
+
{
|
| 47696 |
+
"epoch": 0.0006798,
|
| 47697 |
+
"grad_norm": 0.7408937215805054,
|
| 47698 |
+
"learning_rate": 6.797e-05,
|
| 47699 |
+
"loss": 0.2966,
|
| 47700 |
+
"step": 6798
|
| 47701 |
+
},
|
| 47702 |
+
{
|
| 47703 |
+
"epoch": 0.0006799,
|
| 47704 |
+
"grad_norm": 1.057076096534729,
|
| 47705 |
+
"learning_rate": 6.798e-05,
|
| 47706 |
+
"loss": 0.3621,
|
| 47707 |
+
"step": 6799
|
| 47708 |
+
},
|
| 47709 |
+
{
|
| 47710 |
+
"epoch": 0.00068,
|
| 47711 |
+
"grad_norm": 0.7257677316665649,
|
| 47712 |
+
"learning_rate": 6.799e-05,
|
| 47713 |
+
"loss": 0.2898,
|
| 47714 |
+
"step": 6800
|
| 47715 |
+
},
|
| 47716 |
+
{
|
| 47717 |
+
"epoch": 0.0006801,
|
| 47718 |
+
"grad_norm": 0.6983649730682373,
|
| 47719 |
+
"learning_rate": 6.800000000000001e-05,
|
| 47720 |
+
"loss": 0.2886,
|
| 47721 |
+
"step": 6801
|
| 47722 |
+
},
|
| 47723 |
+
{
|
| 47724 |
+
"epoch": 0.0006802,
|
| 47725 |
+
"grad_norm": 0.7865191102027893,
|
| 47726 |
+
"learning_rate": 6.801e-05,
|
| 47727 |
+
"loss": 0.3005,
|
| 47728 |
+
"step": 6802
|
| 47729 |
+
},
|
| 47730 |
+
{
|
| 47731 |
+
"epoch": 0.0006803,
|
| 47732 |
+
"grad_norm": 1.1119191646575928,
|
| 47733 |
+
"learning_rate": 6.802e-05,
|
| 47734 |
+
"loss": 0.3594,
|
| 47735 |
+
"step": 6803
|
| 47736 |
+
},
|
| 47737 |
+
{
|
| 47738 |
+
"epoch": 0.0006804,
|
| 47739 |
+
"grad_norm": 0.7985936403274536,
|
| 47740 |
+
"learning_rate": 6.803000000000001e-05,
|
| 47741 |
+
"loss": 0.3114,
|
| 47742 |
+
"step": 6804
|
| 47743 |
+
},
|
| 47744 |
+
{
|
| 47745 |
+
"epoch": 0.0006805,
|
| 47746 |
+
"grad_norm": 0.6647433042526245,
|
| 47747 |
+
"learning_rate": 6.803999999999999e-05,
|
| 47748 |
+
"loss": 0.2828,
|
| 47749 |
+
"step": 6805
|
| 47750 |
+
},
|
| 47751 |
+
{
|
| 47752 |
+
"epoch": 0.0006806,
|
| 47753 |
+
"grad_norm": 1.1630476713180542,
|
| 47754 |
+
"learning_rate": 6.805e-05,
|
| 47755 |
+
"loss": 0.387,
|
| 47756 |
+
"step": 6806
|
| 47757 |
+
},
|
| 47758 |
+
{
|
| 47759 |
+
"epoch": 0.0006807,
|
| 47760 |
+
"grad_norm": 1.109668493270874,
|
| 47761 |
+
"learning_rate": 6.806000000000001e-05,
|
| 47762 |
+
"loss": 0.3226,
|
| 47763 |
+
"step": 6807
|
| 47764 |
+
},
|
| 47765 |
+
{
|
| 47766 |
+
"epoch": 0.0006808,
|
| 47767 |
+
"grad_norm": 0.8443289399147034,
|
| 47768 |
+
"learning_rate": 6.806999999999999e-05,
|
| 47769 |
+
"loss": 0.3088,
|
| 47770 |
+
"step": 6808
|
| 47771 |
+
},
|
| 47772 |
+
{
|
| 47773 |
+
"epoch": 0.0006809,
|
| 47774 |
+
"grad_norm": 1.652001976966858,
|
| 47775 |
+
"learning_rate": 6.808e-05,
|
| 47776 |
+
"loss": 0.3855,
|
| 47777 |
+
"step": 6809
|
| 47778 |
+
},
|
| 47779 |
+
{
|
| 47780 |
+
"epoch": 0.000681,
|
| 47781 |
+
"grad_norm": 0.8046223521232605,
|
| 47782 |
+
"learning_rate": 6.809000000000001e-05,
|
| 47783 |
+
"loss": 0.3191,
|
| 47784 |
+
"step": 6810
|
| 47785 |
+
},
|
| 47786 |
+
{
|
| 47787 |
+
"epoch": 0.0006811,
|
| 47788 |
+
"grad_norm": 2.053948163986206,
|
| 47789 |
+
"learning_rate": 6.81e-05,
|
| 47790 |
+
"loss": 0.4033,
|
| 47791 |
+
"step": 6811
|
| 47792 |
+
},
|
| 47793 |
+
{
|
| 47794 |
+
"epoch": 0.0006812,
|
| 47795 |
+
"grad_norm": 1.0023415088653564,
|
| 47796 |
+
"learning_rate": 6.811e-05,
|
| 47797 |
+
"loss": 0.3169,
|
| 47798 |
+
"step": 6812
|
| 47799 |
+
},
|
| 47800 |
+
{
|
| 47801 |
+
"epoch": 0.0006813,
|
| 47802 |
+
"grad_norm": 0.8542107939720154,
|
| 47803 |
+
"learning_rate": 6.812e-05,
|
| 47804 |
+
"loss": 0.2849,
|
| 47805 |
+
"step": 6813
|
| 47806 |
+
},
|
| 47807 |
+
{
|
| 47808 |
+
"epoch": 0.0006814,
|
| 47809 |
+
"grad_norm": 0.771658182144165,
|
| 47810 |
+
"learning_rate": 6.813e-05,
|
| 47811 |
+
"loss": 0.2908,
|
| 47812 |
+
"step": 6814
|
| 47813 |
+
},
|
| 47814 |
+
{
|
| 47815 |
+
"epoch": 0.0006815,
|
| 47816 |
+
"grad_norm": 0.89285808801651,
|
| 47817 |
+
"learning_rate": 6.814e-05,
|
| 47818 |
+
"loss": 0.3062,
|
| 47819 |
+
"step": 6815
|
| 47820 |
+
},
|
| 47821 |
+
{
|
| 47822 |
+
"epoch": 0.0006816,
|
| 47823 |
+
"grad_norm": 0.8619227409362793,
|
| 47824 |
+
"learning_rate": 6.815e-05,
|
| 47825 |
+
"loss": 0.3032,
|
| 47826 |
+
"step": 6816
|
| 47827 |
+
},
|
| 47828 |
+
{
|
| 47829 |
+
"epoch": 0.0006817,
|
| 47830 |
+
"grad_norm": 1.047168493270874,
|
| 47831 |
+
"learning_rate": 6.816e-05,
|
| 47832 |
+
"loss": 0.3362,
|
| 47833 |
+
"step": 6817
|
| 47834 |
+
},
|
| 47835 |
+
{
|
| 47836 |
+
"epoch": 0.0006818,
|
| 47837 |
+
"grad_norm": 0.9726020693778992,
|
| 47838 |
+
"learning_rate": 6.817e-05,
|
| 47839 |
+
"loss": 0.2985,
|
| 47840 |
+
"step": 6818
|
| 47841 |
+
},
|
| 47842 |
+
{
|
| 47843 |
+
"epoch": 0.0006819,
|
| 47844 |
+
"grad_norm": 0.7409684658050537,
|
| 47845 |
+
"learning_rate": 6.818e-05,
|
| 47846 |
+
"loss": 0.2693,
|
| 47847 |
+
"step": 6819
|
| 47848 |
+
},
|
| 47849 |
+
{
|
| 47850 |
+
"epoch": 0.000682,
|
| 47851 |
+
"grad_norm": 0.7690621614456177,
|
| 47852 |
+
"learning_rate": 6.819e-05,
|
| 47853 |
+
"loss": 0.2837,
|
| 47854 |
+
"step": 6820
|
| 47855 |
+
},
|
| 47856 |
+
{
|
| 47857 |
+
"epoch": 0.0006821,
|
| 47858 |
+
"grad_norm": 0.8043884634971619,
|
| 47859 |
+
"learning_rate": 6.82e-05,
|
| 47860 |
+
"loss": 0.3254,
|
| 47861 |
+
"step": 6821
|
| 47862 |
+
},
|
| 47863 |
+
{
|
| 47864 |
+
"epoch": 0.0006822,
|
| 47865 |
+
"grad_norm": 0.7533547282218933,
|
| 47866 |
+
"learning_rate": 6.821e-05,
|
| 47867 |
+
"loss": 0.2983,
|
| 47868 |
+
"step": 6822
|
| 47869 |
+
},
|
| 47870 |
+
{
|
| 47871 |
+
"epoch": 0.0006823,
|
| 47872 |
+
"grad_norm": 1.1310558319091797,
|
| 47873 |
+
"learning_rate": 6.822000000000001e-05,
|
| 47874 |
+
"loss": 0.363,
|
| 47875 |
+
"step": 6823
|
| 47876 |
+
},
|
| 47877 |
+
{
|
| 47878 |
+
"epoch": 0.0006824,
|
| 47879 |
+
"grad_norm": 1.1872040033340454,
|
| 47880 |
+
"learning_rate": 6.823e-05,
|
| 47881 |
+
"loss": 0.3219,
|
| 47882 |
+
"step": 6824
|
| 47883 |
+
},
|
| 47884 |
+
{
|
| 47885 |
+
"epoch": 0.0006825,
|
| 47886 |
+
"grad_norm": 0.9793376922607422,
|
| 47887 |
+
"learning_rate": 6.824e-05,
|
| 47888 |
+
"loss": 0.3489,
|
| 47889 |
+
"step": 6825
|
| 47890 |
+
},
|
| 47891 |
+
{
|
| 47892 |
+
"epoch": 0.0006826,
|
| 47893 |
+
"grad_norm": 0.7625957727432251,
|
| 47894 |
+
"learning_rate": 6.825e-05,
|
| 47895 |
+
"loss": 0.2771,
|
| 47896 |
+
"step": 6826
|
| 47897 |
+
},
|
| 47898 |
+
{
|
| 47899 |
+
"epoch": 0.0006827,
|
| 47900 |
+
"grad_norm": 0.7681121230125427,
|
| 47901 |
+
"learning_rate": 6.826e-05,
|
| 47902 |
+
"loss": 0.3005,
|
| 47903 |
+
"step": 6827
|
| 47904 |
+
},
|
| 47905 |
+
{
|
| 47906 |
+
"epoch": 0.0006828,
|
| 47907 |
+
"grad_norm": 0.831994354724884,
|
| 47908 |
+
"learning_rate": 6.827000000000001e-05,
|
| 47909 |
+
"loss": 0.3113,
|
| 47910 |
+
"step": 6828
|
| 47911 |
+
},
|
| 47912 |
+
{
|
| 47913 |
+
"epoch": 0.0006829,
|
| 47914 |
+
"grad_norm": 0.6504027247428894,
|
| 47915 |
+
"learning_rate": 6.828e-05,
|
| 47916 |
+
"loss": 0.2791,
|
| 47917 |
+
"step": 6829
|
| 47918 |
+
},
|
| 47919 |
+
{
|
| 47920 |
+
"epoch": 0.000683,
|
| 47921 |
+
"grad_norm": 0.855286717414856,
|
| 47922 |
+
"learning_rate": 6.829e-05,
|
| 47923 |
+
"loss": 0.3169,
|
| 47924 |
+
"step": 6830
|
| 47925 |
+
},
|
| 47926 |
+
{
|
| 47927 |
+
"epoch": 0.0006831,
|
| 47928 |
+
"grad_norm": 0.7506752014160156,
|
| 47929 |
+
"learning_rate": 6.83e-05,
|
| 47930 |
+
"loss": 0.2863,
|
| 47931 |
+
"step": 6831
|
| 47932 |
+
},
|
| 47933 |
+
{
|
| 47934 |
+
"epoch": 0.0006832,
|
| 47935 |
+
"grad_norm": 0.6989216804504395,
|
| 47936 |
+
"learning_rate": 6.831e-05,
|
| 47937 |
+
"loss": 0.2861,
|
| 47938 |
+
"step": 6832
|
| 47939 |
+
},
|
| 47940 |
+
{
|
| 47941 |
+
"epoch": 0.0006833,
|
| 47942 |
+
"grad_norm": 0.7412368655204773,
|
| 47943 |
+
"learning_rate": 6.832e-05,
|
| 47944 |
+
"loss": 0.3159,
|
| 47945 |
+
"step": 6833
|
| 47946 |
+
},
|
| 47947 |
+
{
|
| 47948 |
+
"epoch": 0.0006834,
|
| 47949 |
+
"grad_norm": 0.677550733089447,
|
| 47950 |
+
"learning_rate": 6.833e-05,
|
| 47951 |
+
"loss": 0.2936,
|
| 47952 |
+
"step": 6834
|
| 47953 |
+
},
|
| 47954 |
+
{
|
| 47955 |
+
"epoch": 0.0006835,
|
| 47956 |
+
"grad_norm": 1.302125334739685,
|
| 47957 |
+
"learning_rate": 6.834e-05,
|
| 47958 |
+
"loss": 0.3418,
|
| 47959 |
+
"step": 6835
|
| 47960 |
+
},
|
| 47961 |
+
{
|
| 47962 |
+
"epoch": 0.0006836,
|
| 47963 |
+
"grad_norm": 0.7905217409133911,
|
| 47964 |
+
"learning_rate": 6.835e-05,
|
| 47965 |
+
"loss": 0.3115,
|
| 47966 |
+
"step": 6836
|
| 47967 |
+
},
|
| 47968 |
+
{
|
| 47969 |
+
"epoch": 0.0006837,
|
| 47970 |
+
"grad_norm": 0.7866873145103455,
|
| 47971 |
+
"learning_rate": 6.836e-05,
|
| 47972 |
+
"loss": 0.2991,
|
| 47973 |
+
"step": 6837
|
| 47974 |
+
},
|
| 47975 |
+
{
|
| 47976 |
+
"epoch": 0.0006838,
|
| 47977 |
+
"grad_norm": 0.7025447487831116,
|
| 47978 |
+
"learning_rate": 6.837000000000001e-05,
|
| 47979 |
+
"loss": 0.2747,
|
| 47980 |
+
"step": 6838
|
| 47981 |
+
},
|
| 47982 |
+
{
|
| 47983 |
+
"epoch": 0.0006839,
|
| 47984 |
+
"grad_norm": 0.6906653642654419,
|
| 47985 |
+
"learning_rate": 6.837999999999999e-05,
|
| 47986 |
+
"loss": 0.2881,
|
| 47987 |
+
"step": 6839
|
| 47988 |
+
},
|
| 47989 |
+
{
|
| 47990 |
+
"epoch": 0.000684,
|
| 47991 |
+
"grad_norm": 0.6507428288459778,
|
| 47992 |
+
"learning_rate": 6.839e-05,
|
| 47993 |
+
"loss": 0.2745,
|
| 47994 |
+
"step": 6840
|
| 47995 |
+
},
|
| 47996 |
+
{
|
| 47997 |
+
"epoch": 0.0006841,
|
| 47998 |
+
"grad_norm": 0.7092325687408447,
|
| 47999 |
+
"learning_rate": 6.840000000000001e-05,
|
| 48000 |
+
"loss": 0.2927,
|
| 48001 |
+
"step": 6841
|
| 48002 |
+
},
|
| 48003 |
+
{
|
| 48004 |
+
"epoch": 0.0006842,
|
| 48005 |
+
"grad_norm": 0.79344642162323,
|
| 48006 |
+
"learning_rate": 6.840999999999999e-05,
|
| 48007 |
+
"loss": 0.3191,
|
| 48008 |
+
"step": 6842
|
| 48009 |
+
},
|
| 48010 |
+
{
|
| 48011 |
+
"epoch": 0.0006843,
|
| 48012 |
+
"grad_norm": 0.6421338319778442,
|
| 48013 |
+
"learning_rate": 6.842e-05,
|
| 48014 |
+
"loss": 0.2803,
|
| 48015 |
+
"step": 6843
|
| 48016 |
+
},
|
| 48017 |
+
{
|
| 48018 |
+
"epoch": 0.0006844,
|
| 48019 |
+
"grad_norm": 0.6311710476875305,
|
| 48020 |
+
"learning_rate": 6.843000000000001e-05,
|
| 48021 |
+
"loss": 0.2732,
|
| 48022 |
+
"step": 6844
|
| 48023 |
+
},
|
| 48024 |
+
{
|
| 48025 |
+
"epoch": 0.0006845,
|
| 48026 |
+
"grad_norm": 1.3130443096160889,
|
| 48027 |
+
"learning_rate": 6.844e-05,
|
| 48028 |
+
"loss": 0.3396,
|
| 48029 |
+
"step": 6845
|
| 48030 |
+
},
|
| 48031 |
+
{
|
| 48032 |
+
"epoch": 0.0006846,
|
| 48033 |
+
"grad_norm": 0.6774488687515259,
|
| 48034 |
+
"learning_rate": 6.845e-05,
|
| 48035 |
+
"loss": 0.2732,
|
| 48036 |
+
"step": 6846
|
| 48037 |
+
},
|
| 48038 |
+
{
|
| 48039 |
+
"epoch": 0.0006847,
|
| 48040 |
+
"grad_norm": 0.7677571177482605,
|
| 48041 |
+
"learning_rate": 6.846e-05,
|
| 48042 |
+
"loss": 0.291,
|
| 48043 |
+
"step": 6847
|
| 48044 |
+
},
|
| 48045 |
+
{
|
| 48046 |
+
"epoch": 0.0006848,
|
| 48047 |
+
"grad_norm": 0.6695631742477417,
|
| 48048 |
+
"learning_rate": 6.847e-05,
|
| 48049 |
+
"loss": 0.282,
|
| 48050 |
+
"step": 6848
|
| 48051 |
+
},
|
| 48052 |
+
{
|
| 48053 |
+
"epoch": 0.0006849,
|
| 48054 |
+
"grad_norm": 0.7841461896896362,
|
| 48055 |
+
"learning_rate": 6.848e-05,
|
| 48056 |
+
"loss": 0.3225,
|
| 48057 |
+
"step": 6849
|
| 48058 |
+
},
|
| 48059 |
+
{
|
| 48060 |
+
"epoch": 0.000685,
|
| 48061 |
+
"grad_norm": 0.7521286010742188,
|
| 48062 |
+
"learning_rate": 6.849e-05,
|
| 48063 |
+
"loss": 0.2922,
|
| 48064 |
+
"step": 6850
|
| 48065 |
+
},
|
| 48066 |
+
{
|
| 48067 |
+
"epoch": 0.0006851,
|
| 48068 |
+
"grad_norm": 0.6829274892807007,
|
| 48069 |
+
"learning_rate": 6.85e-05,
|
| 48070 |
+
"loss": 0.297,
|
| 48071 |
+
"step": 6851
|
| 48072 |
+
},
|
| 48073 |
+
{
|
| 48074 |
+
"epoch": 0.0006852,
|
| 48075 |
+
"grad_norm": 0.5868691802024841,
|
| 48076 |
+
"learning_rate": 6.851e-05,
|
| 48077 |
+
"loss": 0.264,
|
| 48078 |
+
"step": 6852
|
| 48079 |
+
},
|
| 48080 |
+
{
|
| 48081 |
+
"epoch": 0.0006853,
|
| 48082 |
+
"grad_norm": 0.6112059354782104,
|
| 48083 |
+
"learning_rate": 6.852e-05,
|
| 48084 |
+
"loss": 0.269,
|
| 48085 |
+
"step": 6853
|
| 48086 |
+
},
|
| 48087 |
+
{
|
| 48088 |
+
"epoch": 0.0006854,
|
| 48089 |
+
"grad_norm": 0.6771926283836365,
|
| 48090 |
+
"learning_rate": 6.853e-05,
|
| 48091 |
+
"loss": 0.2915,
|
| 48092 |
+
"step": 6854
|
| 48093 |
+
},
|
| 48094 |
+
{
|
| 48095 |
+
"epoch": 0.0006855,
|
| 48096 |
+
"grad_norm": 0.6877410411834717,
|
| 48097 |
+
"learning_rate": 6.854e-05,
|
| 48098 |
+
"loss": 0.2654,
|
| 48099 |
+
"step": 6855
|
| 48100 |
+
},
|
| 48101 |
+
{
|
| 48102 |
+
"epoch": 0.0006856,
|
| 48103 |
+
"grad_norm": 0.7091522216796875,
|
| 48104 |
+
"learning_rate": 6.855e-05,
|
| 48105 |
+
"loss": 0.2864,
|
| 48106 |
+
"step": 6856
|
| 48107 |
+
},
|
| 48108 |
+
{
|
| 48109 |
+
"epoch": 0.0006857,
|
| 48110 |
+
"grad_norm": 0.6090933084487915,
|
| 48111 |
+
"learning_rate": 6.856e-05,
|
| 48112 |
+
"loss": 0.2701,
|
| 48113 |
+
"step": 6857
|
| 48114 |
+
},
|
| 48115 |
+
{
|
| 48116 |
+
"epoch": 0.0006858,
|
| 48117 |
+
"grad_norm": 0.6597985625267029,
|
| 48118 |
+
"learning_rate": 6.857e-05,
|
| 48119 |
+
"loss": 0.2952,
|
| 48120 |
+
"step": 6858
|
| 48121 |
+
},
|
| 48122 |
+
{
|
| 48123 |
+
"epoch": 0.0006859,
|
| 48124 |
+
"grad_norm": 5.0231475830078125,
|
| 48125 |
+
"learning_rate": 6.858e-05,
|
| 48126 |
+
"loss": 0.3691,
|
| 48127 |
+
"step": 6859
|
| 48128 |
+
},
|
| 48129 |
+
{
|
| 48130 |
+
"epoch": 0.000686,
|
| 48131 |
+
"grad_norm": 0.829133152961731,
|
| 48132 |
+
"learning_rate": 6.859e-05,
|
| 48133 |
+
"loss": 0.2822,
|
| 48134 |
+
"step": 6860
|
| 48135 |
+
},
|
| 48136 |
+
{
|
| 48137 |
+
"epoch": 0.0006861,
|
| 48138 |
+
"grad_norm": 1.6139463186264038,
|
| 48139 |
+
"learning_rate": 6.86e-05,
|
| 48140 |
+
"loss": 0.3093,
|
| 48141 |
+
"step": 6861
|
| 48142 |
+
},
|
| 48143 |
+
{
|
| 48144 |
+
"epoch": 0.0006862,
|
| 48145 |
+
"grad_norm": 1.1528732776641846,
|
| 48146 |
+
"learning_rate": 6.861000000000001e-05,
|
| 48147 |
+
"loss": 0.3365,
|
| 48148 |
+
"step": 6862
|
| 48149 |
+
},
|
| 48150 |
+
{
|
| 48151 |
+
"epoch": 0.0006863,
|
| 48152 |
+
"grad_norm": 0.673590898513794,
|
| 48153 |
+
"learning_rate": 6.862e-05,
|
| 48154 |
+
"loss": 0.2788,
|
| 48155 |
+
"step": 6863
|
| 48156 |
+
},
|
| 48157 |
+
{
|
| 48158 |
+
"epoch": 0.0006864,
|
| 48159 |
+
"grad_norm": 0.8255468010902405,
|
| 48160 |
+
"learning_rate": 6.863e-05,
|
| 48161 |
+
"loss": 0.3047,
|
| 48162 |
+
"step": 6864
|
| 48163 |
+
},
|
| 48164 |
+
{
|
| 48165 |
+
"epoch": 0.0006865,
|
| 48166 |
+
"grad_norm": 0.980257511138916,
|
| 48167 |
+
"learning_rate": 6.864000000000001e-05,
|
| 48168 |
+
"loss": 0.3389,
|
| 48169 |
+
"step": 6865
|
| 48170 |
+
},
|
| 48171 |
+
{
|
| 48172 |
+
"epoch": 0.0006866,
|
| 48173 |
+
"grad_norm": 0.9978033900260925,
|
| 48174 |
+
"learning_rate": 6.865e-05,
|
| 48175 |
+
"loss": 0.2896,
|
| 48176 |
+
"step": 6866
|
| 48177 |
+
},
|
| 48178 |
+
{
|
| 48179 |
+
"epoch": 0.0006867,
|
| 48180 |
+
"grad_norm": 0.6645432114601135,
|
| 48181 |
+
"learning_rate": 6.866e-05,
|
| 48182 |
+
"loss": 0.27,
|
| 48183 |
+
"step": 6867
|
| 48184 |
+
},
|
| 48185 |
+
{
|
| 48186 |
+
"epoch": 0.0006868,
|
| 48187 |
+
"grad_norm": 0.7752996683120728,
|
| 48188 |
+
"learning_rate": 6.867e-05,
|
| 48189 |
+
"loss": 0.2797,
|
| 48190 |
+
"step": 6868
|
| 48191 |
+
},
|
| 48192 |
+
{
|
| 48193 |
+
"epoch": 0.0006869,
|
| 48194 |
+
"grad_norm": 0.846230149269104,
|
| 48195 |
+
"learning_rate": 6.868e-05,
|
| 48196 |
+
"loss": 0.3071,
|
| 48197 |
+
"step": 6869
|
| 48198 |
+
},
|
| 48199 |
+
{
|
| 48200 |
+
"epoch": 0.000687,
|
| 48201 |
+
"grad_norm": 0.715424656867981,
|
| 48202 |
+
"learning_rate": 6.869e-05,
|
| 48203 |
+
"loss": 0.2756,
|
| 48204 |
+
"step": 6870
|
| 48205 |
+
},
|
| 48206 |
+
{
|
| 48207 |
+
"epoch": 0.0006871,
|
| 48208 |
+
"grad_norm": 0.7180454134941101,
|
| 48209 |
+
"learning_rate": 6.87e-05,
|
| 48210 |
+
"loss": 0.2932,
|
| 48211 |
+
"step": 6871
|
| 48212 |
+
},
|
| 48213 |
+
{
|
| 48214 |
+
"epoch": 0.0006872,
|
| 48215 |
+
"grad_norm": 0.734980583190918,
|
| 48216 |
+
"learning_rate": 6.871000000000001e-05,
|
| 48217 |
+
"loss": 0.2996,
|
| 48218 |
+
"step": 6872
|
| 48219 |
+
},
|
| 48220 |
+
{
|
| 48221 |
+
"epoch": 0.0006873,
|
| 48222 |
+
"grad_norm": 0.7681039571762085,
|
| 48223 |
+
"learning_rate": 6.871999999999999e-05,
|
| 48224 |
+
"loss": 0.2942,
|
| 48225 |
+
"step": 6873
|
| 48226 |
+
},
|
| 48227 |
+
{
|
| 48228 |
+
"epoch": 0.0006874,
|
| 48229 |
+
"grad_norm": 0.7316890954971313,
|
| 48230 |
+
"learning_rate": 6.873e-05,
|
| 48231 |
+
"loss": 0.2721,
|
| 48232 |
+
"step": 6874
|
| 48233 |
+
},
|
| 48234 |
+
{
|
| 48235 |
+
"epoch": 0.0006875,
|
| 48236 |
+
"grad_norm": 0.6318367123603821,
|
| 48237 |
+
"learning_rate": 6.874000000000001e-05,
|
| 48238 |
+
"loss": 0.2681,
|
| 48239 |
+
"step": 6875
|
| 48240 |
+
},
|
| 48241 |
+
{
|
| 48242 |
+
"epoch": 0.0006876,
|
| 48243 |
+
"grad_norm": 0.6656007766723633,
|
| 48244 |
+
"learning_rate": 6.874999999999999e-05,
|
| 48245 |
+
"loss": 0.2736,
|
| 48246 |
+
"step": 6876
|
| 48247 |
+
},
|
| 48248 |
+
{
|
| 48249 |
+
"epoch": 0.0006877,
|
| 48250 |
+
"grad_norm": 0.6839348673820496,
|
| 48251 |
+
"learning_rate": 6.876e-05,
|
| 48252 |
+
"loss": 0.2949,
|
| 48253 |
+
"step": 6877
|
| 48254 |
+
},
|
| 48255 |
+
{
|
| 48256 |
+
"epoch": 0.0006878,
|
| 48257 |
+
"grad_norm": 1.1404519081115723,
|
| 48258 |
+
"learning_rate": 6.877000000000001e-05,
|
| 48259 |
+
"loss": 0.3265,
|
| 48260 |
+
"step": 6878
|
| 48261 |
+
},
|
| 48262 |
+
{
|
| 48263 |
+
"epoch": 0.0006879,
|
| 48264 |
+
"grad_norm": 0.6324551701545715,
|
| 48265 |
+
"learning_rate": 6.878e-05,
|
| 48266 |
+
"loss": 0.2639,
|
| 48267 |
+
"step": 6879
|
| 48268 |
+
},
|
| 48269 |
+
{
|
| 48270 |
+
"epoch": 0.000688,
|
| 48271 |
+
"grad_norm": 0.6577877998352051,
|
| 48272 |
+
"learning_rate": 6.879e-05,
|
| 48273 |
+
"loss": 0.2781,
|
| 48274 |
+
"step": 6880
|
| 48275 |
+
},
|
| 48276 |
+
{
|
| 48277 |
+
"epoch": 0.0006881,
|
| 48278 |
+
"grad_norm": 0.6746148467063904,
|
| 48279 |
+
"learning_rate": 6.88e-05,
|
| 48280 |
+
"loss": 0.2788,
|
| 48281 |
+
"step": 6881
|
| 48282 |
+
},
|
| 48283 |
+
{
|
| 48284 |
+
"epoch": 0.0006882,
|
| 48285 |
+
"grad_norm": 0.6653241515159607,
|
| 48286 |
+
"learning_rate": 6.881e-05,
|
| 48287 |
+
"loss": 0.2889,
|
| 48288 |
+
"step": 6882
|
| 48289 |
+
},
|
| 48290 |
+
{
|
| 48291 |
+
"epoch": 0.0006883,
|
| 48292 |
+
"grad_norm": 0.6338821649551392,
|
| 48293 |
+
"learning_rate": 6.882e-05,
|
| 48294 |
+
"loss": 0.2808,
|
| 48295 |
+
"step": 6883
|
| 48296 |
+
},
|
| 48297 |
+
{
|
| 48298 |
+
"epoch": 0.0006884,
|
| 48299 |
+
"grad_norm": 0.6709138751029968,
|
| 48300 |
+
"learning_rate": 6.883e-05,
|
| 48301 |
+
"loss": 0.2832,
|
| 48302 |
+
"step": 6884
|
| 48303 |
+
},
|
| 48304 |
+
{
|
| 48305 |
+
"epoch": 0.0006885,
|
| 48306 |
+
"grad_norm": 0.5759314298629761,
|
| 48307 |
+
"learning_rate": 6.884e-05,
|
| 48308 |
+
"loss": 0.2576,
|
| 48309 |
+
"step": 6885
|
| 48310 |
+
},
|
| 48311 |
+
{
|
| 48312 |
+
"epoch": 0.0006886,
|
| 48313 |
+
"grad_norm": 0.7626339793205261,
|
| 48314 |
+
"learning_rate": 6.885e-05,
|
| 48315 |
+
"loss": 0.2808,
|
| 48316 |
+
"step": 6886
|
| 48317 |
+
},
|
| 48318 |
+
{
|
| 48319 |
+
"epoch": 0.0006887,
|
| 48320 |
+
"grad_norm": 1.1623761653900146,
|
| 48321 |
+
"learning_rate": 6.886e-05,
|
| 48322 |
+
"loss": 0.4182,
|
| 48323 |
+
"step": 6887
|
| 48324 |
+
},
|
| 48325 |
+
{
|
| 48326 |
+
"epoch": 0.0006888,
|
| 48327 |
+
"grad_norm": 0.8023167252540588,
|
| 48328 |
+
"learning_rate": 6.887e-05,
|
| 48329 |
+
"loss": 0.301,
|
| 48330 |
+
"step": 6888
|
| 48331 |
+
},
|
| 48332 |
+
{
|
| 48333 |
+
"epoch": 0.0006889,
|
| 48334 |
+
"grad_norm": 1.7670682668685913,
|
| 48335 |
+
"learning_rate": 6.888e-05,
|
| 48336 |
+
"loss": 0.413,
|
| 48337 |
+
"step": 6889
|
| 48338 |
+
},
|
| 48339 |
+
{
|
| 48340 |
+
"epoch": 0.000689,
|
| 48341 |
+
"grad_norm": 0.7617492079734802,
|
| 48342 |
+
"learning_rate": 6.889e-05,
|
| 48343 |
+
"loss": 0.2847,
|
| 48344 |
+
"step": 6890
|
| 48345 |
+
},
|
| 48346 |
+
{
|
| 48347 |
+
"epoch": 0.0006891,
|
| 48348 |
+
"grad_norm": 6.765068054199219,
|
| 48349 |
+
"learning_rate": 6.89e-05,
|
| 48350 |
+
"loss": 0.5615,
|
| 48351 |
+
"step": 6891
|
| 48352 |
+
},
|
| 48353 |
+
{
|
| 48354 |
+
"epoch": 0.0006892,
|
| 48355 |
+
"grad_norm": 0.8808358311653137,
|
| 48356 |
+
"learning_rate": 6.891e-05,
|
| 48357 |
+
"loss": 0.2999,
|
| 48358 |
+
"step": 6892
|
| 48359 |
+
},
|
| 48360 |
+
{
|
| 48361 |
+
"epoch": 0.0006893,
|
| 48362 |
+
"grad_norm": 0.7774884104728699,
|
| 48363 |
+
"learning_rate": 6.892e-05,
|
| 48364 |
+
"loss": 0.2833,
|
| 48365 |
+
"step": 6893
|
| 48366 |
+
},
|
| 48367 |
+
{
|
| 48368 |
+
"epoch": 0.0006894,
|
| 48369 |
+
"grad_norm": 0.7922816276550293,
|
| 48370 |
+
"learning_rate": 6.893e-05,
|
| 48371 |
+
"loss": 0.2834,
|
| 48372 |
+
"step": 6894
|
| 48373 |
+
},
|
| 48374 |
+
{
|
| 48375 |
+
"epoch": 0.0006895,
|
| 48376 |
+
"grad_norm": 0.7029367685317993,
|
| 48377 |
+
"learning_rate": 6.894e-05,
|
| 48378 |
+
"loss": 0.2765,
|
| 48379 |
+
"step": 6895
|
| 48380 |
+
},
|
| 48381 |
+
{
|
| 48382 |
+
"epoch": 0.0006896,
|
| 48383 |
+
"grad_norm": 1.1723849773406982,
|
| 48384 |
+
"learning_rate": 6.895000000000001e-05,
|
| 48385 |
+
"loss": 0.3149,
|
| 48386 |
+
"step": 6896
|
| 48387 |
+
},
|
| 48388 |
+
{
|
| 48389 |
+
"epoch": 0.0006897,
|
| 48390 |
+
"grad_norm": 0.7313202619552612,
|
| 48391 |
+
"learning_rate": 6.896e-05,
|
| 48392 |
+
"loss": 0.2677,
|
| 48393 |
+
"step": 6897
|
| 48394 |
+
},
|
| 48395 |
+
{
|
| 48396 |
+
"epoch": 0.0006898,
|
| 48397 |
+
"grad_norm": 0.7777508497238159,
|
| 48398 |
+
"learning_rate": 6.897e-05,
|
| 48399 |
+
"loss": 0.2744,
|
| 48400 |
+
"step": 6898
|
| 48401 |
+
},
|
| 48402 |
+
{
|
| 48403 |
+
"epoch": 0.0006899,
|
| 48404 |
+
"grad_norm": 1.5544298887252808,
|
| 48405 |
+
"learning_rate": 6.898000000000001e-05,
|
| 48406 |
+
"loss": 0.3359,
|
| 48407 |
+
"step": 6899
|
| 48408 |
+
},
|
| 48409 |
+
{
|
| 48410 |
+
"epoch": 0.00069,
|
| 48411 |
+
"grad_norm": 2.774675130844116,
|
| 48412 |
+
"learning_rate": 6.899e-05,
|
| 48413 |
+
"loss": 0.5413,
|
| 48414 |
+
"step": 6900
|
| 48415 |
+
},
|
| 48416 |
+
{
|
| 48417 |
+
"epoch": 0.0006901,
|
| 48418 |
+
"grad_norm": 0.7986059784889221,
|
| 48419 |
+
"learning_rate": 6.9e-05,
|
| 48420 |
+
"loss": 0.2656,
|
| 48421 |
+
"step": 6901
|
| 48422 |
+
},
|
| 48423 |
+
{
|
| 48424 |
+
"epoch": 0.0006902,
|
| 48425 |
+
"grad_norm": 1.1680564880371094,
|
| 48426 |
+
"learning_rate": 6.901e-05,
|
| 48427 |
+
"loss": 0.3145,
|
| 48428 |
+
"step": 6902
|
| 48429 |
+
},
|
| 48430 |
+
{
|
| 48431 |
+
"epoch": 0.0006903,
|
| 48432 |
+
"grad_norm": 0.8539535999298096,
|
| 48433 |
+
"learning_rate": 6.902e-05,
|
| 48434 |
+
"loss": 0.3198,
|
| 48435 |
+
"step": 6903
|
| 48436 |
+
},
|
| 48437 |
+
{
|
| 48438 |
+
"epoch": 0.0006904,
|
| 48439 |
+
"grad_norm": 0.6857061982154846,
|
| 48440 |
+
"learning_rate": 6.903e-05,
|
| 48441 |
+
"loss": 0.2793,
|
| 48442 |
+
"step": 6904
|
| 48443 |
+
},
|
| 48444 |
+
{
|
| 48445 |
+
"epoch": 0.0006905,
|
| 48446 |
+
"grad_norm": 0.7869257926940918,
|
| 48447 |
+
"learning_rate": 6.904e-05,
|
| 48448 |
+
"loss": 0.2793,
|
| 48449 |
+
"step": 6905
|
| 48450 |
+
},
|
| 48451 |
+
{
|
| 48452 |
+
"epoch": 0.0006906,
|
| 48453 |
+
"grad_norm": 0.9233639240264893,
|
| 48454 |
+
"learning_rate": 6.905000000000001e-05,
|
| 48455 |
+
"loss": 0.2875,
|
| 48456 |
+
"step": 6906
|
| 48457 |
+
},
|
| 48458 |
+
{
|
| 48459 |
+
"epoch": 0.0006907,
|
| 48460 |
+
"grad_norm": 0.7806829214096069,
|
| 48461 |
+
"learning_rate": 6.905999999999999e-05,
|
| 48462 |
+
"loss": 0.3007,
|
| 48463 |
+
"step": 6907
|
| 48464 |
+
},
|
| 48465 |
+
{
|
| 48466 |
+
"epoch": 0.0006908,
|
| 48467 |
+
"grad_norm": 0.7770219445228577,
|
| 48468 |
+
"learning_rate": 6.907e-05,
|
| 48469 |
+
"loss": 0.3054,
|
| 48470 |
+
"step": 6908
|
| 48471 |
+
},
|
| 48472 |
+
{
|
| 48473 |
+
"epoch": 0.0006909,
|
| 48474 |
+
"grad_norm": 0.6133571267127991,
|
| 48475 |
+
"learning_rate": 6.908000000000001e-05,
|
| 48476 |
+
"loss": 0.2567,
|
| 48477 |
+
"step": 6909
|
| 48478 |
+
},
|
| 48479 |
+
{
|
| 48480 |
+
"epoch": 0.000691,
|
| 48481 |
+
"grad_norm": 1.9333255290985107,
|
| 48482 |
+
"learning_rate": 6.908999999999999e-05,
|
| 48483 |
+
"loss": 0.4106,
|
| 48484 |
+
"step": 6910
|
| 48485 |
+
},
|
| 48486 |
+
{
|
| 48487 |
+
"epoch": 0.0006911,
|
| 48488 |
+
"grad_norm": 0.9833579659461975,
|
| 48489 |
+
"learning_rate": 6.91e-05,
|
| 48490 |
+
"loss": 0.2974,
|
| 48491 |
+
"step": 6911
|
| 48492 |
+
},
|
| 48493 |
+
{
|
| 48494 |
+
"epoch": 0.0006912,
|
| 48495 |
+
"grad_norm": 1.0075924396514893,
|
| 48496 |
+
"learning_rate": 6.911000000000001e-05,
|
| 48497 |
+
"loss": 0.2941,
|
| 48498 |
+
"step": 6912
|
| 48499 |
+
},
|
| 48500 |
+
{
|
| 48501 |
+
"epoch": 0.0006913,
|
| 48502 |
+
"grad_norm": 2.9899237155914307,
|
| 48503 |
+
"learning_rate": 6.912e-05,
|
| 48504 |
+
"loss": 0.4446,
|
| 48505 |
+
"step": 6913
|
| 48506 |
+
},
|
| 48507 |
+
{
|
| 48508 |
+
"epoch": 0.0006914,
|
| 48509 |
+
"grad_norm": 0.8225877285003662,
|
| 48510 |
+
"learning_rate": 6.913e-05,
|
| 48511 |
+
"loss": 0.2852,
|
| 48512 |
+
"step": 6914
|
| 48513 |
+
},
|
| 48514 |
+
{
|
| 48515 |
+
"epoch": 0.0006915,
|
| 48516 |
+
"grad_norm": 0.7364519834518433,
|
| 48517 |
+
"learning_rate": 6.914e-05,
|
| 48518 |
+
"loss": 0.2645,
|
| 48519 |
+
"step": 6915
|
| 48520 |
+
},
|
| 48521 |
+
{
|
| 48522 |
+
"epoch": 0.0006916,
|
| 48523 |
+
"grad_norm": 0.6848117709159851,
|
| 48524 |
+
"learning_rate": 6.915e-05,
|
| 48525 |
+
"loss": 0.2605,
|
| 48526 |
+
"step": 6916
|
| 48527 |
+
},
|
| 48528 |
+
{
|
| 48529 |
+
"epoch": 0.0006917,
|
| 48530 |
+
"grad_norm": 0.8027401566505432,
|
| 48531 |
+
"learning_rate": 6.916e-05,
|
| 48532 |
+
"loss": 0.2672,
|
| 48533 |
+
"step": 6917
|
| 48534 |
+
},
|
| 48535 |
+
{
|
| 48536 |
+
"epoch": 0.0006918,
|
| 48537 |
+
"grad_norm": 0.6527199149131775,
|
| 48538 |
+
"learning_rate": 6.917e-05,
|
| 48539 |
+
"loss": 0.2654,
|
| 48540 |
+
"step": 6918
|
| 48541 |
+
},
|
| 48542 |
+
{
|
| 48543 |
+
"epoch": 0.0006919,
|
| 48544 |
+
"grad_norm": 0.6591749787330627,
|
| 48545 |
+
"learning_rate": 6.918e-05,
|
| 48546 |
+
"loss": 0.2612,
|
| 48547 |
+
"step": 6919
|
| 48548 |
+
},
|
| 48549 |
+
{
|
| 48550 |
+
"epoch": 0.000692,
|
| 48551 |
+
"grad_norm": 1.452710747718811,
|
| 48552 |
+
"learning_rate": 6.919e-05,
|
| 48553 |
+
"loss": 0.4023,
|
| 48554 |
+
"step": 6920
|
| 48555 |
+
},
|
| 48556 |
+
{
|
| 48557 |
+
"epoch": 0.0006921,
|
| 48558 |
+
"grad_norm": 0.6021394729614258,
|
| 48559 |
+
"learning_rate": 6.92e-05,
|
| 48560 |
+
"loss": 0.2585,
|
| 48561 |
+
"step": 6921
|
| 48562 |
+
},
|
| 48563 |
+
{
|
| 48564 |
+
"epoch": 0.0006922,
|
| 48565 |
+
"grad_norm": 4.742277145385742,
|
| 48566 |
+
"learning_rate": 6.921e-05,
|
| 48567 |
+
"loss": 0.3076,
|
| 48568 |
+
"step": 6922
|
| 48569 |
+
},
|
| 48570 |
+
{
|
| 48571 |
+
"epoch": 0.0006923,
|
| 48572 |
+
"grad_norm": 0.7667524814605713,
|
| 48573 |
+
"learning_rate": 6.922e-05,
|
| 48574 |
+
"loss": 0.2605,
|
| 48575 |
+
"step": 6923
|
| 48576 |
+
},
|
| 48577 |
+
{
|
| 48578 |
+
"epoch": 0.0006924,
|
| 48579 |
+
"grad_norm": 0.7648462653160095,
|
| 48580 |
+
"learning_rate": 6.923e-05,
|
| 48581 |
+
"loss": 0.2688,
|
| 48582 |
+
"step": 6924
|
| 48583 |
+
},
|
| 48584 |
+
{
|
| 48585 |
+
"epoch": 0.0006925,
|
| 48586 |
+
"grad_norm": 0.7535057663917542,
|
| 48587 |
+
"learning_rate": 6.924e-05,
|
| 48588 |
+
"loss": 0.2906,
|
| 48589 |
+
"step": 6925
|
| 48590 |
+
},
|
| 48591 |
+
{
|
| 48592 |
+
"epoch": 0.0006926,
|
| 48593 |
+
"grad_norm": 0.6999765634536743,
|
| 48594 |
+
"learning_rate": 6.925e-05,
|
| 48595 |
+
"loss": 0.2947,
|
| 48596 |
+
"step": 6926
|
| 48597 |
+
},
|
| 48598 |
+
{
|
| 48599 |
+
"epoch": 0.0006927,
|
| 48600 |
+
"grad_norm": 0.8671202659606934,
|
| 48601 |
+
"learning_rate": 6.926e-05,
|
| 48602 |
+
"loss": 0.3026,
|
| 48603 |
+
"step": 6927
|
| 48604 |
+
},
|
| 48605 |
+
{
|
| 48606 |
+
"epoch": 0.0006928,
|
| 48607 |
+
"grad_norm": 0.7182843685150146,
|
| 48608 |
+
"learning_rate": 6.927e-05,
|
| 48609 |
+
"loss": 0.2639,
|
| 48610 |
+
"step": 6928
|
| 48611 |
+
},
|
| 48612 |
+
{
|
| 48613 |
+
"epoch": 0.0006929,
|
| 48614 |
+
"grad_norm": 0.7621828317642212,
|
| 48615 |
+
"learning_rate": 6.928e-05,
|
| 48616 |
+
"loss": 0.29,
|
| 48617 |
+
"step": 6929
|
| 48618 |
+
},
|
| 48619 |
+
{
|
| 48620 |
+
"epoch": 0.000693,
|
| 48621 |
+
"grad_norm": 0.7141491174697876,
|
| 48622 |
+
"learning_rate": 6.929000000000001e-05,
|
| 48623 |
+
"loss": 0.2869,
|
| 48624 |
+
"step": 6930
|
| 48625 |
+
},
|
| 48626 |
+
{
|
| 48627 |
+
"epoch": 0.0006931,
|
| 48628 |
+
"grad_norm": 0.8775362372398376,
|
| 48629 |
+
"learning_rate": 6.93e-05,
|
| 48630 |
+
"loss": 0.2881,
|
| 48631 |
+
"step": 6931
|
| 48632 |
+
},
|
| 48633 |
+
{
|
| 48634 |
+
"epoch": 0.0006932,
|
| 48635 |
+
"grad_norm": 1.493428349494934,
|
| 48636 |
+
"learning_rate": 6.931e-05,
|
| 48637 |
+
"loss": 0.4263,
|
| 48638 |
+
"step": 6932
|
| 48639 |
+
},
|
| 48640 |
+
{
|
| 48641 |
+
"epoch": 0.0006933,
|
| 48642 |
+
"grad_norm": 0.732142984867096,
|
| 48643 |
+
"learning_rate": 6.932000000000001e-05,
|
| 48644 |
+
"loss": 0.2621,
|
| 48645 |
+
"step": 6933
|
| 48646 |
+
},
|
| 48647 |
+
{
|
| 48648 |
+
"epoch": 0.0006934,
|
| 48649 |
+
"grad_norm": 0.7555822730064392,
|
| 48650 |
+
"learning_rate": 6.933e-05,
|
| 48651 |
+
"loss": 0.2598,
|
| 48652 |
+
"step": 6934
|
| 48653 |
+
},
|
| 48654 |
+
{
|
| 48655 |
+
"epoch": 0.0006935,
|
| 48656 |
+
"grad_norm": 0.8155015110969543,
|
| 48657 |
+
"learning_rate": 6.934e-05,
|
| 48658 |
+
"loss": 0.2904,
|
| 48659 |
+
"step": 6935
|
| 48660 |
+
},
|
| 48661 |
+
{
|
| 48662 |
+
"epoch": 0.0006936,
|
| 48663 |
+
"grad_norm": 0.5576425194740295,
|
| 48664 |
+
"learning_rate": 6.935e-05,
|
| 48665 |
+
"loss": 0.2446,
|
| 48666 |
+
"step": 6936
|
| 48667 |
+
},
|
| 48668 |
+
{
|
| 48669 |
+
"epoch": 0.0006937,
|
| 48670 |
+
"grad_norm": 0.7607638239860535,
|
| 48671 |
+
"learning_rate": 6.936e-05,
|
| 48672 |
+
"loss": 0.2734,
|
| 48673 |
+
"step": 6937
|
| 48674 |
+
},
|
| 48675 |
+
{
|
| 48676 |
+
"epoch": 0.0006938,
|
| 48677 |
+
"grad_norm": 1.1647169589996338,
|
| 48678 |
+
"learning_rate": 6.937e-05,
|
| 48679 |
+
"loss": 0.4021,
|
| 48680 |
+
"step": 6938
|
| 48681 |
+
},
|
| 48682 |
+
{
|
| 48683 |
+
"epoch": 0.0006939,
|
| 48684 |
+
"grad_norm": 1.0491317510604858,
|
| 48685 |
+
"learning_rate": 6.938e-05,
|
| 48686 |
+
"loss": 0.3015,
|
| 48687 |
+
"step": 6939
|
| 48688 |
+
},
|
| 48689 |
+
{
|
| 48690 |
+
"epoch": 0.000694,
|
| 48691 |
+
"grad_norm": 0.8073709607124329,
|
| 48692 |
+
"learning_rate": 6.939000000000001e-05,
|
| 48693 |
+
"loss": 0.2809,
|
| 48694 |
+
"step": 6940
|
| 48695 |
+
},
|
| 48696 |
+
{
|
| 48697 |
+
"epoch": 0.0006941,
|
| 48698 |
+
"grad_norm": 0.72732013463974,
|
| 48699 |
+
"learning_rate": 6.939999999999999e-05,
|
| 48700 |
+
"loss": 0.2743,
|
| 48701 |
+
"step": 6941
|
| 48702 |
+
},
|
| 48703 |
+
{
|
| 48704 |
+
"epoch": 0.0006942,
|
| 48705 |
+
"grad_norm": 1.0625195503234863,
|
| 48706 |
+
"learning_rate": 6.941e-05,
|
| 48707 |
+
"loss": 0.4106,
|
| 48708 |
+
"step": 6942
|
| 48709 |
+
},
|
| 48710 |
+
{
|
| 48711 |
+
"epoch": 0.0006943,
|
| 48712 |
+
"grad_norm": 0.9433525800704956,
|
| 48713 |
+
"learning_rate": 6.942000000000001e-05,
|
| 48714 |
+
"loss": 0.3411,
|
| 48715 |
+
"step": 6943
|
| 48716 |
+
},
|
| 48717 |
+
{
|
| 48718 |
+
"epoch": 0.0006944,
|
| 48719 |
+
"grad_norm": 0.7006153464317322,
|
| 48720 |
+
"learning_rate": 6.942999999999999e-05,
|
| 48721 |
+
"loss": 0.2584,
|
| 48722 |
+
"step": 6944
|
| 48723 |
+
},
|
| 48724 |
+
{
|
| 48725 |
+
"epoch": 0.0006945,
|
| 48726 |
+
"grad_norm": 1.3899009227752686,
|
| 48727 |
+
"learning_rate": 6.944e-05,
|
| 48728 |
+
"loss": 0.3727,
|
| 48729 |
+
"step": 6945
|
| 48730 |
+
},
|
| 48731 |
+
{
|
| 48732 |
+
"epoch": 0.0006946,
|
| 48733 |
+
"grad_norm": 0.9198282957077026,
|
| 48734 |
+
"learning_rate": 6.945000000000001e-05,
|
| 48735 |
+
"loss": 0.3386,
|
| 48736 |
+
"step": 6946
|
| 48737 |
+
},
|
| 48738 |
+
{
|
| 48739 |
+
"epoch": 0.0006947,
|
| 48740 |
+
"grad_norm": 0.7298462390899658,
|
| 48741 |
+
"learning_rate": 6.946e-05,
|
| 48742 |
+
"loss": 0.2622,
|
| 48743 |
+
"step": 6947
|
| 48744 |
+
},
|
| 48745 |
+
{
|
| 48746 |
+
"epoch": 0.0006948,
|
| 48747 |
+
"grad_norm": 0.7429442405700684,
|
| 48748 |
+
"learning_rate": 6.947e-05,
|
| 48749 |
+
"loss": 0.261,
|
| 48750 |
+
"step": 6948
|
| 48751 |
+
},
|
| 48752 |
+
{
|
| 48753 |
+
"epoch": 0.0006949,
|
| 48754 |
+
"grad_norm": 0.7668522596359253,
|
| 48755 |
+
"learning_rate": 6.948e-05,
|
| 48756 |
+
"loss": 0.2881,
|
| 48757 |
+
"step": 6949
|
| 48758 |
+
},
|
| 48759 |
+
{
|
| 48760 |
+
"epoch": 0.000695,
|
| 48761 |
+
"grad_norm": 0.6823588013648987,
|
| 48762 |
+
"learning_rate": 6.949e-05,
|
| 48763 |
+
"loss": 0.278,
|
| 48764 |
+
"step": 6950
|
| 48765 |
+
},
|
| 48766 |
+
{
|
| 48767 |
+
"epoch": 0.0006951,
|
| 48768 |
+
"grad_norm": 0.6069337725639343,
|
| 48769 |
+
"learning_rate": 6.95e-05,
|
| 48770 |
+
"loss": 0.2505,
|
| 48771 |
+
"step": 6951
|
| 48772 |
+
},
|
| 48773 |
+
{
|
| 48774 |
+
"epoch": 0.0006952,
|
| 48775 |
+
"grad_norm": 1.1467939615249634,
|
| 48776 |
+
"learning_rate": 6.951e-05,
|
| 48777 |
+
"loss": 0.3793,
|
| 48778 |
+
"step": 6952
|
| 48779 |
+
},
|
| 48780 |
+
{
|
| 48781 |
+
"epoch": 0.0006953,
|
| 48782 |
+
"grad_norm": 0.8934077620506287,
|
| 48783 |
+
"learning_rate": 6.952e-05,
|
| 48784 |
+
"loss": 0.3065,
|
| 48785 |
+
"step": 6953
|
| 48786 |
+
},
|
| 48787 |
+
{
|
| 48788 |
+
"epoch": 0.0006954,
|
| 48789 |
+
"grad_norm": 0.7593517303466797,
|
| 48790 |
+
"learning_rate": 6.953000000000001e-05,
|
| 48791 |
+
"loss": 0.2786,
|
| 48792 |
+
"step": 6954
|
| 48793 |
+
},
|
| 48794 |
+
{
|
| 48795 |
+
"epoch": 0.0006955,
|
| 48796 |
+
"grad_norm": 0.587807297706604,
|
| 48797 |
+
"learning_rate": 6.954e-05,
|
| 48798 |
+
"loss": 0.2476,
|
| 48799 |
+
"step": 6955
|
| 48800 |
+
},
|
| 48801 |
+
{
|
| 48802 |
+
"epoch": 0.0006956,
|
| 48803 |
+
"grad_norm": 0.7230364680290222,
|
| 48804 |
+
"learning_rate": 6.955e-05,
|
| 48805 |
+
"loss": 0.2728,
|
| 48806 |
+
"step": 6956
|
| 48807 |
+
},
|
| 48808 |
+
{
|
| 48809 |
+
"epoch": 0.0006957,
|
| 48810 |
+
"grad_norm": 0.9407157301902771,
|
| 48811 |
+
"learning_rate": 6.956e-05,
|
| 48812 |
+
"loss": 0.3571,
|
| 48813 |
+
"step": 6957
|
| 48814 |
+
},
|
| 48815 |
+
{
|
| 48816 |
+
"epoch": 0.0006958,
|
| 48817 |
+
"grad_norm": 1.3212006092071533,
|
| 48818 |
+
"learning_rate": 6.957e-05,
|
| 48819 |
+
"loss": 0.312,
|
| 48820 |
+
"step": 6958
|
| 48821 |
+
},
|
| 48822 |
+
{
|
| 48823 |
+
"epoch": 0.0006959,
|
| 48824 |
+
"grad_norm": 0.8143905401229858,
|
| 48825 |
+
"learning_rate": 6.958e-05,
|
| 48826 |
+
"loss": 0.3319,
|
| 48827 |
+
"step": 6959
|
| 48828 |
+
},
|
| 48829 |
+
{
|
| 48830 |
+
"epoch": 0.000696,
|
| 48831 |
+
"grad_norm": 0.7418932318687439,
|
| 48832 |
+
"learning_rate": 6.959e-05,
|
| 48833 |
+
"loss": 0.2756,
|
| 48834 |
+
"step": 6960
|
| 48835 |
+
},
|
| 48836 |
+
{
|
| 48837 |
+
"epoch": 0.0006961,
|
| 48838 |
+
"grad_norm": 0.9562113285064697,
|
| 48839 |
+
"learning_rate": 6.96e-05,
|
| 48840 |
+
"loss": 0.2852,
|
| 48841 |
+
"step": 6961
|
| 48842 |
+
},
|
| 48843 |
+
{
|
| 48844 |
+
"epoch": 0.0006962,
|
| 48845 |
+
"grad_norm": 0.7164644598960876,
|
| 48846 |
+
"learning_rate": 6.961e-05,
|
| 48847 |
+
"loss": 0.2596,
|
| 48848 |
+
"step": 6962
|
| 48849 |
+
},
|
| 48850 |
+
{
|
| 48851 |
+
"epoch": 0.0006963,
|
| 48852 |
+
"grad_norm": 2.320756196975708,
|
| 48853 |
+
"learning_rate": 6.962e-05,
|
| 48854 |
+
"loss": 0.3641,
|
| 48855 |
+
"step": 6963
|
| 48856 |
+
},
|
| 48857 |
+
{
|
| 48858 |
+
"epoch": 0.0006964,
|
| 48859 |
+
"grad_norm": 0.7264569401741028,
|
| 48860 |
+
"learning_rate": 6.963000000000001e-05,
|
| 48861 |
+
"loss": 0.2603,
|
| 48862 |
+
"step": 6964
|
| 48863 |
+
},
|
| 48864 |
+
{
|
| 48865 |
+
"epoch": 0.0006965,
|
| 48866 |
+
"grad_norm": 0.8696227073669434,
|
| 48867 |
+
"learning_rate": 6.964e-05,
|
| 48868 |
+
"loss": 0.2762,
|
| 48869 |
+
"step": 6965
|
| 48870 |
+
},
|
| 48871 |
+
{
|
| 48872 |
+
"epoch": 0.0006966,
|
| 48873 |
+
"grad_norm": 0.7736457586288452,
|
| 48874 |
+
"learning_rate": 6.965e-05,
|
| 48875 |
+
"loss": 0.2499,
|
| 48876 |
+
"step": 6966
|
| 48877 |
+
},
|
| 48878 |
+
{
|
| 48879 |
+
"epoch": 0.0006967,
|
| 48880 |
+
"grad_norm": 0.7460713386535645,
|
| 48881 |
+
"learning_rate": 6.966000000000001e-05,
|
| 48882 |
+
"loss": 0.2756,
|
| 48883 |
+
"step": 6967
|
| 48884 |
+
},
|
| 48885 |
+
{
|
| 48886 |
+
"epoch": 0.0006968,
|
| 48887 |
+
"grad_norm": 0.7883889079093933,
|
| 48888 |
+
"learning_rate": 6.967e-05,
|
| 48889 |
+
"loss": 0.3052,
|
| 48890 |
+
"step": 6968
|
| 48891 |
+
},
|
| 48892 |
+
{
|
| 48893 |
+
"epoch": 0.0006969,
|
| 48894 |
+
"grad_norm": 0.7249166369438171,
|
| 48895 |
+
"learning_rate": 6.968e-05,
|
| 48896 |
+
"loss": 0.2706,
|
| 48897 |
+
"step": 6969
|
| 48898 |
+
},
|
| 48899 |
+
{
|
| 48900 |
+
"epoch": 0.000697,
|
| 48901 |
+
"grad_norm": 0.8029236793518066,
|
| 48902 |
+
"learning_rate": 6.969e-05,
|
| 48903 |
+
"loss": 0.2808,
|
| 48904 |
+
"step": 6970
|
| 48905 |
+
},
|
| 48906 |
+
{
|
| 48907 |
+
"epoch": 0.0006971,
|
| 48908 |
+
"grad_norm": 0.7636973857879639,
|
| 48909 |
+
"learning_rate": 6.97e-05,
|
| 48910 |
+
"loss": 0.2754,
|
| 48911 |
+
"step": 6971
|
| 48912 |
+
},
|
| 48913 |
+
{
|
| 48914 |
+
"epoch": 0.0006972,
|
| 48915 |
+
"grad_norm": 0.6970018148422241,
|
| 48916 |
+
"learning_rate": 6.971e-05,
|
| 48917 |
+
"loss": 0.2612,
|
| 48918 |
+
"step": 6972
|
| 48919 |
+
},
|
| 48920 |
+
{
|
| 48921 |
+
"epoch": 0.0006973,
|
| 48922 |
+
"grad_norm": 0.6350924968719482,
|
| 48923 |
+
"learning_rate": 6.972e-05,
|
| 48924 |
+
"loss": 0.2522,
|
| 48925 |
+
"step": 6973
|
| 48926 |
+
},
|
| 48927 |
+
{
|
| 48928 |
+
"epoch": 0.0006974,
|
| 48929 |
+
"grad_norm": 0.6835649609565735,
|
| 48930 |
+
"learning_rate": 6.973000000000001e-05,
|
| 48931 |
+
"loss": 0.2667,
|
| 48932 |
+
"step": 6974
|
| 48933 |
+
},
|
| 48934 |
+
{
|
| 48935 |
+
"epoch": 0.0006975,
|
| 48936 |
+
"grad_norm": 1.531586766242981,
|
| 48937 |
+
"learning_rate": 6.973999999999999e-05,
|
| 48938 |
+
"loss": 0.3433,
|
| 48939 |
+
"step": 6975
|
| 48940 |
+
},
|
| 48941 |
+
{
|
| 48942 |
+
"epoch": 0.0006976,
|
| 48943 |
+
"grad_norm": 0.784424364566803,
|
| 48944 |
+
"learning_rate": 6.975e-05,
|
| 48945 |
+
"loss": 0.2732,
|
| 48946 |
+
"step": 6976
|
| 48947 |
+
},
|
| 48948 |
+
{
|
| 48949 |
+
"epoch": 0.0006977,
|
| 48950 |
+
"grad_norm": 0.6527853012084961,
|
| 48951 |
+
"learning_rate": 6.976000000000001e-05,
|
| 48952 |
+
"loss": 0.2603,
|
| 48953 |
+
"step": 6977
|
| 48954 |
+
},
|
| 48955 |
+
{
|
| 48956 |
+
"epoch": 0.0006978,
|
| 48957 |
+
"grad_norm": 0.6221573948860168,
|
| 48958 |
+
"learning_rate": 6.976999999999999e-05,
|
| 48959 |
+
"loss": 0.2522,
|
| 48960 |
+
"step": 6978
|
| 48961 |
+
},
|
| 48962 |
+
{
|
| 48963 |
+
"epoch": 0.0006979,
|
| 48964 |
+
"grad_norm": 0.7770185470581055,
|
| 48965 |
+
"learning_rate": 6.978e-05,
|
| 48966 |
+
"loss": 0.2664,
|
| 48967 |
+
"step": 6979
|
| 48968 |
+
},
|
| 48969 |
+
{
|
| 48970 |
+
"epoch": 0.000698,
|
| 48971 |
+
"grad_norm": 0.5562544465065002,
|
| 48972 |
+
"learning_rate": 6.979000000000001e-05,
|
| 48973 |
+
"loss": 0.2478,
|
| 48974 |
+
"step": 6980
|
| 48975 |
+
},
|
| 48976 |
+
{
|
| 48977 |
+
"epoch": 0.0006981,
|
| 48978 |
+
"grad_norm": 0.6760058999061584,
|
| 48979 |
+
"learning_rate": 6.98e-05,
|
| 48980 |
+
"loss": 0.2714,
|
| 48981 |
+
"step": 6981
|
| 48982 |
+
},
|
| 48983 |
+
{
|
| 48984 |
+
"epoch": 0.0006982,
|
| 48985 |
+
"grad_norm": 0.675886869430542,
|
| 48986 |
+
"learning_rate": 6.981e-05,
|
| 48987 |
+
"loss": 0.2721,
|
| 48988 |
+
"step": 6982
|
| 48989 |
+
},
|
| 48990 |
+
{
|
| 48991 |
+
"epoch": 0.0006983,
|
| 48992 |
+
"grad_norm": 1.0800305604934692,
|
| 48993 |
+
"learning_rate": 6.982e-05,
|
| 48994 |
+
"loss": 0.3134,
|
| 48995 |
+
"step": 6983
|
| 48996 |
+
},
|
| 48997 |
+
{
|
| 48998 |
+
"epoch": 0.0006984,
|
| 48999 |
+
"grad_norm": 0.6709005832672119,
|
| 49000 |
+
"learning_rate": 6.983e-05,
|
| 49001 |
+
"loss": 0.2594,
|
| 49002 |
+
"step": 6984
|
| 49003 |
+
},
|
| 49004 |
+
{
|
| 49005 |
+
"epoch": 0.0006985,
|
| 49006 |
+
"grad_norm": 1.8012157678604126,
|
| 49007 |
+
"learning_rate": 6.984e-05,
|
| 49008 |
+
"loss": 0.3384,
|
| 49009 |
+
"step": 6985
|
| 49010 |
+
},
|
| 49011 |
+
{
|
| 49012 |
+
"epoch": 0.0006986,
|
| 49013 |
+
"grad_norm": 0.7090801000595093,
|
| 49014 |
+
"learning_rate": 6.985e-05,
|
| 49015 |
+
"loss": 0.2628,
|
| 49016 |
+
"step": 6986
|
| 49017 |
+
},
|
| 49018 |
+
{
|
| 49019 |
+
"epoch": 0.0006987,
|
| 49020 |
+
"grad_norm": 0.6993853449821472,
|
| 49021 |
+
"learning_rate": 6.986e-05,
|
| 49022 |
+
"loss": 0.2568,
|
| 49023 |
+
"step": 6987
|
| 49024 |
+
},
|
| 49025 |
+
{
|
| 49026 |
+
"epoch": 0.0006988,
|
| 49027 |
+
"grad_norm": 0.6504535675048828,
|
| 49028 |
+
"learning_rate": 6.987000000000001e-05,
|
| 49029 |
+
"loss": 0.2583,
|
| 49030 |
+
"step": 6988
|
| 49031 |
+
},
|
| 49032 |
+
{
|
| 49033 |
+
"epoch": 0.0006989,
|
| 49034 |
+
"grad_norm": 0.553688645362854,
|
| 49035 |
+
"learning_rate": 6.988e-05,
|
| 49036 |
+
"loss": 0.2521,
|
| 49037 |
+
"step": 6989
|
| 49038 |
+
},
|
| 49039 |
+
{
|
| 49040 |
+
"epoch": 0.000699,
|
| 49041 |
+
"grad_norm": 0.6234272122383118,
|
| 49042 |
+
"learning_rate": 6.989e-05,
|
| 49043 |
+
"loss": 0.2565,
|
| 49044 |
+
"step": 6990
|
| 49045 |
+
},
|
| 49046 |
+
{
|
| 49047 |
+
"epoch": 0.0006991,
|
| 49048 |
+
"grad_norm": 0.7927871942520142,
|
| 49049 |
+
"learning_rate": 6.99e-05,
|
| 49050 |
+
"loss": 0.3008,
|
| 49051 |
+
"step": 6991
|
| 49052 |
+
},
|
| 49053 |
+
{
|
| 49054 |
+
"epoch": 0.0006992,
|
| 49055 |
+
"grad_norm": 0.5891395211219788,
|
| 49056 |
+
"learning_rate": 6.991e-05,
|
| 49057 |
+
"loss": 0.2466,
|
| 49058 |
+
"step": 6992
|
| 49059 |
+
},
|
| 49060 |
+
{
|
| 49061 |
+
"epoch": 0.0006993,
|
| 49062 |
+
"grad_norm": 0.6059378981590271,
|
| 49063 |
+
"learning_rate": 6.992e-05,
|
| 49064 |
+
"loss": 0.2577,
|
| 49065 |
+
"step": 6993
|
| 49066 |
+
},
|
| 49067 |
+
{
|
| 49068 |
+
"epoch": 0.0006994,
|
| 49069 |
+
"grad_norm": 1.2688783407211304,
|
| 49070 |
+
"learning_rate": 6.993e-05,
|
| 49071 |
+
"loss": 0.3342,
|
| 49072 |
+
"step": 6994
|
| 49073 |
+
},
|
| 49074 |
+
{
|
| 49075 |
+
"epoch": 0.0006995,
|
| 49076 |
+
"grad_norm": 0.7680343389511108,
|
| 49077 |
+
"learning_rate": 6.994e-05,
|
| 49078 |
+
"loss": 0.2557,
|
| 49079 |
+
"step": 6995
|
| 49080 |
+
},
|
| 49081 |
+
{
|
| 49082 |
+
"epoch": 0.0006996,
|
| 49083 |
+
"grad_norm": 0.6820274591445923,
|
| 49084 |
+
"learning_rate": 6.995e-05,
|
| 49085 |
+
"loss": 0.2612,
|
| 49086 |
+
"step": 6996
|
| 49087 |
+
},
|
| 49088 |
+
{
|
| 49089 |
+
"epoch": 0.0006997,
|
| 49090 |
+
"grad_norm": 0.7284886837005615,
|
| 49091 |
+
"learning_rate": 6.996e-05,
|
| 49092 |
+
"loss": 0.2581,
|
| 49093 |
+
"step": 6997
|
| 49094 |
+
},
|
| 49095 |
+
{
|
| 49096 |
+
"epoch": 0.0006998,
|
| 49097 |
+
"grad_norm": 1.973966360092163,
|
| 49098 |
+
"learning_rate": 6.997000000000001e-05,
|
| 49099 |
+
"loss": 0.4316,
|
| 49100 |
+
"step": 6998
|
| 49101 |
+
},
|
| 49102 |
+
{
|
| 49103 |
+
"epoch": 0.0006999,
|
| 49104 |
+
"grad_norm": 0.5775460004806519,
|
| 49105 |
+
"learning_rate": 6.998e-05,
|
| 49106 |
+
"loss": 0.2427,
|
| 49107 |
+
"step": 6999
|
| 49108 |
+
},
|
| 49109 |
+
{
|
| 49110 |
+
"epoch": 0.0007,
|
| 49111 |
+
"grad_norm": 4.808237075805664,
|
| 49112 |
+
"learning_rate": 6.999e-05,
|
| 49113 |
+
"loss": 0.4492,
|
| 49114 |
+
"step": 7000
|
| 49115 |
+
},
|
| 49116 |
+
{
|
| 49117 |
+
"epoch": 0.0007,
|
| 49118 |
+
"eval_loss": 0.030403021723031998,
|
| 49119 |
+
"eval_runtime": 362.4086,
|
| 49120 |
+
"eval_samples_per_second": 27.593,
|
| 49121 |
+
"eval_steps_per_second": 1.725,
|
| 49122 |
+
"step": 7000
|
| 49123 |
}
|
| 49124 |
],
|
| 49125 |
"logging_steps": 1,
|