Training in progress, step 2000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 91951912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7eb08bde3240a557936e40b71a499732f2805a97d9209352dc28a73ce5fa51b8
|
| 3 |
size 91951912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fc08e980f730d47d34ea8984768530714bd752baab6c68664b3aecfe4ba9b8c1
|
| 3 |
+
size 183991627
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
|
| 3 |
+
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6329c14a44d63f981bc4f97f028c1c4346dcd55df991563b7480ee7653ebd50f
|
| 3 |
+
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -10532,6 +10532,3514 @@
|
|
| 10532 |
"eval_samples_per_second": 27.46,
|
| 10533 |
"eval_steps_per_second": 1.716,
|
| 10534 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10535 |
}
|
| 10536 |
],
|
| 10537 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.0002,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 2000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 10532 |
"eval_samples_per_second": 27.46,
|
| 10533 |
"eval_steps_per_second": 1.716,
|
| 10534 |
"step": 1500
|
| 10535 |
+
},
|
| 10536 |
+
{
|
| 10537 |
+
"epoch": 0.0001501,
|
| 10538 |
+
"grad_norm": 11.855791091918945,
|
| 10539 |
+
"learning_rate": 1.5e-05,
|
| 10540 |
+
"loss": 89.4375,
|
| 10541 |
+
"step": 1501
|
| 10542 |
+
},
|
| 10543 |
+
{
|
| 10544 |
+
"epoch": 0.0001502,
|
| 10545 |
+
"grad_norm": 11.970094680786133,
|
| 10546 |
+
"learning_rate": 1.501e-05,
|
| 10547 |
+
"loss": 88.9375,
|
| 10548 |
+
"step": 1502
|
| 10549 |
+
},
|
| 10550 |
+
{
|
| 10551 |
+
"epoch": 0.0001503,
|
| 10552 |
+
"grad_norm": 11.825295448303223,
|
| 10553 |
+
"learning_rate": 1.5020000000000002e-05,
|
| 10554 |
+
"loss": 89.9375,
|
| 10555 |
+
"step": 1503
|
| 10556 |
+
},
|
| 10557 |
+
{
|
| 10558 |
+
"epoch": 0.0001504,
|
| 10559 |
+
"grad_norm": 12.03028392791748,
|
| 10560 |
+
"learning_rate": 1.503e-05,
|
| 10561 |
+
"loss": 87.875,
|
| 10562 |
+
"step": 1504
|
| 10563 |
+
},
|
| 10564 |
+
{
|
| 10565 |
+
"epoch": 0.0001505,
|
| 10566 |
+
"grad_norm": 11.944097518920898,
|
| 10567 |
+
"learning_rate": 1.504e-05,
|
| 10568 |
+
"loss": 89.3125,
|
| 10569 |
+
"step": 1505
|
| 10570 |
+
},
|
| 10571 |
+
{
|
| 10572 |
+
"epoch": 0.0001506,
|
| 10573 |
+
"grad_norm": 11.77283763885498,
|
| 10574 |
+
"learning_rate": 1.505e-05,
|
| 10575 |
+
"loss": 89.125,
|
| 10576 |
+
"step": 1506
|
| 10577 |
+
},
|
| 10578 |
+
{
|
| 10579 |
+
"epoch": 0.0001507,
|
| 10580 |
+
"grad_norm": 11.934531211853027,
|
| 10581 |
+
"learning_rate": 1.5060000000000001e-05,
|
| 10582 |
+
"loss": 87.5,
|
| 10583 |
+
"step": 1507
|
| 10584 |
+
},
|
| 10585 |
+
{
|
| 10586 |
+
"epoch": 0.0001508,
|
| 10587 |
+
"grad_norm": 11.855193138122559,
|
| 10588 |
+
"learning_rate": 1.5070000000000001e-05,
|
| 10589 |
+
"loss": 88.375,
|
| 10590 |
+
"step": 1508
|
| 10591 |
+
},
|
| 10592 |
+
{
|
| 10593 |
+
"epoch": 0.0001509,
|
| 10594 |
+
"grad_norm": 11.6827392578125,
|
| 10595 |
+
"learning_rate": 1.508e-05,
|
| 10596 |
+
"loss": 89.3125,
|
| 10597 |
+
"step": 1509
|
| 10598 |
+
},
|
| 10599 |
+
{
|
| 10600 |
+
"epoch": 0.000151,
|
| 10601 |
+
"grad_norm": 11.954957008361816,
|
| 10602 |
+
"learning_rate": 1.509e-05,
|
| 10603 |
+
"loss": 87.75,
|
| 10604 |
+
"step": 1510
|
| 10605 |
+
},
|
| 10606 |
+
{
|
| 10607 |
+
"epoch": 0.0001511,
|
| 10608 |
+
"grad_norm": 11.718145370483398,
|
| 10609 |
+
"learning_rate": 1.51e-05,
|
| 10610 |
+
"loss": 88.0625,
|
| 10611 |
+
"step": 1511
|
| 10612 |
+
},
|
| 10613 |
+
{
|
| 10614 |
+
"epoch": 0.0001512,
|
| 10615 |
+
"grad_norm": 11.530472755432129,
|
| 10616 |
+
"learning_rate": 1.5110000000000001e-05,
|
| 10617 |
+
"loss": 89.25,
|
| 10618 |
+
"step": 1512
|
| 10619 |
+
},
|
| 10620 |
+
{
|
| 10621 |
+
"epoch": 0.0001513,
|
| 10622 |
+
"grad_norm": 11.798002243041992,
|
| 10623 |
+
"learning_rate": 1.5120000000000001e-05,
|
| 10624 |
+
"loss": 88.4375,
|
| 10625 |
+
"step": 1513
|
| 10626 |
+
},
|
| 10627 |
+
{
|
| 10628 |
+
"epoch": 0.0001514,
|
| 10629 |
+
"grad_norm": 11.953389167785645,
|
| 10630 |
+
"learning_rate": 1.513e-05,
|
| 10631 |
+
"loss": 87.875,
|
| 10632 |
+
"step": 1514
|
| 10633 |
+
},
|
| 10634 |
+
{
|
| 10635 |
+
"epoch": 0.0001515,
|
| 10636 |
+
"grad_norm": 11.680356979370117,
|
| 10637 |
+
"learning_rate": 1.5139999999999999e-05,
|
| 10638 |
+
"loss": 88.8125,
|
| 10639 |
+
"step": 1515
|
| 10640 |
+
},
|
| 10641 |
+
{
|
| 10642 |
+
"epoch": 0.0001516,
|
| 10643 |
+
"grad_norm": 11.720935821533203,
|
| 10644 |
+
"learning_rate": 1.5149999999999999e-05,
|
| 10645 |
+
"loss": 88.6875,
|
| 10646 |
+
"step": 1516
|
| 10647 |
+
},
|
| 10648 |
+
{
|
| 10649 |
+
"epoch": 0.0001517,
|
| 10650 |
+
"grad_norm": 11.798686027526855,
|
| 10651 |
+
"learning_rate": 1.516e-05,
|
| 10652 |
+
"loss": 88.9375,
|
| 10653 |
+
"step": 1517
|
| 10654 |
+
},
|
| 10655 |
+
{
|
| 10656 |
+
"epoch": 0.0001518,
|
| 10657 |
+
"grad_norm": 11.694768905639648,
|
| 10658 |
+
"learning_rate": 1.517e-05,
|
| 10659 |
+
"loss": 87.625,
|
| 10660 |
+
"step": 1518
|
| 10661 |
+
},
|
| 10662 |
+
{
|
| 10663 |
+
"epoch": 0.0001519,
|
| 10664 |
+
"grad_norm": 11.854416847229004,
|
| 10665 |
+
"learning_rate": 1.518e-05,
|
| 10666 |
+
"loss": 87.5,
|
| 10667 |
+
"step": 1519
|
| 10668 |
+
},
|
| 10669 |
+
{
|
| 10670 |
+
"epoch": 0.000152,
|
| 10671 |
+
"grad_norm": 11.804166793823242,
|
| 10672 |
+
"learning_rate": 1.5190000000000002e-05,
|
| 10673 |
+
"loss": 87.5,
|
| 10674 |
+
"step": 1520
|
| 10675 |
+
},
|
| 10676 |
+
{
|
| 10677 |
+
"epoch": 0.0001521,
|
| 10678 |
+
"grad_norm": 11.772957801818848,
|
| 10679 |
+
"learning_rate": 1.52e-05,
|
| 10680 |
+
"loss": 88.25,
|
| 10681 |
+
"step": 1521
|
| 10682 |
+
},
|
| 10683 |
+
{
|
| 10684 |
+
"epoch": 0.0001522,
|
| 10685 |
+
"grad_norm": 11.960450172424316,
|
| 10686 |
+
"learning_rate": 1.521e-05,
|
| 10687 |
+
"loss": 87.0625,
|
| 10688 |
+
"step": 1522
|
| 10689 |
+
},
|
| 10690 |
+
{
|
| 10691 |
+
"epoch": 0.0001523,
|
| 10692 |
+
"grad_norm": 11.786050796508789,
|
| 10693 |
+
"learning_rate": 1.522e-05,
|
| 10694 |
+
"loss": 87.3125,
|
| 10695 |
+
"step": 1523
|
| 10696 |
+
},
|
| 10697 |
+
{
|
| 10698 |
+
"epoch": 0.0001524,
|
| 10699 |
+
"grad_norm": 12.02585506439209,
|
| 10700 |
+
"learning_rate": 1.5230000000000002e-05,
|
| 10701 |
+
"loss": 86.0,
|
| 10702 |
+
"step": 1524
|
| 10703 |
+
},
|
| 10704 |
+
{
|
| 10705 |
+
"epoch": 0.0001525,
|
| 10706 |
+
"grad_norm": 11.747179985046387,
|
| 10707 |
+
"learning_rate": 1.5240000000000001e-05,
|
| 10708 |
+
"loss": 87.9375,
|
| 10709 |
+
"step": 1525
|
| 10710 |
+
},
|
| 10711 |
+
{
|
| 10712 |
+
"epoch": 0.0001526,
|
| 10713 |
+
"grad_norm": 11.714150428771973,
|
| 10714 |
+
"learning_rate": 1.525e-05,
|
| 10715 |
+
"loss": 87.25,
|
| 10716 |
+
"step": 1526
|
| 10717 |
+
},
|
| 10718 |
+
{
|
| 10719 |
+
"epoch": 0.0001527,
|
| 10720 |
+
"grad_norm": 11.772976875305176,
|
| 10721 |
+
"learning_rate": 1.526e-05,
|
| 10722 |
+
"loss": 86.5625,
|
| 10723 |
+
"step": 1527
|
| 10724 |
+
},
|
| 10725 |
+
{
|
| 10726 |
+
"epoch": 0.0001528,
|
| 10727 |
+
"grad_norm": 11.734100341796875,
|
| 10728 |
+
"learning_rate": 1.527e-05,
|
| 10729 |
+
"loss": 87.125,
|
| 10730 |
+
"step": 1528
|
| 10731 |
+
},
|
| 10732 |
+
{
|
| 10733 |
+
"epoch": 0.0001529,
|
| 10734 |
+
"grad_norm": 11.775686264038086,
|
| 10735 |
+
"learning_rate": 1.528e-05,
|
| 10736 |
+
"loss": 87.4375,
|
| 10737 |
+
"step": 1529
|
| 10738 |
+
},
|
| 10739 |
+
{
|
| 10740 |
+
"epoch": 0.000153,
|
| 10741 |
+
"grad_norm": 11.796177864074707,
|
| 10742 |
+
"learning_rate": 1.529e-05,
|
| 10743 |
+
"loss": 87.625,
|
| 10744 |
+
"step": 1530
|
| 10745 |
+
},
|
| 10746 |
+
{
|
| 10747 |
+
"epoch": 0.0001531,
|
| 10748 |
+
"grad_norm": 11.95287799835205,
|
| 10749 |
+
"learning_rate": 1.5300000000000003e-05,
|
| 10750 |
+
"loss": 86.125,
|
| 10751 |
+
"step": 1531
|
| 10752 |
+
},
|
| 10753 |
+
{
|
| 10754 |
+
"epoch": 0.0001532,
|
| 10755 |
+
"grad_norm": 11.887791633605957,
|
| 10756 |
+
"learning_rate": 1.531e-05,
|
| 10757 |
+
"loss": 87.0625,
|
| 10758 |
+
"step": 1532
|
| 10759 |
+
},
|
| 10760 |
+
{
|
| 10761 |
+
"epoch": 0.0001533,
|
| 10762 |
+
"grad_norm": 11.548758506774902,
|
| 10763 |
+
"learning_rate": 1.532e-05,
|
| 10764 |
+
"loss": 87.3125,
|
| 10765 |
+
"step": 1533
|
| 10766 |
+
},
|
| 10767 |
+
{
|
| 10768 |
+
"epoch": 0.0001534,
|
| 10769 |
+
"grad_norm": 11.616901397705078,
|
| 10770 |
+
"learning_rate": 1.533e-05,
|
| 10771 |
+
"loss": 86.9375,
|
| 10772 |
+
"step": 1534
|
| 10773 |
+
},
|
| 10774 |
+
{
|
| 10775 |
+
"epoch": 0.0001535,
|
| 10776 |
+
"grad_norm": 11.714616775512695,
|
| 10777 |
+
"learning_rate": 1.5340000000000002e-05,
|
| 10778 |
+
"loss": 86.625,
|
| 10779 |
+
"step": 1535
|
| 10780 |
+
},
|
| 10781 |
+
{
|
| 10782 |
+
"epoch": 0.0001536,
|
| 10783 |
+
"grad_norm": 11.771031379699707,
|
| 10784 |
+
"learning_rate": 1.535e-05,
|
| 10785 |
+
"loss": 85.9375,
|
| 10786 |
+
"step": 1536
|
| 10787 |
+
},
|
| 10788 |
+
{
|
| 10789 |
+
"epoch": 0.0001537,
|
| 10790 |
+
"grad_norm": 11.87447738647461,
|
| 10791 |
+
"learning_rate": 1.5360000000000002e-05,
|
| 10792 |
+
"loss": 86.3125,
|
| 10793 |
+
"step": 1537
|
| 10794 |
+
},
|
| 10795 |
+
{
|
| 10796 |
+
"epoch": 0.0001538,
|
| 10797 |
+
"grad_norm": 11.68323040008545,
|
| 10798 |
+
"learning_rate": 1.537e-05,
|
| 10799 |
+
"loss": 86.4375,
|
| 10800 |
+
"step": 1538
|
| 10801 |
+
},
|
| 10802 |
+
{
|
| 10803 |
+
"epoch": 0.0001539,
|
| 10804 |
+
"grad_norm": 11.763370513916016,
|
| 10805 |
+
"learning_rate": 1.538e-05,
|
| 10806 |
+
"loss": 85.9375,
|
| 10807 |
+
"step": 1539
|
| 10808 |
+
},
|
| 10809 |
+
{
|
| 10810 |
+
"epoch": 0.000154,
|
| 10811 |
+
"grad_norm": 11.63352108001709,
|
| 10812 |
+
"learning_rate": 1.539e-05,
|
| 10813 |
+
"loss": 86.375,
|
| 10814 |
+
"step": 1540
|
| 10815 |
+
},
|
| 10816 |
+
{
|
| 10817 |
+
"epoch": 0.0001541,
|
| 10818 |
+
"grad_norm": 11.659286499023438,
|
| 10819 |
+
"learning_rate": 1.54e-05,
|
| 10820 |
+
"loss": 86.8125,
|
| 10821 |
+
"step": 1541
|
| 10822 |
+
},
|
| 10823 |
+
{
|
| 10824 |
+
"epoch": 0.0001542,
|
| 10825 |
+
"grad_norm": 11.715020179748535,
|
| 10826 |
+
"learning_rate": 1.541e-05,
|
| 10827 |
+
"loss": 87.125,
|
| 10828 |
+
"step": 1542
|
| 10829 |
+
},
|
| 10830 |
+
{
|
| 10831 |
+
"epoch": 0.0001543,
|
| 10832 |
+
"grad_norm": 11.713168144226074,
|
| 10833 |
+
"learning_rate": 1.5419999999999998e-05,
|
| 10834 |
+
"loss": 86.0,
|
| 10835 |
+
"step": 1543
|
| 10836 |
+
},
|
| 10837 |
+
{
|
| 10838 |
+
"epoch": 0.0001544,
|
| 10839 |
+
"grad_norm": 11.8790283203125,
|
| 10840 |
+
"learning_rate": 1.543e-05,
|
| 10841 |
+
"loss": 87.0,
|
| 10842 |
+
"step": 1544
|
| 10843 |
+
},
|
| 10844 |
+
{
|
| 10845 |
+
"epoch": 0.0001545,
|
| 10846 |
+
"grad_norm": 11.551057815551758,
|
| 10847 |
+
"learning_rate": 1.544e-05,
|
| 10848 |
+
"loss": 85.8125,
|
| 10849 |
+
"step": 1545
|
| 10850 |
+
},
|
| 10851 |
+
{
|
| 10852 |
+
"epoch": 0.0001546,
|
| 10853 |
+
"grad_norm": 11.429879188537598,
|
| 10854 |
+
"learning_rate": 1.545e-05,
|
| 10855 |
+
"loss": 88.0,
|
| 10856 |
+
"step": 1546
|
| 10857 |
+
},
|
| 10858 |
+
{
|
| 10859 |
+
"epoch": 0.0001547,
|
| 10860 |
+
"grad_norm": 11.400771141052246,
|
| 10861 |
+
"learning_rate": 1.546e-05,
|
| 10862 |
+
"loss": 87.3125,
|
| 10863 |
+
"step": 1547
|
| 10864 |
+
},
|
| 10865 |
+
{
|
| 10866 |
+
"epoch": 0.0001548,
|
| 10867 |
+
"grad_norm": 11.639250755310059,
|
| 10868 |
+
"learning_rate": 1.5470000000000003e-05,
|
| 10869 |
+
"loss": 86.3125,
|
| 10870 |
+
"step": 1548
|
| 10871 |
+
},
|
| 10872 |
+
{
|
| 10873 |
+
"epoch": 0.0001549,
|
| 10874 |
+
"grad_norm": 11.554698944091797,
|
| 10875 |
+
"learning_rate": 1.548e-05,
|
| 10876 |
+
"loss": 86.0625,
|
| 10877 |
+
"step": 1549
|
| 10878 |
+
},
|
| 10879 |
+
{
|
| 10880 |
+
"epoch": 0.000155,
|
| 10881 |
+
"grad_norm": 11.33513069152832,
|
| 10882 |
+
"learning_rate": 1.549e-05,
|
| 10883 |
+
"loss": 88.0,
|
| 10884 |
+
"step": 1550
|
| 10885 |
+
},
|
| 10886 |
+
{
|
| 10887 |
+
"epoch": 0.0001551,
|
| 10888 |
+
"grad_norm": 11.57883358001709,
|
| 10889 |
+
"learning_rate": 1.55e-05,
|
| 10890 |
+
"loss": 86.4375,
|
| 10891 |
+
"step": 1551
|
| 10892 |
+
},
|
| 10893 |
+
{
|
| 10894 |
+
"epoch": 0.0001552,
|
| 10895 |
+
"grad_norm": 11.587578773498535,
|
| 10896 |
+
"learning_rate": 1.5510000000000002e-05,
|
| 10897 |
+
"loss": 87.25,
|
| 10898 |
+
"step": 1552
|
| 10899 |
+
},
|
| 10900 |
+
{
|
| 10901 |
+
"epoch": 0.0001553,
|
| 10902 |
+
"grad_norm": 11.631257057189941,
|
| 10903 |
+
"learning_rate": 1.552e-05,
|
| 10904 |
+
"loss": 85.5,
|
| 10905 |
+
"step": 1553
|
| 10906 |
+
},
|
| 10907 |
+
{
|
| 10908 |
+
"epoch": 0.0001554,
|
| 10909 |
+
"grad_norm": 11.561725616455078,
|
| 10910 |
+
"learning_rate": 1.5530000000000002e-05,
|
| 10911 |
+
"loss": 85.4375,
|
| 10912 |
+
"step": 1554
|
| 10913 |
+
},
|
| 10914 |
+
{
|
| 10915 |
+
"epoch": 0.0001555,
|
| 10916 |
+
"grad_norm": 11.868837356567383,
|
| 10917 |
+
"learning_rate": 1.554e-05,
|
| 10918 |
+
"loss": 85.5,
|
| 10919 |
+
"step": 1555
|
| 10920 |
+
},
|
| 10921 |
+
{
|
| 10922 |
+
"epoch": 0.0001556,
|
| 10923 |
+
"grad_norm": 12.030888557434082,
|
| 10924 |
+
"learning_rate": 1.555e-05,
|
| 10925 |
+
"loss": 85.4375,
|
| 10926 |
+
"step": 1556
|
| 10927 |
+
},
|
| 10928 |
+
{
|
| 10929 |
+
"epoch": 0.0001557,
|
| 10930 |
+
"grad_norm": 11.721089363098145,
|
| 10931 |
+
"learning_rate": 1.556e-05,
|
| 10932 |
+
"loss": 84.875,
|
| 10933 |
+
"step": 1557
|
| 10934 |
+
},
|
| 10935 |
+
{
|
| 10936 |
+
"epoch": 0.0001558,
|
| 10937 |
+
"grad_norm": 11.465258598327637,
|
| 10938 |
+
"learning_rate": 1.5570000000000002e-05,
|
| 10939 |
+
"loss": 85.75,
|
| 10940 |
+
"step": 1558
|
| 10941 |
+
},
|
| 10942 |
+
{
|
| 10943 |
+
"epoch": 0.0001559,
|
| 10944 |
+
"grad_norm": 11.625471115112305,
|
| 10945 |
+
"learning_rate": 1.558e-05,
|
| 10946 |
+
"loss": 85.375,
|
| 10947 |
+
"step": 1559
|
| 10948 |
+
},
|
| 10949 |
+
{
|
| 10950 |
+
"epoch": 0.000156,
|
| 10951 |
+
"grad_norm": 11.661459922790527,
|
| 10952 |
+
"learning_rate": 1.5589999999999998e-05,
|
| 10953 |
+
"loss": 86.8125,
|
| 10954 |
+
"step": 1560
|
| 10955 |
+
},
|
| 10956 |
+
{
|
| 10957 |
+
"epoch": 0.0001561,
|
| 10958 |
+
"grad_norm": 11.460598945617676,
|
| 10959 |
+
"learning_rate": 1.56e-05,
|
| 10960 |
+
"loss": 85.875,
|
| 10961 |
+
"step": 1561
|
| 10962 |
+
},
|
| 10963 |
+
{
|
| 10964 |
+
"epoch": 0.0001562,
|
| 10965 |
+
"grad_norm": 11.483807563781738,
|
| 10966 |
+
"learning_rate": 1.561e-05,
|
| 10967 |
+
"loss": 85.6875,
|
| 10968 |
+
"step": 1562
|
| 10969 |
+
},
|
| 10970 |
+
{
|
| 10971 |
+
"epoch": 0.0001563,
|
| 10972 |
+
"grad_norm": 11.545580863952637,
|
| 10973 |
+
"learning_rate": 1.562e-05,
|
| 10974 |
+
"loss": 84.9375,
|
| 10975 |
+
"step": 1563
|
| 10976 |
+
},
|
| 10977 |
+
{
|
| 10978 |
+
"epoch": 0.0001564,
|
| 10979 |
+
"grad_norm": 11.488518714904785,
|
| 10980 |
+
"learning_rate": 1.563e-05,
|
| 10981 |
+
"loss": 86.5,
|
| 10982 |
+
"step": 1564
|
| 10983 |
+
},
|
| 10984 |
+
{
|
| 10985 |
+
"epoch": 0.0001565,
|
| 10986 |
+
"grad_norm": 11.404995918273926,
|
| 10987 |
+
"learning_rate": 1.5640000000000003e-05,
|
| 10988 |
+
"loss": 86.0625,
|
| 10989 |
+
"step": 1565
|
| 10990 |
+
},
|
| 10991 |
+
{
|
| 10992 |
+
"epoch": 0.0001566,
|
| 10993 |
+
"grad_norm": 11.470978736877441,
|
| 10994 |
+
"learning_rate": 1.565e-05,
|
| 10995 |
+
"loss": 85.5,
|
| 10996 |
+
"step": 1566
|
| 10997 |
+
},
|
| 10998 |
+
{
|
| 10999 |
+
"epoch": 0.0001567,
|
| 11000 |
+
"grad_norm": 11.510252952575684,
|
| 11001 |
+
"learning_rate": 1.566e-05,
|
| 11002 |
+
"loss": 86.5625,
|
| 11003 |
+
"step": 1567
|
| 11004 |
+
},
|
| 11005 |
+
{
|
| 11006 |
+
"epoch": 0.0001568,
|
| 11007 |
+
"grad_norm": 11.237356185913086,
|
| 11008 |
+
"learning_rate": 1.567e-05,
|
| 11009 |
+
"loss": 86.5625,
|
| 11010 |
+
"step": 1568
|
| 11011 |
+
},
|
| 11012 |
+
{
|
| 11013 |
+
"epoch": 0.0001569,
|
| 11014 |
+
"grad_norm": 11.380836486816406,
|
| 11015 |
+
"learning_rate": 1.5680000000000002e-05,
|
| 11016 |
+
"loss": 86.3125,
|
| 11017 |
+
"step": 1569
|
| 11018 |
+
},
|
| 11019 |
+
{
|
| 11020 |
+
"epoch": 0.000157,
|
| 11021 |
+
"grad_norm": 11.34556770324707,
|
| 11022 |
+
"learning_rate": 1.569e-05,
|
| 11023 |
+
"loss": 87.25,
|
| 11024 |
+
"step": 1570
|
| 11025 |
+
},
|
| 11026 |
+
{
|
| 11027 |
+
"epoch": 0.0001571,
|
| 11028 |
+
"grad_norm": 11.4042387008667,
|
| 11029 |
+
"learning_rate": 1.5700000000000002e-05,
|
| 11030 |
+
"loss": 85.625,
|
| 11031 |
+
"step": 1571
|
| 11032 |
+
},
|
| 11033 |
+
{
|
| 11034 |
+
"epoch": 0.0001572,
|
| 11035 |
+
"grad_norm": 11.649144172668457,
|
| 11036 |
+
"learning_rate": 1.571e-05,
|
| 11037 |
+
"loss": 83.9375,
|
| 11038 |
+
"step": 1572
|
| 11039 |
+
},
|
| 11040 |
+
{
|
| 11041 |
+
"epoch": 0.0001573,
|
| 11042 |
+
"grad_norm": 11.32030200958252,
|
| 11043 |
+
"learning_rate": 1.572e-05,
|
| 11044 |
+
"loss": 86.1875,
|
| 11045 |
+
"step": 1573
|
| 11046 |
+
},
|
| 11047 |
+
{
|
| 11048 |
+
"epoch": 0.0001574,
|
| 11049 |
+
"grad_norm": 11.432470321655273,
|
| 11050 |
+
"learning_rate": 1.573e-05,
|
| 11051 |
+
"loss": 85.75,
|
| 11052 |
+
"step": 1574
|
| 11053 |
+
},
|
| 11054 |
+
{
|
| 11055 |
+
"epoch": 0.0001575,
|
| 11056 |
+
"grad_norm": 11.351807594299316,
|
| 11057 |
+
"learning_rate": 1.5740000000000002e-05,
|
| 11058 |
+
"loss": 85.625,
|
| 11059 |
+
"step": 1575
|
| 11060 |
+
},
|
| 11061 |
+
{
|
| 11062 |
+
"epoch": 0.0001576,
|
| 11063 |
+
"grad_norm": 11.332376480102539,
|
| 11064 |
+
"learning_rate": 1.575e-05,
|
| 11065 |
+
"loss": 85.5,
|
| 11066 |
+
"step": 1576
|
| 11067 |
+
},
|
| 11068 |
+
{
|
| 11069 |
+
"epoch": 0.0001577,
|
| 11070 |
+
"grad_norm": 11.747312545776367,
|
| 11071 |
+
"learning_rate": 1.5759999999999998e-05,
|
| 11072 |
+
"loss": 83.6875,
|
| 11073 |
+
"step": 1577
|
| 11074 |
+
},
|
| 11075 |
+
{
|
| 11076 |
+
"epoch": 0.0001578,
|
| 11077 |
+
"grad_norm": 11.493258476257324,
|
| 11078 |
+
"learning_rate": 1.577e-05,
|
| 11079 |
+
"loss": 85.25,
|
| 11080 |
+
"step": 1578
|
| 11081 |
+
},
|
| 11082 |
+
{
|
| 11083 |
+
"epoch": 0.0001579,
|
| 11084 |
+
"grad_norm": 11.303948402404785,
|
| 11085 |
+
"learning_rate": 1.578e-05,
|
| 11086 |
+
"loss": 85.4375,
|
| 11087 |
+
"step": 1579
|
| 11088 |
+
},
|
| 11089 |
+
{
|
| 11090 |
+
"epoch": 0.000158,
|
| 11091 |
+
"grad_norm": 11.353314399719238,
|
| 11092 |
+
"learning_rate": 1.579e-05,
|
| 11093 |
+
"loss": 85.5,
|
| 11094 |
+
"step": 1580
|
| 11095 |
+
},
|
| 11096 |
+
{
|
| 11097 |
+
"epoch": 0.0001581,
|
| 11098 |
+
"grad_norm": 11.475183486938477,
|
| 11099 |
+
"learning_rate": 1.58e-05,
|
| 11100 |
+
"loss": 85.125,
|
| 11101 |
+
"step": 1581
|
| 11102 |
+
},
|
| 11103 |
+
{
|
| 11104 |
+
"epoch": 0.0001582,
|
| 11105 |
+
"grad_norm": 11.416069984436035,
|
| 11106 |
+
"learning_rate": 1.5810000000000003e-05,
|
| 11107 |
+
"loss": 85.3125,
|
| 11108 |
+
"step": 1582
|
| 11109 |
+
},
|
| 11110 |
+
{
|
| 11111 |
+
"epoch": 0.0001583,
|
| 11112 |
+
"grad_norm": 11.469791412353516,
|
| 11113 |
+
"learning_rate": 1.582e-05,
|
| 11114 |
+
"loss": 83.625,
|
| 11115 |
+
"step": 1583
|
| 11116 |
+
},
|
| 11117 |
+
{
|
| 11118 |
+
"epoch": 0.0001584,
|
| 11119 |
+
"grad_norm": 11.495800018310547,
|
| 11120 |
+
"learning_rate": 1.583e-05,
|
| 11121 |
+
"loss": 84.1875,
|
| 11122 |
+
"step": 1584
|
| 11123 |
+
},
|
| 11124 |
+
{
|
| 11125 |
+
"epoch": 0.0001585,
|
| 11126 |
+
"grad_norm": 11.347009658813477,
|
| 11127 |
+
"learning_rate": 1.584e-05,
|
| 11128 |
+
"loss": 84.3125,
|
| 11129 |
+
"step": 1585
|
| 11130 |
+
},
|
| 11131 |
+
{
|
| 11132 |
+
"epoch": 0.0001586,
|
| 11133 |
+
"grad_norm": 11.404011726379395,
|
| 11134 |
+
"learning_rate": 1.585e-05,
|
| 11135 |
+
"loss": 84.25,
|
| 11136 |
+
"step": 1586
|
| 11137 |
+
},
|
| 11138 |
+
{
|
| 11139 |
+
"epoch": 0.0001587,
|
| 11140 |
+
"grad_norm": 11.331399917602539,
|
| 11141 |
+
"learning_rate": 1.586e-05,
|
| 11142 |
+
"loss": 84.6875,
|
| 11143 |
+
"step": 1587
|
| 11144 |
+
},
|
| 11145 |
+
{
|
| 11146 |
+
"epoch": 0.0001588,
|
| 11147 |
+
"grad_norm": 11.486993789672852,
|
| 11148 |
+
"learning_rate": 1.5870000000000002e-05,
|
| 11149 |
+
"loss": 84.8125,
|
| 11150 |
+
"step": 1588
|
| 11151 |
+
},
|
| 11152 |
+
{
|
| 11153 |
+
"epoch": 0.0001589,
|
| 11154 |
+
"grad_norm": 11.486628532409668,
|
| 11155 |
+
"learning_rate": 1.588e-05,
|
| 11156 |
+
"loss": 84.125,
|
| 11157 |
+
"step": 1589
|
| 11158 |
+
},
|
| 11159 |
+
{
|
| 11160 |
+
"epoch": 0.000159,
|
| 11161 |
+
"grad_norm": 11.41197681427002,
|
| 11162 |
+
"learning_rate": 1.589e-05,
|
| 11163 |
+
"loss": 83.875,
|
| 11164 |
+
"step": 1590
|
| 11165 |
+
},
|
| 11166 |
+
{
|
| 11167 |
+
"epoch": 0.0001591,
|
| 11168 |
+
"grad_norm": 11.50651741027832,
|
| 11169 |
+
"learning_rate": 1.59e-05,
|
| 11170 |
+
"loss": 83.375,
|
| 11171 |
+
"step": 1591
|
| 11172 |
+
},
|
| 11173 |
+
{
|
| 11174 |
+
"epoch": 0.0001592,
|
| 11175 |
+
"grad_norm": 11.40333080291748,
|
| 11176 |
+
"learning_rate": 1.5910000000000002e-05,
|
| 11177 |
+
"loss": 84.4375,
|
| 11178 |
+
"step": 1592
|
| 11179 |
+
},
|
| 11180 |
+
{
|
| 11181 |
+
"epoch": 0.0001593,
|
| 11182 |
+
"grad_norm": 11.41156005859375,
|
| 11183 |
+
"learning_rate": 1.592e-05,
|
| 11184 |
+
"loss": 84.125,
|
| 11185 |
+
"step": 1593
|
| 11186 |
+
},
|
| 11187 |
+
{
|
| 11188 |
+
"epoch": 0.0001594,
|
| 11189 |
+
"grad_norm": 11.424681663513184,
|
| 11190 |
+
"learning_rate": 1.5929999999999998e-05,
|
| 11191 |
+
"loss": 83.25,
|
| 11192 |
+
"step": 1594
|
| 11193 |
+
},
|
| 11194 |
+
{
|
| 11195 |
+
"epoch": 0.0001595,
|
| 11196 |
+
"grad_norm": 11.179929733276367,
|
| 11197 |
+
"learning_rate": 1.594e-05,
|
| 11198 |
+
"loss": 85.25,
|
| 11199 |
+
"step": 1595
|
| 11200 |
+
},
|
| 11201 |
+
{
|
| 11202 |
+
"epoch": 0.0001596,
|
| 11203 |
+
"grad_norm": 11.350379943847656,
|
| 11204 |
+
"learning_rate": 1.595e-05,
|
| 11205 |
+
"loss": 83.625,
|
| 11206 |
+
"step": 1596
|
| 11207 |
+
},
|
| 11208 |
+
{
|
| 11209 |
+
"epoch": 0.0001597,
|
| 11210 |
+
"grad_norm": 11.30370044708252,
|
| 11211 |
+
"learning_rate": 1.596e-05,
|
| 11212 |
+
"loss": 83.875,
|
| 11213 |
+
"step": 1597
|
| 11214 |
+
},
|
| 11215 |
+
{
|
| 11216 |
+
"epoch": 0.0001598,
|
| 11217 |
+
"grad_norm": 11.342832565307617,
|
| 11218 |
+
"learning_rate": 1.597e-05,
|
| 11219 |
+
"loss": 83.25,
|
| 11220 |
+
"step": 1598
|
| 11221 |
+
},
|
| 11222 |
+
{
|
| 11223 |
+
"epoch": 0.0001599,
|
| 11224 |
+
"grad_norm": 11.332225799560547,
|
| 11225 |
+
"learning_rate": 1.5980000000000003e-05,
|
| 11226 |
+
"loss": 83.5625,
|
| 11227 |
+
"step": 1599
|
| 11228 |
+
},
|
| 11229 |
+
{
|
| 11230 |
+
"epoch": 0.00016,
|
| 11231 |
+
"grad_norm": 11.266222953796387,
|
| 11232 |
+
"learning_rate": 1.5989999999999998e-05,
|
| 11233 |
+
"loss": 84.1875,
|
| 11234 |
+
"step": 1600
|
| 11235 |
+
},
|
| 11236 |
+
{
|
| 11237 |
+
"epoch": 0.0001601,
|
| 11238 |
+
"grad_norm": 11.43062686920166,
|
| 11239 |
+
"learning_rate": 1.6e-05,
|
| 11240 |
+
"loss": 83.5625,
|
| 11241 |
+
"step": 1601
|
| 11242 |
+
},
|
| 11243 |
+
{
|
| 11244 |
+
"epoch": 0.0001602,
|
| 11245 |
+
"grad_norm": 11.166046142578125,
|
| 11246 |
+
"learning_rate": 1.601e-05,
|
| 11247 |
+
"loss": 84.5,
|
| 11248 |
+
"step": 1602
|
| 11249 |
+
},
|
| 11250 |
+
{
|
| 11251 |
+
"epoch": 0.0001603,
|
| 11252 |
+
"grad_norm": 11.344799041748047,
|
| 11253 |
+
"learning_rate": 1.602e-05,
|
| 11254 |
+
"loss": 83.1875,
|
| 11255 |
+
"step": 1603
|
| 11256 |
+
},
|
| 11257 |
+
{
|
| 11258 |
+
"epoch": 0.0001604,
|
| 11259 |
+
"grad_norm": 11.39476203918457,
|
| 11260 |
+
"learning_rate": 1.603e-05,
|
| 11261 |
+
"loss": 83.625,
|
| 11262 |
+
"step": 1604
|
| 11263 |
+
},
|
| 11264 |
+
{
|
| 11265 |
+
"epoch": 0.0001605,
|
| 11266 |
+
"grad_norm": 11.016305923461914,
|
| 11267 |
+
"learning_rate": 1.604e-05,
|
| 11268 |
+
"loss": 84.9375,
|
| 11269 |
+
"step": 1605
|
| 11270 |
+
},
|
| 11271 |
+
{
|
| 11272 |
+
"epoch": 0.0001606,
|
| 11273 |
+
"grad_norm": 11.331496238708496,
|
| 11274 |
+
"learning_rate": 1.605e-05,
|
| 11275 |
+
"loss": 83.6875,
|
| 11276 |
+
"step": 1606
|
| 11277 |
+
},
|
| 11278 |
+
{
|
| 11279 |
+
"epoch": 0.0001607,
|
| 11280 |
+
"grad_norm": 11.173765182495117,
|
| 11281 |
+
"learning_rate": 1.606e-05,
|
| 11282 |
+
"loss": 84.375,
|
| 11283 |
+
"step": 1607
|
| 11284 |
+
},
|
| 11285 |
+
{
|
| 11286 |
+
"epoch": 0.0001608,
|
| 11287 |
+
"grad_norm": 11.105717658996582,
|
| 11288 |
+
"learning_rate": 1.607e-05,
|
| 11289 |
+
"loss": 85.1875,
|
| 11290 |
+
"step": 1608
|
| 11291 |
+
},
|
| 11292 |
+
{
|
| 11293 |
+
"epoch": 0.0001609,
|
| 11294 |
+
"grad_norm": 11.248224258422852,
|
| 11295 |
+
"learning_rate": 1.6080000000000002e-05,
|
| 11296 |
+
"loss": 83.8125,
|
| 11297 |
+
"step": 1609
|
| 11298 |
+
},
|
| 11299 |
+
{
|
| 11300 |
+
"epoch": 0.000161,
|
| 11301 |
+
"grad_norm": 11.4295015335083,
|
| 11302 |
+
"learning_rate": 1.609e-05,
|
| 11303 |
+
"loss": 82.8125,
|
| 11304 |
+
"step": 1610
|
| 11305 |
+
},
|
| 11306 |
+
{
|
| 11307 |
+
"epoch": 0.0001611,
|
| 11308 |
+
"grad_norm": 11.350955963134766,
|
| 11309 |
+
"learning_rate": 1.61e-05,
|
| 11310 |
+
"loss": 83.9375,
|
| 11311 |
+
"step": 1611
|
| 11312 |
+
},
|
| 11313 |
+
{
|
| 11314 |
+
"epoch": 0.0001612,
|
| 11315 |
+
"grad_norm": 11.247148513793945,
|
| 11316 |
+
"learning_rate": 1.611e-05,
|
| 11317 |
+
"loss": 83.4375,
|
| 11318 |
+
"step": 1612
|
| 11319 |
+
},
|
| 11320 |
+
{
|
| 11321 |
+
"epoch": 0.0001613,
|
| 11322 |
+
"grad_norm": 11.086236000061035,
|
| 11323 |
+
"learning_rate": 1.612e-05,
|
| 11324 |
+
"loss": 84.3125,
|
| 11325 |
+
"step": 1613
|
| 11326 |
+
},
|
| 11327 |
+
{
|
| 11328 |
+
"epoch": 0.0001614,
|
| 11329 |
+
"grad_norm": 11.098573684692383,
|
| 11330 |
+
"learning_rate": 1.613e-05,
|
| 11331 |
+
"loss": 83.875,
|
| 11332 |
+
"step": 1614
|
| 11333 |
+
},
|
| 11334 |
+
{
|
| 11335 |
+
"epoch": 0.0001615,
|
| 11336 |
+
"grad_norm": 11.348047256469727,
|
| 11337 |
+
"learning_rate": 1.614e-05,
|
| 11338 |
+
"loss": 83.1875,
|
| 11339 |
+
"step": 1615
|
| 11340 |
+
},
|
| 11341 |
+
{
|
| 11342 |
+
"epoch": 0.0001616,
|
| 11343 |
+
"grad_norm": 11.169997215270996,
|
| 11344 |
+
"learning_rate": 1.6150000000000003e-05,
|
| 11345 |
+
"loss": 83.1875,
|
| 11346 |
+
"step": 1616
|
| 11347 |
+
},
|
| 11348 |
+
{
|
| 11349 |
+
"epoch": 0.0001617,
|
| 11350 |
+
"grad_norm": 11.257536888122559,
|
| 11351 |
+
"learning_rate": 1.6159999999999998e-05,
|
| 11352 |
+
"loss": 83.5,
|
| 11353 |
+
"step": 1617
|
| 11354 |
+
},
|
| 11355 |
+
{
|
| 11356 |
+
"epoch": 0.0001618,
|
| 11357 |
+
"grad_norm": 11.304291725158691,
|
| 11358 |
+
"learning_rate": 1.617e-05,
|
| 11359 |
+
"loss": 83.9375,
|
| 11360 |
+
"step": 1618
|
| 11361 |
+
},
|
| 11362 |
+
{
|
| 11363 |
+
"epoch": 0.0001619,
|
| 11364 |
+
"grad_norm": 11.34911823272705,
|
| 11365 |
+
"learning_rate": 1.618e-05,
|
| 11366 |
+
"loss": 82.9375,
|
| 11367 |
+
"step": 1619
|
| 11368 |
+
},
|
| 11369 |
+
{
|
| 11370 |
+
"epoch": 0.000162,
|
| 11371 |
+
"grad_norm": 11.359582901000977,
|
| 11372 |
+
"learning_rate": 1.619e-05,
|
| 11373 |
+
"loss": 82.6875,
|
| 11374 |
+
"step": 1620
|
| 11375 |
+
},
|
| 11376 |
+
{
|
| 11377 |
+
"epoch": 0.0001621,
|
| 11378 |
+
"grad_norm": 11.43436336517334,
|
| 11379 |
+
"learning_rate": 1.62e-05,
|
| 11380 |
+
"loss": 81.5,
|
| 11381 |
+
"step": 1621
|
| 11382 |
+
},
|
| 11383 |
+
{
|
| 11384 |
+
"epoch": 0.0001622,
|
| 11385 |
+
"grad_norm": 11.373072624206543,
|
| 11386 |
+
"learning_rate": 1.621e-05,
|
| 11387 |
+
"loss": 82.4375,
|
| 11388 |
+
"step": 1622
|
| 11389 |
+
},
|
| 11390 |
+
{
|
| 11391 |
+
"epoch": 0.0001623,
|
| 11392 |
+
"grad_norm": 11.195119857788086,
|
| 11393 |
+
"learning_rate": 1.622e-05,
|
| 11394 |
+
"loss": 84.0625,
|
| 11395 |
+
"step": 1623
|
| 11396 |
+
},
|
| 11397 |
+
{
|
| 11398 |
+
"epoch": 0.0001624,
|
| 11399 |
+
"grad_norm": 11.049300193786621,
|
| 11400 |
+
"learning_rate": 1.623e-05,
|
| 11401 |
+
"loss": 83.375,
|
| 11402 |
+
"step": 1624
|
| 11403 |
+
},
|
| 11404 |
+
{
|
| 11405 |
+
"epoch": 0.0001625,
|
| 11406 |
+
"grad_norm": 11.10819149017334,
|
| 11407 |
+
"learning_rate": 1.624e-05,
|
| 11408 |
+
"loss": 83.4375,
|
| 11409 |
+
"step": 1625
|
| 11410 |
+
},
|
| 11411 |
+
{
|
| 11412 |
+
"epoch": 0.0001626,
|
| 11413 |
+
"grad_norm": 11.200508117675781,
|
| 11414 |
+
"learning_rate": 1.6250000000000002e-05,
|
| 11415 |
+
"loss": 82.5625,
|
| 11416 |
+
"step": 1626
|
| 11417 |
+
},
|
| 11418 |
+
{
|
| 11419 |
+
"epoch": 0.0001627,
|
| 11420 |
+
"grad_norm": 11.225939750671387,
|
| 11421 |
+
"learning_rate": 1.626e-05,
|
| 11422 |
+
"loss": 82.5625,
|
| 11423 |
+
"step": 1627
|
| 11424 |
+
},
|
| 11425 |
+
{
|
| 11426 |
+
"epoch": 0.0001628,
|
| 11427 |
+
"grad_norm": 11.106568336486816,
|
| 11428 |
+
"learning_rate": 1.627e-05,
|
| 11429 |
+
"loss": 83.375,
|
| 11430 |
+
"step": 1628
|
| 11431 |
+
},
|
| 11432 |
+
{
|
| 11433 |
+
"epoch": 0.0001629,
|
| 11434 |
+
"grad_norm": 11.150421142578125,
|
| 11435 |
+
"learning_rate": 1.628e-05,
|
| 11436 |
+
"loss": 83.0625,
|
| 11437 |
+
"step": 1629
|
| 11438 |
+
},
|
| 11439 |
+
{
|
| 11440 |
+
"epoch": 0.000163,
|
| 11441 |
+
"grad_norm": 11.192241668701172,
|
| 11442 |
+
"learning_rate": 1.629e-05,
|
| 11443 |
+
"loss": 82.5625,
|
| 11444 |
+
"step": 1630
|
| 11445 |
+
},
|
| 11446 |
+
{
|
| 11447 |
+
"epoch": 0.0001631,
|
| 11448 |
+
"grad_norm": 11.128030776977539,
|
| 11449 |
+
"learning_rate": 1.63e-05,
|
| 11450 |
+
"loss": 82.6875,
|
| 11451 |
+
"step": 1631
|
| 11452 |
+
},
|
| 11453 |
+
{
|
| 11454 |
+
"epoch": 0.0001632,
|
| 11455 |
+
"grad_norm": 11.366748809814453,
|
| 11456 |
+
"learning_rate": 1.631e-05,
|
| 11457 |
+
"loss": 81.0,
|
| 11458 |
+
"step": 1632
|
| 11459 |
+
},
|
| 11460 |
+
{
|
| 11461 |
+
"epoch": 0.0001633,
|
| 11462 |
+
"grad_norm": 11.179375648498535,
|
| 11463 |
+
"learning_rate": 1.6320000000000003e-05,
|
| 11464 |
+
"loss": 82.9375,
|
| 11465 |
+
"step": 1633
|
| 11466 |
+
},
|
| 11467 |
+
{
|
| 11468 |
+
"epoch": 0.0001634,
|
| 11469 |
+
"grad_norm": 10.959238052368164,
|
| 11470 |
+
"learning_rate": 1.6329999999999998e-05,
|
| 11471 |
+
"loss": 83.75,
|
| 11472 |
+
"step": 1634
|
| 11473 |
+
},
|
| 11474 |
+
{
|
| 11475 |
+
"epoch": 0.0001635,
|
| 11476 |
+
"grad_norm": 11.190832138061523,
|
| 11477 |
+
"learning_rate": 1.634e-05,
|
| 11478 |
+
"loss": 82.3125,
|
| 11479 |
+
"step": 1635
|
| 11480 |
+
},
|
| 11481 |
+
{
|
| 11482 |
+
"epoch": 0.0001636,
|
| 11483 |
+
"grad_norm": 11.21330738067627,
|
| 11484 |
+
"learning_rate": 1.635e-05,
|
| 11485 |
+
"loss": 81.6875,
|
| 11486 |
+
"step": 1636
|
| 11487 |
+
},
|
| 11488 |
+
{
|
| 11489 |
+
"epoch": 0.0001637,
|
| 11490 |
+
"grad_norm": 11.11747932434082,
|
| 11491 |
+
"learning_rate": 1.636e-05,
|
| 11492 |
+
"loss": 81.9375,
|
| 11493 |
+
"step": 1637
|
| 11494 |
+
},
|
| 11495 |
+
{
|
| 11496 |
+
"epoch": 0.0001638,
|
| 11497 |
+
"grad_norm": 11.298837661743164,
|
| 11498 |
+
"learning_rate": 1.637e-05,
|
| 11499 |
+
"loss": 82.25,
|
| 11500 |
+
"step": 1638
|
| 11501 |
+
},
|
| 11502 |
+
{
|
| 11503 |
+
"epoch": 0.0001639,
|
| 11504 |
+
"grad_norm": 11.277381896972656,
|
| 11505 |
+
"learning_rate": 1.638e-05,
|
| 11506 |
+
"loss": 81.0,
|
| 11507 |
+
"step": 1639
|
| 11508 |
+
},
|
| 11509 |
+
{
|
| 11510 |
+
"epoch": 0.000164,
|
| 11511 |
+
"grad_norm": 11.104133605957031,
|
| 11512 |
+
"learning_rate": 1.639e-05,
|
| 11513 |
+
"loss": 82.1875,
|
| 11514 |
+
"step": 1640
|
| 11515 |
+
},
|
| 11516 |
+
{
|
| 11517 |
+
"epoch": 0.0001641,
|
| 11518 |
+
"grad_norm": 11.214916229248047,
|
| 11519 |
+
"learning_rate": 1.64e-05,
|
| 11520 |
+
"loss": 82.0625,
|
| 11521 |
+
"step": 1641
|
| 11522 |
+
},
|
| 11523 |
+
{
|
| 11524 |
+
"epoch": 0.0001642,
|
| 11525 |
+
"grad_norm": 11.148131370544434,
|
| 11526 |
+
"learning_rate": 1.641e-05,
|
| 11527 |
+
"loss": 82.8125,
|
| 11528 |
+
"step": 1642
|
| 11529 |
+
},
|
| 11530 |
+
{
|
| 11531 |
+
"epoch": 0.0001643,
|
| 11532 |
+
"grad_norm": 11.112770080566406,
|
| 11533 |
+
"learning_rate": 1.6420000000000002e-05,
|
| 11534 |
+
"loss": 82.5,
|
| 11535 |
+
"step": 1643
|
| 11536 |
+
},
|
| 11537 |
+
{
|
| 11538 |
+
"epoch": 0.0001644,
|
| 11539 |
+
"grad_norm": 11.18635368347168,
|
| 11540 |
+
"learning_rate": 1.643e-05,
|
| 11541 |
+
"loss": 80.75,
|
| 11542 |
+
"step": 1644
|
| 11543 |
+
},
|
| 11544 |
+
{
|
| 11545 |
+
"epoch": 0.0001645,
|
| 11546 |
+
"grad_norm": 11.048691749572754,
|
| 11547 |
+
"learning_rate": 1.644e-05,
|
| 11548 |
+
"loss": 83.8125,
|
| 11549 |
+
"step": 1645
|
| 11550 |
+
},
|
| 11551 |
+
{
|
| 11552 |
+
"epoch": 0.0001646,
|
| 11553 |
+
"grad_norm": 11.135150909423828,
|
| 11554 |
+
"learning_rate": 1.645e-05,
|
| 11555 |
+
"loss": 82.125,
|
| 11556 |
+
"step": 1646
|
| 11557 |
+
},
|
| 11558 |
+
{
|
| 11559 |
+
"epoch": 0.0001647,
|
| 11560 |
+
"grad_norm": 11.023847579956055,
|
| 11561 |
+
"learning_rate": 1.646e-05,
|
| 11562 |
+
"loss": 82.1875,
|
| 11563 |
+
"step": 1647
|
| 11564 |
+
},
|
| 11565 |
+
{
|
| 11566 |
+
"epoch": 0.0001648,
|
| 11567 |
+
"grad_norm": 11.032581329345703,
|
| 11568 |
+
"learning_rate": 1.647e-05,
|
| 11569 |
+
"loss": 82.25,
|
| 11570 |
+
"step": 1648
|
| 11571 |
+
},
|
| 11572 |
+
{
|
| 11573 |
+
"epoch": 0.0001649,
|
| 11574 |
+
"grad_norm": 10.882695198059082,
|
| 11575 |
+
"learning_rate": 1.648e-05,
|
| 11576 |
+
"loss": 84.9375,
|
| 11577 |
+
"step": 1649
|
| 11578 |
+
},
|
| 11579 |
+
{
|
| 11580 |
+
"epoch": 0.000165,
|
| 11581 |
+
"grad_norm": 10.961777687072754,
|
| 11582 |
+
"learning_rate": 1.6490000000000003e-05,
|
| 11583 |
+
"loss": 81.8125,
|
| 11584 |
+
"step": 1650
|
| 11585 |
+
},
|
| 11586 |
+
{
|
| 11587 |
+
"epoch": 0.0001651,
|
| 11588 |
+
"grad_norm": 11.20870304107666,
|
| 11589 |
+
"learning_rate": 1.6499999999999998e-05,
|
| 11590 |
+
"loss": 82.1875,
|
| 11591 |
+
"step": 1651
|
| 11592 |
+
},
|
| 11593 |
+
{
|
| 11594 |
+
"epoch": 0.0001652,
|
| 11595 |
+
"grad_norm": 11.134272575378418,
|
| 11596 |
+
"learning_rate": 1.651e-05,
|
| 11597 |
+
"loss": 81.5,
|
| 11598 |
+
"step": 1652
|
| 11599 |
+
},
|
| 11600 |
+
{
|
| 11601 |
+
"epoch": 0.0001653,
|
| 11602 |
+
"grad_norm": 10.889755249023438,
|
| 11603 |
+
"learning_rate": 1.652e-05,
|
| 11604 |
+
"loss": 83.5625,
|
| 11605 |
+
"step": 1653
|
| 11606 |
+
},
|
| 11607 |
+
{
|
| 11608 |
+
"epoch": 0.0001654,
|
| 11609 |
+
"grad_norm": 11.065031051635742,
|
| 11610 |
+
"learning_rate": 1.653e-05,
|
| 11611 |
+
"loss": 82.375,
|
| 11612 |
+
"step": 1654
|
| 11613 |
+
},
|
| 11614 |
+
{
|
| 11615 |
+
"epoch": 0.0001655,
|
| 11616 |
+
"grad_norm": 11.27316665649414,
|
| 11617 |
+
"learning_rate": 1.654e-05,
|
| 11618 |
+
"loss": 79.8125,
|
| 11619 |
+
"step": 1655
|
| 11620 |
+
},
|
| 11621 |
+
{
|
| 11622 |
+
"epoch": 0.0001656,
|
| 11623 |
+
"grad_norm": 11.198715209960938,
|
| 11624 |
+
"learning_rate": 1.655e-05,
|
| 11625 |
+
"loss": 80.75,
|
| 11626 |
+
"step": 1656
|
| 11627 |
+
},
|
| 11628 |
+
{
|
| 11629 |
+
"epoch": 0.0001657,
|
| 11630 |
+
"grad_norm": 11.055648803710938,
|
| 11631 |
+
"learning_rate": 1.656e-05,
|
| 11632 |
+
"loss": 81.9375,
|
| 11633 |
+
"step": 1657
|
| 11634 |
+
},
|
| 11635 |
+
{
|
| 11636 |
+
"epoch": 0.0001658,
|
| 11637 |
+
"grad_norm": 11.146472930908203,
|
| 11638 |
+
"learning_rate": 1.657e-05,
|
| 11639 |
+
"loss": 81.0625,
|
| 11640 |
+
"step": 1658
|
| 11641 |
+
},
|
| 11642 |
+
{
|
| 11643 |
+
"epoch": 0.0001659,
|
| 11644 |
+
"grad_norm": 11.19427490234375,
|
| 11645 |
+
"learning_rate": 1.658e-05,
|
| 11646 |
+
"loss": 80.5,
|
| 11647 |
+
"step": 1659
|
| 11648 |
+
},
|
| 11649 |
+
{
|
| 11650 |
+
"epoch": 0.000166,
|
| 11651 |
+
"grad_norm": 10.967174530029297,
|
| 11652 |
+
"learning_rate": 1.6590000000000002e-05,
|
| 11653 |
+
"loss": 82.875,
|
| 11654 |
+
"step": 1660
|
| 11655 |
+
},
|
| 11656 |
+
{
|
| 11657 |
+
"epoch": 0.0001661,
|
| 11658 |
+
"grad_norm": 11.37897777557373,
|
| 11659 |
+
"learning_rate": 1.66e-05,
|
| 11660 |
+
"loss": 79.75,
|
| 11661 |
+
"step": 1661
|
| 11662 |
+
},
|
| 11663 |
+
{
|
| 11664 |
+
"epoch": 0.0001662,
|
| 11665 |
+
"grad_norm": 11.097198486328125,
|
| 11666 |
+
"learning_rate": 1.661e-05,
|
| 11667 |
+
"loss": 82.0625,
|
| 11668 |
+
"step": 1662
|
| 11669 |
+
},
|
| 11670 |
+
{
|
| 11671 |
+
"epoch": 0.0001663,
|
| 11672 |
+
"grad_norm": 10.96146011352539,
|
| 11673 |
+
"learning_rate": 1.662e-05,
|
| 11674 |
+
"loss": 81.125,
|
| 11675 |
+
"step": 1663
|
| 11676 |
+
},
|
| 11677 |
+
{
|
| 11678 |
+
"epoch": 0.0001664,
|
| 11679 |
+
"grad_norm": 11.034396171569824,
|
| 11680 |
+
"learning_rate": 1.6630000000000002e-05,
|
| 11681 |
+
"loss": 80.0625,
|
| 11682 |
+
"step": 1664
|
| 11683 |
+
},
|
| 11684 |
+
{
|
| 11685 |
+
"epoch": 0.0001665,
|
| 11686 |
+
"grad_norm": 11.061921119689941,
|
| 11687 |
+
"learning_rate": 1.664e-05,
|
| 11688 |
+
"loss": 81.875,
|
| 11689 |
+
"step": 1665
|
| 11690 |
+
},
|
| 11691 |
+
{
|
| 11692 |
+
"epoch": 0.0001666,
|
| 11693 |
+
"grad_norm": 11.300657272338867,
|
| 11694 |
+
"learning_rate": 1.665e-05,
|
| 11695 |
+
"loss": 79.8125,
|
| 11696 |
+
"step": 1666
|
| 11697 |
+
},
|
| 11698 |
+
{
|
| 11699 |
+
"epoch": 0.0001667,
|
| 11700 |
+
"grad_norm": 11.197166442871094,
|
| 11701 |
+
"learning_rate": 1.6660000000000003e-05,
|
| 11702 |
+
"loss": 80.375,
|
| 11703 |
+
"step": 1667
|
| 11704 |
+
},
|
| 11705 |
+
{
|
| 11706 |
+
"epoch": 0.0001668,
|
| 11707 |
+
"grad_norm": 10.98279094696045,
|
| 11708 |
+
"learning_rate": 1.6669999999999998e-05,
|
| 11709 |
+
"loss": 81.9375,
|
| 11710 |
+
"step": 1668
|
| 11711 |
+
},
|
| 11712 |
+
{
|
| 11713 |
+
"epoch": 0.0001669,
|
| 11714 |
+
"grad_norm": 11.043325424194336,
|
| 11715 |
+
"learning_rate": 1.668e-05,
|
| 11716 |
+
"loss": 80.75,
|
| 11717 |
+
"step": 1669
|
| 11718 |
+
},
|
| 11719 |
+
{
|
| 11720 |
+
"epoch": 0.000167,
|
| 11721 |
+
"grad_norm": 10.966551780700684,
|
| 11722 |
+
"learning_rate": 1.669e-05,
|
| 11723 |
+
"loss": 80.375,
|
| 11724 |
+
"step": 1670
|
| 11725 |
+
},
|
| 11726 |
+
{
|
| 11727 |
+
"epoch": 0.0001671,
|
| 11728 |
+
"grad_norm": 10.982937812805176,
|
| 11729 |
+
"learning_rate": 1.67e-05,
|
| 11730 |
+
"loss": 80.5,
|
| 11731 |
+
"step": 1671
|
| 11732 |
+
},
|
| 11733 |
+
{
|
| 11734 |
+
"epoch": 0.0001672,
|
| 11735 |
+
"grad_norm": 11.045061111450195,
|
| 11736 |
+
"learning_rate": 1.671e-05,
|
| 11737 |
+
"loss": 81.125,
|
| 11738 |
+
"step": 1672
|
| 11739 |
+
},
|
| 11740 |
+
{
|
| 11741 |
+
"epoch": 0.0001673,
|
| 11742 |
+
"grad_norm": 11.25208854675293,
|
| 11743 |
+
"learning_rate": 1.672e-05,
|
| 11744 |
+
"loss": 80.0625,
|
| 11745 |
+
"step": 1673
|
| 11746 |
+
},
|
| 11747 |
+
{
|
| 11748 |
+
"epoch": 0.0001674,
|
| 11749 |
+
"grad_norm": 10.908302307128906,
|
| 11750 |
+
"learning_rate": 1.673e-05,
|
| 11751 |
+
"loss": 80.875,
|
| 11752 |
+
"step": 1674
|
| 11753 |
+
},
|
| 11754 |
+
{
|
| 11755 |
+
"epoch": 0.0001675,
|
| 11756 |
+
"grad_norm": 10.964232444763184,
|
| 11757 |
+
"learning_rate": 1.674e-05,
|
| 11758 |
+
"loss": 79.6875,
|
| 11759 |
+
"step": 1675
|
| 11760 |
+
},
|
| 11761 |
+
{
|
| 11762 |
+
"epoch": 0.0001676,
|
| 11763 |
+
"grad_norm": 10.88063907623291,
|
| 11764 |
+
"learning_rate": 1.675e-05,
|
| 11765 |
+
"loss": 80.9375,
|
| 11766 |
+
"step": 1676
|
| 11767 |
+
},
|
| 11768 |
+
{
|
| 11769 |
+
"epoch": 0.0001677,
|
| 11770 |
+
"grad_norm": 11.139262199401855,
|
| 11771 |
+
"learning_rate": 1.6760000000000002e-05,
|
| 11772 |
+
"loss": 79.4375,
|
| 11773 |
+
"step": 1677
|
| 11774 |
+
},
|
| 11775 |
+
{
|
| 11776 |
+
"epoch": 0.0001678,
|
| 11777 |
+
"grad_norm": 11.198766708374023,
|
| 11778 |
+
"learning_rate": 1.677e-05,
|
| 11779 |
+
"loss": 79.125,
|
| 11780 |
+
"step": 1678
|
| 11781 |
+
},
|
| 11782 |
+
{
|
| 11783 |
+
"epoch": 0.0001679,
|
| 11784 |
+
"grad_norm": 10.993010520935059,
|
| 11785 |
+
"learning_rate": 1.678e-05,
|
| 11786 |
+
"loss": 81.25,
|
| 11787 |
+
"step": 1679
|
| 11788 |
+
},
|
| 11789 |
+
{
|
| 11790 |
+
"epoch": 0.000168,
|
| 11791 |
+
"grad_norm": 11.06545352935791,
|
| 11792 |
+
"learning_rate": 1.679e-05,
|
| 11793 |
+
"loss": 79.4375,
|
| 11794 |
+
"step": 1680
|
| 11795 |
+
},
|
| 11796 |
+
{
|
| 11797 |
+
"epoch": 0.0001681,
|
| 11798 |
+
"grad_norm": 10.88327693939209,
|
| 11799 |
+
"learning_rate": 1.6800000000000002e-05,
|
| 11800 |
+
"loss": 81.0625,
|
| 11801 |
+
"step": 1681
|
| 11802 |
+
},
|
| 11803 |
+
{
|
| 11804 |
+
"epoch": 0.0001682,
|
| 11805 |
+
"grad_norm": 10.90662956237793,
|
| 11806 |
+
"learning_rate": 1.681e-05,
|
| 11807 |
+
"loss": 81.0625,
|
| 11808 |
+
"step": 1682
|
| 11809 |
+
},
|
| 11810 |
+
{
|
| 11811 |
+
"epoch": 0.0001683,
|
| 11812 |
+
"grad_norm": 10.89114761352539,
|
| 11813 |
+
"learning_rate": 1.682e-05,
|
| 11814 |
+
"loss": 81.375,
|
| 11815 |
+
"step": 1683
|
| 11816 |
+
},
|
| 11817 |
+
{
|
| 11818 |
+
"epoch": 0.0001684,
|
| 11819 |
+
"grad_norm": 10.992010116577148,
|
| 11820 |
+
"learning_rate": 1.6830000000000003e-05,
|
| 11821 |
+
"loss": 80.6875,
|
| 11822 |
+
"step": 1684
|
| 11823 |
+
},
|
| 11824 |
+
{
|
| 11825 |
+
"epoch": 0.0001685,
|
| 11826 |
+
"grad_norm": 11.057424545288086,
|
| 11827 |
+
"learning_rate": 1.6839999999999998e-05,
|
| 11828 |
+
"loss": 79.6875,
|
| 11829 |
+
"step": 1685
|
| 11830 |
+
},
|
| 11831 |
+
{
|
| 11832 |
+
"epoch": 0.0001686,
|
| 11833 |
+
"grad_norm": 10.815861701965332,
|
| 11834 |
+
"learning_rate": 1.685e-05,
|
| 11835 |
+
"loss": 80.8125,
|
| 11836 |
+
"step": 1686
|
| 11837 |
+
},
|
| 11838 |
+
{
|
| 11839 |
+
"epoch": 0.0001687,
|
| 11840 |
+
"grad_norm": 10.851166725158691,
|
| 11841 |
+
"learning_rate": 1.686e-05,
|
| 11842 |
+
"loss": 81.75,
|
| 11843 |
+
"step": 1687
|
| 11844 |
+
},
|
| 11845 |
+
{
|
| 11846 |
+
"epoch": 0.0001688,
|
| 11847 |
+
"grad_norm": 11.063788414001465,
|
| 11848 |
+
"learning_rate": 1.687e-05,
|
| 11849 |
+
"loss": 80.0,
|
| 11850 |
+
"step": 1688
|
| 11851 |
+
},
|
| 11852 |
+
{
|
| 11853 |
+
"epoch": 0.0001689,
|
| 11854 |
+
"grad_norm": 10.901667594909668,
|
| 11855 |
+
"learning_rate": 1.688e-05,
|
| 11856 |
+
"loss": 79.8125,
|
| 11857 |
+
"step": 1689
|
| 11858 |
+
},
|
| 11859 |
+
{
|
| 11860 |
+
"epoch": 0.000169,
|
| 11861 |
+
"grad_norm": 10.897828102111816,
|
| 11862 |
+
"learning_rate": 1.689e-05,
|
| 11863 |
+
"loss": 79.375,
|
| 11864 |
+
"step": 1690
|
| 11865 |
+
},
|
| 11866 |
+
{
|
| 11867 |
+
"epoch": 0.0001691,
|
| 11868 |
+
"grad_norm": 10.9066162109375,
|
| 11869 |
+
"learning_rate": 1.69e-05,
|
| 11870 |
+
"loss": 80.25,
|
| 11871 |
+
"step": 1691
|
| 11872 |
+
},
|
| 11873 |
+
{
|
| 11874 |
+
"epoch": 0.0001692,
|
| 11875 |
+
"grad_norm": 11.124065399169922,
|
| 11876 |
+
"learning_rate": 1.691e-05,
|
| 11877 |
+
"loss": 79.4375,
|
| 11878 |
+
"step": 1692
|
| 11879 |
+
},
|
| 11880 |
+
{
|
| 11881 |
+
"epoch": 0.0001693,
|
| 11882 |
+
"grad_norm": 10.849020957946777,
|
| 11883 |
+
"learning_rate": 1.692e-05,
|
| 11884 |
+
"loss": 80.25,
|
| 11885 |
+
"step": 1693
|
| 11886 |
+
},
|
| 11887 |
+
{
|
| 11888 |
+
"epoch": 0.0001694,
|
| 11889 |
+
"grad_norm": 10.995338439941406,
|
| 11890 |
+
"learning_rate": 1.6930000000000002e-05,
|
| 11891 |
+
"loss": 80.1875,
|
| 11892 |
+
"step": 1694
|
| 11893 |
+
},
|
| 11894 |
+
{
|
| 11895 |
+
"epoch": 0.0001695,
|
| 11896 |
+
"grad_norm": 10.745613098144531,
|
| 11897 |
+
"learning_rate": 1.694e-05,
|
| 11898 |
+
"loss": 81.3125,
|
| 11899 |
+
"step": 1695
|
| 11900 |
+
},
|
| 11901 |
+
{
|
| 11902 |
+
"epoch": 0.0001696,
|
| 11903 |
+
"grad_norm": 10.822576522827148,
|
| 11904 |
+
"learning_rate": 1.695e-05,
|
| 11905 |
+
"loss": 81.0625,
|
| 11906 |
+
"step": 1696
|
| 11907 |
+
},
|
| 11908 |
+
{
|
| 11909 |
+
"epoch": 0.0001697,
|
| 11910 |
+
"grad_norm": 10.834251403808594,
|
| 11911 |
+
"learning_rate": 1.696e-05,
|
| 11912 |
+
"loss": 81.0625,
|
| 11913 |
+
"step": 1697
|
| 11914 |
+
},
|
| 11915 |
+
{
|
| 11916 |
+
"epoch": 0.0001698,
|
| 11917 |
+
"grad_norm": 10.99413776397705,
|
| 11918 |
+
"learning_rate": 1.6970000000000002e-05,
|
| 11919 |
+
"loss": 78.5,
|
| 11920 |
+
"step": 1698
|
| 11921 |
+
},
|
| 11922 |
+
{
|
| 11923 |
+
"epoch": 0.0001699,
|
| 11924 |
+
"grad_norm": 10.745895385742188,
|
| 11925 |
+
"learning_rate": 1.698e-05,
|
| 11926 |
+
"loss": 79.875,
|
| 11927 |
+
"step": 1699
|
| 11928 |
+
},
|
| 11929 |
+
{
|
| 11930 |
+
"epoch": 0.00017,
|
| 11931 |
+
"grad_norm": 10.999711990356445,
|
| 11932 |
+
"learning_rate": 1.699e-05,
|
| 11933 |
+
"loss": 79.3125,
|
| 11934 |
+
"step": 1700
|
| 11935 |
+
},
|
| 11936 |
+
{
|
| 11937 |
+
"epoch": 0.0001701,
|
| 11938 |
+
"grad_norm": 10.955674171447754,
|
| 11939 |
+
"learning_rate": 1.7000000000000003e-05,
|
| 11940 |
+
"loss": 80.875,
|
| 11941 |
+
"step": 1701
|
| 11942 |
+
},
|
| 11943 |
+
{
|
| 11944 |
+
"epoch": 0.0001702,
|
| 11945 |
+
"grad_norm": 10.888795852661133,
|
| 11946 |
+
"learning_rate": 1.7009999999999998e-05,
|
| 11947 |
+
"loss": 78.9375,
|
| 11948 |
+
"step": 1702
|
| 11949 |
+
},
|
| 11950 |
+
{
|
| 11951 |
+
"epoch": 0.0001703,
|
| 11952 |
+
"grad_norm": 10.802494049072266,
|
| 11953 |
+
"learning_rate": 1.702e-05,
|
| 11954 |
+
"loss": 79.625,
|
| 11955 |
+
"step": 1703
|
| 11956 |
+
},
|
| 11957 |
+
{
|
| 11958 |
+
"epoch": 0.0001704,
|
| 11959 |
+
"grad_norm": 10.999938011169434,
|
| 11960 |
+
"learning_rate": 1.703e-05,
|
| 11961 |
+
"loss": 79.125,
|
| 11962 |
+
"step": 1704
|
| 11963 |
+
},
|
| 11964 |
+
{
|
| 11965 |
+
"epoch": 0.0001705,
|
| 11966 |
+
"grad_norm": 10.716090202331543,
|
| 11967 |
+
"learning_rate": 1.704e-05,
|
| 11968 |
+
"loss": 80.125,
|
| 11969 |
+
"step": 1705
|
| 11970 |
+
},
|
| 11971 |
+
{
|
| 11972 |
+
"epoch": 0.0001706,
|
| 11973 |
+
"grad_norm": 10.765728950500488,
|
| 11974 |
+
"learning_rate": 1.705e-05,
|
| 11975 |
+
"loss": 79.9375,
|
| 11976 |
+
"step": 1706
|
| 11977 |
+
},
|
| 11978 |
+
{
|
| 11979 |
+
"epoch": 0.0001707,
|
| 11980 |
+
"grad_norm": 10.725311279296875,
|
| 11981 |
+
"learning_rate": 1.706e-05,
|
| 11982 |
+
"loss": 80.1875,
|
| 11983 |
+
"step": 1707
|
| 11984 |
+
},
|
| 11985 |
+
{
|
| 11986 |
+
"epoch": 0.0001708,
|
| 11987 |
+
"grad_norm": 10.912590980529785,
|
| 11988 |
+
"learning_rate": 1.707e-05,
|
| 11989 |
+
"loss": 78.875,
|
| 11990 |
+
"step": 1708
|
| 11991 |
+
},
|
| 11992 |
+
{
|
| 11993 |
+
"epoch": 0.0001709,
|
| 11994 |
+
"grad_norm": 10.935500144958496,
|
| 11995 |
+
"learning_rate": 1.708e-05,
|
| 11996 |
+
"loss": 79.8125,
|
| 11997 |
+
"step": 1709
|
| 11998 |
+
},
|
| 11999 |
+
{
|
| 12000 |
+
"epoch": 0.000171,
|
| 12001 |
+
"grad_norm": 10.742402076721191,
|
| 12002 |
+
"learning_rate": 1.709e-05,
|
| 12003 |
+
"loss": 80.4375,
|
| 12004 |
+
"step": 1710
|
| 12005 |
+
},
|
| 12006 |
+
{
|
| 12007 |
+
"epoch": 0.0001711,
|
| 12008 |
+
"grad_norm": 10.83121395111084,
|
| 12009 |
+
"learning_rate": 1.7100000000000002e-05,
|
| 12010 |
+
"loss": 79.1875,
|
| 12011 |
+
"step": 1711
|
| 12012 |
+
},
|
| 12013 |
+
{
|
| 12014 |
+
"epoch": 0.0001712,
|
| 12015 |
+
"grad_norm": 11.009101867675781,
|
| 12016 |
+
"learning_rate": 1.711e-05,
|
| 12017 |
+
"loss": 79.5,
|
| 12018 |
+
"step": 1712
|
| 12019 |
+
},
|
| 12020 |
+
{
|
| 12021 |
+
"epoch": 0.0001713,
|
| 12022 |
+
"grad_norm": 10.9196138381958,
|
| 12023 |
+
"learning_rate": 1.712e-05,
|
| 12024 |
+
"loss": 79.8125,
|
| 12025 |
+
"step": 1713
|
| 12026 |
+
},
|
| 12027 |
+
{
|
| 12028 |
+
"epoch": 0.0001714,
|
| 12029 |
+
"grad_norm": 10.713299751281738,
|
| 12030 |
+
"learning_rate": 1.713e-05,
|
| 12031 |
+
"loss": 79.875,
|
| 12032 |
+
"step": 1714
|
| 12033 |
+
},
|
| 12034 |
+
{
|
| 12035 |
+
"epoch": 0.0001715,
|
| 12036 |
+
"grad_norm": 10.738595962524414,
|
| 12037 |
+
"learning_rate": 1.714e-05,
|
| 12038 |
+
"loss": 79.375,
|
| 12039 |
+
"step": 1715
|
| 12040 |
+
},
|
| 12041 |
+
{
|
| 12042 |
+
"epoch": 0.0001716,
|
| 12043 |
+
"grad_norm": 10.912589073181152,
|
| 12044 |
+
"learning_rate": 1.715e-05,
|
| 12045 |
+
"loss": 78.625,
|
| 12046 |
+
"step": 1716
|
| 12047 |
+
},
|
| 12048 |
+
{
|
| 12049 |
+
"epoch": 0.0001717,
|
| 12050 |
+
"grad_norm": 10.915419578552246,
|
| 12051 |
+
"learning_rate": 1.7160000000000002e-05,
|
| 12052 |
+
"loss": 78.8125,
|
| 12053 |
+
"step": 1717
|
| 12054 |
+
},
|
| 12055 |
+
{
|
| 12056 |
+
"epoch": 0.0001718,
|
| 12057 |
+
"grad_norm": 10.834695816040039,
|
| 12058 |
+
"learning_rate": 1.717e-05,
|
| 12059 |
+
"loss": 79.875,
|
| 12060 |
+
"step": 1718
|
| 12061 |
+
},
|
| 12062 |
+
{
|
| 12063 |
+
"epoch": 0.0001719,
|
| 12064 |
+
"grad_norm": 10.900665283203125,
|
| 12065 |
+
"learning_rate": 1.7179999999999998e-05,
|
| 12066 |
+
"loss": 78.3125,
|
| 12067 |
+
"step": 1719
|
| 12068 |
+
},
|
| 12069 |
+
{
|
| 12070 |
+
"epoch": 0.000172,
|
| 12071 |
+
"grad_norm": 10.814151763916016,
|
| 12072 |
+
"learning_rate": 1.719e-05,
|
| 12073 |
+
"loss": 77.75,
|
| 12074 |
+
"step": 1720
|
| 12075 |
+
},
|
| 12076 |
+
{
|
| 12077 |
+
"epoch": 0.0001721,
|
| 12078 |
+
"grad_norm": 10.723401069641113,
|
| 12079 |
+
"learning_rate": 1.72e-05,
|
| 12080 |
+
"loss": 79.625,
|
| 12081 |
+
"step": 1721
|
| 12082 |
+
},
|
| 12083 |
+
{
|
| 12084 |
+
"epoch": 0.0001722,
|
| 12085 |
+
"grad_norm": 10.772787094116211,
|
| 12086 |
+
"learning_rate": 1.721e-05,
|
| 12087 |
+
"loss": 78.8125,
|
| 12088 |
+
"step": 1722
|
| 12089 |
+
},
|
| 12090 |
+
{
|
| 12091 |
+
"epoch": 0.0001723,
|
| 12092 |
+
"grad_norm": 10.624513626098633,
|
| 12093 |
+
"learning_rate": 1.722e-05,
|
| 12094 |
+
"loss": 79.0,
|
| 12095 |
+
"step": 1723
|
| 12096 |
+
},
|
| 12097 |
+
{
|
| 12098 |
+
"epoch": 0.0001724,
|
| 12099 |
+
"grad_norm": 10.845480918884277,
|
| 12100 |
+
"learning_rate": 1.723e-05,
|
| 12101 |
+
"loss": 78.25,
|
| 12102 |
+
"step": 1724
|
| 12103 |
+
},
|
| 12104 |
+
{
|
| 12105 |
+
"epoch": 0.0001725,
|
| 12106 |
+
"grad_norm": 10.63790225982666,
|
| 12107 |
+
"learning_rate": 1.724e-05,
|
| 12108 |
+
"loss": 79.3125,
|
| 12109 |
+
"step": 1725
|
| 12110 |
+
},
|
| 12111 |
+
{
|
| 12112 |
+
"epoch": 0.0001726,
|
| 12113 |
+
"grad_norm": 10.91887378692627,
|
| 12114 |
+
"learning_rate": 1.725e-05,
|
| 12115 |
+
"loss": 77.8125,
|
| 12116 |
+
"step": 1726
|
| 12117 |
+
},
|
| 12118 |
+
{
|
| 12119 |
+
"epoch": 0.0001727,
|
| 12120 |
+
"grad_norm": 11.007935523986816,
|
| 12121 |
+
"learning_rate": 1.726e-05,
|
| 12122 |
+
"loss": 78.3125,
|
| 12123 |
+
"step": 1727
|
| 12124 |
+
},
|
| 12125 |
+
{
|
| 12126 |
+
"epoch": 0.0001728,
|
| 12127 |
+
"grad_norm": 10.816855430603027,
|
| 12128 |
+
"learning_rate": 1.7270000000000002e-05,
|
| 12129 |
+
"loss": 77.625,
|
| 12130 |
+
"step": 1728
|
| 12131 |
+
},
|
| 12132 |
+
{
|
| 12133 |
+
"epoch": 0.0001729,
|
| 12134 |
+
"grad_norm": 10.639528274536133,
|
| 12135 |
+
"learning_rate": 1.728e-05,
|
| 12136 |
+
"loss": 79.25,
|
| 12137 |
+
"step": 1729
|
| 12138 |
+
},
|
| 12139 |
+
{
|
| 12140 |
+
"epoch": 0.000173,
|
| 12141 |
+
"grad_norm": 10.81132698059082,
|
| 12142 |
+
"learning_rate": 1.729e-05,
|
| 12143 |
+
"loss": 77.875,
|
| 12144 |
+
"step": 1730
|
| 12145 |
+
},
|
| 12146 |
+
{
|
| 12147 |
+
"epoch": 0.0001731,
|
| 12148 |
+
"grad_norm": 10.696173667907715,
|
| 12149 |
+
"learning_rate": 1.73e-05,
|
| 12150 |
+
"loss": 79.1875,
|
| 12151 |
+
"step": 1731
|
| 12152 |
+
},
|
| 12153 |
+
{
|
| 12154 |
+
"epoch": 0.0001732,
|
| 12155 |
+
"grad_norm": 10.654017448425293,
|
| 12156 |
+
"learning_rate": 1.731e-05,
|
| 12157 |
+
"loss": 79.875,
|
| 12158 |
+
"step": 1732
|
| 12159 |
+
},
|
| 12160 |
+
{
|
| 12161 |
+
"epoch": 0.0001733,
|
| 12162 |
+
"grad_norm": 10.759590148925781,
|
| 12163 |
+
"learning_rate": 1.732e-05,
|
| 12164 |
+
"loss": 78.875,
|
| 12165 |
+
"step": 1733
|
| 12166 |
+
},
|
| 12167 |
+
{
|
| 12168 |
+
"epoch": 0.0001734,
|
| 12169 |
+
"grad_norm": 10.894126892089844,
|
| 12170 |
+
"learning_rate": 1.7330000000000002e-05,
|
| 12171 |
+
"loss": 80.3125,
|
| 12172 |
+
"step": 1734
|
| 12173 |
+
},
|
| 12174 |
+
{
|
| 12175 |
+
"epoch": 0.0001735,
|
| 12176 |
+
"grad_norm": 10.793487548828125,
|
| 12177 |
+
"learning_rate": 1.734e-05,
|
| 12178 |
+
"loss": 79.6875,
|
| 12179 |
+
"step": 1735
|
| 12180 |
+
},
|
| 12181 |
+
{
|
| 12182 |
+
"epoch": 0.0001736,
|
| 12183 |
+
"grad_norm": 11.036444664001465,
|
| 12184 |
+
"learning_rate": 1.7349999999999998e-05,
|
| 12185 |
+
"loss": 77.9375,
|
| 12186 |
+
"step": 1736
|
| 12187 |
+
},
|
| 12188 |
+
{
|
| 12189 |
+
"epoch": 0.0001737,
|
| 12190 |
+
"grad_norm": 10.841407775878906,
|
| 12191 |
+
"learning_rate": 1.736e-05,
|
| 12192 |
+
"loss": 77.75,
|
| 12193 |
+
"step": 1737
|
| 12194 |
+
},
|
| 12195 |
+
{
|
| 12196 |
+
"epoch": 0.0001738,
|
| 12197 |
+
"grad_norm": 10.731624603271484,
|
| 12198 |
+
"learning_rate": 1.737e-05,
|
| 12199 |
+
"loss": 79.0,
|
| 12200 |
+
"step": 1738
|
| 12201 |
+
},
|
| 12202 |
+
{
|
| 12203 |
+
"epoch": 0.0001739,
|
| 12204 |
+
"grad_norm": 10.66896915435791,
|
| 12205 |
+
"learning_rate": 1.738e-05,
|
| 12206 |
+
"loss": 79.75,
|
| 12207 |
+
"step": 1739
|
| 12208 |
+
},
|
| 12209 |
+
{
|
| 12210 |
+
"epoch": 0.000174,
|
| 12211 |
+
"grad_norm": 10.88010025024414,
|
| 12212 |
+
"learning_rate": 1.739e-05,
|
| 12213 |
+
"loss": 79.0625,
|
| 12214 |
+
"step": 1740
|
| 12215 |
+
},
|
| 12216 |
+
{
|
| 12217 |
+
"epoch": 0.0001741,
|
| 12218 |
+
"grad_norm": 10.936246871948242,
|
| 12219 |
+
"learning_rate": 1.74e-05,
|
| 12220 |
+
"loss": 79.5625,
|
| 12221 |
+
"step": 1741
|
| 12222 |
+
},
|
| 12223 |
+
{
|
| 12224 |
+
"epoch": 0.0001742,
|
| 12225 |
+
"grad_norm": 10.785653114318848,
|
| 12226 |
+
"learning_rate": 1.741e-05,
|
| 12227 |
+
"loss": 77.8125,
|
| 12228 |
+
"step": 1742
|
| 12229 |
+
},
|
| 12230 |
+
{
|
| 12231 |
+
"epoch": 0.0001743,
|
| 12232 |
+
"grad_norm": 10.53821086883545,
|
| 12233 |
+
"learning_rate": 1.742e-05,
|
| 12234 |
+
"loss": 79.9375,
|
| 12235 |
+
"step": 1743
|
| 12236 |
+
},
|
| 12237 |
+
{
|
| 12238 |
+
"epoch": 0.0001744,
|
| 12239 |
+
"grad_norm": 10.72213363647461,
|
| 12240 |
+
"learning_rate": 1.743e-05,
|
| 12241 |
+
"loss": 80.0,
|
| 12242 |
+
"step": 1744
|
| 12243 |
+
},
|
| 12244 |
+
{
|
| 12245 |
+
"epoch": 0.0001745,
|
| 12246 |
+
"grad_norm": 10.682663917541504,
|
| 12247 |
+
"learning_rate": 1.7440000000000002e-05,
|
| 12248 |
+
"loss": 79.0,
|
| 12249 |
+
"step": 1745
|
| 12250 |
+
},
|
| 12251 |
+
{
|
| 12252 |
+
"epoch": 0.0001746,
|
| 12253 |
+
"grad_norm": 10.701003074645996,
|
| 12254 |
+
"learning_rate": 1.745e-05,
|
| 12255 |
+
"loss": 79.0,
|
| 12256 |
+
"step": 1746
|
| 12257 |
+
},
|
| 12258 |
+
{
|
| 12259 |
+
"epoch": 0.0001747,
|
| 12260 |
+
"grad_norm": 10.708039283752441,
|
| 12261 |
+
"learning_rate": 1.746e-05,
|
| 12262 |
+
"loss": 77.9375,
|
| 12263 |
+
"step": 1747
|
| 12264 |
+
},
|
| 12265 |
+
{
|
| 12266 |
+
"epoch": 0.0001748,
|
| 12267 |
+
"grad_norm": 10.948110580444336,
|
| 12268 |
+
"learning_rate": 1.747e-05,
|
| 12269 |
+
"loss": 76.25,
|
| 12270 |
+
"step": 1748
|
| 12271 |
+
},
|
| 12272 |
+
{
|
| 12273 |
+
"epoch": 0.0001749,
|
| 12274 |
+
"grad_norm": 10.633480072021484,
|
| 12275 |
+
"learning_rate": 1.748e-05,
|
| 12276 |
+
"loss": 80.0,
|
| 12277 |
+
"step": 1749
|
| 12278 |
+
},
|
| 12279 |
+
{
|
| 12280 |
+
"epoch": 0.000175,
|
| 12281 |
+
"grad_norm": 10.546310424804688,
|
| 12282 |
+
"learning_rate": 1.749e-05,
|
| 12283 |
+
"loss": 79.25,
|
| 12284 |
+
"step": 1750
|
| 12285 |
+
},
|
| 12286 |
+
{
|
| 12287 |
+
"epoch": 0.0001751,
|
| 12288 |
+
"grad_norm": 10.7278413772583,
|
| 12289 |
+
"learning_rate": 1.7500000000000002e-05,
|
| 12290 |
+
"loss": 77.3125,
|
| 12291 |
+
"step": 1751
|
| 12292 |
+
},
|
| 12293 |
+
{
|
| 12294 |
+
"epoch": 0.0001752,
|
| 12295 |
+
"grad_norm": 10.804795265197754,
|
| 12296 |
+
"learning_rate": 1.751e-05,
|
| 12297 |
+
"loss": 77.8125,
|
| 12298 |
+
"step": 1752
|
| 12299 |
+
},
|
| 12300 |
+
{
|
| 12301 |
+
"epoch": 0.0001753,
|
| 12302 |
+
"grad_norm": 10.649805068969727,
|
| 12303 |
+
"learning_rate": 1.7519999999999998e-05,
|
| 12304 |
+
"loss": 77.9375,
|
| 12305 |
+
"step": 1753
|
| 12306 |
+
},
|
| 12307 |
+
{
|
| 12308 |
+
"epoch": 0.0001754,
|
| 12309 |
+
"grad_norm": 10.849508285522461,
|
| 12310 |
+
"learning_rate": 1.753e-05,
|
| 12311 |
+
"loss": 75.875,
|
| 12312 |
+
"step": 1754
|
| 12313 |
+
},
|
| 12314 |
+
{
|
| 12315 |
+
"epoch": 0.0001755,
|
| 12316 |
+
"grad_norm": 10.669038772583008,
|
| 12317 |
+
"learning_rate": 1.754e-05,
|
| 12318 |
+
"loss": 79.1875,
|
| 12319 |
+
"step": 1755
|
| 12320 |
+
},
|
| 12321 |
+
{
|
| 12322 |
+
"epoch": 0.0001756,
|
| 12323 |
+
"grad_norm": 10.650232315063477,
|
| 12324 |
+
"learning_rate": 1.755e-05,
|
| 12325 |
+
"loss": 78.125,
|
| 12326 |
+
"step": 1756
|
| 12327 |
+
},
|
| 12328 |
+
{
|
| 12329 |
+
"epoch": 0.0001757,
|
| 12330 |
+
"grad_norm": 10.93763542175293,
|
| 12331 |
+
"learning_rate": 1.756e-05,
|
| 12332 |
+
"loss": 75.4375,
|
| 12333 |
+
"step": 1757
|
| 12334 |
+
},
|
| 12335 |
+
{
|
| 12336 |
+
"epoch": 0.0001758,
|
| 12337 |
+
"grad_norm": 10.791654586791992,
|
| 12338 |
+
"learning_rate": 1.757e-05,
|
| 12339 |
+
"loss": 77.0,
|
| 12340 |
+
"step": 1758
|
| 12341 |
+
},
|
| 12342 |
+
{
|
| 12343 |
+
"epoch": 0.0001759,
|
| 12344 |
+
"grad_norm": 10.705726623535156,
|
| 12345 |
+
"learning_rate": 1.758e-05,
|
| 12346 |
+
"loss": 77.9375,
|
| 12347 |
+
"step": 1759
|
| 12348 |
+
},
|
| 12349 |
+
{
|
| 12350 |
+
"epoch": 0.000176,
|
| 12351 |
+
"grad_norm": 10.580390930175781,
|
| 12352 |
+
"learning_rate": 1.759e-05,
|
| 12353 |
+
"loss": 77.625,
|
| 12354 |
+
"step": 1760
|
| 12355 |
+
},
|
| 12356 |
+
{
|
| 12357 |
+
"epoch": 0.0001761,
|
| 12358 |
+
"grad_norm": 10.656951904296875,
|
| 12359 |
+
"learning_rate": 1.76e-05,
|
| 12360 |
+
"loss": 77.1875,
|
| 12361 |
+
"step": 1761
|
| 12362 |
+
},
|
| 12363 |
+
{
|
| 12364 |
+
"epoch": 0.0001762,
|
| 12365 |
+
"grad_norm": 10.772924423217773,
|
| 12366 |
+
"learning_rate": 1.7610000000000002e-05,
|
| 12367 |
+
"loss": 76.0625,
|
| 12368 |
+
"step": 1762
|
| 12369 |
+
},
|
| 12370 |
+
{
|
| 12371 |
+
"epoch": 0.0001763,
|
| 12372 |
+
"grad_norm": 10.663338661193848,
|
| 12373 |
+
"learning_rate": 1.762e-05,
|
| 12374 |
+
"loss": 77.25,
|
| 12375 |
+
"step": 1763
|
| 12376 |
+
},
|
| 12377 |
+
{
|
| 12378 |
+
"epoch": 0.0001764,
|
| 12379 |
+
"grad_norm": 10.654496192932129,
|
| 12380 |
+
"learning_rate": 1.763e-05,
|
| 12381 |
+
"loss": 77.0,
|
| 12382 |
+
"step": 1764
|
| 12383 |
+
},
|
| 12384 |
+
{
|
| 12385 |
+
"epoch": 0.0001765,
|
| 12386 |
+
"grad_norm": 10.55364990234375,
|
| 12387 |
+
"learning_rate": 1.764e-05,
|
| 12388 |
+
"loss": 79.3125,
|
| 12389 |
+
"step": 1765
|
| 12390 |
+
},
|
| 12391 |
+
{
|
| 12392 |
+
"epoch": 0.0001766,
|
| 12393 |
+
"grad_norm": 10.59168529510498,
|
| 12394 |
+
"learning_rate": 1.765e-05,
|
| 12395 |
+
"loss": 77.8125,
|
| 12396 |
+
"step": 1766
|
| 12397 |
+
},
|
| 12398 |
+
{
|
| 12399 |
+
"epoch": 0.0001767,
|
| 12400 |
+
"grad_norm": 10.650297164916992,
|
| 12401 |
+
"learning_rate": 1.766e-05,
|
| 12402 |
+
"loss": 77.125,
|
| 12403 |
+
"step": 1767
|
| 12404 |
+
},
|
| 12405 |
+
{
|
| 12406 |
+
"epoch": 0.0001768,
|
| 12407 |
+
"grad_norm": 10.719636917114258,
|
| 12408 |
+
"learning_rate": 1.7670000000000002e-05,
|
| 12409 |
+
"loss": 77.0,
|
| 12410 |
+
"step": 1768
|
| 12411 |
+
},
|
| 12412 |
+
{
|
| 12413 |
+
"epoch": 0.0001769,
|
| 12414 |
+
"grad_norm": 10.793968200683594,
|
| 12415 |
+
"learning_rate": 1.768e-05,
|
| 12416 |
+
"loss": 75.75,
|
| 12417 |
+
"step": 1769
|
| 12418 |
+
},
|
| 12419 |
+
{
|
| 12420 |
+
"epoch": 0.000177,
|
| 12421 |
+
"grad_norm": 10.62430477142334,
|
| 12422 |
+
"learning_rate": 1.769e-05,
|
| 12423 |
+
"loss": 76.3125,
|
| 12424 |
+
"step": 1770
|
| 12425 |
+
},
|
| 12426 |
+
{
|
| 12427 |
+
"epoch": 0.0001771,
|
| 12428 |
+
"grad_norm": 10.62021541595459,
|
| 12429 |
+
"learning_rate": 1.77e-05,
|
| 12430 |
+
"loss": 78.4375,
|
| 12431 |
+
"step": 1771
|
| 12432 |
+
},
|
| 12433 |
+
{
|
| 12434 |
+
"epoch": 0.0001772,
|
| 12435 |
+
"grad_norm": 10.529915809631348,
|
| 12436 |
+
"learning_rate": 1.771e-05,
|
| 12437 |
+
"loss": 76.5625,
|
| 12438 |
+
"step": 1772
|
| 12439 |
+
},
|
| 12440 |
+
{
|
| 12441 |
+
"epoch": 0.0001773,
|
| 12442 |
+
"grad_norm": 10.798215866088867,
|
| 12443 |
+
"learning_rate": 1.772e-05,
|
| 12444 |
+
"loss": 75.4375,
|
| 12445 |
+
"step": 1773
|
| 12446 |
+
},
|
| 12447 |
+
{
|
| 12448 |
+
"epoch": 0.0001774,
|
| 12449 |
+
"grad_norm": 10.537800788879395,
|
| 12450 |
+
"learning_rate": 1.773e-05,
|
| 12451 |
+
"loss": 77.125,
|
| 12452 |
+
"step": 1774
|
| 12453 |
+
},
|
| 12454 |
+
{
|
| 12455 |
+
"epoch": 0.0001775,
|
| 12456 |
+
"grad_norm": 10.770161628723145,
|
| 12457 |
+
"learning_rate": 1.774e-05,
|
| 12458 |
+
"loss": 76.4375,
|
| 12459 |
+
"step": 1775
|
| 12460 |
+
},
|
| 12461 |
+
{
|
| 12462 |
+
"epoch": 0.0001776,
|
| 12463 |
+
"grad_norm": 10.577451705932617,
|
| 12464 |
+
"learning_rate": 1.775e-05,
|
| 12465 |
+
"loss": 76.0,
|
| 12466 |
+
"step": 1776
|
| 12467 |
+
},
|
| 12468 |
+
{
|
| 12469 |
+
"epoch": 0.0001777,
|
| 12470 |
+
"grad_norm": 10.794952392578125,
|
| 12471 |
+
"learning_rate": 1.776e-05,
|
| 12472 |
+
"loss": 75.75,
|
| 12473 |
+
"step": 1777
|
| 12474 |
+
},
|
| 12475 |
+
{
|
| 12476 |
+
"epoch": 0.0001778,
|
| 12477 |
+
"grad_norm": 10.660951614379883,
|
| 12478 |
+
"learning_rate": 1.777e-05,
|
| 12479 |
+
"loss": 75.5,
|
| 12480 |
+
"step": 1778
|
| 12481 |
+
},
|
| 12482 |
+
{
|
| 12483 |
+
"epoch": 0.0001779,
|
| 12484 |
+
"grad_norm": 10.519042015075684,
|
| 12485 |
+
"learning_rate": 1.7780000000000003e-05,
|
| 12486 |
+
"loss": 77.0,
|
| 12487 |
+
"step": 1779
|
| 12488 |
+
},
|
| 12489 |
+
{
|
| 12490 |
+
"epoch": 0.000178,
|
| 12491 |
+
"grad_norm": 10.545072555541992,
|
| 12492 |
+
"learning_rate": 1.779e-05,
|
| 12493 |
+
"loss": 76.8125,
|
| 12494 |
+
"step": 1780
|
| 12495 |
+
},
|
| 12496 |
+
{
|
| 12497 |
+
"epoch": 0.0001781,
|
| 12498 |
+
"grad_norm": 10.663309097290039,
|
| 12499 |
+
"learning_rate": 1.78e-05,
|
| 12500 |
+
"loss": 76.6875,
|
| 12501 |
+
"step": 1781
|
| 12502 |
+
},
|
| 12503 |
+
{
|
| 12504 |
+
"epoch": 0.0001782,
|
| 12505 |
+
"grad_norm": 10.746946334838867,
|
| 12506 |
+
"learning_rate": 1.781e-05,
|
| 12507 |
+
"loss": 77.375,
|
| 12508 |
+
"step": 1782
|
| 12509 |
+
},
|
| 12510 |
+
{
|
| 12511 |
+
"epoch": 0.0001783,
|
| 12512 |
+
"grad_norm": 10.625143051147461,
|
| 12513 |
+
"learning_rate": 1.782e-05,
|
| 12514 |
+
"loss": 76.5,
|
| 12515 |
+
"step": 1783
|
| 12516 |
+
},
|
| 12517 |
+
{
|
| 12518 |
+
"epoch": 0.0001784,
|
| 12519 |
+
"grad_norm": 10.452560424804688,
|
| 12520 |
+
"learning_rate": 1.783e-05,
|
| 12521 |
+
"loss": 77.5,
|
| 12522 |
+
"step": 1784
|
| 12523 |
+
},
|
| 12524 |
+
{
|
| 12525 |
+
"epoch": 0.0001785,
|
| 12526 |
+
"grad_norm": 10.465551376342773,
|
| 12527 |
+
"learning_rate": 1.7840000000000002e-05,
|
| 12528 |
+
"loss": 76.625,
|
| 12529 |
+
"step": 1785
|
| 12530 |
+
},
|
| 12531 |
+
{
|
| 12532 |
+
"epoch": 0.0001786,
|
| 12533 |
+
"grad_norm": 10.593033790588379,
|
| 12534 |
+
"learning_rate": 1.785e-05,
|
| 12535 |
+
"loss": 77.5,
|
| 12536 |
+
"step": 1786
|
| 12537 |
+
},
|
| 12538 |
+
{
|
| 12539 |
+
"epoch": 0.0001787,
|
| 12540 |
+
"grad_norm": 10.652999877929688,
|
| 12541 |
+
"learning_rate": 1.786e-05,
|
| 12542 |
+
"loss": 76.5,
|
| 12543 |
+
"step": 1787
|
| 12544 |
+
},
|
| 12545 |
+
{
|
| 12546 |
+
"epoch": 0.0001788,
|
| 12547 |
+
"grad_norm": 10.6140775680542,
|
| 12548 |
+
"learning_rate": 1.787e-05,
|
| 12549 |
+
"loss": 76.25,
|
| 12550 |
+
"step": 1788
|
| 12551 |
+
},
|
| 12552 |
+
{
|
| 12553 |
+
"epoch": 0.0001789,
|
| 12554 |
+
"grad_norm": 10.677515983581543,
|
| 12555 |
+
"learning_rate": 1.788e-05,
|
| 12556 |
+
"loss": 75.875,
|
| 12557 |
+
"step": 1789
|
| 12558 |
+
},
|
| 12559 |
+
{
|
| 12560 |
+
"epoch": 0.000179,
|
| 12561 |
+
"grad_norm": 10.523712158203125,
|
| 12562 |
+
"learning_rate": 1.789e-05,
|
| 12563 |
+
"loss": 77.0,
|
| 12564 |
+
"step": 1790
|
| 12565 |
+
},
|
| 12566 |
+
{
|
| 12567 |
+
"epoch": 0.0001791,
|
| 12568 |
+
"grad_norm": 10.816093444824219,
|
| 12569 |
+
"learning_rate": 1.79e-05,
|
| 12570 |
+
"loss": 74.8125,
|
| 12571 |
+
"step": 1791
|
| 12572 |
+
},
|
| 12573 |
+
{
|
| 12574 |
+
"epoch": 0.0001792,
|
| 12575 |
+
"grad_norm": 10.561775207519531,
|
| 12576 |
+
"learning_rate": 1.791e-05,
|
| 12577 |
+
"loss": 75.5,
|
| 12578 |
+
"step": 1792
|
| 12579 |
+
},
|
| 12580 |
+
{
|
| 12581 |
+
"epoch": 0.0001793,
|
| 12582 |
+
"grad_norm": 10.464574813842773,
|
| 12583 |
+
"learning_rate": 1.792e-05,
|
| 12584 |
+
"loss": 76.75,
|
| 12585 |
+
"step": 1793
|
| 12586 |
+
},
|
| 12587 |
+
{
|
| 12588 |
+
"epoch": 0.0001794,
|
| 12589 |
+
"grad_norm": 10.453524589538574,
|
| 12590 |
+
"learning_rate": 1.793e-05,
|
| 12591 |
+
"loss": 76.5625,
|
| 12592 |
+
"step": 1794
|
| 12593 |
+
},
|
| 12594 |
+
{
|
| 12595 |
+
"epoch": 0.0001795,
|
| 12596 |
+
"grad_norm": 10.519964218139648,
|
| 12597 |
+
"learning_rate": 1.794e-05,
|
| 12598 |
+
"loss": 76.625,
|
| 12599 |
+
"step": 1795
|
| 12600 |
+
},
|
| 12601 |
+
{
|
| 12602 |
+
"epoch": 0.0001796,
|
| 12603 |
+
"grad_norm": 10.693997383117676,
|
| 12604 |
+
"learning_rate": 1.7950000000000003e-05,
|
| 12605 |
+
"loss": 74.4375,
|
| 12606 |
+
"step": 1796
|
| 12607 |
+
},
|
| 12608 |
+
{
|
| 12609 |
+
"epoch": 0.0001797,
|
| 12610 |
+
"grad_norm": 10.64401626586914,
|
| 12611 |
+
"learning_rate": 1.796e-05,
|
| 12612 |
+
"loss": 75.3125,
|
| 12613 |
+
"step": 1797
|
| 12614 |
+
},
|
| 12615 |
+
{
|
| 12616 |
+
"epoch": 0.0001798,
|
| 12617 |
+
"grad_norm": 10.452816009521484,
|
| 12618 |
+
"learning_rate": 1.797e-05,
|
| 12619 |
+
"loss": 78.1875,
|
| 12620 |
+
"step": 1798
|
| 12621 |
+
},
|
| 12622 |
+
{
|
| 12623 |
+
"epoch": 0.0001799,
|
| 12624 |
+
"grad_norm": 10.566648483276367,
|
| 12625 |
+
"learning_rate": 1.798e-05,
|
| 12626 |
+
"loss": 75.875,
|
| 12627 |
+
"step": 1799
|
| 12628 |
+
},
|
| 12629 |
+
{
|
| 12630 |
+
"epoch": 0.00018,
|
| 12631 |
+
"grad_norm": 10.594141960144043,
|
| 12632 |
+
"learning_rate": 1.799e-05,
|
| 12633 |
+
"loss": 74.875,
|
| 12634 |
+
"step": 1800
|
| 12635 |
+
},
|
| 12636 |
+
{
|
| 12637 |
+
"epoch": 0.0001801,
|
| 12638 |
+
"grad_norm": 10.448125839233398,
|
| 12639 |
+
"learning_rate": 1.8e-05,
|
| 12640 |
+
"loss": 76.3125,
|
| 12641 |
+
"step": 1801
|
| 12642 |
+
},
|
| 12643 |
+
{
|
| 12644 |
+
"epoch": 0.0001802,
|
| 12645 |
+
"grad_norm": 10.401894569396973,
|
| 12646 |
+
"learning_rate": 1.8010000000000002e-05,
|
| 12647 |
+
"loss": 76.75,
|
| 12648 |
+
"step": 1802
|
| 12649 |
+
},
|
| 12650 |
+
{
|
| 12651 |
+
"epoch": 0.0001803,
|
| 12652 |
+
"grad_norm": 10.610258102416992,
|
| 12653 |
+
"learning_rate": 1.802e-05,
|
| 12654 |
+
"loss": 75.1875,
|
| 12655 |
+
"step": 1803
|
| 12656 |
+
},
|
| 12657 |
+
{
|
| 12658 |
+
"epoch": 0.0001804,
|
| 12659 |
+
"grad_norm": 10.440401077270508,
|
| 12660 |
+
"learning_rate": 1.803e-05,
|
| 12661 |
+
"loss": 76.25,
|
| 12662 |
+
"step": 1804
|
| 12663 |
+
},
|
| 12664 |
+
{
|
| 12665 |
+
"epoch": 0.0001805,
|
| 12666 |
+
"grad_norm": 10.447839736938477,
|
| 12667 |
+
"learning_rate": 1.804e-05,
|
| 12668 |
+
"loss": 77.0,
|
| 12669 |
+
"step": 1805
|
| 12670 |
+
},
|
| 12671 |
+
{
|
| 12672 |
+
"epoch": 0.0001806,
|
| 12673 |
+
"grad_norm": 10.623817443847656,
|
| 12674 |
+
"learning_rate": 1.805e-05,
|
| 12675 |
+
"loss": 76.3125,
|
| 12676 |
+
"step": 1806
|
| 12677 |
+
},
|
| 12678 |
+
{
|
| 12679 |
+
"epoch": 0.0001807,
|
| 12680 |
+
"grad_norm": 10.760772705078125,
|
| 12681 |
+
"learning_rate": 1.806e-05,
|
| 12682 |
+
"loss": 75.375,
|
| 12683 |
+
"step": 1807
|
| 12684 |
+
},
|
| 12685 |
+
{
|
| 12686 |
+
"epoch": 0.0001808,
|
| 12687 |
+
"grad_norm": 10.51712703704834,
|
| 12688 |
+
"learning_rate": 1.807e-05,
|
| 12689 |
+
"loss": 75.9375,
|
| 12690 |
+
"step": 1808
|
| 12691 |
+
},
|
| 12692 |
+
{
|
| 12693 |
+
"epoch": 0.0001809,
|
| 12694 |
+
"grad_norm": 10.787428855895996,
|
| 12695 |
+
"learning_rate": 1.808e-05,
|
| 12696 |
+
"loss": 74.125,
|
| 12697 |
+
"step": 1809
|
| 12698 |
+
},
|
| 12699 |
+
{
|
| 12700 |
+
"epoch": 0.000181,
|
| 12701 |
+
"grad_norm": 10.413379669189453,
|
| 12702 |
+
"learning_rate": 1.809e-05,
|
| 12703 |
+
"loss": 76.5625,
|
| 12704 |
+
"step": 1810
|
| 12705 |
+
},
|
| 12706 |
+
{
|
| 12707 |
+
"epoch": 0.0001811,
|
| 12708 |
+
"grad_norm": 10.707974433898926,
|
| 12709 |
+
"learning_rate": 1.81e-05,
|
| 12710 |
+
"loss": 74.5625,
|
| 12711 |
+
"step": 1811
|
| 12712 |
+
},
|
| 12713 |
+
{
|
| 12714 |
+
"epoch": 0.0001812,
|
| 12715 |
+
"grad_norm": 10.540277481079102,
|
| 12716 |
+
"learning_rate": 1.811e-05,
|
| 12717 |
+
"loss": 75.5625,
|
| 12718 |
+
"step": 1812
|
| 12719 |
+
},
|
| 12720 |
+
{
|
| 12721 |
+
"epoch": 0.0001813,
|
| 12722 |
+
"grad_norm": 10.474909782409668,
|
| 12723 |
+
"learning_rate": 1.8120000000000003e-05,
|
| 12724 |
+
"loss": 77.1875,
|
| 12725 |
+
"step": 1813
|
| 12726 |
+
},
|
| 12727 |
+
{
|
| 12728 |
+
"epoch": 0.0001814,
|
| 12729 |
+
"grad_norm": 10.581918716430664,
|
| 12730 |
+
"learning_rate": 1.813e-05,
|
| 12731 |
+
"loss": 75.625,
|
| 12732 |
+
"step": 1814
|
| 12733 |
+
},
|
| 12734 |
+
{
|
| 12735 |
+
"epoch": 0.0001815,
|
| 12736 |
+
"grad_norm": 10.54649543762207,
|
| 12737 |
+
"learning_rate": 1.814e-05,
|
| 12738 |
+
"loss": 75.4375,
|
| 12739 |
+
"step": 1815
|
| 12740 |
+
},
|
| 12741 |
+
{
|
| 12742 |
+
"epoch": 0.0001816,
|
| 12743 |
+
"grad_norm": 10.404945373535156,
|
| 12744 |
+
"learning_rate": 1.815e-05,
|
| 12745 |
+
"loss": 75.8125,
|
| 12746 |
+
"step": 1816
|
| 12747 |
+
},
|
| 12748 |
+
{
|
| 12749 |
+
"epoch": 0.0001817,
|
| 12750 |
+
"grad_norm": 10.537050247192383,
|
| 12751 |
+
"learning_rate": 1.816e-05,
|
| 12752 |
+
"loss": 75.125,
|
| 12753 |
+
"step": 1817
|
| 12754 |
+
},
|
| 12755 |
+
{
|
| 12756 |
+
"epoch": 0.0001818,
|
| 12757 |
+
"grad_norm": 10.64071273803711,
|
| 12758 |
+
"learning_rate": 1.817e-05,
|
| 12759 |
+
"loss": 76.6875,
|
| 12760 |
+
"step": 1818
|
| 12761 |
+
},
|
| 12762 |
+
{
|
| 12763 |
+
"epoch": 0.0001819,
|
| 12764 |
+
"grad_norm": 10.442253112792969,
|
| 12765 |
+
"learning_rate": 1.8180000000000002e-05,
|
| 12766 |
+
"loss": 75.375,
|
| 12767 |
+
"step": 1819
|
| 12768 |
+
},
|
| 12769 |
+
{
|
| 12770 |
+
"epoch": 0.000182,
|
| 12771 |
+
"grad_norm": 10.428080558776855,
|
| 12772 |
+
"learning_rate": 1.819e-05,
|
| 12773 |
+
"loss": 76.0625,
|
| 12774 |
+
"step": 1820
|
| 12775 |
+
},
|
| 12776 |
+
{
|
| 12777 |
+
"epoch": 0.0001821,
|
| 12778 |
+
"grad_norm": 10.296630859375,
|
| 12779 |
+
"learning_rate": 1.82e-05,
|
| 12780 |
+
"loss": 76.4375,
|
| 12781 |
+
"step": 1821
|
| 12782 |
+
},
|
| 12783 |
+
{
|
| 12784 |
+
"epoch": 0.0001822,
|
| 12785 |
+
"grad_norm": 10.323344230651855,
|
| 12786 |
+
"learning_rate": 1.821e-05,
|
| 12787 |
+
"loss": 74.875,
|
| 12788 |
+
"step": 1822
|
| 12789 |
+
},
|
| 12790 |
+
{
|
| 12791 |
+
"epoch": 0.0001823,
|
| 12792 |
+
"grad_norm": 10.475907325744629,
|
| 12793 |
+
"learning_rate": 1.8220000000000002e-05,
|
| 12794 |
+
"loss": 75.625,
|
| 12795 |
+
"step": 1823
|
| 12796 |
+
},
|
| 12797 |
+
{
|
| 12798 |
+
"epoch": 0.0001824,
|
| 12799 |
+
"grad_norm": 10.535412788391113,
|
| 12800 |
+
"learning_rate": 1.823e-05,
|
| 12801 |
+
"loss": 74.75,
|
| 12802 |
+
"step": 1824
|
| 12803 |
+
},
|
| 12804 |
+
{
|
| 12805 |
+
"epoch": 0.0001825,
|
| 12806 |
+
"grad_norm": 10.529937744140625,
|
| 12807 |
+
"learning_rate": 1.824e-05,
|
| 12808 |
+
"loss": 74.0625,
|
| 12809 |
+
"step": 1825
|
| 12810 |
+
},
|
| 12811 |
+
{
|
| 12812 |
+
"epoch": 0.0001826,
|
| 12813 |
+
"grad_norm": 10.555984497070312,
|
| 12814 |
+
"learning_rate": 1.825e-05,
|
| 12815 |
+
"loss": 74.75,
|
| 12816 |
+
"step": 1826
|
| 12817 |
+
},
|
| 12818 |
+
{
|
| 12819 |
+
"epoch": 0.0001827,
|
| 12820 |
+
"grad_norm": 10.664322853088379,
|
| 12821 |
+
"learning_rate": 1.826e-05,
|
| 12822 |
+
"loss": 74.875,
|
| 12823 |
+
"step": 1827
|
| 12824 |
+
},
|
| 12825 |
+
{
|
| 12826 |
+
"epoch": 0.0001828,
|
| 12827 |
+
"grad_norm": 10.602211952209473,
|
| 12828 |
+
"learning_rate": 1.827e-05,
|
| 12829 |
+
"loss": 74.0625,
|
| 12830 |
+
"step": 1828
|
| 12831 |
+
},
|
| 12832 |
+
{
|
| 12833 |
+
"epoch": 0.0001829,
|
| 12834 |
+
"grad_norm": 10.418312072753906,
|
| 12835 |
+
"learning_rate": 1.828e-05,
|
| 12836 |
+
"loss": 75.0625,
|
| 12837 |
+
"step": 1829
|
| 12838 |
+
},
|
| 12839 |
+
{
|
| 12840 |
+
"epoch": 0.000183,
|
| 12841 |
+
"grad_norm": 10.414963722229004,
|
| 12842 |
+
"learning_rate": 1.8290000000000003e-05,
|
| 12843 |
+
"loss": 75.6875,
|
| 12844 |
+
"step": 1830
|
| 12845 |
+
},
|
| 12846 |
+
{
|
| 12847 |
+
"epoch": 0.0001831,
|
| 12848 |
+
"grad_norm": 10.508442878723145,
|
| 12849 |
+
"learning_rate": 1.83e-05,
|
| 12850 |
+
"loss": 74.375,
|
| 12851 |
+
"step": 1831
|
| 12852 |
+
},
|
| 12853 |
+
{
|
| 12854 |
+
"epoch": 0.0001832,
|
| 12855 |
+
"grad_norm": 10.432555198669434,
|
| 12856 |
+
"learning_rate": 1.831e-05,
|
| 12857 |
+
"loss": 75.4375,
|
| 12858 |
+
"step": 1832
|
| 12859 |
+
},
|
| 12860 |
+
{
|
| 12861 |
+
"epoch": 0.0001833,
|
| 12862 |
+
"grad_norm": 10.469473838806152,
|
| 12863 |
+
"learning_rate": 1.832e-05,
|
| 12864 |
+
"loss": 74.5625,
|
| 12865 |
+
"step": 1833
|
| 12866 |
+
},
|
| 12867 |
+
{
|
| 12868 |
+
"epoch": 0.0001834,
|
| 12869 |
+
"grad_norm": 10.339816093444824,
|
| 12870 |
+
"learning_rate": 1.833e-05,
|
| 12871 |
+
"loss": 74.875,
|
| 12872 |
+
"step": 1834
|
| 12873 |
+
},
|
| 12874 |
+
{
|
| 12875 |
+
"epoch": 0.0001835,
|
| 12876 |
+
"grad_norm": 10.438968658447266,
|
| 12877 |
+
"learning_rate": 1.834e-05,
|
| 12878 |
+
"loss": 74.8125,
|
| 12879 |
+
"step": 1835
|
| 12880 |
+
},
|
| 12881 |
+
{
|
| 12882 |
+
"epoch": 0.0001836,
|
| 12883 |
+
"grad_norm": 10.777366638183594,
|
| 12884 |
+
"learning_rate": 1.8350000000000002e-05,
|
| 12885 |
+
"loss": 75.0,
|
| 12886 |
+
"step": 1836
|
| 12887 |
+
},
|
| 12888 |
+
{
|
| 12889 |
+
"epoch": 0.0001837,
|
| 12890 |
+
"grad_norm": 10.707802772521973,
|
| 12891 |
+
"learning_rate": 1.836e-05,
|
| 12892 |
+
"loss": 78.0625,
|
| 12893 |
+
"step": 1837
|
| 12894 |
+
},
|
| 12895 |
+
{
|
| 12896 |
+
"epoch": 0.0001838,
|
| 12897 |
+
"grad_norm": 10.437213897705078,
|
| 12898 |
+
"learning_rate": 1.837e-05,
|
| 12899 |
+
"loss": 74.3125,
|
| 12900 |
+
"step": 1838
|
| 12901 |
+
},
|
| 12902 |
+
{
|
| 12903 |
+
"epoch": 0.0001839,
|
| 12904 |
+
"grad_norm": 10.431440353393555,
|
| 12905 |
+
"learning_rate": 1.838e-05,
|
| 12906 |
+
"loss": 75.875,
|
| 12907 |
+
"step": 1839
|
| 12908 |
+
},
|
| 12909 |
+
{
|
| 12910 |
+
"epoch": 0.000184,
|
| 12911 |
+
"grad_norm": 10.38449478149414,
|
| 12912 |
+
"learning_rate": 1.8390000000000002e-05,
|
| 12913 |
+
"loss": 74.125,
|
| 12914 |
+
"step": 1840
|
| 12915 |
+
},
|
| 12916 |
+
{
|
| 12917 |
+
"epoch": 0.0001841,
|
| 12918 |
+
"grad_norm": 10.561605453491211,
|
| 12919 |
+
"learning_rate": 1.84e-05,
|
| 12920 |
+
"loss": 77.125,
|
| 12921 |
+
"step": 1841
|
| 12922 |
+
},
|
| 12923 |
+
{
|
| 12924 |
+
"epoch": 0.0001842,
|
| 12925 |
+
"grad_norm": 10.490052223205566,
|
| 12926 |
+
"learning_rate": 1.841e-05,
|
| 12927 |
+
"loss": 74.5625,
|
| 12928 |
+
"step": 1842
|
| 12929 |
+
},
|
| 12930 |
+
{
|
| 12931 |
+
"epoch": 0.0001843,
|
| 12932 |
+
"grad_norm": 10.40212631225586,
|
| 12933 |
+
"learning_rate": 1.842e-05,
|
| 12934 |
+
"loss": 75.8125,
|
| 12935 |
+
"step": 1843
|
| 12936 |
+
},
|
| 12937 |
+
{
|
| 12938 |
+
"epoch": 0.0001844,
|
| 12939 |
+
"grad_norm": 10.394779205322266,
|
| 12940 |
+
"learning_rate": 1.8429999999999998e-05,
|
| 12941 |
+
"loss": 74.1875,
|
| 12942 |
+
"step": 1844
|
| 12943 |
+
},
|
| 12944 |
+
{
|
| 12945 |
+
"epoch": 0.0001845,
|
| 12946 |
+
"grad_norm": 10.477581024169922,
|
| 12947 |
+
"learning_rate": 1.844e-05,
|
| 12948 |
+
"loss": 73.5625,
|
| 12949 |
+
"step": 1845
|
| 12950 |
+
},
|
| 12951 |
+
{
|
| 12952 |
+
"epoch": 0.0001846,
|
| 12953 |
+
"grad_norm": 10.522961616516113,
|
| 12954 |
+
"learning_rate": 1.845e-05,
|
| 12955 |
+
"loss": 72.875,
|
| 12956 |
+
"step": 1846
|
| 12957 |
+
},
|
| 12958 |
+
{
|
| 12959 |
+
"epoch": 0.0001847,
|
| 12960 |
+
"grad_norm": 10.521002769470215,
|
| 12961 |
+
"learning_rate": 1.846e-05,
|
| 12962 |
+
"loss": 73.3125,
|
| 12963 |
+
"step": 1847
|
| 12964 |
+
},
|
| 12965 |
+
{
|
| 12966 |
+
"epoch": 0.0001848,
|
| 12967 |
+
"grad_norm": 10.377899169921875,
|
| 12968 |
+
"learning_rate": 1.847e-05,
|
| 12969 |
+
"loss": 74.8125,
|
| 12970 |
+
"step": 1848
|
| 12971 |
+
},
|
| 12972 |
+
{
|
| 12973 |
+
"epoch": 0.0001849,
|
| 12974 |
+
"grad_norm": 10.571773529052734,
|
| 12975 |
+
"learning_rate": 1.848e-05,
|
| 12976 |
+
"loss": 74.125,
|
| 12977 |
+
"step": 1849
|
| 12978 |
+
},
|
| 12979 |
+
{
|
| 12980 |
+
"epoch": 0.000185,
|
| 12981 |
+
"grad_norm": 10.516894340515137,
|
| 12982 |
+
"learning_rate": 1.849e-05,
|
| 12983 |
+
"loss": 75.125,
|
| 12984 |
+
"step": 1850
|
| 12985 |
+
},
|
| 12986 |
+
{
|
| 12987 |
+
"epoch": 0.0001851,
|
| 12988 |
+
"grad_norm": 10.519248008728027,
|
| 12989 |
+
"learning_rate": 1.85e-05,
|
| 12990 |
+
"loss": 73.0,
|
| 12991 |
+
"step": 1851
|
| 12992 |
+
},
|
| 12993 |
+
{
|
| 12994 |
+
"epoch": 0.0001852,
|
| 12995 |
+
"grad_norm": 10.265591621398926,
|
| 12996 |
+
"learning_rate": 1.851e-05,
|
| 12997 |
+
"loss": 74.0625,
|
| 12998 |
+
"step": 1852
|
| 12999 |
+
},
|
| 13000 |
+
{
|
| 13001 |
+
"epoch": 0.0001853,
|
| 13002 |
+
"grad_norm": 10.392489433288574,
|
| 13003 |
+
"learning_rate": 1.8520000000000002e-05,
|
| 13004 |
+
"loss": 75.8125,
|
| 13005 |
+
"step": 1853
|
| 13006 |
+
},
|
| 13007 |
+
{
|
| 13008 |
+
"epoch": 0.0001854,
|
| 13009 |
+
"grad_norm": 10.352034568786621,
|
| 13010 |
+
"learning_rate": 1.853e-05,
|
| 13011 |
+
"loss": 74.5625,
|
| 13012 |
+
"step": 1854
|
| 13013 |
+
},
|
| 13014 |
+
{
|
| 13015 |
+
"epoch": 0.0001855,
|
| 13016 |
+
"grad_norm": 10.400627136230469,
|
| 13017 |
+
"learning_rate": 1.854e-05,
|
| 13018 |
+
"loss": 73.9375,
|
| 13019 |
+
"step": 1855
|
| 13020 |
+
},
|
| 13021 |
+
{
|
| 13022 |
+
"epoch": 0.0001856,
|
| 13023 |
+
"grad_norm": 10.386198043823242,
|
| 13024 |
+
"learning_rate": 1.855e-05,
|
| 13025 |
+
"loss": 73.5,
|
| 13026 |
+
"step": 1856
|
| 13027 |
+
},
|
| 13028 |
+
{
|
| 13029 |
+
"epoch": 0.0001857,
|
| 13030 |
+
"grad_norm": 10.476325035095215,
|
| 13031 |
+
"learning_rate": 1.8560000000000002e-05,
|
| 13032 |
+
"loss": 73.75,
|
| 13033 |
+
"step": 1857
|
| 13034 |
+
},
|
| 13035 |
+
{
|
| 13036 |
+
"epoch": 0.0001858,
|
| 13037 |
+
"grad_norm": 10.511929512023926,
|
| 13038 |
+
"learning_rate": 1.857e-05,
|
| 13039 |
+
"loss": 74.25,
|
| 13040 |
+
"step": 1858
|
| 13041 |
+
},
|
| 13042 |
+
{
|
| 13043 |
+
"epoch": 0.0001859,
|
| 13044 |
+
"grad_norm": 10.316984176635742,
|
| 13045 |
+
"learning_rate": 1.858e-05,
|
| 13046 |
+
"loss": 73.8125,
|
| 13047 |
+
"step": 1859
|
| 13048 |
+
},
|
| 13049 |
+
{
|
| 13050 |
+
"epoch": 0.000186,
|
| 13051 |
+
"grad_norm": 10.564013481140137,
|
| 13052 |
+
"learning_rate": 1.859e-05,
|
| 13053 |
+
"loss": 74.375,
|
| 13054 |
+
"step": 1860
|
| 13055 |
+
},
|
| 13056 |
+
{
|
| 13057 |
+
"epoch": 0.0001861,
|
| 13058 |
+
"grad_norm": 10.520541191101074,
|
| 13059 |
+
"learning_rate": 1.8599999999999998e-05,
|
| 13060 |
+
"loss": 72.9375,
|
| 13061 |
+
"step": 1861
|
| 13062 |
+
},
|
| 13063 |
+
{
|
| 13064 |
+
"epoch": 0.0001862,
|
| 13065 |
+
"grad_norm": 10.493901252746582,
|
| 13066 |
+
"learning_rate": 1.861e-05,
|
| 13067 |
+
"loss": 73.5,
|
| 13068 |
+
"step": 1862
|
| 13069 |
+
},
|
| 13070 |
+
{
|
| 13071 |
+
"epoch": 0.0001863,
|
| 13072 |
+
"grad_norm": 10.289846420288086,
|
| 13073 |
+
"learning_rate": 1.862e-05,
|
| 13074 |
+
"loss": 74.5,
|
| 13075 |
+
"step": 1863
|
| 13076 |
+
},
|
| 13077 |
+
{
|
| 13078 |
+
"epoch": 0.0001864,
|
| 13079 |
+
"grad_norm": 10.55647087097168,
|
| 13080 |
+
"learning_rate": 1.863e-05,
|
| 13081 |
+
"loss": 72.5625,
|
| 13082 |
+
"step": 1864
|
| 13083 |
+
},
|
| 13084 |
+
{
|
| 13085 |
+
"epoch": 0.0001865,
|
| 13086 |
+
"grad_norm": 10.611162185668945,
|
| 13087 |
+
"learning_rate": 1.864e-05,
|
| 13088 |
+
"loss": 73.1875,
|
| 13089 |
+
"step": 1865
|
| 13090 |
+
},
|
| 13091 |
+
{
|
| 13092 |
+
"epoch": 0.0001866,
|
| 13093 |
+
"grad_norm": 10.35433292388916,
|
| 13094 |
+
"learning_rate": 1.865e-05,
|
| 13095 |
+
"loss": 73.3125,
|
| 13096 |
+
"step": 1866
|
| 13097 |
+
},
|
| 13098 |
+
{
|
| 13099 |
+
"epoch": 0.0001867,
|
| 13100 |
+
"grad_norm": 10.491514205932617,
|
| 13101 |
+
"learning_rate": 1.866e-05,
|
| 13102 |
+
"loss": 72.1875,
|
| 13103 |
+
"step": 1867
|
| 13104 |
+
},
|
| 13105 |
+
{
|
| 13106 |
+
"epoch": 0.0001868,
|
| 13107 |
+
"grad_norm": 10.399983406066895,
|
| 13108 |
+
"learning_rate": 1.867e-05,
|
| 13109 |
+
"loss": 74.4375,
|
| 13110 |
+
"step": 1868
|
| 13111 |
+
},
|
| 13112 |
+
{
|
| 13113 |
+
"epoch": 0.0001869,
|
| 13114 |
+
"grad_norm": 10.26793098449707,
|
| 13115 |
+
"learning_rate": 1.868e-05,
|
| 13116 |
+
"loss": 75.6875,
|
| 13117 |
+
"step": 1869
|
| 13118 |
+
},
|
| 13119 |
+
{
|
| 13120 |
+
"epoch": 0.000187,
|
| 13121 |
+
"grad_norm": 10.247400283813477,
|
| 13122 |
+
"learning_rate": 1.8690000000000002e-05,
|
| 13123 |
+
"loss": 74.1875,
|
| 13124 |
+
"step": 1870
|
| 13125 |
+
},
|
| 13126 |
+
{
|
| 13127 |
+
"epoch": 0.0001871,
|
| 13128 |
+
"grad_norm": 10.544309616088867,
|
| 13129 |
+
"learning_rate": 1.87e-05,
|
| 13130 |
+
"loss": 71.75,
|
| 13131 |
+
"step": 1871
|
| 13132 |
+
},
|
| 13133 |
+
{
|
| 13134 |
+
"epoch": 0.0001872,
|
| 13135 |
+
"grad_norm": 10.499302864074707,
|
| 13136 |
+
"learning_rate": 1.871e-05,
|
| 13137 |
+
"loss": 72.0625,
|
| 13138 |
+
"step": 1872
|
| 13139 |
+
},
|
| 13140 |
+
{
|
| 13141 |
+
"epoch": 0.0001873,
|
| 13142 |
+
"grad_norm": 10.524739265441895,
|
| 13143 |
+
"learning_rate": 1.872e-05,
|
| 13144 |
+
"loss": 74.0,
|
| 13145 |
+
"step": 1873
|
| 13146 |
+
},
|
| 13147 |
+
{
|
| 13148 |
+
"epoch": 0.0001874,
|
| 13149 |
+
"grad_norm": 10.458234786987305,
|
| 13150 |
+
"learning_rate": 1.8730000000000002e-05,
|
| 13151 |
+
"loss": 72.9375,
|
| 13152 |
+
"step": 1874
|
| 13153 |
+
},
|
| 13154 |
+
{
|
| 13155 |
+
"epoch": 0.0001875,
|
| 13156 |
+
"grad_norm": 10.530044555664062,
|
| 13157 |
+
"learning_rate": 1.874e-05,
|
| 13158 |
+
"loss": 73.9375,
|
| 13159 |
+
"step": 1875
|
| 13160 |
+
},
|
| 13161 |
+
{
|
| 13162 |
+
"epoch": 0.0001876,
|
| 13163 |
+
"grad_norm": 10.297721862792969,
|
| 13164 |
+
"learning_rate": 1.8750000000000002e-05,
|
| 13165 |
+
"loss": 74.1875,
|
| 13166 |
+
"step": 1876
|
| 13167 |
+
},
|
| 13168 |
+
{
|
| 13169 |
+
"epoch": 0.0001877,
|
| 13170 |
+
"grad_norm": 10.372282981872559,
|
| 13171 |
+
"learning_rate": 1.876e-05,
|
| 13172 |
+
"loss": 74.25,
|
| 13173 |
+
"step": 1877
|
| 13174 |
+
},
|
| 13175 |
+
{
|
| 13176 |
+
"epoch": 0.0001878,
|
| 13177 |
+
"grad_norm": 10.302542686462402,
|
| 13178 |
+
"learning_rate": 1.8769999999999998e-05,
|
| 13179 |
+
"loss": 73.6875,
|
| 13180 |
+
"step": 1878
|
| 13181 |
+
},
|
| 13182 |
+
{
|
| 13183 |
+
"epoch": 0.0001879,
|
| 13184 |
+
"grad_norm": 10.455268859863281,
|
| 13185 |
+
"learning_rate": 1.878e-05,
|
| 13186 |
+
"loss": 74.4375,
|
| 13187 |
+
"step": 1879
|
| 13188 |
+
},
|
| 13189 |
+
{
|
| 13190 |
+
"epoch": 0.000188,
|
| 13191 |
+
"grad_norm": 10.465432167053223,
|
| 13192 |
+
"learning_rate": 1.879e-05,
|
| 13193 |
+
"loss": 71.75,
|
| 13194 |
+
"step": 1880
|
| 13195 |
+
},
|
| 13196 |
+
{
|
| 13197 |
+
"epoch": 0.0001881,
|
| 13198 |
+
"grad_norm": 10.485514640808105,
|
| 13199 |
+
"learning_rate": 1.88e-05,
|
| 13200 |
+
"loss": 73.6875,
|
| 13201 |
+
"step": 1881
|
| 13202 |
+
},
|
| 13203 |
+
{
|
| 13204 |
+
"epoch": 0.0001882,
|
| 13205 |
+
"grad_norm": 10.271815299987793,
|
| 13206 |
+
"learning_rate": 1.881e-05,
|
| 13207 |
+
"loss": 74.125,
|
| 13208 |
+
"step": 1882
|
| 13209 |
+
},
|
| 13210 |
+
{
|
| 13211 |
+
"epoch": 0.0001883,
|
| 13212 |
+
"grad_norm": 10.41010856628418,
|
| 13213 |
+
"learning_rate": 1.882e-05,
|
| 13214 |
+
"loss": 72.875,
|
| 13215 |
+
"step": 1883
|
| 13216 |
+
},
|
| 13217 |
+
{
|
| 13218 |
+
"epoch": 0.0001884,
|
| 13219 |
+
"grad_norm": 10.436948776245117,
|
| 13220 |
+
"learning_rate": 1.883e-05,
|
| 13221 |
+
"loss": 73.5,
|
| 13222 |
+
"step": 1884
|
| 13223 |
+
},
|
| 13224 |
+
{
|
| 13225 |
+
"epoch": 0.0001885,
|
| 13226 |
+
"grad_norm": 10.39260482788086,
|
| 13227 |
+
"learning_rate": 1.884e-05,
|
| 13228 |
+
"loss": 72.75,
|
| 13229 |
+
"step": 1885
|
| 13230 |
+
},
|
| 13231 |
+
{
|
| 13232 |
+
"epoch": 0.0001886,
|
| 13233 |
+
"grad_norm": 10.365702629089355,
|
| 13234 |
+
"learning_rate": 1.885e-05,
|
| 13235 |
+
"loss": 72.875,
|
| 13236 |
+
"step": 1886
|
| 13237 |
+
},
|
| 13238 |
+
{
|
| 13239 |
+
"epoch": 0.0001887,
|
| 13240 |
+
"grad_norm": 10.395852088928223,
|
| 13241 |
+
"learning_rate": 1.8860000000000002e-05,
|
| 13242 |
+
"loss": 71.6875,
|
| 13243 |
+
"step": 1887
|
| 13244 |
+
},
|
| 13245 |
+
{
|
| 13246 |
+
"epoch": 0.0001888,
|
| 13247 |
+
"grad_norm": 10.262138366699219,
|
| 13248 |
+
"learning_rate": 1.887e-05,
|
| 13249 |
+
"loss": 72.75,
|
| 13250 |
+
"step": 1888
|
| 13251 |
+
},
|
| 13252 |
+
{
|
| 13253 |
+
"epoch": 0.0001889,
|
| 13254 |
+
"grad_norm": 10.43374252319336,
|
| 13255 |
+
"learning_rate": 1.888e-05,
|
| 13256 |
+
"loss": 72.875,
|
| 13257 |
+
"step": 1889
|
| 13258 |
+
},
|
| 13259 |
+
{
|
| 13260 |
+
"epoch": 0.000189,
|
| 13261 |
+
"grad_norm": 10.227705001831055,
|
| 13262 |
+
"learning_rate": 1.889e-05,
|
| 13263 |
+
"loss": 73.875,
|
| 13264 |
+
"step": 1890
|
| 13265 |
+
},
|
| 13266 |
+
{
|
| 13267 |
+
"epoch": 0.0001891,
|
| 13268 |
+
"grad_norm": 10.353809356689453,
|
| 13269 |
+
"learning_rate": 1.8900000000000002e-05,
|
| 13270 |
+
"loss": 73.125,
|
| 13271 |
+
"step": 1891
|
| 13272 |
+
},
|
| 13273 |
+
{
|
| 13274 |
+
"epoch": 0.0001892,
|
| 13275 |
+
"grad_norm": 10.32115364074707,
|
| 13276 |
+
"learning_rate": 1.891e-05,
|
| 13277 |
+
"loss": 73.6875,
|
| 13278 |
+
"step": 1892
|
| 13279 |
+
},
|
| 13280 |
+
{
|
| 13281 |
+
"epoch": 0.0001893,
|
| 13282 |
+
"grad_norm": 10.273197174072266,
|
| 13283 |
+
"learning_rate": 1.8920000000000002e-05,
|
| 13284 |
+
"loss": 72.5625,
|
| 13285 |
+
"step": 1893
|
| 13286 |
+
},
|
| 13287 |
+
{
|
| 13288 |
+
"epoch": 0.0001894,
|
| 13289 |
+
"grad_norm": 10.563416481018066,
|
| 13290 |
+
"learning_rate": 1.893e-05,
|
| 13291 |
+
"loss": 73.1875,
|
| 13292 |
+
"step": 1894
|
| 13293 |
+
},
|
| 13294 |
+
{
|
| 13295 |
+
"epoch": 0.0001895,
|
| 13296 |
+
"grad_norm": 10.399928092956543,
|
| 13297 |
+
"learning_rate": 1.8939999999999998e-05,
|
| 13298 |
+
"loss": 71.4375,
|
| 13299 |
+
"step": 1895
|
| 13300 |
+
},
|
| 13301 |
+
{
|
| 13302 |
+
"epoch": 0.0001896,
|
| 13303 |
+
"grad_norm": 10.268082618713379,
|
| 13304 |
+
"learning_rate": 1.895e-05,
|
| 13305 |
+
"loss": 74.1875,
|
| 13306 |
+
"step": 1896
|
| 13307 |
+
},
|
| 13308 |
+
{
|
| 13309 |
+
"epoch": 0.0001897,
|
| 13310 |
+
"grad_norm": 10.20687484741211,
|
| 13311 |
+
"learning_rate": 1.896e-05,
|
| 13312 |
+
"loss": 73.0,
|
| 13313 |
+
"step": 1897
|
| 13314 |
+
},
|
| 13315 |
+
{
|
| 13316 |
+
"epoch": 0.0001898,
|
| 13317 |
+
"grad_norm": 10.29458999633789,
|
| 13318 |
+
"learning_rate": 1.897e-05,
|
| 13319 |
+
"loss": 72.5,
|
| 13320 |
+
"step": 1898
|
| 13321 |
+
},
|
| 13322 |
+
{
|
| 13323 |
+
"epoch": 0.0001899,
|
| 13324 |
+
"grad_norm": 10.40804672241211,
|
| 13325 |
+
"learning_rate": 1.898e-05,
|
| 13326 |
+
"loss": 72.3125,
|
| 13327 |
+
"step": 1899
|
| 13328 |
+
},
|
| 13329 |
+
{
|
| 13330 |
+
"epoch": 0.00019,
|
| 13331 |
+
"grad_norm": 10.445226669311523,
|
| 13332 |
+
"learning_rate": 1.899e-05,
|
| 13333 |
+
"loss": 71.25,
|
| 13334 |
+
"step": 1900
|
| 13335 |
+
},
|
| 13336 |
+
{
|
| 13337 |
+
"epoch": 0.0001901,
|
| 13338 |
+
"grad_norm": 10.368699073791504,
|
| 13339 |
+
"learning_rate": 1.9e-05,
|
| 13340 |
+
"loss": 71.75,
|
| 13341 |
+
"step": 1901
|
| 13342 |
+
},
|
| 13343 |
+
{
|
| 13344 |
+
"epoch": 0.0001902,
|
| 13345 |
+
"grad_norm": 10.655980110168457,
|
| 13346 |
+
"learning_rate": 1.901e-05,
|
| 13347 |
+
"loss": 71.375,
|
| 13348 |
+
"step": 1902
|
| 13349 |
+
},
|
| 13350 |
+
{
|
| 13351 |
+
"epoch": 0.0001903,
|
| 13352 |
+
"grad_norm": 10.2596435546875,
|
| 13353 |
+
"learning_rate": 1.902e-05,
|
| 13354 |
+
"loss": 72.5,
|
| 13355 |
+
"step": 1903
|
| 13356 |
+
},
|
| 13357 |
+
{
|
| 13358 |
+
"epoch": 0.0001904,
|
| 13359 |
+
"grad_norm": 10.201349258422852,
|
| 13360 |
+
"learning_rate": 1.9030000000000002e-05,
|
| 13361 |
+
"loss": 72.625,
|
| 13362 |
+
"step": 1904
|
| 13363 |
+
},
|
| 13364 |
+
{
|
| 13365 |
+
"epoch": 0.0001905,
|
| 13366 |
+
"grad_norm": 10.324286460876465,
|
| 13367 |
+
"learning_rate": 1.904e-05,
|
| 13368 |
+
"loss": 73.375,
|
| 13369 |
+
"step": 1905
|
| 13370 |
+
},
|
| 13371 |
+
{
|
| 13372 |
+
"epoch": 0.0001906,
|
| 13373 |
+
"grad_norm": 10.320876121520996,
|
| 13374 |
+
"learning_rate": 1.905e-05,
|
| 13375 |
+
"loss": 71.875,
|
| 13376 |
+
"step": 1906
|
| 13377 |
+
},
|
| 13378 |
+
{
|
| 13379 |
+
"epoch": 0.0001907,
|
| 13380 |
+
"grad_norm": 10.326148986816406,
|
| 13381 |
+
"learning_rate": 1.906e-05,
|
| 13382 |
+
"loss": 73.5,
|
| 13383 |
+
"step": 1907
|
| 13384 |
+
},
|
| 13385 |
+
{
|
| 13386 |
+
"epoch": 0.0001908,
|
| 13387 |
+
"grad_norm": 10.288737297058105,
|
| 13388 |
+
"learning_rate": 1.9070000000000002e-05,
|
| 13389 |
+
"loss": 72.125,
|
| 13390 |
+
"step": 1908
|
| 13391 |
+
},
|
| 13392 |
+
{
|
| 13393 |
+
"epoch": 0.0001909,
|
| 13394 |
+
"grad_norm": 10.349129676818848,
|
| 13395 |
+
"learning_rate": 1.908e-05,
|
| 13396 |
+
"loss": 72.875,
|
| 13397 |
+
"step": 1909
|
| 13398 |
+
},
|
| 13399 |
+
{
|
| 13400 |
+
"epoch": 0.000191,
|
| 13401 |
+
"grad_norm": 10.330138206481934,
|
| 13402 |
+
"learning_rate": 1.9090000000000002e-05,
|
| 13403 |
+
"loss": 73.0625,
|
| 13404 |
+
"step": 1910
|
| 13405 |
+
},
|
| 13406 |
+
{
|
| 13407 |
+
"epoch": 0.0001911,
|
| 13408 |
+
"grad_norm": 10.37156867980957,
|
| 13409 |
+
"learning_rate": 1.91e-05,
|
| 13410 |
+
"loss": 71.125,
|
| 13411 |
+
"step": 1911
|
| 13412 |
+
},
|
| 13413 |
+
{
|
| 13414 |
+
"epoch": 0.0001912,
|
| 13415 |
+
"grad_norm": 10.332508087158203,
|
| 13416 |
+
"learning_rate": 1.9109999999999998e-05,
|
| 13417 |
+
"loss": 71.8125,
|
| 13418 |
+
"step": 1912
|
| 13419 |
+
},
|
| 13420 |
+
{
|
| 13421 |
+
"epoch": 0.0001913,
|
| 13422 |
+
"grad_norm": 10.3118257522583,
|
| 13423 |
+
"learning_rate": 1.912e-05,
|
| 13424 |
+
"loss": 73.25,
|
| 13425 |
+
"step": 1913
|
| 13426 |
+
},
|
| 13427 |
+
{
|
| 13428 |
+
"epoch": 0.0001914,
|
| 13429 |
+
"grad_norm": 10.078189849853516,
|
| 13430 |
+
"learning_rate": 1.913e-05,
|
| 13431 |
+
"loss": 72.6875,
|
| 13432 |
+
"step": 1914
|
| 13433 |
+
},
|
| 13434 |
+
{
|
| 13435 |
+
"epoch": 0.0001915,
|
| 13436 |
+
"grad_norm": 10.437750816345215,
|
| 13437 |
+
"learning_rate": 1.914e-05,
|
| 13438 |
+
"loss": 72.1875,
|
| 13439 |
+
"step": 1915
|
| 13440 |
+
},
|
| 13441 |
+
{
|
| 13442 |
+
"epoch": 0.0001916,
|
| 13443 |
+
"grad_norm": 10.279887199401855,
|
| 13444 |
+
"learning_rate": 1.915e-05,
|
| 13445 |
+
"loss": 71.5625,
|
| 13446 |
+
"step": 1916
|
| 13447 |
+
},
|
| 13448 |
+
{
|
| 13449 |
+
"epoch": 0.0001917,
|
| 13450 |
+
"grad_norm": 10.300010681152344,
|
| 13451 |
+
"learning_rate": 1.916e-05,
|
| 13452 |
+
"loss": 71.25,
|
| 13453 |
+
"step": 1917
|
| 13454 |
+
},
|
| 13455 |
+
{
|
| 13456 |
+
"epoch": 0.0001918,
|
| 13457 |
+
"grad_norm": 10.136702537536621,
|
| 13458 |
+
"learning_rate": 1.917e-05,
|
| 13459 |
+
"loss": 73.875,
|
| 13460 |
+
"step": 1918
|
| 13461 |
+
},
|
| 13462 |
+
{
|
| 13463 |
+
"epoch": 0.0001919,
|
| 13464 |
+
"grad_norm": 10.21435260772705,
|
| 13465 |
+
"learning_rate": 1.918e-05,
|
| 13466 |
+
"loss": 71.75,
|
| 13467 |
+
"step": 1919
|
| 13468 |
+
},
|
| 13469 |
+
{
|
| 13470 |
+
"epoch": 0.000192,
|
| 13471 |
+
"grad_norm": 10.43912410736084,
|
| 13472 |
+
"learning_rate": 1.919e-05,
|
| 13473 |
+
"loss": 70.25,
|
| 13474 |
+
"step": 1920
|
| 13475 |
+
},
|
| 13476 |
+
{
|
| 13477 |
+
"epoch": 0.0001921,
|
| 13478 |
+
"grad_norm": 10.203756332397461,
|
| 13479 |
+
"learning_rate": 1.9200000000000003e-05,
|
| 13480 |
+
"loss": 71.75,
|
| 13481 |
+
"step": 1921
|
| 13482 |
+
},
|
| 13483 |
+
{
|
| 13484 |
+
"epoch": 0.0001922,
|
| 13485 |
+
"grad_norm": 10.295417785644531,
|
| 13486 |
+
"learning_rate": 1.921e-05,
|
| 13487 |
+
"loss": 72.1875,
|
| 13488 |
+
"step": 1922
|
| 13489 |
+
},
|
| 13490 |
+
{
|
| 13491 |
+
"epoch": 0.0001923,
|
| 13492 |
+
"grad_norm": 10.405128479003906,
|
| 13493 |
+
"learning_rate": 1.922e-05,
|
| 13494 |
+
"loss": 71.5,
|
| 13495 |
+
"step": 1923
|
| 13496 |
+
},
|
| 13497 |
+
{
|
| 13498 |
+
"epoch": 0.0001924,
|
| 13499 |
+
"grad_norm": 10.384449005126953,
|
| 13500 |
+
"learning_rate": 1.923e-05,
|
| 13501 |
+
"loss": 72.8125,
|
| 13502 |
+
"step": 1924
|
| 13503 |
+
},
|
| 13504 |
+
{
|
| 13505 |
+
"epoch": 0.0001925,
|
| 13506 |
+
"grad_norm": 10.155817031860352,
|
| 13507 |
+
"learning_rate": 1.9240000000000002e-05,
|
| 13508 |
+
"loss": 72.1875,
|
| 13509 |
+
"step": 1925
|
| 13510 |
+
},
|
| 13511 |
+
{
|
| 13512 |
+
"epoch": 0.0001926,
|
| 13513 |
+
"grad_norm": 10.310267448425293,
|
| 13514 |
+
"learning_rate": 1.925e-05,
|
| 13515 |
+
"loss": 72.1875,
|
| 13516 |
+
"step": 1926
|
| 13517 |
+
},
|
| 13518 |
+
{
|
| 13519 |
+
"epoch": 0.0001927,
|
| 13520 |
+
"grad_norm": 10.375925064086914,
|
| 13521 |
+
"learning_rate": 1.9260000000000002e-05,
|
| 13522 |
+
"loss": 70.75,
|
| 13523 |
+
"step": 1927
|
| 13524 |
+
},
|
| 13525 |
+
{
|
| 13526 |
+
"epoch": 0.0001928,
|
| 13527 |
+
"grad_norm": 10.55906867980957,
|
| 13528 |
+
"learning_rate": 1.927e-05,
|
| 13529 |
+
"loss": 73.375,
|
| 13530 |
+
"step": 1928
|
| 13531 |
+
},
|
| 13532 |
+
{
|
| 13533 |
+
"epoch": 0.0001929,
|
| 13534 |
+
"grad_norm": 10.122315406799316,
|
| 13535 |
+
"learning_rate": 1.928e-05,
|
| 13536 |
+
"loss": 74.625,
|
| 13537 |
+
"step": 1929
|
| 13538 |
+
},
|
| 13539 |
+
{
|
| 13540 |
+
"epoch": 0.000193,
|
| 13541 |
+
"grad_norm": 10.311563491821289,
|
| 13542 |
+
"learning_rate": 1.929e-05,
|
| 13543 |
+
"loss": 72.0625,
|
| 13544 |
+
"step": 1930
|
| 13545 |
+
},
|
| 13546 |
+
{
|
| 13547 |
+
"epoch": 0.0001931,
|
| 13548 |
+
"grad_norm": 10.309248924255371,
|
| 13549 |
+
"learning_rate": 1.93e-05,
|
| 13550 |
+
"loss": 72.6875,
|
| 13551 |
+
"step": 1931
|
| 13552 |
+
},
|
| 13553 |
+
{
|
| 13554 |
+
"epoch": 0.0001932,
|
| 13555 |
+
"grad_norm": 10.181316375732422,
|
| 13556 |
+
"learning_rate": 1.931e-05,
|
| 13557 |
+
"loss": 70.6875,
|
| 13558 |
+
"step": 1932
|
| 13559 |
+
},
|
| 13560 |
+
{
|
| 13561 |
+
"epoch": 0.0001933,
|
| 13562 |
+
"grad_norm": 10.316238403320312,
|
| 13563 |
+
"learning_rate": 1.932e-05,
|
| 13564 |
+
"loss": 71.5625,
|
| 13565 |
+
"step": 1933
|
| 13566 |
+
},
|
| 13567 |
+
{
|
| 13568 |
+
"epoch": 0.0001934,
|
| 13569 |
+
"grad_norm": 10.179081916809082,
|
| 13570 |
+
"learning_rate": 1.933e-05,
|
| 13571 |
+
"loss": 72.4375,
|
| 13572 |
+
"step": 1934
|
| 13573 |
+
},
|
| 13574 |
+
{
|
| 13575 |
+
"epoch": 0.0001935,
|
| 13576 |
+
"grad_norm": 10.145713806152344,
|
| 13577 |
+
"learning_rate": 1.934e-05,
|
| 13578 |
+
"loss": 71.75,
|
| 13579 |
+
"step": 1935
|
| 13580 |
+
},
|
| 13581 |
+
{
|
| 13582 |
+
"epoch": 0.0001936,
|
| 13583 |
+
"grad_norm": 10.175612449645996,
|
| 13584 |
+
"learning_rate": 1.935e-05,
|
| 13585 |
+
"loss": 71.5,
|
| 13586 |
+
"step": 1936
|
| 13587 |
+
},
|
| 13588 |
+
{
|
| 13589 |
+
"epoch": 0.0001937,
|
| 13590 |
+
"grad_norm": 10.24749755859375,
|
| 13591 |
+
"learning_rate": 1.936e-05,
|
| 13592 |
+
"loss": 72.5625,
|
| 13593 |
+
"step": 1937
|
| 13594 |
+
},
|
| 13595 |
+
{
|
| 13596 |
+
"epoch": 0.0001938,
|
| 13597 |
+
"grad_norm": 10.308296203613281,
|
| 13598 |
+
"learning_rate": 1.9370000000000003e-05,
|
| 13599 |
+
"loss": 70.5,
|
| 13600 |
+
"step": 1938
|
| 13601 |
+
},
|
| 13602 |
+
{
|
| 13603 |
+
"epoch": 0.0001939,
|
| 13604 |
+
"grad_norm": 10.151104927062988,
|
| 13605 |
+
"learning_rate": 1.938e-05,
|
| 13606 |
+
"loss": 71.3125,
|
| 13607 |
+
"step": 1939
|
| 13608 |
+
},
|
| 13609 |
+
{
|
| 13610 |
+
"epoch": 0.000194,
|
| 13611 |
+
"grad_norm": 10.346338272094727,
|
| 13612 |
+
"learning_rate": 1.939e-05,
|
| 13613 |
+
"loss": 71.625,
|
| 13614 |
+
"step": 1940
|
| 13615 |
+
},
|
| 13616 |
+
{
|
| 13617 |
+
"epoch": 0.0001941,
|
| 13618 |
+
"grad_norm": 10.234703063964844,
|
| 13619 |
+
"learning_rate": 1.94e-05,
|
| 13620 |
+
"loss": 70.4375,
|
| 13621 |
+
"step": 1941
|
| 13622 |
+
},
|
| 13623 |
+
{
|
| 13624 |
+
"epoch": 0.0001942,
|
| 13625 |
+
"grad_norm": 10.15624713897705,
|
| 13626 |
+
"learning_rate": 1.9410000000000002e-05,
|
| 13627 |
+
"loss": 72.5,
|
| 13628 |
+
"step": 1942
|
| 13629 |
+
},
|
| 13630 |
+
{
|
| 13631 |
+
"epoch": 0.0001943,
|
| 13632 |
+
"grad_norm": 10.2576265335083,
|
| 13633 |
+
"learning_rate": 1.942e-05,
|
| 13634 |
+
"loss": 71.8125,
|
| 13635 |
+
"step": 1943
|
| 13636 |
+
},
|
| 13637 |
+
{
|
| 13638 |
+
"epoch": 0.0001944,
|
| 13639 |
+
"grad_norm": 10.192061424255371,
|
| 13640 |
+
"learning_rate": 1.9430000000000002e-05,
|
| 13641 |
+
"loss": 71.625,
|
| 13642 |
+
"step": 1944
|
| 13643 |
+
},
|
| 13644 |
+
{
|
| 13645 |
+
"epoch": 0.0001945,
|
| 13646 |
+
"grad_norm": 10.456489562988281,
|
| 13647 |
+
"learning_rate": 1.944e-05,
|
| 13648 |
+
"loss": 69.75,
|
| 13649 |
+
"step": 1945
|
| 13650 |
+
},
|
| 13651 |
+
{
|
| 13652 |
+
"epoch": 0.0001946,
|
| 13653 |
+
"grad_norm": 10.204558372497559,
|
| 13654 |
+
"learning_rate": 1.945e-05,
|
| 13655 |
+
"loss": 71.3125,
|
| 13656 |
+
"step": 1946
|
| 13657 |
+
},
|
| 13658 |
+
{
|
| 13659 |
+
"epoch": 0.0001947,
|
| 13660 |
+
"grad_norm": 10.194465637207031,
|
| 13661 |
+
"learning_rate": 1.946e-05,
|
| 13662 |
+
"loss": 70.25,
|
| 13663 |
+
"step": 1947
|
| 13664 |
+
},
|
| 13665 |
+
{
|
| 13666 |
+
"epoch": 0.0001948,
|
| 13667 |
+
"grad_norm": 10.135470390319824,
|
| 13668 |
+
"learning_rate": 1.947e-05,
|
| 13669 |
+
"loss": 71.4375,
|
| 13670 |
+
"step": 1948
|
| 13671 |
+
},
|
| 13672 |
+
{
|
| 13673 |
+
"epoch": 0.0001949,
|
| 13674 |
+
"grad_norm": 10.270816802978516,
|
| 13675 |
+
"learning_rate": 1.948e-05,
|
| 13676 |
+
"loss": 72.8125,
|
| 13677 |
+
"step": 1949
|
| 13678 |
+
},
|
| 13679 |
+
{
|
| 13680 |
+
"epoch": 0.000195,
|
| 13681 |
+
"grad_norm": 10.145831108093262,
|
| 13682 |
+
"learning_rate": 1.949e-05,
|
| 13683 |
+
"loss": 73.25,
|
| 13684 |
+
"step": 1950
|
| 13685 |
+
},
|
| 13686 |
+
{
|
| 13687 |
+
"epoch": 0.0001951,
|
| 13688 |
+
"grad_norm": 10.1314115524292,
|
| 13689 |
+
"learning_rate": 1.95e-05,
|
| 13690 |
+
"loss": 71.4375,
|
| 13691 |
+
"step": 1951
|
| 13692 |
+
},
|
| 13693 |
+
{
|
| 13694 |
+
"epoch": 0.0001952,
|
| 13695 |
+
"grad_norm": 10.260855674743652,
|
| 13696 |
+
"learning_rate": 1.951e-05,
|
| 13697 |
+
"loss": 70.0625,
|
| 13698 |
+
"step": 1952
|
| 13699 |
+
},
|
| 13700 |
+
{
|
| 13701 |
+
"epoch": 0.0001953,
|
| 13702 |
+
"grad_norm": 10.306357383728027,
|
| 13703 |
+
"learning_rate": 1.952e-05,
|
| 13704 |
+
"loss": 69.9375,
|
| 13705 |
+
"step": 1953
|
| 13706 |
+
},
|
| 13707 |
+
{
|
| 13708 |
+
"epoch": 0.0001954,
|
| 13709 |
+
"grad_norm": 10.049440383911133,
|
| 13710 |
+
"learning_rate": 1.953e-05,
|
| 13711 |
+
"loss": 72.5625,
|
| 13712 |
+
"step": 1954
|
| 13713 |
+
},
|
| 13714 |
+
{
|
| 13715 |
+
"epoch": 0.0001955,
|
| 13716 |
+
"grad_norm": 10.397737503051758,
|
| 13717 |
+
"learning_rate": 1.9540000000000003e-05,
|
| 13718 |
+
"loss": 70.75,
|
| 13719 |
+
"step": 1955
|
| 13720 |
+
},
|
| 13721 |
+
{
|
| 13722 |
+
"epoch": 0.0001956,
|
| 13723 |
+
"grad_norm": 10.084511756896973,
|
| 13724 |
+
"learning_rate": 1.955e-05,
|
| 13725 |
+
"loss": 70.3125,
|
| 13726 |
+
"step": 1956
|
| 13727 |
+
},
|
| 13728 |
+
{
|
| 13729 |
+
"epoch": 0.0001957,
|
| 13730 |
+
"grad_norm": 10.107562065124512,
|
| 13731 |
+
"learning_rate": 1.956e-05,
|
| 13732 |
+
"loss": 71.9375,
|
| 13733 |
+
"step": 1957
|
| 13734 |
+
},
|
| 13735 |
+
{
|
| 13736 |
+
"epoch": 0.0001958,
|
| 13737 |
+
"grad_norm": 10.259406089782715,
|
| 13738 |
+
"learning_rate": 1.957e-05,
|
| 13739 |
+
"loss": 71.6875,
|
| 13740 |
+
"step": 1958
|
| 13741 |
+
},
|
| 13742 |
+
{
|
| 13743 |
+
"epoch": 0.0001959,
|
| 13744 |
+
"grad_norm": 10.093414306640625,
|
| 13745 |
+
"learning_rate": 1.9580000000000002e-05,
|
| 13746 |
+
"loss": 72.5,
|
| 13747 |
+
"step": 1959
|
| 13748 |
+
},
|
| 13749 |
+
{
|
| 13750 |
+
"epoch": 0.000196,
|
| 13751 |
+
"grad_norm": 10.273123741149902,
|
| 13752 |
+
"learning_rate": 1.959e-05,
|
| 13753 |
+
"loss": 69.75,
|
| 13754 |
+
"step": 1960
|
| 13755 |
+
},
|
| 13756 |
+
{
|
| 13757 |
+
"epoch": 0.0001961,
|
| 13758 |
+
"grad_norm": 10.390420913696289,
|
| 13759 |
+
"learning_rate": 1.9600000000000002e-05,
|
| 13760 |
+
"loss": 70.6875,
|
| 13761 |
+
"step": 1961
|
| 13762 |
+
},
|
| 13763 |
+
{
|
| 13764 |
+
"epoch": 0.0001962,
|
| 13765 |
+
"grad_norm": 10.27889633178711,
|
| 13766 |
+
"learning_rate": 1.961e-05,
|
| 13767 |
+
"loss": 70.4375,
|
| 13768 |
+
"step": 1962
|
| 13769 |
+
},
|
| 13770 |
+
{
|
| 13771 |
+
"epoch": 0.0001963,
|
| 13772 |
+
"grad_norm": 10.058853149414062,
|
| 13773 |
+
"learning_rate": 1.962e-05,
|
| 13774 |
+
"loss": 71.5,
|
| 13775 |
+
"step": 1963
|
| 13776 |
+
},
|
| 13777 |
+
{
|
| 13778 |
+
"epoch": 0.0001964,
|
| 13779 |
+
"grad_norm": 10.128463745117188,
|
| 13780 |
+
"learning_rate": 1.963e-05,
|
| 13781 |
+
"loss": 73.125,
|
| 13782 |
+
"step": 1964
|
| 13783 |
+
},
|
| 13784 |
+
{
|
| 13785 |
+
"epoch": 0.0001965,
|
| 13786 |
+
"grad_norm": 10.429844856262207,
|
| 13787 |
+
"learning_rate": 1.9640000000000002e-05,
|
| 13788 |
+
"loss": 69.5625,
|
| 13789 |
+
"step": 1965
|
| 13790 |
+
},
|
| 13791 |
+
{
|
| 13792 |
+
"epoch": 0.0001966,
|
| 13793 |
+
"grad_norm": 10.319170951843262,
|
| 13794 |
+
"learning_rate": 1.965e-05,
|
| 13795 |
+
"loss": 69.25,
|
| 13796 |
+
"step": 1966
|
| 13797 |
+
},
|
| 13798 |
+
{
|
| 13799 |
+
"epoch": 0.0001967,
|
| 13800 |
+
"grad_norm": 10.247960090637207,
|
| 13801 |
+
"learning_rate": 1.966e-05,
|
| 13802 |
+
"loss": 70.5,
|
| 13803 |
+
"step": 1967
|
| 13804 |
+
},
|
| 13805 |
+
{
|
| 13806 |
+
"epoch": 0.0001968,
|
| 13807 |
+
"grad_norm": 10.223762512207031,
|
| 13808 |
+
"learning_rate": 1.967e-05,
|
| 13809 |
+
"loss": 68.75,
|
| 13810 |
+
"step": 1968
|
| 13811 |
+
},
|
| 13812 |
+
{
|
| 13813 |
+
"epoch": 0.0001969,
|
| 13814 |
+
"grad_norm": 10.190348625183105,
|
| 13815 |
+
"learning_rate": 1.968e-05,
|
| 13816 |
+
"loss": 69.1875,
|
| 13817 |
+
"step": 1969
|
| 13818 |
+
},
|
| 13819 |
+
{
|
| 13820 |
+
"epoch": 0.000197,
|
| 13821 |
+
"grad_norm": 10.337133407592773,
|
| 13822 |
+
"learning_rate": 1.969e-05,
|
| 13823 |
+
"loss": 70.5625,
|
| 13824 |
+
"step": 1970
|
| 13825 |
+
},
|
| 13826 |
+
{
|
| 13827 |
+
"epoch": 0.0001971,
|
| 13828 |
+
"grad_norm": 10.41808032989502,
|
| 13829 |
+
"learning_rate": 1.97e-05,
|
| 13830 |
+
"loss": 69.125,
|
| 13831 |
+
"step": 1971
|
| 13832 |
+
},
|
| 13833 |
+
{
|
| 13834 |
+
"epoch": 0.0001972,
|
| 13835 |
+
"grad_norm": 9.977437019348145,
|
| 13836 |
+
"learning_rate": 1.9710000000000003e-05,
|
| 13837 |
+
"loss": 71.375,
|
| 13838 |
+
"step": 1972
|
| 13839 |
+
},
|
| 13840 |
+
{
|
| 13841 |
+
"epoch": 0.0001973,
|
| 13842 |
+
"grad_norm": 10.139707565307617,
|
| 13843 |
+
"learning_rate": 1.9719999999999998e-05,
|
| 13844 |
+
"loss": 71.4375,
|
| 13845 |
+
"step": 1973
|
| 13846 |
+
},
|
| 13847 |
+
{
|
| 13848 |
+
"epoch": 0.0001974,
|
| 13849 |
+
"grad_norm": 10.059530258178711,
|
| 13850 |
+
"learning_rate": 1.973e-05,
|
| 13851 |
+
"loss": 71.875,
|
| 13852 |
+
"step": 1974
|
| 13853 |
+
},
|
| 13854 |
+
{
|
| 13855 |
+
"epoch": 0.0001975,
|
| 13856 |
+
"grad_norm": 10.154842376708984,
|
| 13857 |
+
"learning_rate": 1.974e-05,
|
| 13858 |
+
"loss": 69.875,
|
| 13859 |
+
"step": 1975
|
| 13860 |
+
},
|
| 13861 |
+
{
|
| 13862 |
+
"epoch": 0.0001976,
|
| 13863 |
+
"grad_norm": 10.15227222442627,
|
| 13864 |
+
"learning_rate": 1.975e-05,
|
| 13865 |
+
"loss": 69.5,
|
| 13866 |
+
"step": 1976
|
| 13867 |
+
},
|
| 13868 |
+
{
|
| 13869 |
+
"epoch": 0.0001977,
|
| 13870 |
+
"grad_norm": 10.316965103149414,
|
| 13871 |
+
"learning_rate": 1.976e-05,
|
| 13872 |
+
"loss": 69.5,
|
| 13873 |
+
"step": 1977
|
| 13874 |
+
},
|
| 13875 |
+
{
|
| 13876 |
+
"epoch": 0.0001978,
|
| 13877 |
+
"grad_norm": 10.136872291564941,
|
| 13878 |
+
"learning_rate": 1.9770000000000002e-05,
|
| 13879 |
+
"loss": 70.5,
|
| 13880 |
+
"step": 1978
|
| 13881 |
+
},
|
| 13882 |
+
{
|
| 13883 |
+
"epoch": 0.0001979,
|
| 13884 |
+
"grad_norm": 10.085726737976074,
|
| 13885 |
+
"learning_rate": 1.978e-05,
|
| 13886 |
+
"loss": 71.0625,
|
| 13887 |
+
"step": 1979
|
| 13888 |
+
},
|
| 13889 |
+
{
|
| 13890 |
+
"epoch": 0.000198,
|
| 13891 |
+
"grad_norm": 10.222604751586914,
|
| 13892 |
+
"learning_rate": 1.979e-05,
|
| 13893 |
+
"loss": 69.75,
|
| 13894 |
+
"step": 1980
|
| 13895 |
+
},
|
| 13896 |
+
{
|
| 13897 |
+
"epoch": 0.0001981,
|
| 13898 |
+
"grad_norm": 10.206459045410156,
|
| 13899 |
+
"learning_rate": 1.98e-05,
|
| 13900 |
+
"loss": 69.9375,
|
| 13901 |
+
"step": 1981
|
| 13902 |
+
},
|
| 13903 |
+
{
|
| 13904 |
+
"epoch": 0.0001982,
|
| 13905 |
+
"grad_norm": 9.955349922180176,
|
| 13906 |
+
"learning_rate": 1.9810000000000002e-05,
|
| 13907 |
+
"loss": 71.5625,
|
| 13908 |
+
"step": 1982
|
| 13909 |
+
},
|
| 13910 |
+
{
|
| 13911 |
+
"epoch": 0.0001983,
|
| 13912 |
+
"grad_norm": 10.17930793762207,
|
| 13913 |
+
"learning_rate": 1.982e-05,
|
| 13914 |
+
"loss": 70.75,
|
| 13915 |
+
"step": 1983
|
| 13916 |
+
},
|
| 13917 |
+
{
|
| 13918 |
+
"epoch": 0.0001984,
|
| 13919 |
+
"grad_norm": 10.088285446166992,
|
| 13920 |
+
"learning_rate": 1.983e-05,
|
| 13921 |
+
"loss": 71.0625,
|
| 13922 |
+
"step": 1984
|
| 13923 |
+
},
|
| 13924 |
+
{
|
| 13925 |
+
"epoch": 0.0001985,
|
| 13926 |
+
"grad_norm": 10.180354118347168,
|
| 13927 |
+
"learning_rate": 1.984e-05,
|
| 13928 |
+
"loss": 70.3125,
|
| 13929 |
+
"step": 1985
|
| 13930 |
+
},
|
| 13931 |
+
{
|
| 13932 |
+
"epoch": 0.0001986,
|
| 13933 |
+
"grad_norm": 10.134940147399902,
|
| 13934 |
+
"learning_rate": 1.985e-05,
|
| 13935 |
+
"loss": 69.625,
|
| 13936 |
+
"step": 1986
|
| 13937 |
+
},
|
| 13938 |
+
{
|
| 13939 |
+
"epoch": 0.0001987,
|
| 13940 |
+
"grad_norm": 10.140948295593262,
|
| 13941 |
+
"learning_rate": 1.986e-05,
|
| 13942 |
+
"loss": 71.25,
|
| 13943 |
+
"step": 1987
|
| 13944 |
+
},
|
| 13945 |
+
{
|
| 13946 |
+
"epoch": 0.0001988,
|
| 13947 |
+
"grad_norm": 10.105887413024902,
|
| 13948 |
+
"learning_rate": 1.987e-05,
|
| 13949 |
+
"loss": 69.125,
|
| 13950 |
+
"step": 1988
|
| 13951 |
+
},
|
| 13952 |
+
{
|
| 13953 |
+
"epoch": 0.0001989,
|
| 13954 |
+
"grad_norm": 10.131355285644531,
|
| 13955 |
+
"learning_rate": 1.9880000000000003e-05,
|
| 13956 |
+
"loss": 70.5,
|
| 13957 |
+
"step": 1989
|
| 13958 |
+
},
|
| 13959 |
+
{
|
| 13960 |
+
"epoch": 0.000199,
|
| 13961 |
+
"grad_norm": 10.018689155578613,
|
| 13962 |
+
"learning_rate": 1.9889999999999998e-05,
|
| 13963 |
+
"loss": 71.4375,
|
| 13964 |
+
"step": 1990
|
| 13965 |
+
},
|
| 13966 |
+
{
|
| 13967 |
+
"epoch": 0.0001991,
|
| 13968 |
+
"grad_norm": 10.111222267150879,
|
| 13969 |
+
"learning_rate": 1.99e-05,
|
| 13970 |
+
"loss": 69.5625,
|
| 13971 |
+
"step": 1991
|
| 13972 |
+
},
|
| 13973 |
+
{
|
| 13974 |
+
"epoch": 0.0001992,
|
| 13975 |
+
"grad_norm": 10.133853912353516,
|
| 13976 |
+
"learning_rate": 1.991e-05,
|
| 13977 |
+
"loss": 69.8125,
|
| 13978 |
+
"step": 1992
|
| 13979 |
+
},
|
| 13980 |
+
{
|
| 13981 |
+
"epoch": 0.0001993,
|
| 13982 |
+
"grad_norm": 10.056241989135742,
|
| 13983 |
+
"learning_rate": 1.992e-05,
|
| 13984 |
+
"loss": 69.0,
|
| 13985 |
+
"step": 1993
|
| 13986 |
+
},
|
| 13987 |
+
{
|
| 13988 |
+
"epoch": 0.0001994,
|
| 13989 |
+
"grad_norm": 10.109450340270996,
|
| 13990 |
+
"learning_rate": 1.993e-05,
|
| 13991 |
+
"loss": 68.75,
|
| 13992 |
+
"step": 1994
|
| 13993 |
+
},
|
| 13994 |
+
{
|
| 13995 |
+
"epoch": 0.0001995,
|
| 13996 |
+
"grad_norm": 10.147754669189453,
|
| 13997 |
+
"learning_rate": 1.9940000000000002e-05,
|
| 13998 |
+
"loss": 69.5625,
|
| 13999 |
+
"step": 1995
|
| 14000 |
+
},
|
| 14001 |
+
{
|
| 14002 |
+
"epoch": 0.0001996,
|
| 14003 |
+
"grad_norm": 10.053937911987305,
|
| 14004 |
+
"learning_rate": 1.995e-05,
|
| 14005 |
+
"loss": 69.625,
|
| 14006 |
+
"step": 1996
|
| 14007 |
+
},
|
| 14008 |
+
{
|
| 14009 |
+
"epoch": 0.0001997,
|
| 14010 |
+
"grad_norm": 10.346978187561035,
|
| 14011 |
+
"learning_rate": 1.996e-05,
|
| 14012 |
+
"loss": 69.125,
|
| 14013 |
+
"step": 1997
|
| 14014 |
+
},
|
| 14015 |
+
{
|
| 14016 |
+
"epoch": 0.0001998,
|
| 14017 |
+
"grad_norm": 10.11376667022705,
|
| 14018 |
+
"learning_rate": 1.997e-05,
|
| 14019 |
+
"loss": 68.6875,
|
| 14020 |
+
"step": 1998
|
| 14021 |
+
},
|
| 14022 |
+
{
|
| 14023 |
+
"epoch": 0.0001999,
|
| 14024 |
+
"grad_norm": 10.165390968322754,
|
| 14025 |
+
"learning_rate": 1.9980000000000002e-05,
|
| 14026 |
+
"loss": 68.25,
|
| 14027 |
+
"step": 1999
|
| 14028 |
+
},
|
| 14029 |
+
{
|
| 14030 |
+
"epoch": 0.0002,
|
| 14031 |
+
"grad_norm": 10.25505256652832,
|
| 14032 |
+
"learning_rate": 1.999e-05,
|
| 14033 |
+
"loss": 70.4375,
|
| 14034 |
+
"step": 2000
|
| 14035 |
+
},
|
| 14036 |
+
{
|
| 14037 |
+
"epoch": 0.0002,
|
| 14038 |
+
"eval_loss": 8.524580001831055,
|
| 14039 |
+
"eval_runtime": 403.2967,
|
| 14040 |
+
"eval_samples_per_second": 24.796,
|
| 14041 |
+
"eval_steps_per_second": 1.55,
|
| 14042 |
+
"step": 2000
|
| 14043 |
}
|
| 14044 |
],
|
| 14045 |
"logging_steps": 1,
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71988dd6ac0602db875ee39071ef74a48a3843fe763623ff411a4aef8f5c27b2
|
| 3 |
+
size 5841
|