Training in progress, step 6500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 91951912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a145a32c48e7dc3bd508dd75fc8e7fe5250a1b6e3a2df7f3535ed82a8d0858e8
|
| 3 |
size 91951912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 183991627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f8966ca3314d2a85a0fe791bda90892020c09adb3c62302874471fa40e297d3
|
| 3 |
size 183991627
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4a9f217e852f439efa6bd32fde98d6867f11aa6ea13ddc021ba10af6a0b0934
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:930e6cea79b6e3d4bb425cc6e2f12942b48c90126621309aadc0ddfdee9a6918
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -42104,6 +42104,3514 @@
|
|
| 42104 |
"eval_samples_per_second": 27.613,
|
| 42105 |
"eval_steps_per_second": 1.726,
|
| 42106 |
"step": 6000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42107 |
}
|
| 42108 |
],
|
| 42109 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.00065,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 42104 |
"eval_samples_per_second": 27.613,
|
| 42105 |
"eval_steps_per_second": 1.726,
|
| 42106 |
"step": 6000
|
| 42107 |
+
},
|
| 42108 |
+
{
|
| 42109 |
+
"epoch": 0.0006001,
|
| 42110 |
+
"grad_norm": 1.5553802251815796,
|
| 42111 |
+
"learning_rate": 6e-05,
|
| 42112 |
+
"loss": 0.7573,
|
| 42113 |
+
"step": 6001
|
| 42114 |
+
},
|
| 42115 |
+
{
|
| 42116 |
+
"epoch": 0.0006002,
|
| 42117 |
+
"grad_norm": 1.6512336730957031,
|
| 42118 |
+
"learning_rate": 6.001e-05,
|
| 42119 |
+
"loss": 0.7559,
|
| 42120 |
+
"step": 6002
|
| 42121 |
+
},
|
| 42122 |
+
{
|
| 42123 |
+
"epoch": 0.0006003,
|
| 42124 |
+
"grad_norm": 1.5313918590545654,
|
| 42125 |
+
"learning_rate": 6.002e-05,
|
| 42126 |
+
"loss": 0.7388,
|
| 42127 |
+
"step": 6003
|
| 42128 |
+
},
|
| 42129 |
+
{
|
| 42130 |
+
"epoch": 0.0006004,
|
| 42131 |
+
"grad_norm": 1.7878886461257935,
|
| 42132 |
+
"learning_rate": 6.003e-05,
|
| 42133 |
+
"loss": 0.8364,
|
| 42134 |
+
"step": 6004
|
| 42135 |
+
},
|
| 42136 |
+
{
|
| 42137 |
+
"epoch": 0.0006005,
|
| 42138 |
+
"grad_norm": 1.4012184143066406,
|
| 42139 |
+
"learning_rate": 6.004e-05,
|
| 42140 |
+
"loss": 0.6924,
|
| 42141 |
+
"step": 6005
|
| 42142 |
+
},
|
| 42143 |
+
{
|
| 42144 |
+
"epoch": 0.0006006,
|
| 42145 |
+
"grad_norm": 1.6499422788619995,
|
| 42146 |
+
"learning_rate": 6.0049999999999996e-05,
|
| 42147 |
+
"loss": 0.7358,
|
| 42148 |
+
"step": 6006
|
| 42149 |
+
},
|
| 42150 |
+
{
|
| 42151 |
+
"epoch": 0.0006007,
|
| 42152 |
+
"grad_norm": 1.5204499959945679,
|
| 42153 |
+
"learning_rate": 6.006e-05,
|
| 42154 |
+
"loss": 0.6792,
|
| 42155 |
+
"step": 6007
|
| 42156 |
+
},
|
| 42157 |
+
{
|
| 42158 |
+
"epoch": 0.0006008,
|
| 42159 |
+
"grad_norm": 2.066147565841675,
|
| 42160 |
+
"learning_rate": 6.0070000000000006e-05,
|
| 42161 |
+
"loss": 0.8433,
|
| 42162 |
+
"step": 6008
|
| 42163 |
+
},
|
| 42164 |
+
{
|
| 42165 |
+
"epoch": 0.0006009,
|
| 42166 |
+
"grad_norm": 1.7936888933181763,
|
| 42167 |
+
"learning_rate": 6.008000000000001e-05,
|
| 42168 |
+
"loss": 1.0107,
|
| 42169 |
+
"step": 6009
|
| 42170 |
+
},
|
| 42171 |
+
{
|
| 42172 |
+
"epoch": 0.000601,
|
| 42173 |
+
"grad_norm": 1.4196463823318481,
|
| 42174 |
+
"learning_rate": 6.0089999999999996e-05,
|
| 42175 |
+
"loss": 0.7012,
|
| 42176 |
+
"step": 6010
|
| 42177 |
+
},
|
| 42178 |
+
{
|
| 42179 |
+
"epoch": 0.0006011,
|
| 42180 |
+
"grad_norm": 2.7417123317718506,
|
| 42181 |
+
"learning_rate": 6.0100000000000004e-05,
|
| 42182 |
+
"loss": 0.8823,
|
| 42183 |
+
"step": 6011
|
| 42184 |
+
},
|
| 42185 |
+
{
|
| 42186 |
+
"epoch": 0.0006012,
|
| 42187 |
+
"grad_norm": 2.072298288345337,
|
| 42188 |
+
"learning_rate": 6.0110000000000006e-05,
|
| 42189 |
+
"loss": 0.7476,
|
| 42190 |
+
"step": 6012
|
| 42191 |
+
},
|
| 42192 |
+
{
|
| 42193 |
+
"epoch": 0.0006013,
|
| 42194 |
+
"grad_norm": 1.7735410928726196,
|
| 42195 |
+
"learning_rate": 6.012e-05,
|
| 42196 |
+
"loss": 0.7466,
|
| 42197 |
+
"step": 6013
|
| 42198 |
+
},
|
| 42199 |
+
{
|
| 42200 |
+
"epoch": 0.0006014,
|
| 42201 |
+
"grad_norm": 1.6693915128707886,
|
| 42202 |
+
"learning_rate": 6.013e-05,
|
| 42203 |
+
"loss": 0.7783,
|
| 42204 |
+
"step": 6014
|
| 42205 |
+
},
|
| 42206 |
+
{
|
| 42207 |
+
"epoch": 0.0006015,
|
| 42208 |
+
"grad_norm": 1.4681349992752075,
|
| 42209 |
+
"learning_rate": 6.0140000000000004e-05,
|
| 42210 |
+
"loss": 0.7012,
|
| 42211 |
+
"step": 6015
|
| 42212 |
+
},
|
| 42213 |
+
{
|
| 42214 |
+
"epoch": 0.0006016,
|
| 42215 |
+
"grad_norm": 2.034409761428833,
|
| 42216 |
+
"learning_rate": 6.015e-05,
|
| 42217 |
+
"loss": 0.6836,
|
| 42218 |
+
"step": 6016
|
| 42219 |
+
},
|
| 42220 |
+
{
|
| 42221 |
+
"epoch": 0.0006017,
|
| 42222 |
+
"grad_norm": 1.5247353315353394,
|
| 42223 |
+
"learning_rate": 6.016e-05,
|
| 42224 |
+
"loss": 0.7061,
|
| 42225 |
+
"step": 6017
|
| 42226 |
+
},
|
| 42227 |
+
{
|
| 42228 |
+
"epoch": 0.0006018,
|
| 42229 |
+
"grad_norm": 2.1397507190704346,
|
| 42230 |
+
"learning_rate": 6.017e-05,
|
| 42231 |
+
"loss": 0.7803,
|
| 42232 |
+
"step": 6018
|
| 42233 |
+
},
|
| 42234 |
+
{
|
| 42235 |
+
"epoch": 0.0006019,
|
| 42236 |
+
"grad_norm": 1.5201102495193481,
|
| 42237 |
+
"learning_rate": 6.018e-05,
|
| 42238 |
+
"loss": 0.6997,
|
| 42239 |
+
"step": 6019
|
| 42240 |
+
},
|
| 42241 |
+
{
|
| 42242 |
+
"epoch": 0.000602,
|
| 42243 |
+
"grad_norm": 1.7340844869613647,
|
| 42244 |
+
"learning_rate": 6.019e-05,
|
| 42245 |
+
"loss": 0.8164,
|
| 42246 |
+
"step": 6020
|
| 42247 |
+
},
|
| 42248 |
+
{
|
| 42249 |
+
"epoch": 0.0006021,
|
| 42250 |
+
"grad_norm": 1.4697178602218628,
|
| 42251 |
+
"learning_rate": 6.02e-05,
|
| 42252 |
+
"loss": 0.6431,
|
| 42253 |
+
"step": 6021
|
| 42254 |
+
},
|
| 42255 |
+
{
|
| 42256 |
+
"epoch": 0.0006022,
|
| 42257 |
+
"grad_norm": 1.5053555965423584,
|
| 42258 |
+
"learning_rate": 6.021e-05,
|
| 42259 |
+
"loss": 0.6851,
|
| 42260 |
+
"step": 6022
|
| 42261 |
+
},
|
| 42262 |
+
{
|
| 42263 |
+
"epoch": 0.0006023,
|
| 42264 |
+
"grad_norm": 1.394316554069519,
|
| 42265 |
+
"learning_rate": 6.0219999999999996e-05,
|
| 42266 |
+
"loss": 0.6714,
|
| 42267 |
+
"step": 6023
|
| 42268 |
+
},
|
| 42269 |
+
{
|
| 42270 |
+
"epoch": 0.0006024,
|
| 42271 |
+
"grad_norm": 1.365586519241333,
|
| 42272 |
+
"learning_rate": 6.023e-05,
|
| 42273 |
+
"loss": 0.6343,
|
| 42274 |
+
"step": 6024
|
| 42275 |
+
},
|
| 42276 |
+
{
|
| 42277 |
+
"epoch": 0.0006025,
|
| 42278 |
+
"grad_norm": 1.5944674015045166,
|
| 42279 |
+
"learning_rate": 6.0240000000000006e-05,
|
| 42280 |
+
"loss": 0.7695,
|
| 42281 |
+
"step": 6025
|
| 42282 |
+
},
|
| 42283 |
+
{
|
| 42284 |
+
"epoch": 0.0006026,
|
| 42285 |
+
"grad_norm": 1.3342678546905518,
|
| 42286 |
+
"learning_rate": 6.025000000000001e-05,
|
| 42287 |
+
"loss": 0.6338,
|
| 42288 |
+
"step": 6026
|
| 42289 |
+
},
|
| 42290 |
+
{
|
| 42291 |
+
"epoch": 0.0006027,
|
| 42292 |
+
"grad_norm": 1.4751640558242798,
|
| 42293 |
+
"learning_rate": 6.0259999999999996e-05,
|
| 42294 |
+
"loss": 0.6633,
|
| 42295 |
+
"step": 6027
|
| 42296 |
+
},
|
| 42297 |
+
{
|
| 42298 |
+
"epoch": 0.0006028,
|
| 42299 |
+
"grad_norm": 1.5709500312805176,
|
| 42300 |
+
"learning_rate": 6.0270000000000004e-05,
|
| 42301 |
+
"loss": 0.7354,
|
| 42302 |
+
"step": 6028
|
| 42303 |
+
},
|
| 42304 |
+
{
|
| 42305 |
+
"epoch": 0.0006029,
|
| 42306 |
+
"grad_norm": 1.364405870437622,
|
| 42307 |
+
"learning_rate": 6.0280000000000006e-05,
|
| 42308 |
+
"loss": 0.6846,
|
| 42309 |
+
"step": 6029
|
| 42310 |
+
},
|
| 42311 |
+
{
|
| 42312 |
+
"epoch": 0.000603,
|
| 42313 |
+
"grad_norm": 1.280802845954895,
|
| 42314 |
+
"learning_rate": 6.029e-05,
|
| 42315 |
+
"loss": 0.645,
|
| 42316 |
+
"step": 6030
|
| 42317 |
+
},
|
| 42318 |
+
{
|
| 42319 |
+
"epoch": 0.0006031,
|
| 42320 |
+
"grad_norm": 1.8422123193740845,
|
| 42321 |
+
"learning_rate": 6.03e-05,
|
| 42322 |
+
"loss": 0.7554,
|
| 42323 |
+
"step": 6031
|
| 42324 |
+
},
|
| 42325 |
+
{
|
| 42326 |
+
"epoch": 0.0006032,
|
| 42327 |
+
"grad_norm": 1.5924108028411865,
|
| 42328 |
+
"learning_rate": 6.0310000000000004e-05,
|
| 42329 |
+
"loss": 0.6846,
|
| 42330 |
+
"step": 6032
|
| 42331 |
+
},
|
| 42332 |
+
{
|
| 42333 |
+
"epoch": 0.0006033,
|
| 42334 |
+
"grad_norm": 1.7534675598144531,
|
| 42335 |
+
"learning_rate": 6.032e-05,
|
| 42336 |
+
"loss": 0.8188,
|
| 42337 |
+
"step": 6033
|
| 42338 |
+
},
|
| 42339 |
+
{
|
| 42340 |
+
"epoch": 0.0006034,
|
| 42341 |
+
"grad_norm": 1.363912582397461,
|
| 42342 |
+
"learning_rate": 6.033e-05,
|
| 42343 |
+
"loss": 0.6841,
|
| 42344 |
+
"step": 6034
|
| 42345 |
+
},
|
| 42346 |
+
{
|
| 42347 |
+
"epoch": 0.0006035,
|
| 42348 |
+
"grad_norm": 1.3158562183380127,
|
| 42349 |
+
"learning_rate": 6.034e-05,
|
| 42350 |
+
"loss": 0.687,
|
| 42351 |
+
"step": 6035
|
| 42352 |
+
},
|
| 42353 |
+
{
|
| 42354 |
+
"epoch": 0.0006036,
|
| 42355 |
+
"grad_norm": 1.873213529586792,
|
| 42356 |
+
"learning_rate": 6.035e-05,
|
| 42357 |
+
"loss": 0.7739,
|
| 42358 |
+
"step": 6036
|
| 42359 |
+
},
|
| 42360 |
+
{
|
| 42361 |
+
"epoch": 0.0006037,
|
| 42362 |
+
"grad_norm": 1.4768091440200806,
|
| 42363 |
+
"learning_rate": 6.036e-05,
|
| 42364 |
+
"loss": 0.6709,
|
| 42365 |
+
"step": 6037
|
| 42366 |
+
},
|
| 42367 |
+
{
|
| 42368 |
+
"epoch": 0.0006038,
|
| 42369 |
+
"grad_norm": 1.7859482765197754,
|
| 42370 |
+
"learning_rate": 6.037e-05,
|
| 42371 |
+
"loss": 0.8154,
|
| 42372 |
+
"step": 6038
|
| 42373 |
+
},
|
| 42374 |
+
{
|
| 42375 |
+
"epoch": 0.0006039,
|
| 42376 |
+
"grad_norm": 1.961234211921692,
|
| 42377 |
+
"learning_rate": 6.038e-05,
|
| 42378 |
+
"loss": 0.6943,
|
| 42379 |
+
"step": 6039
|
| 42380 |
+
},
|
| 42381 |
+
{
|
| 42382 |
+
"epoch": 0.000604,
|
| 42383 |
+
"grad_norm": 1.5077617168426514,
|
| 42384 |
+
"learning_rate": 6.0389999999999996e-05,
|
| 42385 |
+
"loss": 0.6821,
|
| 42386 |
+
"step": 6040
|
| 42387 |
+
},
|
| 42388 |
+
{
|
| 42389 |
+
"epoch": 0.0006041,
|
| 42390 |
+
"grad_norm": 1.4009017944335938,
|
| 42391 |
+
"learning_rate": 6.04e-05,
|
| 42392 |
+
"loss": 0.6602,
|
| 42393 |
+
"step": 6041
|
| 42394 |
+
},
|
| 42395 |
+
{
|
| 42396 |
+
"epoch": 0.0006042,
|
| 42397 |
+
"grad_norm": 1.473061203956604,
|
| 42398 |
+
"learning_rate": 6.0410000000000006e-05,
|
| 42399 |
+
"loss": 0.75,
|
| 42400 |
+
"step": 6042
|
| 42401 |
+
},
|
| 42402 |
+
{
|
| 42403 |
+
"epoch": 0.0006043,
|
| 42404 |
+
"grad_norm": 1.442574143409729,
|
| 42405 |
+
"learning_rate": 6.042000000000001e-05,
|
| 42406 |
+
"loss": 0.6836,
|
| 42407 |
+
"step": 6043
|
| 42408 |
+
},
|
| 42409 |
+
{
|
| 42410 |
+
"epoch": 0.0006044,
|
| 42411 |
+
"grad_norm": 1.5931884050369263,
|
| 42412 |
+
"learning_rate": 6.0429999999999996e-05,
|
| 42413 |
+
"loss": 0.688,
|
| 42414 |
+
"step": 6044
|
| 42415 |
+
},
|
| 42416 |
+
{
|
| 42417 |
+
"epoch": 0.0006045,
|
| 42418 |
+
"grad_norm": 1.3145321607589722,
|
| 42419 |
+
"learning_rate": 6.0440000000000004e-05,
|
| 42420 |
+
"loss": 0.626,
|
| 42421 |
+
"step": 6045
|
| 42422 |
+
},
|
| 42423 |
+
{
|
| 42424 |
+
"epoch": 0.0006046,
|
| 42425 |
+
"grad_norm": 1.446225643157959,
|
| 42426 |
+
"learning_rate": 6.0450000000000006e-05,
|
| 42427 |
+
"loss": 0.6655,
|
| 42428 |
+
"step": 6046
|
| 42429 |
+
},
|
| 42430 |
+
{
|
| 42431 |
+
"epoch": 0.0006047,
|
| 42432 |
+
"grad_norm": 1.5582249164581299,
|
| 42433 |
+
"learning_rate": 6.046e-05,
|
| 42434 |
+
"loss": 0.7358,
|
| 42435 |
+
"step": 6047
|
| 42436 |
+
},
|
| 42437 |
+
{
|
| 42438 |
+
"epoch": 0.0006048,
|
| 42439 |
+
"grad_norm": 1.45441734790802,
|
| 42440 |
+
"learning_rate": 6.047e-05,
|
| 42441 |
+
"loss": 0.7012,
|
| 42442 |
+
"step": 6048
|
| 42443 |
+
},
|
| 42444 |
+
{
|
| 42445 |
+
"epoch": 0.0006049,
|
| 42446 |
+
"grad_norm": 2.6049602031707764,
|
| 42447 |
+
"learning_rate": 6.0480000000000004e-05,
|
| 42448 |
+
"loss": 0.8921,
|
| 42449 |
+
"step": 6049
|
| 42450 |
+
},
|
| 42451 |
+
{
|
| 42452 |
+
"epoch": 0.000605,
|
| 42453 |
+
"grad_norm": 1.6523659229278564,
|
| 42454 |
+
"learning_rate": 6.049e-05,
|
| 42455 |
+
"loss": 0.7407,
|
| 42456 |
+
"step": 6050
|
| 42457 |
+
},
|
| 42458 |
+
{
|
| 42459 |
+
"epoch": 0.0006051,
|
| 42460 |
+
"grad_norm": 1.8112012147903442,
|
| 42461 |
+
"learning_rate": 6.05e-05,
|
| 42462 |
+
"loss": 0.8452,
|
| 42463 |
+
"step": 6051
|
| 42464 |
+
},
|
| 42465 |
+
{
|
| 42466 |
+
"epoch": 0.0006052,
|
| 42467 |
+
"grad_norm": 1.4687682390213013,
|
| 42468 |
+
"learning_rate": 6.051e-05,
|
| 42469 |
+
"loss": 0.6528,
|
| 42470 |
+
"step": 6052
|
| 42471 |
+
},
|
| 42472 |
+
{
|
| 42473 |
+
"epoch": 0.0006053,
|
| 42474 |
+
"grad_norm": 1.4037615060806274,
|
| 42475 |
+
"learning_rate": 6.052e-05,
|
| 42476 |
+
"loss": 0.6953,
|
| 42477 |
+
"step": 6053
|
| 42478 |
+
},
|
| 42479 |
+
{
|
| 42480 |
+
"epoch": 0.0006054,
|
| 42481 |
+
"grad_norm": 1.3966478109359741,
|
| 42482 |
+
"learning_rate": 6.053e-05,
|
| 42483 |
+
"loss": 0.6772,
|
| 42484 |
+
"step": 6054
|
| 42485 |
+
},
|
| 42486 |
+
{
|
| 42487 |
+
"epoch": 0.0006055,
|
| 42488 |
+
"grad_norm": 2.193171501159668,
|
| 42489 |
+
"learning_rate": 6.054e-05,
|
| 42490 |
+
"loss": 0.9385,
|
| 42491 |
+
"step": 6055
|
| 42492 |
+
},
|
| 42493 |
+
{
|
| 42494 |
+
"epoch": 0.0006056,
|
| 42495 |
+
"grad_norm": 1.522186517715454,
|
| 42496 |
+
"learning_rate": 6.055e-05,
|
| 42497 |
+
"loss": 0.7227,
|
| 42498 |
+
"step": 6056
|
| 42499 |
+
},
|
| 42500 |
+
{
|
| 42501 |
+
"epoch": 0.0006057,
|
| 42502 |
+
"grad_norm": 1.3285267353057861,
|
| 42503 |
+
"learning_rate": 6.0559999999999996e-05,
|
| 42504 |
+
"loss": 0.6499,
|
| 42505 |
+
"step": 6057
|
| 42506 |
+
},
|
| 42507 |
+
{
|
| 42508 |
+
"epoch": 0.0006058,
|
| 42509 |
+
"grad_norm": 1.491991400718689,
|
| 42510 |
+
"learning_rate": 6.057e-05,
|
| 42511 |
+
"loss": 0.7163,
|
| 42512 |
+
"step": 6058
|
| 42513 |
+
},
|
| 42514 |
+
{
|
| 42515 |
+
"epoch": 0.0006059,
|
| 42516 |
+
"grad_norm": 1.3360713720321655,
|
| 42517 |
+
"learning_rate": 6.0580000000000006e-05,
|
| 42518 |
+
"loss": 0.688,
|
| 42519 |
+
"step": 6059
|
| 42520 |
+
},
|
| 42521 |
+
{
|
| 42522 |
+
"epoch": 0.000606,
|
| 42523 |
+
"grad_norm": 1.608153223991394,
|
| 42524 |
+
"learning_rate": 6.059000000000001e-05,
|
| 42525 |
+
"loss": 0.7534,
|
| 42526 |
+
"step": 6060
|
| 42527 |
+
},
|
| 42528 |
+
{
|
| 42529 |
+
"epoch": 0.0006061,
|
| 42530 |
+
"grad_norm": 1.4696681499481201,
|
| 42531 |
+
"learning_rate": 6.0599999999999996e-05,
|
| 42532 |
+
"loss": 0.6851,
|
| 42533 |
+
"step": 6061
|
| 42534 |
+
},
|
| 42535 |
+
{
|
| 42536 |
+
"epoch": 0.0006062,
|
| 42537 |
+
"grad_norm": 1.2329943180084229,
|
| 42538 |
+
"learning_rate": 6.0610000000000004e-05,
|
| 42539 |
+
"loss": 0.6392,
|
| 42540 |
+
"step": 6062
|
| 42541 |
+
},
|
| 42542 |
+
{
|
| 42543 |
+
"epoch": 0.0006063,
|
| 42544 |
+
"grad_norm": 1.6862188577651978,
|
| 42545 |
+
"learning_rate": 6.0620000000000006e-05,
|
| 42546 |
+
"loss": 0.7778,
|
| 42547 |
+
"step": 6063
|
| 42548 |
+
},
|
| 42549 |
+
{
|
| 42550 |
+
"epoch": 0.0006064,
|
| 42551 |
+
"grad_norm": 1.4250811338424683,
|
| 42552 |
+
"learning_rate": 6.063e-05,
|
| 42553 |
+
"loss": 0.6377,
|
| 42554 |
+
"step": 6064
|
| 42555 |
+
},
|
| 42556 |
+
{
|
| 42557 |
+
"epoch": 0.0006065,
|
| 42558 |
+
"grad_norm": 1.5216045379638672,
|
| 42559 |
+
"learning_rate": 6.064e-05,
|
| 42560 |
+
"loss": 0.7222,
|
| 42561 |
+
"step": 6065
|
| 42562 |
+
},
|
| 42563 |
+
{
|
| 42564 |
+
"epoch": 0.0006066,
|
| 42565 |
+
"grad_norm": 1.3850923776626587,
|
| 42566 |
+
"learning_rate": 6.0650000000000004e-05,
|
| 42567 |
+
"loss": 0.6938,
|
| 42568 |
+
"step": 6066
|
| 42569 |
+
},
|
| 42570 |
+
{
|
| 42571 |
+
"epoch": 0.0006067,
|
| 42572 |
+
"grad_norm": 1.2963297367095947,
|
| 42573 |
+
"learning_rate": 6.066e-05,
|
| 42574 |
+
"loss": 0.6162,
|
| 42575 |
+
"step": 6067
|
| 42576 |
+
},
|
| 42577 |
+
{
|
| 42578 |
+
"epoch": 0.0006068,
|
| 42579 |
+
"grad_norm": 1.714134931564331,
|
| 42580 |
+
"learning_rate": 6.067e-05,
|
| 42581 |
+
"loss": 0.8525,
|
| 42582 |
+
"step": 6068
|
| 42583 |
+
},
|
| 42584 |
+
{
|
| 42585 |
+
"epoch": 0.0006069,
|
| 42586 |
+
"grad_norm": 1.6639752388000488,
|
| 42587 |
+
"learning_rate": 6.068e-05,
|
| 42588 |
+
"loss": 0.7471,
|
| 42589 |
+
"step": 6069
|
| 42590 |
+
},
|
| 42591 |
+
{
|
| 42592 |
+
"epoch": 0.000607,
|
| 42593 |
+
"grad_norm": 1.5290673971176147,
|
| 42594 |
+
"learning_rate": 6.069e-05,
|
| 42595 |
+
"loss": 0.7007,
|
| 42596 |
+
"step": 6070
|
| 42597 |
+
},
|
| 42598 |
+
{
|
| 42599 |
+
"epoch": 0.0006071,
|
| 42600 |
+
"grad_norm": 1.3387386798858643,
|
| 42601 |
+
"learning_rate": 6.07e-05,
|
| 42602 |
+
"loss": 0.644,
|
| 42603 |
+
"step": 6071
|
| 42604 |
+
},
|
| 42605 |
+
{
|
| 42606 |
+
"epoch": 0.0006072,
|
| 42607 |
+
"grad_norm": 1.4449995756149292,
|
| 42608 |
+
"learning_rate": 6.071e-05,
|
| 42609 |
+
"loss": 0.6548,
|
| 42610 |
+
"step": 6072
|
| 42611 |
+
},
|
| 42612 |
+
{
|
| 42613 |
+
"epoch": 0.0006073,
|
| 42614 |
+
"grad_norm": 1.3186904191970825,
|
| 42615 |
+
"learning_rate": 6.072e-05,
|
| 42616 |
+
"loss": 0.6851,
|
| 42617 |
+
"step": 6073
|
| 42618 |
+
},
|
| 42619 |
+
{
|
| 42620 |
+
"epoch": 0.0006074,
|
| 42621 |
+
"grad_norm": 1.5486427545547485,
|
| 42622 |
+
"learning_rate": 6.0729999999999996e-05,
|
| 42623 |
+
"loss": 0.7241,
|
| 42624 |
+
"step": 6074
|
| 42625 |
+
},
|
| 42626 |
+
{
|
| 42627 |
+
"epoch": 0.0006075,
|
| 42628 |
+
"grad_norm": 2.433990716934204,
|
| 42629 |
+
"learning_rate": 6.074e-05,
|
| 42630 |
+
"loss": 0.6904,
|
| 42631 |
+
"step": 6075
|
| 42632 |
+
},
|
| 42633 |
+
{
|
| 42634 |
+
"epoch": 0.0006076,
|
| 42635 |
+
"grad_norm": 1.4973959922790527,
|
| 42636 |
+
"learning_rate": 6.0750000000000006e-05,
|
| 42637 |
+
"loss": 0.6118,
|
| 42638 |
+
"step": 6076
|
| 42639 |
+
},
|
| 42640 |
+
{
|
| 42641 |
+
"epoch": 0.0006077,
|
| 42642 |
+
"grad_norm": 1.5700976848602295,
|
| 42643 |
+
"learning_rate": 6.076000000000001e-05,
|
| 42644 |
+
"loss": 0.6963,
|
| 42645 |
+
"step": 6077
|
| 42646 |
+
},
|
| 42647 |
+
{
|
| 42648 |
+
"epoch": 0.0006078,
|
| 42649 |
+
"grad_norm": 1.791594386100769,
|
| 42650 |
+
"learning_rate": 6.0769999999999996e-05,
|
| 42651 |
+
"loss": 0.751,
|
| 42652 |
+
"step": 6078
|
| 42653 |
+
},
|
| 42654 |
+
{
|
| 42655 |
+
"epoch": 0.0006079,
|
| 42656 |
+
"grad_norm": 2.1504902839660645,
|
| 42657 |
+
"learning_rate": 6.0780000000000004e-05,
|
| 42658 |
+
"loss": 0.8877,
|
| 42659 |
+
"step": 6079
|
| 42660 |
+
},
|
| 42661 |
+
{
|
| 42662 |
+
"epoch": 0.000608,
|
| 42663 |
+
"grad_norm": 1.4483439922332764,
|
| 42664 |
+
"learning_rate": 6.0790000000000006e-05,
|
| 42665 |
+
"loss": 0.623,
|
| 42666 |
+
"step": 6080
|
| 42667 |
+
},
|
| 42668 |
+
{
|
| 42669 |
+
"epoch": 0.0006081,
|
| 42670 |
+
"grad_norm": 1.638651967048645,
|
| 42671 |
+
"learning_rate": 6.08e-05,
|
| 42672 |
+
"loss": 0.6558,
|
| 42673 |
+
"step": 6081
|
| 42674 |
+
},
|
| 42675 |
+
{
|
| 42676 |
+
"epoch": 0.0006082,
|
| 42677 |
+
"grad_norm": 1.3742382526397705,
|
| 42678 |
+
"learning_rate": 6.081e-05,
|
| 42679 |
+
"loss": 0.6445,
|
| 42680 |
+
"step": 6082
|
| 42681 |
+
},
|
| 42682 |
+
{
|
| 42683 |
+
"epoch": 0.0006083,
|
| 42684 |
+
"grad_norm": 1.690085768699646,
|
| 42685 |
+
"learning_rate": 6.0820000000000004e-05,
|
| 42686 |
+
"loss": 0.6929,
|
| 42687 |
+
"step": 6083
|
| 42688 |
+
},
|
| 42689 |
+
{
|
| 42690 |
+
"epoch": 0.0006084,
|
| 42691 |
+
"grad_norm": 1.4449642896652222,
|
| 42692 |
+
"learning_rate": 6.083e-05,
|
| 42693 |
+
"loss": 0.6514,
|
| 42694 |
+
"step": 6084
|
| 42695 |
+
},
|
| 42696 |
+
{
|
| 42697 |
+
"epoch": 0.0006085,
|
| 42698 |
+
"grad_norm": 2.428176164627075,
|
| 42699 |
+
"learning_rate": 6.084e-05,
|
| 42700 |
+
"loss": 0.7739,
|
| 42701 |
+
"step": 6085
|
| 42702 |
+
},
|
| 42703 |
+
{
|
| 42704 |
+
"epoch": 0.0006086,
|
| 42705 |
+
"grad_norm": 1.5959484577178955,
|
| 42706 |
+
"learning_rate": 6.085e-05,
|
| 42707 |
+
"loss": 0.6357,
|
| 42708 |
+
"step": 6086
|
| 42709 |
+
},
|
| 42710 |
+
{
|
| 42711 |
+
"epoch": 0.0006087,
|
| 42712 |
+
"grad_norm": 1.6490490436553955,
|
| 42713 |
+
"learning_rate": 6.0860000000000003e-05,
|
| 42714 |
+
"loss": 0.6699,
|
| 42715 |
+
"step": 6087
|
| 42716 |
+
},
|
| 42717 |
+
{
|
| 42718 |
+
"epoch": 0.0006088,
|
| 42719 |
+
"grad_norm": 1.5487797260284424,
|
| 42720 |
+
"learning_rate": 6.087e-05,
|
| 42721 |
+
"loss": 0.6968,
|
| 42722 |
+
"step": 6088
|
| 42723 |
+
},
|
| 42724 |
+
{
|
| 42725 |
+
"epoch": 0.0006089,
|
| 42726 |
+
"grad_norm": 1.2236945629119873,
|
| 42727 |
+
"learning_rate": 6.088e-05,
|
| 42728 |
+
"loss": 0.6001,
|
| 42729 |
+
"step": 6089
|
| 42730 |
+
},
|
| 42731 |
+
{
|
| 42732 |
+
"epoch": 0.000609,
|
| 42733 |
+
"grad_norm": 1.7166575193405151,
|
| 42734 |
+
"learning_rate": 6.089e-05,
|
| 42735 |
+
"loss": 0.7114,
|
| 42736 |
+
"step": 6090
|
| 42737 |
+
},
|
| 42738 |
+
{
|
| 42739 |
+
"epoch": 0.0006091,
|
| 42740 |
+
"grad_norm": 1.4670919179916382,
|
| 42741 |
+
"learning_rate": 6.0899999999999996e-05,
|
| 42742 |
+
"loss": 0.6343,
|
| 42743 |
+
"step": 6091
|
| 42744 |
+
},
|
| 42745 |
+
{
|
| 42746 |
+
"epoch": 0.0006092,
|
| 42747 |
+
"grad_norm": 1.4445433616638184,
|
| 42748 |
+
"learning_rate": 6.091e-05,
|
| 42749 |
+
"loss": 0.7012,
|
| 42750 |
+
"step": 6092
|
| 42751 |
+
},
|
| 42752 |
+
{
|
| 42753 |
+
"epoch": 0.0006093,
|
| 42754 |
+
"grad_norm": 1.329375982284546,
|
| 42755 |
+
"learning_rate": 6.0920000000000006e-05,
|
| 42756 |
+
"loss": 0.6147,
|
| 42757 |
+
"step": 6093
|
| 42758 |
+
},
|
| 42759 |
+
{
|
| 42760 |
+
"epoch": 0.0006094,
|
| 42761 |
+
"grad_norm": 1.2377262115478516,
|
| 42762 |
+
"learning_rate": 6.093000000000001e-05,
|
| 42763 |
+
"loss": 0.6064,
|
| 42764 |
+
"step": 6094
|
| 42765 |
+
},
|
| 42766 |
+
{
|
| 42767 |
+
"epoch": 0.0006095,
|
| 42768 |
+
"grad_norm": 1.3684781789779663,
|
| 42769 |
+
"learning_rate": 6.0939999999999996e-05,
|
| 42770 |
+
"loss": 0.6797,
|
| 42771 |
+
"step": 6095
|
| 42772 |
+
},
|
| 42773 |
+
{
|
| 42774 |
+
"epoch": 0.0006096,
|
| 42775 |
+
"grad_norm": 1.2610841989517212,
|
| 42776 |
+
"learning_rate": 6.0950000000000004e-05,
|
| 42777 |
+
"loss": 0.6279,
|
| 42778 |
+
"step": 6096
|
| 42779 |
+
},
|
| 42780 |
+
{
|
| 42781 |
+
"epoch": 0.0006097,
|
| 42782 |
+
"grad_norm": 1.3230469226837158,
|
| 42783 |
+
"learning_rate": 6.0960000000000006e-05,
|
| 42784 |
+
"loss": 0.6284,
|
| 42785 |
+
"step": 6097
|
| 42786 |
+
},
|
| 42787 |
+
{
|
| 42788 |
+
"epoch": 0.0006098,
|
| 42789 |
+
"grad_norm": 1.3344494104385376,
|
| 42790 |
+
"learning_rate": 6.097e-05,
|
| 42791 |
+
"loss": 0.6455,
|
| 42792 |
+
"step": 6098
|
| 42793 |
+
},
|
| 42794 |
+
{
|
| 42795 |
+
"epoch": 0.0006099,
|
| 42796 |
+
"grad_norm": 2.1403236389160156,
|
| 42797 |
+
"learning_rate": 6.098e-05,
|
| 42798 |
+
"loss": 1.0176,
|
| 42799 |
+
"step": 6099
|
| 42800 |
+
},
|
| 42801 |
+
{
|
| 42802 |
+
"epoch": 0.00061,
|
| 42803 |
+
"grad_norm": 1.5162384510040283,
|
| 42804 |
+
"learning_rate": 6.0990000000000004e-05,
|
| 42805 |
+
"loss": 0.7207,
|
| 42806 |
+
"step": 6100
|
| 42807 |
+
},
|
| 42808 |
+
{
|
| 42809 |
+
"epoch": 0.0006101,
|
| 42810 |
+
"grad_norm": 2.004774808883667,
|
| 42811 |
+
"learning_rate": 6.1e-05,
|
| 42812 |
+
"loss": 0.8926,
|
| 42813 |
+
"step": 6101
|
| 42814 |
+
},
|
| 42815 |
+
{
|
| 42816 |
+
"epoch": 0.0006102,
|
| 42817 |
+
"grad_norm": 1.3667763471603394,
|
| 42818 |
+
"learning_rate": 6.101e-05,
|
| 42819 |
+
"loss": 0.6323,
|
| 42820 |
+
"step": 6102
|
| 42821 |
+
},
|
| 42822 |
+
{
|
| 42823 |
+
"epoch": 0.0006103,
|
| 42824 |
+
"grad_norm": 1.194966435432434,
|
| 42825 |
+
"learning_rate": 6.102e-05,
|
| 42826 |
+
"loss": 0.583,
|
| 42827 |
+
"step": 6103
|
| 42828 |
+
},
|
| 42829 |
+
{
|
| 42830 |
+
"epoch": 0.0006104,
|
| 42831 |
+
"grad_norm": 1.3216735124588013,
|
| 42832 |
+
"learning_rate": 6.1030000000000004e-05,
|
| 42833 |
+
"loss": 0.6465,
|
| 42834 |
+
"step": 6104
|
| 42835 |
+
},
|
| 42836 |
+
{
|
| 42837 |
+
"epoch": 0.0006105,
|
| 42838 |
+
"grad_norm": 1.486382246017456,
|
| 42839 |
+
"learning_rate": 6.104e-05,
|
| 42840 |
+
"loss": 0.6807,
|
| 42841 |
+
"step": 6105
|
| 42842 |
+
},
|
| 42843 |
+
{
|
| 42844 |
+
"epoch": 0.0006106,
|
| 42845 |
+
"grad_norm": 2.2750372886657715,
|
| 42846 |
+
"learning_rate": 6.105e-05,
|
| 42847 |
+
"loss": 1.0103,
|
| 42848 |
+
"step": 6106
|
| 42849 |
+
},
|
| 42850 |
+
{
|
| 42851 |
+
"epoch": 0.0006107,
|
| 42852 |
+
"grad_norm": 3.482034921646118,
|
| 42853 |
+
"learning_rate": 6.106e-05,
|
| 42854 |
+
"loss": 0.7627,
|
| 42855 |
+
"step": 6107
|
| 42856 |
+
},
|
| 42857 |
+
{
|
| 42858 |
+
"epoch": 0.0006108,
|
| 42859 |
+
"grad_norm": 1.6870607137680054,
|
| 42860 |
+
"learning_rate": 6.107e-05,
|
| 42861 |
+
"loss": 0.6392,
|
| 42862 |
+
"step": 6108
|
| 42863 |
+
},
|
| 42864 |
+
{
|
| 42865 |
+
"epoch": 0.0006109,
|
| 42866 |
+
"grad_norm": 4.1532111167907715,
|
| 42867 |
+
"learning_rate": 6.108e-05,
|
| 42868 |
+
"loss": 1.0767,
|
| 42869 |
+
"step": 6109
|
| 42870 |
+
},
|
| 42871 |
+
{
|
| 42872 |
+
"epoch": 0.000611,
|
| 42873 |
+
"grad_norm": 1.5247021913528442,
|
| 42874 |
+
"learning_rate": 6.109e-05,
|
| 42875 |
+
"loss": 0.6348,
|
| 42876 |
+
"step": 6110
|
| 42877 |
+
},
|
| 42878 |
+
{
|
| 42879 |
+
"epoch": 0.0006111,
|
| 42880 |
+
"grad_norm": 1.549442172050476,
|
| 42881 |
+
"learning_rate": 6.110000000000001e-05,
|
| 42882 |
+
"loss": 0.6675,
|
| 42883 |
+
"step": 6111
|
| 42884 |
+
},
|
| 42885 |
+
{
|
| 42886 |
+
"epoch": 0.0006112,
|
| 42887 |
+
"grad_norm": 1.6295372247695923,
|
| 42888 |
+
"learning_rate": 6.111e-05,
|
| 42889 |
+
"loss": 0.6895,
|
| 42890 |
+
"step": 6112
|
| 42891 |
+
},
|
| 42892 |
+
{
|
| 42893 |
+
"epoch": 0.0006113,
|
| 42894 |
+
"grad_norm": 1.4706110954284668,
|
| 42895 |
+
"learning_rate": 6.112e-05,
|
| 42896 |
+
"loss": 0.6245,
|
| 42897 |
+
"step": 6113
|
| 42898 |
+
},
|
| 42899 |
+
{
|
| 42900 |
+
"epoch": 0.0006114,
|
| 42901 |
+
"grad_norm": 1.739664912223816,
|
| 42902 |
+
"learning_rate": 6.113e-05,
|
| 42903 |
+
"loss": 0.8325,
|
| 42904 |
+
"step": 6114
|
| 42905 |
+
},
|
| 42906 |
+
{
|
| 42907 |
+
"epoch": 0.0006115,
|
| 42908 |
+
"grad_norm": 1.614557147026062,
|
| 42909 |
+
"learning_rate": 6.114e-05,
|
| 42910 |
+
"loss": 0.7148,
|
| 42911 |
+
"step": 6115
|
| 42912 |
+
},
|
| 42913 |
+
{
|
| 42914 |
+
"epoch": 0.0006116,
|
| 42915 |
+
"grad_norm": 1.623680591583252,
|
| 42916 |
+
"learning_rate": 6.115e-05,
|
| 42917 |
+
"loss": 0.6597,
|
| 42918 |
+
"step": 6116
|
| 42919 |
+
},
|
| 42920 |
+
{
|
| 42921 |
+
"epoch": 0.0006117,
|
| 42922 |
+
"grad_norm": 1.3491045236587524,
|
| 42923 |
+
"learning_rate": 6.116e-05,
|
| 42924 |
+
"loss": 0.5996,
|
| 42925 |
+
"step": 6117
|
| 42926 |
+
},
|
| 42927 |
+
{
|
| 42928 |
+
"epoch": 0.0006118,
|
| 42929 |
+
"grad_norm": 1.3005375862121582,
|
| 42930 |
+
"learning_rate": 6.117e-05,
|
| 42931 |
+
"loss": 0.5903,
|
| 42932 |
+
"step": 6118
|
| 42933 |
+
},
|
| 42934 |
+
{
|
| 42935 |
+
"epoch": 0.0006119,
|
| 42936 |
+
"grad_norm": 1.3519699573516846,
|
| 42937 |
+
"learning_rate": 6.118e-05,
|
| 42938 |
+
"loss": 0.6133,
|
| 42939 |
+
"step": 6119
|
| 42940 |
+
},
|
| 42941 |
+
{
|
| 42942 |
+
"epoch": 0.000612,
|
| 42943 |
+
"grad_norm": 2.895958423614502,
|
| 42944 |
+
"learning_rate": 6.119e-05,
|
| 42945 |
+
"loss": 0.9756,
|
| 42946 |
+
"step": 6120
|
| 42947 |
+
},
|
| 42948 |
+
{
|
| 42949 |
+
"epoch": 0.0006121,
|
| 42950 |
+
"grad_norm": 2.477351427078247,
|
| 42951 |
+
"learning_rate": 6.120000000000001e-05,
|
| 42952 |
+
"loss": 0.8291,
|
| 42953 |
+
"step": 6121
|
| 42954 |
+
},
|
| 42955 |
+
{
|
| 42956 |
+
"epoch": 0.0006122,
|
| 42957 |
+
"grad_norm": 2.2054953575134277,
|
| 42958 |
+
"learning_rate": 6.120999999999999e-05,
|
| 42959 |
+
"loss": 0.7183,
|
| 42960 |
+
"step": 6122
|
| 42961 |
+
},
|
| 42962 |
+
{
|
| 42963 |
+
"epoch": 0.0006123,
|
| 42964 |
+
"grad_norm": 3.672135829925537,
|
| 42965 |
+
"learning_rate": 6.122e-05,
|
| 42966 |
+
"loss": 1.187,
|
| 42967 |
+
"step": 6123
|
| 42968 |
+
},
|
| 42969 |
+
{
|
| 42970 |
+
"epoch": 0.0006124,
|
| 42971 |
+
"grad_norm": 2.4219377040863037,
|
| 42972 |
+
"learning_rate": 6.123000000000001e-05,
|
| 42973 |
+
"loss": 0.7812,
|
| 42974 |
+
"step": 6124
|
| 42975 |
+
},
|
| 42976 |
+
{
|
| 42977 |
+
"epoch": 0.0006125,
|
| 42978 |
+
"grad_norm": 5.223512649536133,
|
| 42979 |
+
"learning_rate": 6.124e-05,
|
| 42980 |
+
"loss": 0.9116,
|
| 42981 |
+
"step": 6125
|
| 42982 |
+
},
|
| 42983 |
+
{
|
| 42984 |
+
"epoch": 0.0006126,
|
| 42985 |
+
"grad_norm": 2.252377986907959,
|
| 42986 |
+
"learning_rate": 6.125e-05,
|
| 42987 |
+
"loss": 1.0322,
|
| 42988 |
+
"step": 6126
|
| 42989 |
+
},
|
| 42990 |
+
{
|
| 42991 |
+
"epoch": 0.0006127,
|
| 42992 |
+
"grad_norm": 3.5240578651428223,
|
| 42993 |
+
"learning_rate": 6.126e-05,
|
| 42994 |
+
"loss": 1.2168,
|
| 42995 |
+
"step": 6127
|
| 42996 |
+
},
|
| 42997 |
+
{
|
| 42998 |
+
"epoch": 0.0006128,
|
| 42999 |
+
"grad_norm": 2.5054214000701904,
|
| 43000 |
+
"learning_rate": 6.127e-05,
|
| 43001 |
+
"loss": 0.8188,
|
| 43002 |
+
"step": 6128
|
| 43003 |
+
},
|
| 43004 |
+
{
|
| 43005 |
+
"epoch": 0.0006129,
|
| 43006 |
+
"grad_norm": 1.5566062927246094,
|
| 43007 |
+
"learning_rate": 6.128e-05,
|
| 43008 |
+
"loss": 0.7041,
|
| 43009 |
+
"step": 6129
|
| 43010 |
+
},
|
| 43011 |
+
{
|
| 43012 |
+
"epoch": 0.000613,
|
| 43013 |
+
"grad_norm": 1.4800119400024414,
|
| 43014 |
+
"learning_rate": 6.129e-05,
|
| 43015 |
+
"loss": 0.5854,
|
| 43016 |
+
"step": 6130
|
| 43017 |
+
},
|
| 43018 |
+
{
|
| 43019 |
+
"epoch": 0.0006131,
|
| 43020 |
+
"grad_norm": 1.8147279024124146,
|
| 43021 |
+
"learning_rate": 6.13e-05,
|
| 43022 |
+
"loss": 0.8003,
|
| 43023 |
+
"step": 6131
|
| 43024 |
+
},
|
| 43025 |
+
{
|
| 43026 |
+
"epoch": 0.0006132,
|
| 43027 |
+
"grad_norm": 1.4204871654510498,
|
| 43028 |
+
"learning_rate": 6.131e-05,
|
| 43029 |
+
"loss": 0.6201,
|
| 43030 |
+
"step": 6132
|
| 43031 |
+
},
|
| 43032 |
+
{
|
| 43033 |
+
"epoch": 0.0006133,
|
| 43034 |
+
"grad_norm": 1.4384766817092896,
|
| 43035 |
+
"learning_rate": 6.132e-05,
|
| 43036 |
+
"loss": 0.6345,
|
| 43037 |
+
"step": 6133
|
| 43038 |
+
},
|
| 43039 |
+
{
|
| 43040 |
+
"epoch": 0.0006134,
|
| 43041 |
+
"grad_norm": 1.5662084817886353,
|
| 43042 |
+
"learning_rate": 6.133e-05,
|
| 43043 |
+
"loss": 0.689,
|
| 43044 |
+
"step": 6134
|
| 43045 |
+
},
|
| 43046 |
+
{
|
| 43047 |
+
"epoch": 0.0006135,
|
| 43048 |
+
"grad_norm": 1.5638211965560913,
|
| 43049 |
+
"learning_rate": 6.133999999999999e-05,
|
| 43050 |
+
"loss": 0.6387,
|
| 43051 |
+
"step": 6135
|
| 43052 |
+
},
|
| 43053 |
+
{
|
| 43054 |
+
"epoch": 0.0006136,
|
| 43055 |
+
"grad_norm": 1.7658390998840332,
|
| 43056 |
+
"learning_rate": 6.135e-05,
|
| 43057 |
+
"loss": 0.7488,
|
| 43058 |
+
"step": 6136
|
| 43059 |
+
},
|
| 43060 |
+
{
|
| 43061 |
+
"epoch": 0.0006137,
|
| 43062 |
+
"grad_norm": 1.5555974245071411,
|
| 43063 |
+
"learning_rate": 6.136000000000001e-05,
|
| 43064 |
+
"loss": 0.6445,
|
| 43065 |
+
"step": 6137
|
| 43066 |
+
},
|
| 43067 |
+
{
|
| 43068 |
+
"epoch": 0.0006138,
|
| 43069 |
+
"grad_norm": 1.6092109680175781,
|
| 43070 |
+
"learning_rate": 6.137e-05,
|
| 43071 |
+
"loss": 0.6777,
|
| 43072 |
+
"step": 6138
|
| 43073 |
+
},
|
| 43074 |
+
{
|
| 43075 |
+
"epoch": 0.0006139,
|
| 43076 |
+
"grad_norm": 1.5195221900939941,
|
| 43077 |
+
"learning_rate": 6.138e-05,
|
| 43078 |
+
"loss": 0.6519,
|
| 43079 |
+
"step": 6139
|
| 43080 |
+
},
|
| 43081 |
+
{
|
| 43082 |
+
"epoch": 0.000614,
|
| 43083 |
+
"grad_norm": 4.042060852050781,
|
| 43084 |
+
"learning_rate": 6.139000000000001e-05,
|
| 43085 |
+
"loss": 0.9448,
|
| 43086 |
+
"step": 6140
|
| 43087 |
+
},
|
| 43088 |
+
{
|
| 43089 |
+
"epoch": 0.0006141,
|
| 43090 |
+
"grad_norm": 1.4042551517486572,
|
| 43091 |
+
"learning_rate": 6.14e-05,
|
| 43092 |
+
"loss": 0.603,
|
| 43093 |
+
"step": 6141
|
| 43094 |
+
},
|
| 43095 |
+
{
|
| 43096 |
+
"epoch": 0.0006142,
|
| 43097 |
+
"grad_norm": 1.4371025562286377,
|
| 43098 |
+
"learning_rate": 6.141e-05,
|
| 43099 |
+
"loss": 0.6411,
|
| 43100 |
+
"step": 6142
|
| 43101 |
+
},
|
| 43102 |
+
{
|
| 43103 |
+
"epoch": 0.0006143,
|
| 43104 |
+
"grad_norm": 1.1401251554489136,
|
| 43105 |
+
"learning_rate": 6.142e-05,
|
| 43106 |
+
"loss": 0.5542,
|
| 43107 |
+
"step": 6143
|
| 43108 |
+
},
|
| 43109 |
+
{
|
| 43110 |
+
"epoch": 0.0006144,
|
| 43111 |
+
"grad_norm": 1.6855483055114746,
|
| 43112 |
+
"learning_rate": 6.143e-05,
|
| 43113 |
+
"loss": 0.7407,
|
| 43114 |
+
"step": 6144
|
| 43115 |
+
},
|
| 43116 |
+
{
|
| 43117 |
+
"epoch": 0.0006145,
|
| 43118 |
+
"grad_norm": 1.4168471097946167,
|
| 43119 |
+
"learning_rate": 6.144000000000001e-05,
|
| 43120 |
+
"loss": 0.6001,
|
| 43121 |
+
"step": 6145
|
| 43122 |
+
},
|
| 43123 |
+
{
|
| 43124 |
+
"epoch": 0.0006146,
|
| 43125 |
+
"grad_norm": 2.1707684993743896,
|
| 43126 |
+
"learning_rate": 6.145e-05,
|
| 43127 |
+
"loss": 0.9136,
|
| 43128 |
+
"step": 6146
|
| 43129 |
+
},
|
| 43130 |
+
{
|
| 43131 |
+
"epoch": 0.0006147,
|
| 43132 |
+
"grad_norm": 1.533423662185669,
|
| 43133 |
+
"learning_rate": 6.146e-05,
|
| 43134 |
+
"loss": 0.6665,
|
| 43135 |
+
"step": 6147
|
| 43136 |
+
},
|
| 43137 |
+
{
|
| 43138 |
+
"epoch": 0.0006148,
|
| 43139 |
+
"grad_norm": 1.4535192251205444,
|
| 43140 |
+
"learning_rate": 6.147e-05,
|
| 43141 |
+
"loss": 0.6221,
|
| 43142 |
+
"step": 6148
|
| 43143 |
+
},
|
| 43144 |
+
{
|
| 43145 |
+
"epoch": 0.0006149,
|
| 43146 |
+
"grad_norm": 1.4669100046157837,
|
| 43147 |
+
"learning_rate": 6.148e-05,
|
| 43148 |
+
"loss": 0.7026,
|
| 43149 |
+
"step": 6149
|
| 43150 |
+
},
|
| 43151 |
+
{
|
| 43152 |
+
"epoch": 0.000615,
|
| 43153 |
+
"grad_norm": 1.4489563703536987,
|
| 43154 |
+
"learning_rate": 6.149e-05,
|
| 43155 |
+
"loss": 0.6538,
|
| 43156 |
+
"step": 6150
|
| 43157 |
+
},
|
| 43158 |
+
{
|
| 43159 |
+
"epoch": 0.0006151,
|
| 43160 |
+
"grad_norm": 1.4876471757888794,
|
| 43161 |
+
"learning_rate": 6.15e-05,
|
| 43162 |
+
"loss": 0.6748,
|
| 43163 |
+
"step": 6151
|
| 43164 |
+
},
|
| 43165 |
+
{
|
| 43166 |
+
"epoch": 0.0006152,
|
| 43167 |
+
"grad_norm": 1.736519694328308,
|
| 43168 |
+
"learning_rate": 6.151e-05,
|
| 43169 |
+
"loss": 0.8223,
|
| 43170 |
+
"step": 6152
|
| 43171 |
+
},
|
| 43172 |
+
{
|
| 43173 |
+
"epoch": 0.0006153,
|
| 43174 |
+
"grad_norm": 1.3515825271606445,
|
| 43175 |
+
"learning_rate": 6.152e-05,
|
| 43176 |
+
"loss": 0.5974,
|
| 43177 |
+
"step": 6153
|
| 43178 |
+
},
|
| 43179 |
+
{
|
| 43180 |
+
"epoch": 0.0006154,
|
| 43181 |
+
"grad_norm": 1.3721874952316284,
|
| 43182 |
+
"learning_rate": 6.153e-05,
|
| 43183 |
+
"loss": 0.5972,
|
| 43184 |
+
"step": 6154
|
| 43185 |
+
},
|
| 43186 |
+
{
|
| 43187 |
+
"epoch": 0.0006155,
|
| 43188 |
+
"grad_norm": 1.4386078119277954,
|
| 43189 |
+
"learning_rate": 6.154000000000001e-05,
|
| 43190 |
+
"loss": 0.6619,
|
| 43191 |
+
"step": 6155
|
| 43192 |
+
},
|
| 43193 |
+
{
|
| 43194 |
+
"epoch": 0.0006156,
|
| 43195 |
+
"grad_norm": 1.6986792087554932,
|
| 43196 |
+
"learning_rate": 6.154999999999999e-05,
|
| 43197 |
+
"loss": 0.6836,
|
| 43198 |
+
"step": 6156
|
| 43199 |
+
},
|
| 43200 |
+
{
|
| 43201 |
+
"epoch": 0.0006157,
|
| 43202 |
+
"grad_norm": 2.247748613357544,
|
| 43203 |
+
"learning_rate": 6.156e-05,
|
| 43204 |
+
"loss": 0.6572,
|
| 43205 |
+
"step": 6157
|
| 43206 |
+
},
|
| 43207 |
+
{
|
| 43208 |
+
"epoch": 0.0006158,
|
| 43209 |
+
"grad_norm": 1.9118703603744507,
|
| 43210 |
+
"learning_rate": 6.157000000000001e-05,
|
| 43211 |
+
"loss": 0.7832,
|
| 43212 |
+
"step": 6158
|
| 43213 |
+
},
|
| 43214 |
+
{
|
| 43215 |
+
"epoch": 0.0006159,
|
| 43216 |
+
"grad_norm": 1.3325278759002686,
|
| 43217 |
+
"learning_rate": 6.158e-05,
|
| 43218 |
+
"loss": 0.5972,
|
| 43219 |
+
"step": 6159
|
| 43220 |
+
},
|
| 43221 |
+
{
|
| 43222 |
+
"epoch": 0.000616,
|
| 43223 |
+
"grad_norm": 2.9448111057281494,
|
| 43224 |
+
"learning_rate": 6.159e-05,
|
| 43225 |
+
"loss": 0.9946,
|
| 43226 |
+
"step": 6160
|
| 43227 |
+
},
|
| 43228 |
+
{
|
| 43229 |
+
"epoch": 0.0006161,
|
| 43230 |
+
"grad_norm": 1.3446276187896729,
|
| 43231 |
+
"learning_rate": 6.16e-05,
|
| 43232 |
+
"loss": 0.5762,
|
| 43233 |
+
"step": 6161
|
| 43234 |
+
},
|
| 43235 |
+
{
|
| 43236 |
+
"epoch": 0.0006162,
|
| 43237 |
+
"grad_norm": 1.5767836570739746,
|
| 43238 |
+
"learning_rate": 6.161e-05,
|
| 43239 |
+
"loss": 0.6489,
|
| 43240 |
+
"step": 6162
|
| 43241 |
+
},
|
| 43242 |
+
{
|
| 43243 |
+
"epoch": 0.0006163,
|
| 43244 |
+
"grad_norm": 2.111865282058716,
|
| 43245 |
+
"learning_rate": 6.162e-05,
|
| 43246 |
+
"loss": 0.8223,
|
| 43247 |
+
"step": 6163
|
| 43248 |
+
},
|
| 43249 |
+
{
|
| 43250 |
+
"epoch": 0.0006164,
|
| 43251 |
+
"grad_norm": 2.025707721710205,
|
| 43252 |
+
"learning_rate": 6.163e-05,
|
| 43253 |
+
"loss": 0.7529,
|
| 43254 |
+
"step": 6164
|
| 43255 |
+
},
|
| 43256 |
+
{
|
| 43257 |
+
"epoch": 0.0006165,
|
| 43258 |
+
"grad_norm": 1.5718879699707031,
|
| 43259 |
+
"learning_rate": 6.164e-05,
|
| 43260 |
+
"loss": 0.6123,
|
| 43261 |
+
"step": 6165
|
| 43262 |
+
},
|
| 43263 |
+
{
|
| 43264 |
+
"epoch": 0.0006166,
|
| 43265 |
+
"grad_norm": 1.5455278158187866,
|
| 43266 |
+
"learning_rate": 6.165e-05,
|
| 43267 |
+
"loss": 0.6572,
|
| 43268 |
+
"step": 6166
|
| 43269 |
+
},
|
| 43270 |
+
{
|
| 43271 |
+
"epoch": 0.0006167,
|
| 43272 |
+
"grad_norm": 1.1608492136001587,
|
| 43273 |
+
"learning_rate": 6.166e-05,
|
| 43274 |
+
"loss": 0.5386,
|
| 43275 |
+
"step": 6167
|
| 43276 |
+
},
|
| 43277 |
+
{
|
| 43278 |
+
"epoch": 0.0006168,
|
| 43279 |
+
"grad_norm": 1.698362946510315,
|
| 43280 |
+
"learning_rate": 6.167e-05,
|
| 43281 |
+
"loss": 0.7178,
|
| 43282 |
+
"step": 6168
|
| 43283 |
+
},
|
| 43284 |
+
{
|
| 43285 |
+
"epoch": 0.0006169,
|
| 43286 |
+
"grad_norm": 1.5489366054534912,
|
| 43287 |
+
"learning_rate": 6.167999999999999e-05,
|
| 43288 |
+
"loss": 0.5869,
|
| 43289 |
+
"step": 6169
|
| 43290 |
+
},
|
| 43291 |
+
{
|
| 43292 |
+
"epoch": 0.000617,
|
| 43293 |
+
"grad_norm": 1.7101398706436157,
|
| 43294 |
+
"learning_rate": 6.169e-05,
|
| 43295 |
+
"loss": 0.646,
|
| 43296 |
+
"step": 6170
|
| 43297 |
+
},
|
| 43298 |
+
{
|
| 43299 |
+
"epoch": 0.0006171,
|
| 43300 |
+
"grad_norm": 1.4007785320281982,
|
| 43301 |
+
"learning_rate": 6.170000000000001e-05,
|
| 43302 |
+
"loss": 0.6118,
|
| 43303 |
+
"step": 6171
|
| 43304 |
+
},
|
| 43305 |
+
{
|
| 43306 |
+
"epoch": 0.0006172,
|
| 43307 |
+
"grad_norm": 1.5472906827926636,
|
| 43308 |
+
"learning_rate": 6.171e-05,
|
| 43309 |
+
"loss": 0.6597,
|
| 43310 |
+
"step": 6172
|
| 43311 |
+
},
|
| 43312 |
+
{
|
| 43313 |
+
"epoch": 0.0006173,
|
| 43314 |
+
"grad_norm": 2.241448402404785,
|
| 43315 |
+
"learning_rate": 6.172e-05,
|
| 43316 |
+
"loss": 0.6831,
|
| 43317 |
+
"step": 6173
|
| 43318 |
+
},
|
| 43319 |
+
{
|
| 43320 |
+
"epoch": 0.0006174,
|
| 43321 |
+
"grad_norm": 1.3851557970046997,
|
| 43322 |
+
"learning_rate": 6.173000000000001e-05,
|
| 43323 |
+
"loss": 0.5569,
|
| 43324 |
+
"step": 6174
|
| 43325 |
+
},
|
| 43326 |
+
{
|
| 43327 |
+
"epoch": 0.0006175,
|
| 43328 |
+
"grad_norm": 1.3902335166931152,
|
| 43329 |
+
"learning_rate": 6.174e-05,
|
| 43330 |
+
"loss": 0.5762,
|
| 43331 |
+
"step": 6175
|
| 43332 |
+
},
|
| 43333 |
+
{
|
| 43334 |
+
"epoch": 0.0006176,
|
| 43335 |
+
"grad_norm": 1.2751723527908325,
|
| 43336 |
+
"learning_rate": 6.175e-05,
|
| 43337 |
+
"loss": 0.5654,
|
| 43338 |
+
"step": 6176
|
| 43339 |
+
},
|
| 43340 |
+
{
|
| 43341 |
+
"epoch": 0.0006177,
|
| 43342 |
+
"grad_norm": 1.3091758489608765,
|
| 43343 |
+
"learning_rate": 6.176e-05,
|
| 43344 |
+
"loss": 0.5649,
|
| 43345 |
+
"step": 6177
|
| 43346 |
+
},
|
| 43347 |
+
{
|
| 43348 |
+
"epoch": 0.0006178,
|
| 43349 |
+
"grad_norm": 1.3855472803115845,
|
| 43350 |
+
"learning_rate": 6.177e-05,
|
| 43351 |
+
"loss": 0.5571,
|
| 43352 |
+
"step": 6178
|
| 43353 |
+
},
|
| 43354 |
+
{
|
| 43355 |
+
"epoch": 0.0006179,
|
| 43356 |
+
"grad_norm": 2.753174304962158,
|
| 43357 |
+
"learning_rate": 6.178000000000001e-05,
|
| 43358 |
+
"loss": 0.8467,
|
| 43359 |
+
"step": 6179
|
| 43360 |
+
},
|
| 43361 |
+
{
|
| 43362 |
+
"epoch": 0.000618,
|
| 43363 |
+
"grad_norm": 1.5822502374649048,
|
| 43364 |
+
"learning_rate": 6.179e-05,
|
| 43365 |
+
"loss": 0.646,
|
| 43366 |
+
"step": 6180
|
| 43367 |
+
},
|
| 43368 |
+
{
|
| 43369 |
+
"epoch": 0.0006181,
|
| 43370 |
+
"grad_norm": 1.5361640453338623,
|
| 43371 |
+
"learning_rate": 6.18e-05,
|
| 43372 |
+
"loss": 0.6611,
|
| 43373 |
+
"step": 6181
|
| 43374 |
+
},
|
| 43375 |
+
{
|
| 43376 |
+
"epoch": 0.0006182,
|
| 43377 |
+
"grad_norm": 1.59479558467865,
|
| 43378 |
+
"learning_rate": 6.181e-05,
|
| 43379 |
+
"loss": 0.6191,
|
| 43380 |
+
"step": 6182
|
| 43381 |
+
},
|
| 43382 |
+
{
|
| 43383 |
+
"epoch": 0.0006183,
|
| 43384 |
+
"grad_norm": 1.317305088043213,
|
| 43385 |
+
"learning_rate": 6.182e-05,
|
| 43386 |
+
"loss": 0.5845,
|
| 43387 |
+
"step": 6183
|
| 43388 |
+
},
|
| 43389 |
+
{
|
| 43390 |
+
"epoch": 0.0006184,
|
| 43391 |
+
"grad_norm": 1.5284531116485596,
|
| 43392 |
+
"learning_rate": 6.183e-05,
|
| 43393 |
+
"loss": 0.6328,
|
| 43394 |
+
"step": 6184
|
| 43395 |
+
},
|
| 43396 |
+
{
|
| 43397 |
+
"epoch": 0.0006185,
|
| 43398 |
+
"grad_norm": 1.4130276441574097,
|
| 43399 |
+
"learning_rate": 6.184e-05,
|
| 43400 |
+
"loss": 0.6101,
|
| 43401 |
+
"step": 6185
|
| 43402 |
+
},
|
| 43403 |
+
{
|
| 43404 |
+
"epoch": 0.0006186,
|
| 43405 |
+
"grad_norm": 1.2827122211456299,
|
| 43406 |
+
"learning_rate": 6.185e-05,
|
| 43407 |
+
"loss": 0.5884,
|
| 43408 |
+
"step": 6186
|
| 43409 |
+
},
|
| 43410 |
+
{
|
| 43411 |
+
"epoch": 0.0006187,
|
| 43412 |
+
"grad_norm": 1.1124165058135986,
|
| 43413 |
+
"learning_rate": 6.186e-05,
|
| 43414 |
+
"loss": 0.5239,
|
| 43415 |
+
"step": 6187
|
| 43416 |
+
},
|
| 43417 |
+
{
|
| 43418 |
+
"epoch": 0.0006188,
|
| 43419 |
+
"grad_norm": 1.3330076932907104,
|
| 43420 |
+
"learning_rate": 6.187e-05,
|
| 43421 |
+
"loss": 0.5854,
|
| 43422 |
+
"step": 6188
|
| 43423 |
+
},
|
| 43424 |
+
{
|
| 43425 |
+
"epoch": 0.0006189,
|
| 43426 |
+
"grad_norm": 2.5464541912078857,
|
| 43427 |
+
"learning_rate": 6.188000000000001e-05,
|
| 43428 |
+
"loss": 0.8247,
|
| 43429 |
+
"step": 6189
|
| 43430 |
+
},
|
| 43431 |
+
{
|
| 43432 |
+
"epoch": 0.000619,
|
| 43433 |
+
"grad_norm": 1.4788142442703247,
|
| 43434 |
+
"learning_rate": 6.188999999999999e-05,
|
| 43435 |
+
"loss": 0.6431,
|
| 43436 |
+
"step": 6190
|
| 43437 |
+
},
|
| 43438 |
+
{
|
| 43439 |
+
"epoch": 0.0006191,
|
| 43440 |
+
"grad_norm": 1.3143922090530396,
|
| 43441 |
+
"learning_rate": 6.19e-05,
|
| 43442 |
+
"loss": 0.5547,
|
| 43443 |
+
"step": 6191
|
| 43444 |
+
},
|
| 43445 |
+
{
|
| 43446 |
+
"epoch": 0.0006192,
|
| 43447 |
+
"grad_norm": 1.1701135635375977,
|
| 43448 |
+
"learning_rate": 6.191000000000001e-05,
|
| 43449 |
+
"loss": 0.5308,
|
| 43450 |
+
"step": 6192
|
| 43451 |
+
},
|
| 43452 |
+
{
|
| 43453 |
+
"epoch": 0.0006193,
|
| 43454 |
+
"grad_norm": 1.983077883720398,
|
| 43455 |
+
"learning_rate": 6.192e-05,
|
| 43456 |
+
"loss": 0.7656,
|
| 43457 |
+
"step": 6193
|
| 43458 |
+
},
|
| 43459 |
+
{
|
| 43460 |
+
"epoch": 0.0006194,
|
| 43461 |
+
"grad_norm": 1.4042465686798096,
|
| 43462 |
+
"learning_rate": 6.193e-05,
|
| 43463 |
+
"loss": 0.6089,
|
| 43464 |
+
"step": 6194
|
| 43465 |
+
},
|
| 43466 |
+
{
|
| 43467 |
+
"epoch": 0.0006195,
|
| 43468 |
+
"grad_norm": 1.273660659790039,
|
| 43469 |
+
"learning_rate": 6.194e-05,
|
| 43470 |
+
"loss": 0.605,
|
| 43471 |
+
"step": 6195
|
| 43472 |
+
},
|
| 43473 |
+
{
|
| 43474 |
+
"epoch": 0.0006196,
|
| 43475 |
+
"grad_norm": 1.2940797805786133,
|
| 43476 |
+
"learning_rate": 6.195e-05,
|
| 43477 |
+
"loss": 0.6072,
|
| 43478 |
+
"step": 6196
|
| 43479 |
+
},
|
| 43480 |
+
{
|
| 43481 |
+
"epoch": 0.0006197,
|
| 43482 |
+
"grad_norm": 1.2838053703308105,
|
| 43483 |
+
"learning_rate": 6.196e-05,
|
| 43484 |
+
"loss": 0.5681,
|
| 43485 |
+
"step": 6197
|
| 43486 |
+
},
|
| 43487 |
+
{
|
| 43488 |
+
"epoch": 0.0006198,
|
| 43489 |
+
"grad_norm": 1.2031972408294678,
|
| 43490 |
+
"learning_rate": 6.197e-05,
|
| 43491 |
+
"loss": 0.5559,
|
| 43492 |
+
"step": 6198
|
| 43493 |
+
},
|
| 43494 |
+
{
|
| 43495 |
+
"epoch": 0.0006199,
|
| 43496 |
+
"grad_norm": 2.070176362991333,
|
| 43497 |
+
"learning_rate": 6.198e-05,
|
| 43498 |
+
"loss": 0.7498,
|
| 43499 |
+
"step": 6199
|
| 43500 |
+
},
|
| 43501 |
+
{
|
| 43502 |
+
"epoch": 0.00062,
|
| 43503 |
+
"grad_norm": 1.229961633682251,
|
| 43504 |
+
"learning_rate": 6.199e-05,
|
| 43505 |
+
"loss": 0.5618,
|
| 43506 |
+
"step": 6200
|
| 43507 |
+
},
|
| 43508 |
+
{
|
| 43509 |
+
"epoch": 0.0006201,
|
| 43510 |
+
"grad_norm": 1.6335248947143555,
|
| 43511 |
+
"learning_rate": 6.2e-05,
|
| 43512 |
+
"loss": 0.6592,
|
| 43513 |
+
"step": 6201
|
| 43514 |
+
},
|
| 43515 |
+
{
|
| 43516 |
+
"epoch": 0.0006202,
|
| 43517 |
+
"grad_norm": 1.5217071771621704,
|
| 43518 |
+
"learning_rate": 6.201e-05,
|
| 43519 |
+
"loss": 0.6636,
|
| 43520 |
+
"step": 6202
|
| 43521 |
+
},
|
| 43522 |
+
{
|
| 43523 |
+
"epoch": 0.0006203,
|
| 43524 |
+
"grad_norm": 1.7696077823638916,
|
| 43525 |
+
"learning_rate": 6.201999999999999e-05,
|
| 43526 |
+
"loss": 0.6807,
|
| 43527 |
+
"step": 6203
|
| 43528 |
+
},
|
| 43529 |
+
{
|
| 43530 |
+
"epoch": 0.0006204,
|
| 43531 |
+
"grad_norm": 2.0146279335021973,
|
| 43532 |
+
"learning_rate": 6.203e-05,
|
| 43533 |
+
"loss": 0.9287,
|
| 43534 |
+
"step": 6204
|
| 43535 |
+
},
|
| 43536 |
+
{
|
| 43537 |
+
"epoch": 0.0006205,
|
| 43538 |
+
"grad_norm": 1.5057393312454224,
|
| 43539 |
+
"learning_rate": 6.204000000000001e-05,
|
| 43540 |
+
"loss": 0.6523,
|
| 43541 |
+
"step": 6205
|
| 43542 |
+
},
|
| 43543 |
+
{
|
| 43544 |
+
"epoch": 0.0006206,
|
| 43545 |
+
"grad_norm": 1.3235162496566772,
|
| 43546 |
+
"learning_rate": 6.205e-05,
|
| 43547 |
+
"loss": 0.5752,
|
| 43548 |
+
"step": 6206
|
| 43549 |
+
},
|
| 43550 |
+
{
|
| 43551 |
+
"epoch": 0.0006207,
|
| 43552 |
+
"grad_norm": 1.2800863981246948,
|
| 43553 |
+
"learning_rate": 6.206e-05,
|
| 43554 |
+
"loss": 0.5688,
|
| 43555 |
+
"step": 6207
|
| 43556 |
+
},
|
| 43557 |
+
{
|
| 43558 |
+
"epoch": 0.0006208,
|
| 43559 |
+
"grad_norm": 1.4795315265655518,
|
| 43560 |
+
"learning_rate": 6.207000000000001e-05,
|
| 43561 |
+
"loss": 0.6045,
|
| 43562 |
+
"step": 6208
|
| 43563 |
+
},
|
| 43564 |
+
{
|
| 43565 |
+
"epoch": 0.0006209,
|
| 43566 |
+
"grad_norm": 2.372642755508423,
|
| 43567 |
+
"learning_rate": 6.208e-05,
|
| 43568 |
+
"loss": 0.6807,
|
| 43569 |
+
"step": 6209
|
| 43570 |
+
},
|
| 43571 |
+
{
|
| 43572 |
+
"epoch": 0.000621,
|
| 43573 |
+
"grad_norm": 1.4887479543685913,
|
| 43574 |
+
"learning_rate": 6.209e-05,
|
| 43575 |
+
"loss": 0.5566,
|
| 43576 |
+
"step": 6210
|
| 43577 |
+
},
|
| 43578 |
+
{
|
| 43579 |
+
"epoch": 0.0006211,
|
| 43580 |
+
"grad_norm": 1.4304701089859009,
|
| 43581 |
+
"learning_rate": 6.21e-05,
|
| 43582 |
+
"loss": 0.5859,
|
| 43583 |
+
"step": 6211
|
| 43584 |
+
},
|
| 43585 |
+
{
|
| 43586 |
+
"epoch": 0.0006212,
|
| 43587 |
+
"grad_norm": 1.636297583580017,
|
| 43588 |
+
"learning_rate": 6.211e-05,
|
| 43589 |
+
"loss": 0.6465,
|
| 43590 |
+
"step": 6212
|
| 43591 |
+
},
|
| 43592 |
+
{
|
| 43593 |
+
"epoch": 0.0006213,
|
| 43594 |
+
"grad_norm": 1.3796359300613403,
|
| 43595 |
+
"learning_rate": 6.212000000000001e-05,
|
| 43596 |
+
"loss": 0.5728,
|
| 43597 |
+
"step": 6213
|
| 43598 |
+
},
|
| 43599 |
+
{
|
| 43600 |
+
"epoch": 0.0006214,
|
| 43601 |
+
"grad_norm": 1.2996286153793335,
|
| 43602 |
+
"learning_rate": 6.213e-05,
|
| 43603 |
+
"loss": 0.5398,
|
| 43604 |
+
"step": 6214
|
| 43605 |
+
},
|
| 43606 |
+
{
|
| 43607 |
+
"epoch": 0.0006215,
|
| 43608 |
+
"grad_norm": 1.234860897064209,
|
| 43609 |
+
"learning_rate": 6.214e-05,
|
| 43610 |
+
"loss": 0.5442,
|
| 43611 |
+
"step": 6215
|
| 43612 |
+
},
|
| 43613 |
+
{
|
| 43614 |
+
"epoch": 0.0006216,
|
| 43615 |
+
"grad_norm": 1.4250895977020264,
|
| 43616 |
+
"learning_rate": 6.215e-05,
|
| 43617 |
+
"loss": 0.6006,
|
| 43618 |
+
"step": 6216
|
| 43619 |
+
},
|
| 43620 |
+
{
|
| 43621 |
+
"epoch": 0.0006217,
|
| 43622 |
+
"grad_norm": 1.20131516456604,
|
| 43623 |
+
"learning_rate": 6.216e-05,
|
| 43624 |
+
"loss": 0.5239,
|
| 43625 |
+
"step": 6217
|
| 43626 |
+
},
|
| 43627 |
+
{
|
| 43628 |
+
"epoch": 0.0006218,
|
| 43629 |
+
"grad_norm": 1.3933985233306885,
|
| 43630 |
+
"learning_rate": 6.217e-05,
|
| 43631 |
+
"loss": 0.584,
|
| 43632 |
+
"step": 6218
|
| 43633 |
+
},
|
| 43634 |
+
{
|
| 43635 |
+
"epoch": 0.0006219,
|
| 43636 |
+
"grad_norm": 2.1496052742004395,
|
| 43637 |
+
"learning_rate": 6.218e-05,
|
| 43638 |
+
"loss": 0.7788,
|
| 43639 |
+
"step": 6219
|
| 43640 |
+
},
|
| 43641 |
+
{
|
| 43642 |
+
"epoch": 0.000622,
|
| 43643 |
+
"grad_norm": 1.558510184288025,
|
| 43644 |
+
"learning_rate": 6.219e-05,
|
| 43645 |
+
"loss": 0.6006,
|
| 43646 |
+
"step": 6220
|
| 43647 |
+
},
|
| 43648 |
+
{
|
| 43649 |
+
"epoch": 0.0006221,
|
| 43650 |
+
"grad_norm": 4.112026214599609,
|
| 43651 |
+
"learning_rate": 6.22e-05,
|
| 43652 |
+
"loss": 0.7517,
|
| 43653 |
+
"step": 6221
|
| 43654 |
+
},
|
| 43655 |
+
{
|
| 43656 |
+
"epoch": 0.0006222,
|
| 43657 |
+
"grad_norm": 1.8367027044296265,
|
| 43658 |
+
"learning_rate": 6.221e-05,
|
| 43659 |
+
"loss": 0.6196,
|
| 43660 |
+
"step": 6222
|
| 43661 |
+
},
|
| 43662 |
+
{
|
| 43663 |
+
"epoch": 0.0006223,
|
| 43664 |
+
"grad_norm": 2.0355629920959473,
|
| 43665 |
+
"learning_rate": 6.222000000000001e-05,
|
| 43666 |
+
"loss": 0.8071,
|
| 43667 |
+
"step": 6223
|
| 43668 |
+
},
|
| 43669 |
+
{
|
| 43670 |
+
"epoch": 0.0006224,
|
| 43671 |
+
"grad_norm": 1.5606682300567627,
|
| 43672 |
+
"learning_rate": 6.222999999999999e-05,
|
| 43673 |
+
"loss": 0.6716,
|
| 43674 |
+
"step": 6224
|
| 43675 |
+
},
|
| 43676 |
+
{
|
| 43677 |
+
"epoch": 0.0006225,
|
| 43678 |
+
"grad_norm": 1.454939365386963,
|
| 43679 |
+
"learning_rate": 6.224e-05,
|
| 43680 |
+
"loss": 0.6104,
|
| 43681 |
+
"step": 6225
|
| 43682 |
+
},
|
| 43683 |
+
{
|
| 43684 |
+
"epoch": 0.0006226,
|
| 43685 |
+
"grad_norm": 2.0981926918029785,
|
| 43686 |
+
"learning_rate": 6.225000000000001e-05,
|
| 43687 |
+
"loss": 0.5967,
|
| 43688 |
+
"step": 6226
|
| 43689 |
+
},
|
| 43690 |
+
{
|
| 43691 |
+
"epoch": 0.0006227,
|
| 43692 |
+
"grad_norm": 1.3601634502410889,
|
| 43693 |
+
"learning_rate": 6.226e-05,
|
| 43694 |
+
"loss": 0.543,
|
| 43695 |
+
"step": 6227
|
| 43696 |
+
},
|
| 43697 |
+
{
|
| 43698 |
+
"epoch": 0.0006228,
|
| 43699 |
+
"grad_norm": 1.3056219816207886,
|
| 43700 |
+
"learning_rate": 6.227e-05,
|
| 43701 |
+
"loss": 0.5432,
|
| 43702 |
+
"step": 6228
|
| 43703 |
+
},
|
| 43704 |
+
{
|
| 43705 |
+
"epoch": 0.0006229,
|
| 43706 |
+
"grad_norm": 1.2218716144561768,
|
| 43707 |
+
"learning_rate": 6.228000000000001e-05,
|
| 43708 |
+
"loss": 0.5344,
|
| 43709 |
+
"step": 6229
|
| 43710 |
+
},
|
| 43711 |
+
{
|
| 43712 |
+
"epoch": 0.000623,
|
| 43713 |
+
"grad_norm": 1.4955134391784668,
|
| 43714 |
+
"learning_rate": 6.229e-05,
|
| 43715 |
+
"loss": 0.6226,
|
| 43716 |
+
"step": 6230
|
| 43717 |
+
},
|
| 43718 |
+
{
|
| 43719 |
+
"epoch": 0.0006231,
|
| 43720 |
+
"grad_norm": 1.5450738668441772,
|
| 43721 |
+
"learning_rate": 6.23e-05,
|
| 43722 |
+
"loss": 0.5764,
|
| 43723 |
+
"step": 6231
|
| 43724 |
+
},
|
| 43725 |
+
{
|
| 43726 |
+
"epoch": 0.0006232,
|
| 43727 |
+
"grad_norm": 1.5347802639007568,
|
| 43728 |
+
"learning_rate": 6.231e-05,
|
| 43729 |
+
"loss": 0.6047,
|
| 43730 |
+
"step": 6232
|
| 43731 |
+
},
|
| 43732 |
+
{
|
| 43733 |
+
"epoch": 0.0006233,
|
| 43734 |
+
"grad_norm": 1.5982639789581299,
|
| 43735 |
+
"learning_rate": 6.232e-05,
|
| 43736 |
+
"loss": 0.6714,
|
| 43737 |
+
"step": 6233
|
| 43738 |
+
},
|
| 43739 |
+
{
|
| 43740 |
+
"epoch": 0.0006234,
|
| 43741 |
+
"grad_norm": 2.6113107204437256,
|
| 43742 |
+
"learning_rate": 6.233e-05,
|
| 43743 |
+
"loss": 0.7083,
|
| 43744 |
+
"step": 6234
|
| 43745 |
+
},
|
| 43746 |
+
{
|
| 43747 |
+
"epoch": 0.0006235,
|
| 43748 |
+
"grad_norm": 1.6445200443267822,
|
| 43749 |
+
"learning_rate": 6.234e-05,
|
| 43750 |
+
"loss": 0.6084,
|
| 43751 |
+
"step": 6235
|
| 43752 |
+
},
|
| 43753 |
+
{
|
| 43754 |
+
"epoch": 0.0006236,
|
| 43755 |
+
"grad_norm": 1.4309769868850708,
|
| 43756 |
+
"learning_rate": 6.235e-05,
|
| 43757 |
+
"loss": 0.5503,
|
| 43758 |
+
"step": 6236
|
| 43759 |
+
},
|
| 43760 |
+
{
|
| 43761 |
+
"epoch": 0.0006237,
|
| 43762 |
+
"grad_norm": 2.6508233547210693,
|
| 43763 |
+
"learning_rate": 6.235999999999999e-05,
|
| 43764 |
+
"loss": 0.7554,
|
| 43765 |
+
"step": 6237
|
| 43766 |
+
},
|
| 43767 |
+
{
|
| 43768 |
+
"epoch": 0.0006238,
|
| 43769 |
+
"grad_norm": 1.892777681350708,
|
| 43770 |
+
"learning_rate": 6.237e-05,
|
| 43771 |
+
"loss": 0.731,
|
| 43772 |
+
"step": 6238
|
| 43773 |
+
},
|
| 43774 |
+
{
|
| 43775 |
+
"epoch": 0.0006239,
|
| 43776 |
+
"grad_norm": 1.422659993171692,
|
| 43777 |
+
"learning_rate": 6.238000000000001e-05,
|
| 43778 |
+
"loss": 0.5649,
|
| 43779 |
+
"step": 6239
|
| 43780 |
+
},
|
| 43781 |
+
{
|
| 43782 |
+
"epoch": 0.000624,
|
| 43783 |
+
"grad_norm": 1.8252531290054321,
|
| 43784 |
+
"learning_rate": 6.239e-05,
|
| 43785 |
+
"loss": 0.6064,
|
| 43786 |
+
"step": 6240
|
| 43787 |
+
},
|
| 43788 |
+
{
|
| 43789 |
+
"epoch": 0.0006241,
|
| 43790 |
+
"grad_norm": 2.0503756999969482,
|
| 43791 |
+
"learning_rate": 6.24e-05,
|
| 43792 |
+
"loss": 0.9309,
|
| 43793 |
+
"step": 6241
|
| 43794 |
+
},
|
| 43795 |
+
{
|
| 43796 |
+
"epoch": 0.0006242,
|
| 43797 |
+
"grad_norm": 1.3754830360412598,
|
| 43798 |
+
"learning_rate": 6.241000000000001e-05,
|
| 43799 |
+
"loss": 0.5793,
|
| 43800 |
+
"step": 6242
|
| 43801 |
+
},
|
| 43802 |
+
{
|
| 43803 |
+
"epoch": 0.0006243,
|
| 43804 |
+
"grad_norm": 1.2413182258605957,
|
| 43805 |
+
"learning_rate": 6.242e-05,
|
| 43806 |
+
"loss": 0.5415,
|
| 43807 |
+
"step": 6243
|
| 43808 |
+
},
|
| 43809 |
+
{
|
| 43810 |
+
"epoch": 0.0006244,
|
| 43811 |
+
"grad_norm": 1.157693862915039,
|
| 43812 |
+
"learning_rate": 6.243e-05,
|
| 43813 |
+
"loss": 0.5063,
|
| 43814 |
+
"step": 6244
|
| 43815 |
+
},
|
| 43816 |
+
{
|
| 43817 |
+
"epoch": 0.0006245,
|
| 43818 |
+
"grad_norm": 2.4091522693634033,
|
| 43819 |
+
"learning_rate": 6.244e-05,
|
| 43820 |
+
"loss": 0.717,
|
| 43821 |
+
"step": 6245
|
| 43822 |
+
},
|
| 43823 |
+
{
|
| 43824 |
+
"epoch": 0.0006246,
|
| 43825 |
+
"grad_norm": 1.626430630683899,
|
| 43826 |
+
"learning_rate": 6.245e-05,
|
| 43827 |
+
"loss": 0.5918,
|
| 43828 |
+
"step": 6246
|
| 43829 |
+
},
|
| 43830 |
+
{
|
| 43831 |
+
"epoch": 0.0006247,
|
| 43832 |
+
"grad_norm": 1.3793022632598877,
|
| 43833 |
+
"learning_rate": 6.246000000000001e-05,
|
| 43834 |
+
"loss": 0.5386,
|
| 43835 |
+
"step": 6247
|
| 43836 |
+
},
|
| 43837 |
+
{
|
| 43838 |
+
"epoch": 0.0006248,
|
| 43839 |
+
"grad_norm": 3.696800947189331,
|
| 43840 |
+
"learning_rate": 6.247e-05,
|
| 43841 |
+
"loss": 1.2209,
|
| 43842 |
+
"step": 6248
|
| 43843 |
+
},
|
| 43844 |
+
{
|
| 43845 |
+
"epoch": 0.0006249,
|
| 43846 |
+
"grad_norm": 1.4566258192062378,
|
| 43847 |
+
"learning_rate": 6.248e-05,
|
| 43848 |
+
"loss": 0.6089,
|
| 43849 |
+
"step": 6249
|
| 43850 |
+
},
|
| 43851 |
+
{
|
| 43852 |
+
"epoch": 0.000625,
|
| 43853 |
+
"grad_norm": 1.323639154434204,
|
| 43854 |
+
"learning_rate": 6.249e-05,
|
| 43855 |
+
"loss": 0.5266,
|
| 43856 |
+
"step": 6250
|
| 43857 |
+
},
|
| 43858 |
+
{
|
| 43859 |
+
"epoch": 0.0006251,
|
| 43860 |
+
"grad_norm": 2.112474203109741,
|
| 43861 |
+
"learning_rate": 6.25e-05,
|
| 43862 |
+
"loss": 0.6899,
|
| 43863 |
+
"step": 6251
|
| 43864 |
+
},
|
| 43865 |
+
{
|
| 43866 |
+
"epoch": 0.0006252,
|
| 43867 |
+
"grad_norm": 1.2852239608764648,
|
| 43868 |
+
"learning_rate": 6.251e-05,
|
| 43869 |
+
"loss": 0.5193,
|
| 43870 |
+
"step": 6252
|
| 43871 |
+
},
|
| 43872 |
+
{
|
| 43873 |
+
"epoch": 0.0006253,
|
| 43874 |
+
"grad_norm": 1.354556679725647,
|
| 43875 |
+
"learning_rate": 6.252e-05,
|
| 43876 |
+
"loss": 0.6016,
|
| 43877 |
+
"step": 6253
|
| 43878 |
+
},
|
| 43879 |
+
{
|
| 43880 |
+
"epoch": 0.0006254,
|
| 43881 |
+
"grad_norm": 1.2461886405944824,
|
| 43882 |
+
"learning_rate": 6.253e-05,
|
| 43883 |
+
"loss": 0.5354,
|
| 43884 |
+
"step": 6254
|
| 43885 |
+
},
|
| 43886 |
+
{
|
| 43887 |
+
"epoch": 0.0006255,
|
| 43888 |
+
"grad_norm": 2.013084888458252,
|
| 43889 |
+
"learning_rate": 6.254e-05,
|
| 43890 |
+
"loss": 0.9143,
|
| 43891 |
+
"step": 6255
|
| 43892 |
+
},
|
| 43893 |
+
{
|
| 43894 |
+
"epoch": 0.0006256,
|
| 43895 |
+
"grad_norm": 1.2279975414276123,
|
| 43896 |
+
"learning_rate": 6.255e-05,
|
| 43897 |
+
"loss": 0.5317,
|
| 43898 |
+
"step": 6256
|
| 43899 |
+
},
|
| 43900 |
+
{
|
| 43901 |
+
"epoch": 0.0006257,
|
| 43902 |
+
"grad_norm": 1.3620824813842773,
|
| 43903 |
+
"learning_rate": 6.256000000000001e-05,
|
| 43904 |
+
"loss": 0.6245,
|
| 43905 |
+
"step": 6257
|
| 43906 |
+
},
|
| 43907 |
+
{
|
| 43908 |
+
"epoch": 0.0006258,
|
| 43909 |
+
"grad_norm": 1.3307284116744995,
|
| 43910 |
+
"learning_rate": 6.256999999999999e-05,
|
| 43911 |
+
"loss": 0.5559,
|
| 43912 |
+
"step": 6258
|
| 43913 |
+
},
|
| 43914 |
+
{
|
| 43915 |
+
"epoch": 0.0006259,
|
| 43916 |
+
"grad_norm": 1.2366081476211548,
|
| 43917 |
+
"learning_rate": 6.258e-05,
|
| 43918 |
+
"loss": 0.5493,
|
| 43919 |
+
"step": 6259
|
| 43920 |
+
},
|
| 43921 |
+
{
|
| 43922 |
+
"epoch": 0.000626,
|
| 43923 |
+
"grad_norm": 1.2821134328842163,
|
| 43924 |
+
"learning_rate": 6.259000000000001e-05,
|
| 43925 |
+
"loss": 0.5879,
|
| 43926 |
+
"step": 6260
|
| 43927 |
+
},
|
| 43928 |
+
{
|
| 43929 |
+
"epoch": 0.0006261,
|
| 43930 |
+
"grad_norm": 2.112119197845459,
|
| 43931 |
+
"learning_rate": 6.26e-05,
|
| 43932 |
+
"loss": 0.8433,
|
| 43933 |
+
"step": 6261
|
| 43934 |
+
},
|
| 43935 |
+
{
|
| 43936 |
+
"epoch": 0.0006262,
|
| 43937 |
+
"grad_norm": 1.6771800518035889,
|
| 43938 |
+
"learning_rate": 6.261e-05,
|
| 43939 |
+
"loss": 0.6709,
|
| 43940 |
+
"step": 6262
|
| 43941 |
+
},
|
| 43942 |
+
{
|
| 43943 |
+
"epoch": 0.0006263,
|
| 43944 |
+
"grad_norm": 1.531270146369934,
|
| 43945 |
+
"learning_rate": 6.262000000000001e-05,
|
| 43946 |
+
"loss": 0.5874,
|
| 43947 |
+
"step": 6263
|
| 43948 |
+
},
|
| 43949 |
+
{
|
| 43950 |
+
"epoch": 0.0006264,
|
| 43951 |
+
"grad_norm": 1.2914503812789917,
|
| 43952 |
+
"learning_rate": 6.263e-05,
|
| 43953 |
+
"loss": 0.5488,
|
| 43954 |
+
"step": 6264
|
| 43955 |
+
},
|
| 43956 |
+
{
|
| 43957 |
+
"epoch": 0.0006265,
|
| 43958 |
+
"grad_norm": 1.2997040748596191,
|
| 43959 |
+
"learning_rate": 6.264e-05,
|
| 43960 |
+
"loss": 0.5583,
|
| 43961 |
+
"step": 6265
|
| 43962 |
+
},
|
| 43963 |
+
{
|
| 43964 |
+
"epoch": 0.0006266,
|
| 43965 |
+
"grad_norm": 1.2204697132110596,
|
| 43966 |
+
"learning_rate": 6.265e-05,
|
| 43967 |
+
"loss": 0.499,
|
| 43968 |
+
"step": 6266
|
| 43969 |
+
},
|
| 43970 |
+
{
|
| 43971 |
+
"epoch": 0.0006267,
|
| 43972 |
+
"grad_norm": 1.723503828048706,
|
| 43973 |
+
"learning_rate": 6.266e-05,
|
| 43974 |
+
"loss": 0.6738,
|
| 43975 |
+
"step": 6267
|
| 43976 |
+
},
|
| 43977 |
+
{
|
| 43978 |
+
"epoch": 0.0006268,
|
| 43979 |
+
"grad_norm": 1.158341407775879,
|
| 43980 |
+
"learning_rate": 6.267e-05,
|
| 43981 |
+
"loss": 0.5168,
|
| 43982 |
+
"step": 6268
|
| 43983 |
+
},
|
| 43984 |
+
{
|
| 43985 |
+
"epoch": 0.0006269,
|
| 43986 |
+
"grad_norm": 1.4224638938903809,
|
| 43987 |
+
"learning_rate": 6.268e-05,
|
| 43988 |
+
"loss": 0.5784,
|
| 43989 |
+
"step": 6269
|
| 43990 |
+
},
|
| 43991 |
+
{
|
| 43992 |
+
"epoch": 0.000627,
|
| 43993 |
+
"grad_norm": 1.2845311164855957,
|
| 43994 |
+
"learning_rate": 6.269e-05,
|
| 43995 |
+
"loss": 0.5708,
|
| 43996 |
+
"step": 6270
|
| 43997 |
+
},
|
| 43998 |
+
{
|
| 43999 |
+
"epoch": 0.0006271,
|
| 44000 |
+
"grad_norm": 1.322809100151062,
|
| 44001 |
+
"learning_rate": 6.269999999999999e-05,
|
| 44002 |
+
"loss": 0.5571,
|
| 44003 |
+
"step": 6271
|
| 44004 |
+
},
|
| 44005 |
+
{
|
| 44006 |
+
"epoch": 0.0006272,
|
| 44007 |
+
"grad_norm": 1.2853106260299683,
|
| 44008 |
+
"learning_rate": 6.271e-05,
|
| 44009 |
+
"loss": 0.5225,
|
| 44010 |
+
"step": 6272
|
| 44011 |
+
},
|
| 44012 |
+
{
|
| 44013 |
+
"epoch": 0.0006273,
|
| 44014 |
+
"grad_norm": 1.1261441707611084,
|
| 44015 |
+
"learning_rate": 6.272000000000001e-05,
|
| 44016 |
+
"loss": 0.4937,
|
| 44017 |
+
"step": 6273
|
| 44018 |
+
},
|
| 44019 |
+
{
|
| 44020 |
+
"epoch": 0.0006274,
|
| 44021 |
+
"grad_norm": 1.8010724782943726,
|
| 44022 |
+
"learning_rate": 6.273e-05,
|
| 44023 |
+
"loss": 0.6426,
|
| 44024 |
+
"step": 6274
|
| 44025 |
+
},
|
| 44026 |
+
{
|
| 44027 |
+
"epoch": 0.0006275,
|
| 44028 |
+
"grad_norm": 2.123570203781128,
|
| 44029 |
+
"learning_rate": 6.274e-05,
|
| 44030 |
+
"loss": 0.7507,
|
| 44031 |
+
"step": 6275
|
| 44032 |
+
},
|
| 44033 |
+
{
|
| 44034 |
+
"epoch": 0.0006276,
|
| 44035 |
+
"grad_norm": 1.8357847929000854,
|
| 44036 |
+
"learning_rate": 6.275000000000001e-05,
|
| 44037 |
+
"loss": 0.5884,
|
| 44038 |
+
"step": 6276
|
| 44039 |
+
},
|
| 44040 |
+
{
|
| 44041 |
+
"epoch": 0.0006277,
|
| 44042 |
+
"grad_norm": 1.3460817337036133,
|
| 44043 |
+
"learning_rate": 6.276e-05,
|
| 44044 |
+
"loss": 0.5278,
|
| 44045 |
+
"step": 6277
|
| 44046 |
+
},
|
| 44047 |
+
{
|
| 44048 |
+
"epoch": 0.0006278,
|
| 44049 |
+
"grad_norm": 1.095627784729004,
|
| 44050 |
+
"learning_rate": 6.277e-05,
|
| 44051 |
+
"loss": 0.5063,
|
| 44052 |
+
"step": 6278
|
| 44053 |
+
},
|
| 44054 |
+
{
|
| 44055 |
+
"epoch": 0.0006279,
|
| 44056 |
+
"grad_norm": 1.7269349098205566,
|
| 44057 |
+
"learning_rate": 6.278e-05,
|
| 44058 |
+
"loss": 0.6533,
|
| 44059 |
+
"step": 6279
|
| 44060 |
+
},
|
| 44061 |
+
{
|
| 44062 |
+
"epoch": 0.000628,
|
| 44063 |
+
"grad_norm": 1.422385334968567,
|
| 44064 |
+
"learning_rate": 6.279e-05,
|
| 44065 |
+
"loss": 0.5439,
|
| 44066 |
+
"step": 6280
|
| 44067 |
+
},
|
| 44068 |
+
{
|
| 44069 |
+
"epoch": 0.0006281,
|
| 44070 |
+
"grad_norm": 1.2597748041152954,
|
| 44071 |
+
"learning_rate": 6.280000000000001e-05,
|
| 44072 |
+
"loss": 0.5471,
|
| 44073 |
+
"step": 6281
|
| 44074 |
+
},
|
| 44075 |
+
{
|
| 44076 |
+
"epoch": 0.0006282,
|
| 44077 |
+
"grad_norm": 1.303085446357727,
|
| 44078 |
+
"learning_rate": 6.281e-05,
|
| 44079 |
+
"loss": 0.5774,
|
| 44080 |
+
"step": 6282
|
| 44081 |
+
},
|
| 44082 |
+
{
|
| 44083 |
+
"epoch": 0.0006283,
|
| 44084 |
+
"grad_norm": 1.2224934101104736,
|
| 44085 |
+
"learning_rate": 6.282e-05,
|
| 44086 |
+
"loss": 0.5337,
|
| 44087 |
+
"step": 6283
|
| 44088 |
+
},
|
| 44089 |
+
{
|
| 44090 |
+
"epoch": 0.0006284,
|
| 44091 |
+
"grad_norm": 1.2406748533248901,
|
| 44092 |
+
"learning_rate": 6.283e-05,
|
| 44093 |
+
"loss": 0.4871,
|
| 44094 |
+
"step": 6284
|
| 44095 |
+
},
|
| 44096 |
+
{
|
| 44097 |
+
"epoch": 0.0006285,
|
| 44098 |
+
"grad_norm": 1.114234209060669,
|
| 44099 |
+
"learning_rate": 6.284e-05,
|
| 44100 |
+
"loss": 0.4927,
|
| 44101 |
+
"step": 6285
|
| 44102 |
+
},
|
| 44103 |
+
{
|
| 44104 |
+
"epoch": 0.0006286,
|
| 44105 |
+
"grad_norm": 1.326036810874939,
|
| 44106 |
+
"learning_rate": 6.285e-05,
|
| 44107 |
+
"loss": 0.6033,
|
| 44108 |
+
"step": 6286
|
| 44109 |
+
},
|
| 44110 |
+
{
|
| 44111 |
+
"epoch": 0.0006287,
|
| 44112 |
+
"grad_norm": 1.1210211515426636,
|
| 44113 |
+
"learning_rate": 6.286e-05,
|
| 44114 |
+
"loss": 0.4875,
|
| 44115 |
+
"step": 6287
|
| 44116 |
+
},
|
| 44117 |
+
{
|
| 44118 |
+
"epoch": 0.0006288,
|
| 44119 |
+
"grad_norm": 1.2281023263931274,
|
| 44120 |
+
"learning_rate": 6.287e-05,
|
| 44121 |
+
"loss": 0.5334,
|
| 44122 |
+
"step": 6288
|
| 44123 |
+
},
|
| 44124 |
+
{
|
| 44125 |
+
"epoch": 0.0006289,
|
| 44126 |
+
"grad_norm": 1.2000365257263184,
|
| 44127 |
+
"learning_rate": 6.288e-05,
|
| 44128 |
+
"loss": 0.4956,
|
| 44129 |
+
"step": 6289
|
| 44130 |
+
},
|
| 44131 |
+
{
|
| 44132 |
+
"epoch": 0.000629,
|
| 44133 |
+
"grad_norm": 1.6449037790298462,
|
| 44134 |
+
"learning_rate": 6.289e-05,
|
| 44135 |
+
"loss": 0.5977,
|
| 44136 |
+
"step": 6290
|
| 44137 |
+
},
|
| 44138 |
+
{
|
| 44139 |
+
"epoch": 0.0006291,
|
| 44140 |
+
"grad_norm": 2.3417065143585205,
|
| 44141 |
+
"learning_rate": 6.290000000000001e-05,
|
| 44142 |
+
"loss": 0.8713,
|
| 44143 |
+
"step": 6291
|
| 44144 |
+
},
|
| 44145 |
+
{
|
| 44146 |
+
"epoch": 0.0006292,
|
| 44147 |
+
"grad_norm": 1.3869773149490356,
|
| 44148 |
+
"learning_rate": 6.290999999999999e-05,
|
| 44149 |
+
"loss": 0.5161,
|
| 44150 |
+
"step": 6292
|
| 44151 |
+
},
|
| 44152 |
+
{
|
| 44153 |
+
"epoch": 0.0006293,
|
| 44154 |
+
"grad_norm": 1.1831398010253906,
|
| 44155 |
+
"learning_rate": 6.292e-05,
|
| 44156 |
+
"loss": 0.4766,
|
| 44157 |
+
"step": 6293
|
| 44158 |
+
},
|
| 44159 |
+
{
|
| 44160 |
+
"epoch": 0.0006294,
|
| 44161 |
+
"grad_norm": 1.2068742513656616,
|
| 44162 |
+
"learning_rate": 6.293000000000001e-05,
|
| 44163 |
+
"loss": 0.563,
|
| 44164 |
+
"step": 6294
|
| 44165 |
+
},
|
| 44166 |
+
{
|
| 44167 |
+
"epoch": 0.0006295,
|
| 44168 |
+
"grad_norm": 1.366097331047058,
|
| 44169 |
+
"learning_rate": 6.294e-05,
|
| 44170 |
+
"loss": 0.5474,
|
| 44171 |
+
"step": 6295
|
| 44172 |
+
},
|
| 44173 |
+
{
|
| 44174 |
+
"epoch": 0.0006296,
|
| 44175 |
+
"grad_norm": 1.2809611558914185,
|
| 44176 |
+
"learning_rate": 6.295e-05,
|
| 44177 |
+
"loss": 0.5127,
|
| 44178 |
+
"step": 6296
|
| 44179 |
+
},
|
| 44180 |
+
{
|
| 44181 |
+
"epoch": 0.0006297,
|
| 44182 |
+
"grad_norm": 1.66355562210083,
|
| 44183 |
+
"learning_rate": 6.296000000000001e-05,
|
| 44184 |
+
"loss": 0.5964,
|
| 44185 |
+
"step": 6297
|
| 44186 |
+
},
|
| 44187 |
+
{
|
| 44188 |
+
"epoch": 0.0006298,
|
| 44189 |
+
"grad_norm": 1.2659800052642822,
|
| 44190 |
+
"learning_rate": 6.297e-05,
|
| 44191 |
+
"loss": 0.5327,
|
| 44192 |
+
"step": 6298
|
| 44193 |
+
},
|
| 44194 |
+
{
|
| 44195 |
+
"epoch": 0.0006299,
|
| 44196 |
+
"grad_norm": 1.33529794216156,
|
| 44197 |
+
"learning_rate": 6.298e-05,
|
| 44198 |
+
"loss": 0.5581,
|
| 44199 |
+
"step": 6299
|
| 44200 |
+
},
|
| 44201 |
+
{
|
| 44202 |
+
"epoch": 0.00063,
|
| 44203 |
+
"grad_norm": 1.4095124006271362,
|
| 44204 |
+
"learning_rate": 6.299e-05,
|
| 44205 |
+
"loss": 0.5537,
|
| 44206 |
+
"step": 6300
|
| 44207 |
+
},
|
| 44208 |
+
{
|
| 44209 |
+
"epoch": 0.0006301,
|
| 44210 |
+
"grad_norm": 1.3011655807495117,
|
| 44211 |
+
"learning_rate": 6.3e-05,
|
| 44212 |
+
"loss": 0.4934,
|
| 44213 |
+
"step": 6301
|
| 44214 |
+
},
|
| 44215 |
+
{
|
| 44216 |
+
"epoch": 0.0006302,
|
| 44217 |
+
"grad_norm": 2.5561511516571045,
|
| 44218 |
+
"learning_rate": 6.301e-05,
|
| 44219 |
+
"loss": 0.8005,
|
| 44220 |
+
"step": 6302
|
| 44221 |
+
},
|
| 44222 |
+
{
|
| 44223 |
+
"epoch": 0.0006303,
|
| 44224 |
+
"grad_norm": 1.8032957315444946,
|
| 44225 |
+
"learning_rate": 6.302e-05,
|
| 44226 |
+
"loss": 0.7488,
|
| 44227 |
+
"step": 6303
|
| 44228 |
+
},
|
| 44229 |
+
{
|
| 44230 |
+
"epoch": 0.0006304,
|
| 44231 |
+
"grad_norm": 1.6584831476211548,
|
| 44232 |
+
"learning_rate": 6.303e-05,
|
| 44233 |
+
"loss": 0.5923,
|
| 44234 |
+
"step": 6304
|
| 44235 |
+
},
|
| 44236 |
+
{
|
| 44237 |
+
"epoch": 0.0006305,
|
| 44238 |
+
"grad_norm": 1.9853506088256836,
|
| 44239 |
+
"learning_rate": 6.303999999999999e-05,
|
| 44240 |
+
"loss": 0.9365,
|
| 44241 |
+
"step": 6305
|
| 44242 |
+
},
|
| 44243 |
+
{
|
| 44244 |
+
"epoch": 0.0006306,
|
| 44245 |
+
"grad_norm": 1.3332364559173584,
|
| 44246 |
+
"learning_rate": 6.305e-05,
|
| 44247 |
+
"loss": 0.5188,
|
| 44248 |
+
"step": 6306
|
| 44249 |
+
},
|
| 44250 |
+
{
|
| 44251 |
+
"epoch": 0.0006307,
|
| 44252 |
+
"grad_norm": 1.6622294187545776,
|
| 44253 |
+
"learning_rate": 6.306000000000001e-05,
|
| 44254 |
+
"loss": 0.7083,
|
| 44255 |
+
"step": 6307
|
| 44256 |
+
},
|
| 44257 |
+
{
|
| 44258 |
+
"epoch": 0.0006308,
|
| 44259 |
+
"grad_norm": 2.3817873001098633,
|
| 44260 |
+
"learning_rate": 6.307e-05,
|
| 44261 |
+
"loss": 0.7249,
|
| 44262 |
+
"step": 6308
|
| 44263 |
+
},
|
| 44264 |
+
{
|
| 44265 |
+
"epoch": 0.0006309,
|
| 44266 |
+
"grad_norm": 1.5819065570831299,
|
| 44267 |
+
"learning_rate": 6.308e-05,
|
| 44268 |
+
"loss": 0.7139,
|
| 44269 |
+
"step": 6309
|
| 44270 |
+
},
|
| 44271 |
+
{
|
| 44272 |
+
"epoch": 0.000631,
|
| 44273 |
+
"grad_norm": 1.358562707901001,
|
| 44274 |
+
"learning_rate": 6.309000000000001e-05,
|
| 44275 |
+
"loss": 0.5186,
|
| 44276 |
+
"step": 6310
|
| 44277 |
+
},
|
| 44278 |
+
{
|
| 44279 |
+
"epoch": 0.0006311,
|
| 44280 |
+
"grad_norm": 1.435044527053833,
|
| 44281 |
+
"learning_rate": 6.31e-05,
|
| 44282 |
+
"loss": 0.582,
|
| 44283 |
+
"step": 6311
|
| 44284 |
+
},
|
| 44285 |
+
{
|
| 44286 |
+
"epoch": 0.0006312,
|
| 44287 |
+
"grad_norm": 1.264384150505066,
|
| 44288 |
+
"learning_rate": 6.311e-05,
|
| 44289 |
+
"loss": 0.5269,
|
| 44290 |
+
"step": 6312
|
| 44291 |
+
},
|
| 44292 |
+
{
|
| 44293 |
+
"epoch": 0.0006313,
|
| 44294 |
+
"grad_norm": 2.0423264503479004,
|
| 44295 |
+
"learning_rate": 6.312e-05,
|
| 44296 |
+
"loss": 0.8406,
|
| 44297 |
+
"step": 6313
|
| 44298 |
+
},
|
| 44299 |
+
{
|
| 44300 |
+
"epoch": 0.0006314,
|
| 44301 |
+
"grad_norm": 1.5304577350616455,
|
| 44302 |
+
"learning_rate": 6.313e-05,
|
| 44303 |
+
"loss": 0.563,
|
| 44304 |
+
"step": 6314
|
| 44305 |
+
},
|
| 44306 |
+
{
|
| 44307 |
+
"epoch": 0.0006315,
|
| 44308 |
+
"grad_norm": 2.266411066055298,
|
| 44309 |
+
"learning_rate": 6.314000000000001e-05,
|
| 44310 |
+
"loss": 0.9119,
|
| 44311 |
+
"step": 6315
|
| 44312 |
+
},
|
| 44313 |
+
{
|
| 44314 |
+
"epoch": 0.0006316,
|
| 44315 |
+
"grad_norm": 1.722103476524353,
|
| 44316 |
+
"learning_rate": 6.315e-05,
|
| 44317 |
+
"loss": 0.5452,
|
| 44318 |
+
"step": 6316
|
| 44319 |
+
},
|
| 44320 |
+
{
|
| 44321 |
+
"epoch": 0.0006317,
|
| 44322 |
+
"grad_norm": 1.3955553770065308,
|
| 44323 |
+
"learning_rate": 6.316e-05,
|
| 44324 |
+
"loss": 0.5408,
|
| 44325 |
+
"step": 6317
|
| 44326 |
+
},
|
| 44327 |
+
{
|
| 44328 |
+
"epoch": 0.0006318,
|
| 44329 |
+
"grad_norm": 1.2551302909851074,
|
| 44330 |
+
"learning_rate": 6.317e-05,
|
| 44331 |
+
"loss": 0.5273,
|
| 44332 |
+
"step": 6318
|
| 44333 |
+
},
|
| 44334 |
+
{
|
| 44335 |
+
"epoch": 0.0006319,
|
| 44336 |
+
"grad_norm": 1.8961031436920166,
|
| 44337 |
+
"learning_rate": 6.318e-05,
|
| 44338 |
+
"loss": 0.5591,
|
| 44339 |
+
"step": 6319
|
| 44340 |
+
},
|
| 44341 |
+
{
|
| 44342 |
+
"epoch": 0.000632,
|
| 44343 |
+
"grad_norm": 7.158136367797852,
|
| 44344 |
+
"learning_rate": 6.319e-05,
|
| 44345 |
+
"loss": 0.8918,
|
| 44346 |
+
"step": 6320
|
| 44347 |
+
},
|
| 44348 |
+
{
|
| 44349 |
+
"epoch": 0.0006321,
|
| 44350 |
+
"grad_norm": 2.118948459625244,
|
| 44351 |
+
"learning_rate": 6.32e-05,
|
| 44352 |
+
"loss": 0.6646,
|
| 44353 |
+
"step": 6321
|
| 44354 |
+
},
|
| 44355 |
+
{
|
| 44356 |
+
"epoch": 0.0006322,
|
| 44357 |
+
"grad_norm": 1.848267674446106,
|
| 44358 |
+
"learning_rate": 6.321e-05,
|
| 44359 |
+
"loss": 0.5669,
|
| 44360 |
+
"step": 6322
|
| 44361 |
+
},
|
| 44362 |
+
{
|
| 44363 |
+
"epoch": 0.0006323,
|
| 44364 |
+
"grad_norm": 1.3671910762786865,
|
| 44365 |
+
"learning_rate": 6.322e-05,
|
| 44366 |
+
"loss": 0.5347,
|
| 44367 |
+
"step": 6323
|
| 44368 |
+
},
|
| 44369 |
+
{
|
| 44370 |
+
"epoch": 0.0006324,
|
| 44371 |
+
"grad_norm": 1.8404011726379395,
|
| 44372 |
+
"learning_rate": 6.323e-05,
|
| 44373 |
+
"loss": 0.8181,
|
| 44374 |
+
"step": 6324
|
| 44375 |
+
},
|
| 44376 |
+
{
|
| 44377 |
+
"epoch": 0.0006325,
|
| 44378 |
+
"grad_norm": 1.5496231317520142,
|
| 44379 |
+
"learning_rate": 6.324000000000001e-05,
|
| 44380 |
+
"loss": 0.48,
|
| 44381 |
+
"step": 6325
|
| 44382 |
+
},
|
| 44383 |
+
{
|
| 44384 |
+
"epoch": 0.0006326,
|
| 44385 |
+
"grad_norm": 2.027029514312744,
|
| 44386 |
+
"learning_rate": 6.324999999999999e-05,
|
| 44387 |
+
"loss": 0.7007,
|
| 44388 |
+
"step": 6326
|
| 44389 |
+
},
|
| 44390 |
+
{
|
| 44391 |
+
"epoch": 0.0006327,
|
| 44392 |
+
"grad_norm": 1.5602715015411377,
|
| 44393 |
+
"learning_rate": 6.326e-05,
|
| 44394 |
+
"loss": 0.5342,
|
| 44395 |
+
"step": 6327
|
| 44396 |
+
},
|
| 44397 |
+
{
|
| 44398 |
+
"epoch": 0.0006328,
|
| 44399 |
+
"grad_norm": 1.2844575643539429,
|
| 44400 |
+
"learning_rate": 6.327000000000001e-05,
|
| 44401 |
+
"loss": 0.4761,
|
| 44402 |
+
"step": 6328
|
| 44403 |
+
},
|
| 44404 |
+
{
|
| 44405 |
+
"epoch": 0.0006329,
|
| 44406 |
+
"grad_norm": 1.48810875415802,
|
| 44407 |
+
"learning_rate": 6.328e-05,
|
| 44408 |
+
"loss": 0.5132,
|
| 44409 |
+
"step": 6329
|
| 44410 |
+
},
|
| 44411 |
+
{
|
| 44412 |
+
"epoch": 0.000633,
|
| 44413 |
+
"grad_norm": 1.6981958150863647,
|
| 44414 |
+
"learning_rate": 6.329e-05,
|
| 44415 |
+
"loss": 0.6323,
|
| 44416 |
+
"step": 6330
|
| 44417 |
+
},
|
| 44418 |
+
{
|
| 44419 |
+
"epoch": 0.0006331,
|
| 44420 |
+
"grad_norm": 2.4248063564300537,
|
| 44421 |
+
"learning_rate": 6.330000000000001e-05,
|
| 44422 |
+
"loss": 0.6284,
|
| 44423 |
+
"step": 6331
|
| 44424 |
+
},
|
| 44425 |
+
{
|
| 44426 |
+
"epoch": 0.0006332,
|
| 44427 |
+
"grad_norm": 1.428120493888855,
|
| 44428 |
+
"learning_rate": 6.331e-05,
|
| 44429 |
+
"loss": 0.5042,
|
| 44430 |
+
"step": 6332
|
| 44431 |
+
},
|
| 44432 |
+
{
|
| 44433 |
+
"epoch": 0.0006333,
|
| 44434 |
+
"grad_norm": 1.2944762706756592,
|
| 44435 |
+
"learning_rate": 6.332e-05,
|
| 44436 |
+
"loss": 0.4751,
|
| 44437 |
+
"step": 6333
|
| 44438 |
+
},
|
| 44439 |
+
{
|
| 44440 |
+
"epoch": 0.0006334,
|
| 44441 |
+
"grad_norm": 2.560227394104004,
|
| 44442 |
+
"learning_rate": 6.333e-05,
|
| 44443 |
+
"loss": 0.7188,
|
| 44444 |
+
"step": 6334
|
| 44445 |
+
},
|
| 44446 |
+
{
|
| 44447 |
+
"epoch": 0.0006335,
|
| 44448 |
+
"grad_norm": 1.1113696098327637,
|
| 44449 |
+
"learning_rate": 6.334e-05,
|
| 44450 |
+
"loss": 0.4587,
|
| 44451 |
+
"step": 6335
|
| 44452 |
+
},
|
| 44453 |
+
{
|
| 44454 |
+
"epoch": 0.0006336,
|
| 44455 |
+
"grad_norm": 3.3463480472564697,
|
| 44456 |
+
"learning_rate": 6.335e-05,
|
| 44457 |
+
"loss": 1.0486,
|
| 44458 |
+
"step": 6336
|
| 44459 |
+
},
|
| 44460 |
+
{
|
| 44461 |
+
"epoch": 0.0006337,
|
| 44462 |
+
"grad_norm": 2.7146496772766113,
|
| 44463 |
+
"learning_rate": 6.336e-05,
|
| 44464 |
+
"loss": 0.7905,
|
| 44465 |
+
"step": 6337
|
| 44466 |
+
},
|
| 44467 |
+
{
|
| 44468 |
+
"epoch": 0.0006338,
|
| 44469 |
+
"grad_norm": 1.945244312286377,
|
| 44470 |
+
"learning_rate": 6.337e-05,
|
| 44471 |
+
"loss": 0.6143,
|
| 44472 |
+
"step": 6338
|
| 44473 |
+
},
|
| 44474 |
+
{
|
| 44475 |
+
"epoch": 0.0006339,
|
| 44476 |
+
"grad_norm": 1.4121110439300537,
|
| 44477 |
+
"learning_rate": 6.337999999999999e-05,
|
| 44478 |
+
"loss": 0.5398,
|
| 44479 |
+
"step": 6339
|
| 44480 |
+
},
|
| 44481 |
+
{
|
| 44482 |
+
"epoch": 0.000634,
|
| 44483 |
+
"grad_norm": 1.2575377225875854,
|
| 44484 |
+
"learning_rate": 6.339e-05,
|
| 44485 |
+
"loss": 0.4917,
|
| 44486 |
+
"step": 6340
|
| 44487 |
+
},
|
| 44488 |
+
{
|
| 44489 |
+
"epoch": 0.0006341,
|
| 44490 |
+
"grad_norm": 1.2989206314086914,
|
| 44491 |
+
"learning_rate": 6.34e-05,
|
| 44492 |
+
"loss": 0.4814,
|
| 44493 |
+
"step": 6341
|
| 44494 |
+
},
|
| 44495 |
+
{
|
| 44496 |
+
"epoch": 0.0006342,
|
| 44497 |
+
"grad_norm": 1.2086619138717651,
|
| 44498 |
+
"learning_rate": 6.341e-05,
|
| 44499 |
+
"loss": 0.4756,
|
| 44500 |
+
"step": 6342
|
| 44501 |
+
},
|
| 44502 |
+
{
|
| 44503 |
+
"epoch": 0.0006343,
|
| 44504 |
+
"grad_norm": 2.2826614379882812,
|
| 44505 |
+
"learning_rate": 6.342e-05,
|
| 44506 |
+
"loss": 0.8481,
|
| 44507 |
+
"step": 6343
|
| 44508 |
+
},
|
| 44509 |
+
{
|
| 44510 |
+
"epoch": 0.0006344,
|
| 44511 |
+
"grad_norm": 1.9749172925949097,
|
| 44512 |
+
"learning_rate": 6.343000000000001e-05,
|
| 44513 |
+
"loss": 0.6133,
|
| 44514 |
+
"step": 6344
|
| 44515 |
+
},
|
| 44516 |
+
{
|
| 44517 |
+
"epoch": 0.0006345,
|
| 44518 |
+
"grad_norm": 1.5513535737991333,
|
| 44519 |
+
"learning_rate": 6.344e-05,
|
| 44520 |
+
"loss": 0.6121,
|
| 44521 |
+
"step": 6345
|
| 44522 |
+
},
|
| 44523 |
+
{
|
| 44524 |
+
"epoch": 0.0006346,
|
| 44525 |
+
"grad_norm": 1.4886285066604614,
|
| 44526 |
+
"learning_rate": 6.345e-05,
|
| 44527 |
+
"loss": 0.6157,
|
| 44528 |
+
"step": 6346
|
| 44529 |
+
},
|
| 44530 |
+
{
|
| 44531 |
+
"epoch": 0.0006347,
|
| 44532 |
+
"grad_norm": 1.1188468933105469,
|
| 44533 |
+
"learning_rate": 6.346e-05,
|
| 44534 |
+
"loss": 0.4675,
|
| 44535 |
+
"step": 6347
|
| 44536 |
+
},
|
| 44537 |
+
{
|
| 44538 |
+
"epoch": 0.0006348,
|
| 44539 |
+
"grad_norm": 1.867576003074646,
|
| 44540 |
+
"learning_rate": 6.347e-05,
|
| 44541 |
+
"loss": 0.6309,
|
| 44542 |
+
"step": 6348
|
| 44543 |
+
},
|
| 44544 |
+
{
|
| 44545 |
+
"epoch": 0.0006349,
|
| 44546 |
+
"grad_norm": 1.3053021430969238,
|
| 44547 |
+
"learning_rate": 6.348000000000001e-05,
|
| 44548 |
+
"loss": 0.5254,
|
| 44549 |
+
"step": 6349
|
| 44550 |
+
},
|
| 44551 |
+
{
|
| 44552 |
+
"epoch": 0.000635,
|
| 44553 |
+
"grad_norm": 1.2170910835266113,
|
| 44554 |
+
"learning_rate": 6.349e-05,
|
| 44555 |
+
"loss": 0.4998,
|
| 44556 |
+
"step": 6350
|
| 44557 |
+
},
|
| 44558 |
+
{
|
| 44559 |
+
"epoch": 0.0006351,
|
| 44560 |
+
"grad_norm": 1.4668612480163574,
|
| 44561 |
+
"learning_rate": 6.35e-05,
|
| 44562 |
+
"loss": 0.5396,
|
| 44563 |
+
"step": 6351
|
| 44564 |
+
},
|
| 44565 |
+
{
|
| 44566 |
+
"epoch": 0.0006352,
|
| 44567 |
+
"grad_norm": 1.247548222541809,
|
| 44568 |
+
"learning_rate": 6.351000000000001e-05,
|
| 44569 |
+
"loss": 0.522,
|
| 44570 |
+
"step": 6352
|
| 44571 |
+
},
|
| 44572 |
+
{
|
| 44573 |
+
"epoch": 0.0006353,
|
| 44574 |
+
"grad_norm": 1.343698263168335,
|
| 44575 |
+
"learning_rate": 6.352e-05,
|
| 44576 |
+
"loss": 0.5933,
|
| 44577 |
+
"step": 6353
|
| 44578 |
+
},
|
| 44579 |
+
{
|
| 44580 |
+
"epoch": 0.0006354,
|
| 44581 |
+
"grad_norm": 1.2090539932250977,
|
| 44582 |
+
"learning_rate": 6.353e-05,
|
| 44583 |
+
"loss": 0.4805,
|
| 44584 |
+
"step": 6354
|
| 44585 |
+
},
|
| 44586 |
+
{
|
| 44587 |
+
"epoch": 0.0006355,
|
| 44588 |
+
"grad_norm": 1.5267757177352905,
|
| 44589 |
+
"learning_rate": 6.354e-05,
|
| 44590 |
+
"loss": 0.5281,
|
| 44591 |
+
"step": 6355
|
| 44592 |
+
},
|
| 44593 |
+
{
|
| 44594 |
+
"epoch": 0.0006356,
|
| 44595 |
+
"grad_norm": 1.6579869985580444,
|
| 44596 |
+
"learning_rate": 6.355e-05,
|
| 44597 |
+
"loss": 0.6057,
|
| 44598 |
+
"step": 6356
|
| 44599 |
+
},
|
| 44600 |
+
{
|
| 44601 |
+
"epoch": 0.0006357,
|
| 44602 |
+
"grad_norm": 1.513315200805664,
|
| 44603 |
+
"learning_rate": 6.356e-05,
|
| 44604 |
+
"loss": 0.5649,
|
| 44605 |
+
"step": 6357
|
| 44606 |
+
},
|
| 44607 |
+
{
|
| 44608 |
+
"epoch": 0.0006358,
|
| 44609 |
+
"grad_norm": 1.2920204401016235,
|
| 44610 |
+
"learning_rate": 6.357e-05,
|
| 44611 |
+
"loss": 0.5393,
|
| 44612 |
+
"step": 6358
|
| 44613 |
+
},
|
| 44614 |
+
{
|
| 44615 |
+
"epoch": 0.0006359,
|
| 44616 |
+
"grad_norm": 1.0864078998565674,
|
| 44617 |
+
"learning_rate": 6.358000000000001e-05,
|
| 44618 |
+
"loss": 0.4658,
|
| 44619 |
+
"step": 6359
|
| 44620 |
+
},
|
| 44621 |
+
{
|
| 44622 |
+
"epoch": 0.000636,
|
| 44623 |
+
"grad_norm": 1.2634496688842773,
|
| 44624 |
+
"learning_rate": 6.358999999999999e-05,
|
| 44625 |
+
"loss": 0.5022,
|
| 44626 |
+
"step": 6360
|
| 44627 |
+
},
|
| 44628 |
+
{
|
| 44629 |
+
"epoch": 0.0006361,
|
| 44630 |
+
"grad_norm": 1.105880856513977,
|
| 44631 |
+
"learning_rate": 6.36e-05,
|
| 44632 |
+
"loss": 0.4771,
|
| 44633 |
+
"step": 6361
|
| 44634 |
+
},
|
| 44635 |
+
{
|
| 44636 |
+
"epoch": 0.0006362,
|
| 44637 |
+
"grad_norm": 1.1405361890792847,
|
| 44638 |
+
"learning_rate": 6.361000000000001e-05,
|
| 44639 |
+
"loss": 0.5073,
|
| 44640 |
+
"step": 6362
|
| 44641 |
+
},
|
| 44642 |
+
{
|
| 44643 |
+
"epoch": 0.0006363,
|
| 44644 |
+
"grad_norm": 3.483867645263672,
|
| 44645 |
+
"learning_rate": 6.361999999999999e-05,
|
| 44646 |
+
"loss": 1.0667,
|
| 44647 |
+
"step": 6363
|
| 44648 |
+
},
|
| 44649 |
+
{
|
| 44650 |
+
"epoch": 0.0006364,
|
| 44651 |
+
"grad_norm": 1.7898037433624268,
|
| 44652 |
+
"learning_rate": 6.363e-05,
|
| 44653 |
+
"loss": 0.5388,
|
| 44654 |
+
"step": 6364
|
| 44655 |
+
},
|
| 44656 |
+
{
|
| 44657 |
+
"epoch": 0.0006365,
|
| 44658 |
+
"grad_norm": 1.2391436100006104,
|
| 44659 |
+
"learning_rate": 6.364000000000001e-05,
|
| 44660 |
+
"loss": 0.4507,
|
| 44661 |
+
"step": 6365
|
| 44662 |
+
},
|
| 44663 |
+
{
|
| 44664 |
+
"epoch": 0.0006366,
|
| 44665 |
+
"grad_norm": 1.1045516729354858,
|
| 44666 |
+
"learning_rate": 6.365e-05,
|
| 44667 |
+
"loss": 0.458,
|
| 44668 |
+
"step": 6366
|
| 44669 |
+
},
|
| 44670 |
+
{
|
| 44671 |
+
"epoch": 0.0006367,
|
| 44672 |
+
"grad_norm": 1.046285629272461,
|
| 44673 |
+
"learning_rate": 6.366e-05,
|
| 44674 |
+
"loss": 0.4648,
|
| 44675 |
+
"step": 6367
|
| 44676 |
+
},
|
| 44677 |
+
{
|
| 44678 |
+
"epoch": 0.0006368,
|
| 44679 |
+
"grad_norm": 1.1581072807312012,
|
| 44680 |
+
"learning_rate": 6.367e-05,
|
| 44681 |
+
"loss": 0.4712,
|
| 44682 |
+
"step": 6368
|
| 44683 |
+
},
|
| 44684 |
+
{
|
| 44685 |
+
"epoch": 0.0006369,
|
| 44686 |
+
"grad_norm": 1.6926162242889404,
|
| 44687 |
+
"learning_rate": 6.368e-05,
|
| 44688 |
+
"loss": 0.5728,
|
| 44689 |
+
"step": 6369
|
| 44690 |
+
},
|
| 44691 |
+
{
|
| 44692 |
+
"epoch": 0.000637,
|
| 44693 |
+
"grad_norm": 1.29767644405365,
|
| 44694 |
+
"learning_rate": 6.369e-05,
|
| 44695 |
+
"loss": 0.5198,
|
| 44696 |
+
"step": 6370
|
| 44697 |
+
},
|
| 44698 |
+
{
|
| 44699 |
+
"epoch": 0.0006371,
|
| 44700 |
+
"grad_norm": 1.136470079421997,
|
| 44701 |
+
"learning_rate": 6.37e-05,
|
| 44702 |
+
"loss": 0.4646,
|
| 44703 |
+
"step": 6371
|
| 44704 |
+
},
|
| 44705 |
+
{
|
| 44706 |
+
"epoch": 0.0006372,
|
| 44707 |
+
"grad_norm": 1.0796231031417847,
|
| 44708 |
+
"learning_rate": 6.371e-05,
|
| 44709 |
+
"loss": 0.429,
|
| 44710 |
+
"step": 6372
|
| 44711 |
+
},
|
| 44712 |
+
{
|
| 44713 |
+
"epoch": 0.0006373,
|
| 44714 |
+
"grad_norm": 1.1330997943878174,
|
| 44715 |
+
"learning_rate": 6.371999999999999e-05,
|
| 44716 |
+
"loss": 0.4595,
|
| 44717 |
+
"step": 6373
|
| 44718 |
+
},
|
| 44719 |
+
{
|
| 44720 |
+
"epoch": 0.0006374,
|
| 44721 |
+
"grad_norm": 1.1513479948043823,
|
| 44722 |
+
"learning_rate": 6.373e-05,
|
| 44723 |
+
"loss": 0.4875,
|
| 44724 |
+
"step": 6374
|
| 44725 |
+
},
|
| 44726 |
+
{
|
| 44727 |
+
"epoch": 0.0006375,
|
| 44728 |
+
"grad_norm": 1.2037591934204102,
|
| 44729 |
+
"learning_rate": 6.374e-05,
|
| 44730 |
+
"loss": 0.5459,
|
| 44731 |
+
"step": 6375
|
| 44732 |
+
},
|
| 44733 |
+
{
|
| 44734 |
+
"epoch": 0.0006376,
|
| 44735 |
+
"grad_norm": 1.4171353578567505,
|
| 44736 |
+
"learning_rate": 6.375e-05,
|
| 44737 |
+
"loss": 0.5518,
|
| 44738 |
+
"step": 6376
|
| 44739 |
+
},
|
| 44740 |
+
{
|
| 44741 |
+
"epoch": 0.0006377,
|
| 44742 |
+
"grad_norm": 1.5970433950424194,
|
| 44743 |
+
"learning_rate": 6.376e-05,
|
| 44744 |
+
"loss": 0.5161,
|
| 44745 |
+
"step": 6377
|
| 44746 |
+
},
|
| 44747 |
+
{
|
| 44748 |
+
"epoch": 0.0006378,
|
| 44749 |
+
"grad_norm": 1.1912060976028442,
|
| 44750 |
+
"learning_rate": 6.377000000000001e-05,
|
| 44751 |
+
"loss": 0.4429,
|
| 44752 |
+
"step": 6378
|
| 44753 |
+
},
|
| 44754 |
+
{
|
| 44755 |
+
"epoch": 0.0006379,
|
| 44756 |
+
"grad_norm": 1.2796026468276978,
|
| 44757 |
+
"learning_rate": 6.378e-05,
|
| 44758 |
+
"loss": 0.4963,
|
| 44759 |
+
"step": 6379
|
| 44760 |
+
},
|
| 44761 |
+
{
|
| 44762 |
+
"epoch": 0.000638,
|
| 44763 |
+
"grad_norm": 1.4407531023025513,
|
| 44764 |
+
"learning_rate": 6.379e-05,
|
| 44765 |
+
"loss": 0.5935,
|
| 44766 |
+
"step": 6380
|
| 44767 |
+
},
|
| 44768 |
+
{
|
| 44769 |
+
"epoch": 0.0006381,
|
| 44770 |
+
"grad_norm": 1.348570704460144,
|
| 44771 |
+
"learning_rate": 6.38e-05,
|
| 44772 |
+
"loss": 0.5466,
|
| 44773 |
+
"step": 6381
|
| 44774 |
+
},
|
| 44775 |
+
{
|
| 44776 |
+
"epoch": 0.0006382,
|
| 44777 |
+
"grad_norm": 1.1336405277252197,
|
| 44778 |
+
"learning_rate": 6.381e-05,
|
| 44779 |
+
"loss": 0.4617,
|
| 44780 |
+
"step": 6382
|
| 44781 |
+
},
|
| 44782 |
+
{
|
| 44783 |
+
"epoch": 0.0006383,
|
| 44784 |
+
"grad_norm": 1.2180068492889404,
|
| 44785 |
+
"learning_rate": 6.382000000000001e-05,
|
| 44786 |
+
"loss": 0.4634,
|
| 44787 |
+
"step": 6383
|
| 44788 |
+
},
|
| 44789 |
+
{
|
| 44790 |
+
"epoch": 0.0006384,
|
| 44791 |
+
"grad_norm": 1.0494590997695923,
|
| 44792 |
+
"learning_rate": 6.383e-05,
|
| 44793 |
+
"loss": 0.4595,
|
| 44794 |
+
"step": 6384
|
| 44795 |
+
},
|
| 44796 |
+
{
|
| 44797 |
+
"epoch": 0.0006385,
|
| 44798 |
+
"grad_norm": 1.3279155492782593,
|
| 44799 |
+
"learning_rate": 6.384e-05,
|
| 44800 |
+
"loss": 0.5374,
|
| 44801 |
+
"step": 6385
|
| 44802 |
+
},
|
| 44803 |
+
{
|
| 44804 |
+
"epoch": 0.0006386,
|
| 44805 |
+
"grad_norm": 1.906715989112854,
|
| 44806 |
+
"learning_rate": 6.385000000000001e-05,
|
| 44807 |
+
"loss": 0.5901,
|
| 44808 |
+
"step": 6386
|
| 44809 |
+
},
|
| 44810 |
+
{
|
| 44811 |
+
"epoch": 0.0006387,
|
| 44812 |
+
"grad_norm": 2.3137943744659424,
|
| 44813 |
+
"learning_rate": 6.386e-05,
|
| 44814 |
+
"loss": 0.6785,
|
| 44815 |
+
"step": 6387
|
| 44816 |
+
},
|
| 44817 |
+
{
|
| 44818 |
+
"epoch": 0.0006388,
|
| 44819 |
+
"grad_norm": 3.7279317378997803,
|
| 44820 |
+
"learning_rate": 6.387e-05,
|
| 44821 |
+
"loss": 0.7988,
|
| 44822 |
+
"step": 6388
|
| 44823 |
+
},
|
| 44824 |
+
{
|
| 44825 |
+
"epoch": 0.0006389,
|
| 44826 |
+
"grad_norm": 2.4148833751678467,
|
| 44827 |
+
"learning_rate": 6.388e-05,
|
| 44828 |
+
"loss": 0.7737,
|
| 44829 |
+
"step": 6389
|
| 44830 |
+
},
|
| 44831 |
+
{
|
| 44832 |
+
"epoch": 0.000639,
|
| 44833 |
+
"grad_norm": 1.3121864795684814,
|
| 44834 |
+
"learning_rate": 6.389e-05,
|
| 44835 |
+
"loss": 0.4556,
|
| 44836 |
+
"step": 6390
|
| 44837 |
+
},
|
| 44838 |
+
{
|
| 44839 |
+
"epoch": 0.0006391,
|
| 44840 |
+
"grad_norm": 1.8153197765350342,
|
| 44841 |
+
"learning_rate": 6.39e-05,
|
| 44842 |
+
"loss": 0.6904,
|
| 44843 |
+
"step": 6391
|
| 44844 |
+
},
|
| 44845 |
+
{
|
| 44846 |
+
"epoch": 0.0006392,
|
| 44847 |
+
"grad_norm": 1.1611392498016357,
|
| 44848 |
+
"learning_rate": 6.391e-05,
|
| 44849 |
+
"loss": 0.4839,
|
| 44850 |
+
"step": 6392
|
| 44851 |
+
},
|
| 44852 |
+
{
|
| 44853 |
+
"epoch": 0.0006393,
|
| 44854 |
+
"grad_norm": 1.3582239151000977,
|
| 44855 |
+
"learning_rate": 6.392000000000001e-05,
|
| 44856 |
+
"loss": 0.5037,
|
| 44857 |
+
"step": 6393
|
| 44858 |
+
},
|
| 44859 |
+
{
|
| 44860 |
+
"epoch": 0.0006394,
|
| 44861 |
+
"grad_norm": 1.4697034358978271,
|
| 44862 |
+
"learning_rate": 6.392999999999999e-05,
|
| 44863 |
+
"loss": 0.5293,
|
| 44864 |
+
"step": 6394
|
| 44865 |
+
},
|
| 44866 |
+
{
|
| 44867 |
+
"epoch": 0.0006395,
|
| 44868 |
+
"grad_norm": 1.679388165473938,
|
| 44869 |
+
"learning_rate": 6.394e-05,
|
| 44870 |
+
"loss": 0.728,
|
| 44871 |
+
"step": 6395
|
| 44872 |
+
},
|
| 44873 |
+
{
|
| 44874 |
+
"epoch": 0.0006396,
|
| 44875 |
+
"grad_norm": 1.0928062200546265,
|
| 44876 |
+
"learning_rate": 6.395000000000001e-05,
|
| 44877 |
+
"loss": 0.4265,
|
| 44878 |
+
"step": 6396
|
| 44879 |
+
},
|
| 44880 |
+
{
|
| 44881 |
+
"epoch": 0.0006397,
|
| 44882 |
+
"grad_norm": 8.773086547851562,
|
| 44883 |
+
"learning_rate": 6.395999999999999e-05,
|
| 44884 |
+
"loss": 1.2278,
|
| 44885 |
+
"step": 6397
|
| 44886 |
+
},
|
| 44887 |
+
{
|
| 44888 |
+
"epoch": 0.0006398,
|
| 44889 |
+
"grad_norm": 1.7383633852005005,
|
| 44890 |
+
"learning_rate": 6.397e-05,
|
| 44891 |
+
"loss": 0.5452,
|
| 44892 |
+
"step": 6398
|
| 44893 |
+
},
|
| 44894 |
+
{
|
| 44895 |
+
"epoch": 0.0006399,
|
| 44896 |
+
"grad_norm": 1.2912527322769165,
|
| 44897 |
+
"learning_rate": 6.398000000000001e-05,
|
| 44898 |
+
"loss": 0.4585,
|
| 44899 |
+
"step": 6399
|
| 44900 |
+
},
|
| 44901 |
+
{
|
| 44902 |
+
"epoch": 0.00064,
|
| 44903 |
+
"grad_norm": 1.9075850248336792,
|
| 44904 |
+
"learning_rate": 6.399e-05,
|
| 44905 |
+
"loss": 0.5771,
|
| 44906 |
+
"step": 6400
|
| 44907 |
+
},
|
| 44908 |
+
{
|
| 44909 |
+
"epoch": 0.0006401,
|
| 44910 |
+
"grad_norm": 2.1430504322052,
|
| 44911 |
+
"learning_rate": 6.4e-05,
|
| 44912 |
+
"loss": 0.5281,
|
| 44913 |
+
"step": 6401
|
| 44914 |
+
},
|
| 44915 |
+
{
|
| 44916 |
+
"epoch": 0.0006402,
|
| 44917 |
+
"grad_norm": 1.9719579219818115,
|
| 44918 |
+
"learning_rate": 6.401e-05,
|
| 44919 |
+
"loss": 0.6748,
|
| 44920 |
+
"step": 6402
|
| 44921 |
+
},
|
| 44922 |
+
{
|
| 44923 |
+
"epoch": 0.0006403,
|
| 44924 |
+
"grad_norm": 1.5823168754577637,
|
| 44925 |
+
"learning_rate": 6.402e-05,
|
| 44926 |
+
"loss": 0.4995,
|
| 44927 |
+
"step": 6403
|
| 44928 |
+
},
|
| 44929 |
+
{
|
| 44930 |
+
"epoch": 0.0006404,
|
| 44931 |
+
"grad_norm": 1.2021610736846924,
|
| 44932 |
+
"learning_rate": 6.403e-05,
|
| 44933 |
+
"loss": 0.4573,
|
| 44934 |
+
"step": 6404
|
| 44935 |
+
},
|
| 44936 |
+
{
|
| 44937 |
+
"epoch": 0.0006405,
|
| 44938 |
+
"grad_norm": 1.2292171716690063,
|
| 44939 |
+
"learning_rate": 6.404e-05,
|
| 44940 |
+
"loss": 0.4844,
|
| 44941 |
+
"step": 6405
|
| 44942 |
+
},
|
| 44943 |
+
{
|
| 44944 |
+
"epoch": 0.0006406,
|
| 44945 |
+
"grad_norm": 1.312039852142334,
|
| 44946 |
+
"learning_rate": 6.405e-05,
|
| 44947 |
+
"loss": 0.4619,
|
| 44948 |
+
"step": 6406
|
| 44949 |
+
},
|
| 44950 |
+
{
|
| 44951 |
+
"epoch": 0.0006407,
|
| 44952 |
+
"grad_norm": 1.3954253196716309,
|
| 44953 |
+
"learning_rate": 6.405999999999999e-05,
|
| 44954 |
+
"loss": 0.4961,
|
| 44955 |
+
"step": 6407
|
| 44956 |
+
},
|
| 44957 |
+
{
|
| 44958 |
+
"epoch": 0.0006408,
|
| 44959 |
+
"grad_norm": 2.8254330158233643,
|
| 44960 |
+
"learning_rate": 6.407e-05,
|
| 44961 |
+
"loss": 0.8953,
|
| 44962 |
+
"step": 6408
|
| 44963 |
+
},
|
| 44964 |
+
{
|
| 44965 |
+
"epoch": 0.0006409,
|
| 44966 |
+
"grad_norm": 1.5670243501663208,
|
| 44967 |
+
"learning_rate": 6.408e-05,
|
| 44968 |
+
"loss": 0.554,
|
| 44969 |
+
"step": 6409
|
| 44970 |
+
},
|
| 44971 |
+
{
|
| 44972 |
+
"epoch": 0.000641,
|
| 44973 |
+
"grad_norm": 1.5319308042526245,
|
| 44974 |
+
"learning_rate": 6.409e-05,
|
| 44975 |
+
"loss": 0.4963,
|
| 44976 |
+
"step": 6410
|
| 44977 |
+
},
|
| 44978 |
+
{
|
| 44979 |
+
"epoch": 0.0006411,
|
| 44980 |
+
"grad_norm": 1.168323040008545,
|
| 44981 |
+
"learning_rate": 6.41e-05,
|
| 44982 |
+
"loss": 0.4651,
|
| 44983 |
+
"step": 6411
|
| 44984 |
+
},
|
| 44985 |
+
{
|
| 44986 |
+
"epoch": 0.0006412,
|
| 44987 |
+
"grad_norm": 1.4604196548461914,
|
| 44988 |
+
"learning_rate": 6.411000000000001e-05,
|
| 44989 |
+
"loss": 0.5515,
|
| 44990 |
+
"step": 6412
|
| 44991 |
+
},
|
| 44992 |
+
{
|
| 44993 |
+
"epoch": 0.0006413,
|
| 44994 |
+
"grad_norm": 1.3442469835281372,
|
| 44995 |
+
"learning_rate": 6.412e-05,
|
| 44996 |
+
"loss": 0.51,
|
| 44997 |
+
"step": 6413
|
| 44998 |
+
},
|
| 44999 |
+
{
|
| 45000 |
+
"epoch": 0.0006414,
|
| 45001 |
+
"grad_norm": 1.2407495975494385,
|
| 45002 |
+
"learning_rate": 6.413e-05,
|
| 45003 |
+
"loss": 0.4456,
|
| 45004 |
+
"step": 6414
|
| 45005 |
+
},
|
| 45006 |
+
{
|
| 45007 |
+
"epoch": 0.0006415,
|
| 45008 |
+
"grad_norm": 1.054099202156067,
|
| 45009 |
+
"learning_rate": 6.414e-05,
|
| 45010 |
+
"loss": 0.4641,
|
| 45011 |
+
"step": 6415
|
| 45012 |
+
},
|
| 45013 |
+
{
|
| 45014 |
+
"epoch": 0.0006416,
|
| 45015 |
+
"grad_norm": 1.4334583282470703,
|
| 45016 |
+
"learning_rate": 6.415e-05,
|
| 45017 |
+
"loss": 0.5798,
|
| 45018 |
+
"step": 6416
|
| 45019 |
+
},
|
| 45020 |
+
{
|
| 45021 |
+
"epoch": 0.0006417,
|
| 45022 |
+
"grad_norm": 1.3165981769561768,
|
| 45023 |
+
"learning_rate": 6.416e-05,
|
| 45024 |
+
"loss": 0.4995,
|
| 45025 |
+
"step": 6417
|
| 45026 |
+
},
|
| 45027 |
+
{
|
| 45028 |
+
"epoch": 0.0006418,
|
| 45029 |
+
"grad_norm": 1.318405270576477,
|
| 45030 |
+
"learning_rate": 6.417e-05,
|
| 45031 |
+
"loss": 0.47,
|
| 45032 |
+
"step": 6418
|
| 45033 |
+
},
|
| 45034 |
+
{
|
| 45035 |
+
"epoch": 0.0006419,
|
| 45036 |
+
"grad_norm": 1.5474642515182495,
|
| 45037 |
+
"learning_rate": 6.418e-05,
|
| 45038 |
+
"loss": 0.6069,
|
| 45039 |
+
"step": 6419
|
| 45040 |
+
},
|
| 45041 |
+
{
|
| 45042 |
+
"epoch": 0.000642,
|
| 45043 |
+
"grad_norm": 1.4654685258865356,
|
| 45044 |
+
"learning_rate": 6.419000000000001e-05,
|
| 45045 |
+
"loss": 0.467,
|
| 45046 |
+
"step": 6420
|
| 45047 |
+
},
|
| 45048 |
+
{
|
| 45049 |
+
"epoch": 0.0006421,
|
| 45050 |
+
"grad_norm": 1.2679288387298584,
|
| 45051 |
+
"learning_rate": 6.42e-05,
|
| 45052 |
+
"loss": 0.4907,
|
| 45053 |
+
"step": 6421
|
| 45054 |
+
},
|
| 45055 |
+
{
|
| 45056 |
+
"epoch": 0.0006422,
|
| 45057 |
+
"grad_norm": 1.0179003477096558,
|
| 45058 |
+
"learning_rate": 6.421e-05,
|
| 45059 |
+
"loss": 0.4478,
|
| 45060 |
+
"step": 6422
|
| 45061 |
+
},
|
| 45062 |
+
{
|
| 45063 |
+
"epoch": 0.0006423,
|
| 45064 |
+
"grad_norm": 1.70924973487854,
|
| 45065 |
+
"learning_rate": 6.422e-05,
|
| 45066 |
+
"loss": 0.5159,
|
| 45067 |
+
"step": 6423
|
| 45068 |
+
},
|
| 45069 |
+
{
|
| 45070 |
+
"epoch": 0.0006424,
|
| 45071 |
+
"grad_norm": 1.2784104347229004,
|
| 45072 |
+
"learning_rate": 6.423e-05,
|
| 45073 |
+
"loss": 0.4739,
|
| 45074 |
+
"step": 6424
|
| 45075 |
+
},
|
| 45076 |
+
{
|
| 45077 |
+
"epoch": 0.0006425,
|
| 45078 |
+
"grad_norm": 3.122347831726074,
|
| 45079 |
+
"learning_rate": 6.424e-05,
|
| 45080 |
+
"loss": 0.5659,
|
| 45081 |
+
"step": 6425
|
| 45082 |
+
},
|
| 45083 |
+
{
|
| 45084 |
+
"epoch": 0.0006426,
|
| 45085 |
+
"grad_norm": 1.4162781238555908,
|
| 45086 |
+
"learning_rate": 6.425e-05,
|
| 45087 |
+
"loss": 0.5107,
|
| 45088 |
+
"step": 6426
|
| 45089 |
+
},
|
| 45090 |
+
{
|
| 45091 |
+
"epoch": 0.0006427,
|
| 45092 |
+
"grad_norm": 1.7903881072998047,
|
| 45093 |
+
"learning_rate": 6.426000000000001e-05,
|
| 45094 |
+
"loss": 0.574,
|
| 45095 |
+
"step": 6427
|
| 45096 |
+
},
|
| 45097 |
+
{
|
| 45098 |
+
"epoch": 0.0006428,
|
| 45099 |
+
"grad_norm": 1.3344473838806152,
|
| 45100 |
+
"learning_rate": 6.426999999999999e-05,
|
| 45101 |
+
"loss": 0.4871,
|
| 45102 |
+
"step": 6428
|
| 45103 |
+
},
|
| 45104 |
+
{
|
| 45105 |
+
"epoch": 0.0006429,
|
| 45106 |
+
"grad_norm": 1.2928576469421387,
|
| 45107 |
+
"learning_rate": 6.428e-05,
|
| 45108 |
+
"loss": 0.4675,
|
| 45109 |
+
"step": 6429
|
| 45110 |
+
},
|
| 45111 |
+
{
|
| 45112 |
+
"epoch": 0.000643,
|
| 45113 |
+
"grad_norm": 1.1451905965805054,
|
| 45114 |
+
"learning_rate": 6.429000000000001e-05,
|
| 45115 |
+
"loss": 0.4661,
|
| 45116 |
+
"step": 6430
|
| 45117 |
+
},
|
| 45118 |
+
{
|
| 45119 |
+
"epoch": 0.0006431,
|
| 45120 |
+
"grad_norm": 1.119084358215332,
|
| 45121 |
+
"learning_rate": 6.429999999999999e-05,
|
| 45122 |
+
"loss": 0.4546,
|
| 45123 |
+
"step": 6431
|
| 45124 |
+
},
|
| 45125 |
+
{
|
| 45126 |
+
"epoch": 0.0006432,
|
| 45127 |
+
"grad_norm": 1.051802158355713,
|
| 45128 |
+
"learning_rate": 6.431e-05,
|
| 45129 |
+
"loss": 0.4294,
|
| 45130 |
+
"step": 6432
|
| 45131 |
+
},
|
| 45132 |
+
{
|
| 45133 |
+
"epoch": 0.0006433,
|
| 45134 |
+
"grad_norm": 2.0395543575286865,
|
| 45135 |
+
"learning_rate": 6.432000000000001e-05,
|
| 45136 |
+
"loss": 0.77,
|
| 45137 |
+
"step": 6433
|
| 45138 |
+
},
|
| 45139 |
+
{
|
| 45140 |
+
"epoch": 0.0006434,
|
| 45141 |
+
"grad_norm": 2.3523051738739014,
|
| 45142 |
+
"learning_rate": 6.433e-05,
|
| 45143 |
+
"loss": 0.8062,
|
| 45144 |
+
"step": 6434
|
| 45145 |
+
},
|
| 45146 |
+
{
|
| 45147 |
+
"epoch": 0.0006435,
|
| 45148 |
+
"grad_norm": 1.3502811193466187,
|
| 45149 |
+
"learning_rate": 6.434e-05,
|
| 45150 |
+
"loss": 0.4778,
|
| 45151 |
+
"step": 6435
|
| 45152 |
+
},
|
| 45153 |
+
{
|
| 45154 |
+
"epoch": 0.0006436,
|
| 45155 |
+
"grad_norm": 1.2692770957946777,
|
| 45156 |
+
"learning_rate": 6.435e-05,
|
| 45157 |
+
"loss": 0.498,
|
| 45158 |
+
"step": 6436
|
| 45159 |
+
},
|
| 45160 |
+
{
|
| 45161 |
+
"epoch": 0.0006437,
|
| 45162 |
+
"grad_norm": 2.3005588054656982,
|
| 45163 |
+
"learning_rate": 6.436e-05,
|
| 45164 |
+
"loss": 0.668,
|
| 45165 |
+
"step": 6437
|
| 45166 |
+
},
|
| 45167 |
+
{
|
| 45168 |
+
"epoch": 0.0006438,
|
| 45169 |
+
"grad_norm": 2.2760753631591797,
|
| 45170 |
+
"learning_rate": 6.437e-05,
|
| 45171 |
+
"loss": 0.6902,
|
| 45172 |
+
"step": 6438
|
| 45173 |
+
},
|
| 45174 |
+
{
|
| 45175 |
+
"epoch": 0.0006439,
|
| 45176 |
+
"grad_norm": 1.4470828771591187,
|
| 45177 |
+
"learning_rate": 6.438e-05,
|
| 45178 |
+
"loss": 0.5142,
|
| 45179 |
+
"step": 6439
|
| 45180 |
+
},
|
| 45181 |
+
{
|
| 45182 |
+
"epoch": 0.000644,
|
| 45183 |
+
"grad_norm": 1.0976805686950684,
|
| 45184 |
+
"learning_rate": 6.439e-05,
|
| 45185 |
+
"loss": 0.4333,
|
| 45186 |
+
"step": 6440
|
| 45187 |
+
},
|
| 45188 |
+
{
|
| 45189 |
+
"epoch": 0.0006441,
|
| 45190 |
+
"grad_norm": 1.3559536933898926,
|
| 45191 |
+
"learning_rate": 6.44e-05,
|
| 45192 |
+
"loss": 0.5139,
|
| 45193 |
+
"step": 6441
|
| 45194 |
+
},
|
| 45195 |
+
{
|
| 45196 |
+
"epoch": 0.0006442,
|
| 45197 |
+
"grad_norm": 1.2253655195236206,
|
| 45198 |
+
"learning_rate": 6.441e-05,
|
| 45199 |
+
"loss": 0.447,
|
| 45200 |
+
"step": 6442
|
| 45201 |
+
},
|
| 45202 |
+
{
|
| 45203 |
+
"epoch": 0.0006443,
|
| 45204 |
+
"grad_norm": 1.7331396341323853,
|
| 45205 |
+
"learning_rate": 6.442e-05,
|
| 45206 |
+
"loss": 0.6284,
|
| 45207 |
+
"step": 6443
|
| 45208 |
+
},
|
| 45209 |
+
{
|
| 45210 |
+
"epoch": 0.0006444,
|
| 45211 |
+
"grad_norm": 1.949229121208191,
|
| 45212 |
+
"learning_rate": 6.443e-05,
|
| 45213 |
+
"loss": 0.7109,
|
| 45214 |
+
"step": 6444
|
| 45215 |
+
},
|
| 45216 |
+
{
|
| 45217 |
+
"epoch": 0.0006445,
|
| 45218 |
+
"grad_norm": 1.2023966312408447,
|
| 45219 |
+
"learning_rate": 6.444e-05,
|
| 45220 |
+
"loss": 0.4602,
|
| 45221 |
+
"step": 6445
|
| 45222 |
+
},
|
| 45223 |
+
{
|
| 45224 |
+
"epoch": 0.0006446,
|
| 45225 |
+
"grad_norm": 1.1662184000015259,
|
| 45226 |
+
"learning_rate": 6.445000000000001e-05,
|
| 45227 |
+
"loss": 0.4561,
|
| 45228 |
+
"step": 6446
|
| 45229 |
+
},
|
| 45230 |
+
{
|
| 45231 |
+
"epoch": 0.0006447,
|
| 45232 |
+
"grad_norm": 1.2656923532485962,
|
| 45233 |
+
"learning_rate": 6.446e-05,
|
| 45234 |
+
"loss": 0.498,
|
| 45235 |
+
"step": 6447
|
| 45236 |
+
},
|
| 45237 |
+
{
|
| 45238 |
+
"epoch": 0.0006448,
|
| 45239 |
+
"grad_norm": 1.1124790906906128,
|
| 45240 |
+
"learning_rate": 6.447e-05,
|
| 45241 |
+
"loss": 0.4751,
|
| 45242 |
+
"step": 6448
|
| 45243 |
+
},
|
| 45244 |
+
{
|
| 45245 |
+
"epoch": 0.0006449,
|
| 45246 |
+
"grad_norm": 1.4062652587890625,
|
| 45247 |
+
"learning_rate": 6.448e-05,
|
| 45248 |
+
"loss": 0.5117,
|
| 45249 |
+
"step": 6449
|
| 45250 |
+
},
|
| 45251 |
+
{
|
| 45252 |
+
"epoch": 0.000645,
|
| 45253 |
+
"grad_norm": 1.2406326532363892,
|
| 45254 |
+
"learning_rate": 6.449e-05,
|
| 45255 |
+
"loss": 0.4729,
|
| 45256 |
+
"step": 6450
|
| 45257 |
+
},
|
| 45258 |
+
{
|
| 45259 |
+
"epoch": 0.0006451,
|
| 45260 |
+
"grad_norm": 1.0493435859680176,
|
| 45261 |
+
"learning_rate": 6.45e-05,
|
| 45262 |
+
"loss": 0.4402,
|
| 45263 |
+
"step": 6451
|
| 45264 |
+
},
|
| 45265 |
+
{
|
| 45266 |
+
"epoch": 0.0006452,
|
| 45267 |
+
"grad_norm": 1.0537282228469849,
|
| 45268 |
+
"learning_rate": 6.451e-05,
|
| 45269 |
+
"loss": 0.4534,
|
| 45270 |
+
"step": 6452
|
| 45271 |
+
},
|
| 45272 |
+
{
|
| 45273 |
+
"epoch": 0.0006453,
|
| 45274 |
+
"grad_norm": 1.1457775831222534,
|
| 45275 |
+
"learning_rate": 6.452e-05,
|
| 45276 |
+
"loss": 0.4487,
|
| 45277 |
+
"step": 6453
|
| 45278 |
+
},
|
| 45279 |
+
{
|
| 45280 |
+
"epoch": 0.0006454,
|
| 45281 |
+
"grad_norm": 1.4109300374984741,
|
| 45282 |
+
"learning_rate": 6.453000000000001e-05,
|
| 45283 |
+
"loss": 0.5303,
|
| 45284 |
+
"step": 6454
|
| 45285 |
+
},
|
| 45286 |
+
{
|
| 45287 |
+
"epoch": 0.0006455,
|
| 45288 |
+
"grad_norm": 1.0803672075271606,
|
| 45289 |
+
"learning_rate": 6.454e-05,
|
| 45290 |
+
"loss": 0.4465,
|
| 45291 |
+
"step": 6455
|
| 45292 |
+
},
|
| 45293 |
+
{
|
| 45294 |
+
"epoch": 0.0006456,
|
| 45295 |
+
"grad_norm": 1.101141333580017,
|
| 45296 |
+
"learning_rate": 6.455e-05,
|
| 45297 |
+
"loss": 0.4792,
|
| 45298 |
+
"step": 6456
|
| 45299 |
+
},
|
| 45300 |
+
{
|
| 45301 |
+
"epoch": 0.0006457,
|
| 45302 |
+
"grad_norm": 2.4582433700561523,
|
| 45303 |
+
"learning_rate": 6.456e-05,
|
| 45304 |
+
"loss": 0.7312,
|
| 45305 |
+
"step": 6457
|
| 45306 |
+
},
|
| 45307 |
+
{
|
| 45308 |
+
"epoch": 0.0006458,
|
| 45309 |
+
"grad_norm": 1.2731608152389526,
|
| 45310 |
+
"learning_rate": 6.457e-05,
|
| 45311 |
+
"loss": 0.4822,
|
| 45312 |
+
"step": 6458
|
| 45313 |
+
},
|
| 45314 |
+
{
|
| 45315 |
+
"epoch": 0.0006459,
|
| 45316 |
+
"grad_norm": 1.3148726224899292,
|
| 45317 |
+
"learning_rate": 6.458e-05,
|
| 45318 |
+
"loss": 0.4658,
|
| 45319 |
+
"step": 6459
|
| 45320 |
+
},
|
| 45321 |
+
{
|
| 45322 |
+
"epoch": 0.000646,
|
| 45323 |
+
"grad_norm": 1.7852345705032349,
|
| 45324 |
+
"learning_rate": 6.459e-05,
|
| 45325 |
+
"loss": 0.5452,
|
| 45326 |
+
"step": 6460
|
| 45327 |
+
},
|
| 45328 |
+
{
|
| 45329 |
+
"epoch": 0.0006461,
|
| 45330 |
+
"grad_norm": 1.274753212928772,
|
| 45331 |
+
"learning_rate": 6.460000000000001e-05,
|
| 45332 |
+
"loss": 0.5164,
|
| 45333 |
+
"step": 6461
|
| 45334 |
+
},
|
| 45335 |
+
{
|
| 45336 |
+
"epoch": 0.0006462,
|
| 45337 |
+
"grad_norm": 1.173985481262207,
|
| 45338 |
+
"learning_rate": 6.460999999999999e-05,
|
| 45339 |
+
"loss": 0.4419,
|
| 45340 |
+
"step": 6462
|
| 45341 |
+
},
|
| 45342 |
+
{
|
| 45343 |
+
"epoch": 0.0006463,
|
| 45344 |
+
"grad_norm": 1.1694120168685913,
|
| 45345 |
+
"learning_rate": 6.462e-05,
|
| 45346 |
+
"loss": 0.4819,
|
| 45347 |
+
"step": 6463
|
| 45348 |
+
},
|
| 45349 |
+
{
|
| 45350 |
+
"epoch": 0.0006464,
|
| 45351 |
+
"grad_norm": 1.2963957786560059,
|
| 45352 |
+
"learning_rate": 6.463000000000001e-05,
|
| 45353 |
+
"loss": 0.4668,
|
| 45354 |
+
"step": 6464
|
| 45355 |
+
},
|
| 45356 |
+
{
|
| 45357 |
+
"epoch": 0.0006465,
|
| 45358 |
+
"grad_norm": 1.2287315130233765,
|
| 45359 |
+
"learning_rate": 6.463999999999999e-05,
|
| 45360 |
+
"loss": 0.4695,
|
| 45361 |
+
"step": 6465
|
| 45362 |
+
},
|
| 45363 |
+
{
|
| 45364 |
+
"epoch": 0.0006466,
|
| 45365 |
+
"grad_norm": 1.3719747066497803,
|
| 45366 |
+
"learning_rate": 6.465e-05,
|
| 45367 |
+
"loss": 0.5833,
|
| 45368 |
+
"step": 6466
|
| 45369 |
+
},
|
| 45370 |
+
{
|
| 45371 |
+
"epoch": 0.0006467,
|
| 45372 |
+
"grad_norm": 2.0178167819976807,
|
| 45373 |
+
"learning_rate": 6.466000000000001e-05,
|
| 45374 |
+
"loss": 0.5288,
|
| 45375 |
+
"step": 6467
|
| 45376 |
+
},
|
| 45377 |
+
{
|
| 45378 |
+
"epoch": 0.0006468,
|
| 45379 |
+
"grad_norm": 1.0148391723632812,
|
| 45380 |
+
"learning_rate": 6.467e-05,
|
| 45381 |
+
"loss": 0.4104,
|
| 45382 |
+
"step": 6468
|
| 45383 |
+
},
|
| 45384 |
+
{
|
| 45385 |
+
"epoch": 0.0006469,
|
| 45386 |
+
"grad_norm": 1.1594983339309692,
|
| 45387 |
+
"learning_rate": 6.468e-05,
|
| 45388 |
+
"loss": 0.4709,
|
| 45389 |
+
"step": 6469
|
| 45390 |
+
},
|
| 45391 |
+
{
|
| 45392 |
+
"epoch": 0.000647,
|
| 45393 |
+
"grad_norm": 1.2085261344909668,
|
| 45394 |
+
"learning_rate": 6.469e-05,
|
| 45395 |
+
"loss": 0.5552,
|
| 45396 |
+
"step": 6470
|
| 45397 |
+
},
|
| 45398 |
+
{
|
| 45399 |
+
"epoch": 0.0006471,
|
| 45400 |
+
"grad_norm": 1.0414901971817017,
|
| 45401 |
+
"learning_rate": 6.47e-05,
|
| 45402 |
+
"loss": 0.4329,
|
| 45403 |
+
"step": 6471
|
| 45404 |
+
},
|
| 45405 |
+
{
|
| 45406 |
+
"epoch": 0.0006472,
|
| 45407 |
+
"grad_norm": 1.1136866807937622,
|
| 45408 |
+
"learning_rate": 6.471e-05,
|
| 45409 |
+
"loss": 0.4314,
|
| 45410 |
+
"step": 6472
|
| 45411 |
+
},
|
| 45412 |
+
{
|
| 45413 |
+
"epoch": 0.0006473,
|
| 45414 |
+
"grad_norm": 1.0286751985549927,
|
| 45415 |
+
"learning_rate": 6.472e-05,
|
| 45416 |
+
"loss": 0.4463,
|
| 45417 |
+
"step": 6473
|
| 45418 |
+
},
|
| 45419 |
+
{
|
| 45420 |
+
"epoch": 0.0006474,
|
| 45421 |
+
"grad_norm": 1.04264235496521,
|
| 45422 |
+
"learning_rate": 6.473e-05,
|
| 45423 |
+
"loss": 0.4739,
|
| 45424 |
+
"step": 6474
|
| 45425 |
+
},
|
| 45426 |
+
{
|
| 45427 |
+
"epoch": 0.0006475,
|
| 45428 |
+
"grad_norm": 1.2967795133590698,
|
| 45429 |
+
"learning_rate": 6.474e-05,
|
| 45430 |
+
"loss": 0.4812,
|
| 45431 |
+
"step": 6475
|
| 45432 |
+
},
|
| 45433 |
+
{
|
| 45434 |
+
"epoch": 0.0006476,
|
| 45435 |
+
"grad_norm": 1.0789908170700073,
|
| 45436 |
+
"learning_rate": 6.475e-05,
|
| 45437 |
+
"loss": 0.4585,
|
| 45438 |
+
"step": 6476
|
| 45439 |
+
},
|
| 45440 |
+
{
|
| 45441 |
+
"epoch": 0.0006477,
|
| 45442 |
+
"grad_norm": 1.106982707977295,
|
| 45443 |
+
"learning_rate": 6.476e-05,
|
| 45444 |
+
"loss": 0.4736,
|
| 45445 |
+
"step": 6477
|
| 45446 |
+
},
|
| 45447 |
+
{
|
| 45448 |
+
"epoch": 0.0006478,
|
| 45449 |
+
"grad_norm": 1.0327823162078857,
|
| 45450 |
+
"learning_rate": 6.477e-05,
|
| 45451 |
+
"loss": 0.4155,
|
| 45452 |
+
"step": 6478
|
| 45453 |
+
},
|
| 45454 |
+
{
|
| 45455 |
+
"epoch": 0.0006479,
|
| 45456 |
+
"grad_norm": 1.0931192636489868,
|
| 45457 |
+
"learning_rate": 6.478e-05,
|
| 45458 |
+
"loss": 0.4358,
|
| 45459 |
+
"step": 6479
|
| 45460 |
+
},
|
| 45461 |
+
{
|
| 45462 |
+
"epoch": 0.000648,
|
| 45463 |
+
"grad_norm": 1.0536428689956665,
|
| 45464 |
+
"learning_rate": 6.479000000000001e-05,
|
| 45465 |
+
"loss": 0.4285,
|
| 45466 |
+
"step": 6480
|
| 45467 |
+
},
|
| 45468 |
+
{
|
| 45469 |
+
"epoch": 0.0006481,
|
| 45470 |
+
"grad_norm": 1.1148128509521484,
|
| 45471 |
+
"learning_rate": 6.48e-05,
|
| 45472 |
+
"loss": 0.4421,
|
| 45473 |
+
"step": 6481
|
| 45474 |
+
},
|
| 45475 |
+
{
|
| 45476 |
+
"epoch": 0.0006482,
|
| 45477 |
+
"grad_norm": 0.9446101784706116,
|
| 45478 |
+
"learning_rate": 6.481e-05,
|
| 45479 |
+
"loss": 0.3987,
|
| 45480 |
+
"step": 6482
|
| 45481 |
+
},
|
| 45482 |
+
{
|
| 45483 |
+
"epoch": 0.0006483,
|
| 45484 |
+
"grad_norm": 1.0021119117736816,
|
| 45485 |
+
"learning_rate": 6.482e-05,
|
| 45486 |
+
"loss": 0.4248,
|
| 45487 |
+
"step": 6483
|
| 45488 |
+
},
|
| 45489 |
+
{
|
| 45490 |
+
"epoch": 0.0006484,
|
| 45491 |
+
"grad_norm": 0.9024091958999634,
|
| 45492 |
+
"learning_rate": 6.483e-05,
|
| 45493 |
+
"loss": 0.4197,
|
| 45494 |
+
"step": 6484
|
| 45495 |
+
},
|
| 45496 |
+
{
|
| 45497 |
+
"epoch": 0.0006485,
|
| 45498 |
+
"grad_norm": 2.6724512577056885,
|
| 45499 |
+
"learning_rate": 6.484e-05,
|
| 45500 |
+
"loss": 0.7097,
|
| 45501 |
+
"step": 6485
|
| 45502 |
+
},
|
| 45503 |
+
{
|
| 45504 |
+
"epoch": 0.0006486,
|
| 45505 |
+
"grad_norm": 2.107272148132324,
|
| 45506 |
+
"learning_rate": 6.485e-05,
|
| 45507 |
+
"loss": 0.656,
|
| 45508 |
+
"step": 6486
|
| 45509 |
+
},
|
| 45510 |
+
{
|
| 45511 |
+
"epoch": 0.0006487,
|
| 45512 |
+
"grad_norm": 1.1949630975723267,
|
| 45513 |
+
"learning_rate": 6.486e-05,
|
| 45514 |
+
"loss": 0.4294,
|
| 45515 |
+
"step": 6487
|
| 45516 |
+
},
|
| 45517 |
+
{
|
| 45518 |
+
"epoch": 0.0006488,
|
| 45519 |
+
"grad_norm": 1.190617322921753,
|
| 45520 |
+
"learning_rate": 6.487000000000001e-05,
|
| 45521 |
+
"loss": 0.4436,
|
| 45522 |
+
"step": 6488
|
| 45523 |
+
},
|
| 45524 |
+
{
|
| 45525 |
+
"epoch": 0.0006489,
|
| 45526 |
+
"grad_norm": 0.9928703904151917,
|
| 45527 |
+
"learning_rate": 6.488e-05,
|
| 45528 |
+
"loss": 0.4324,
|
| 45529 |
+
"step": 6489
|
| 45530 |
+
},
|
| 45531 |
+
{
|
| 45532 |
+
"epoch": 0.000649,
|
| 45533 |
+
"grad_norm": 3.6930248737335205,
|
| 45534 |
+
"learning_rate": 6.489e-05,
|
| 45535 |
+
"loss": 0.7405,
|
| 45536 |
+
"step": 6490
|
| 45537 |
+
},
|
| 45538 |
+
{
|
| 45539 |
+
"epoch": 0.0006491,
|
| 45540 |
+
"grad_norm": 1.3998894691467285,
|
| 45541 |
+
"learning_rate": 6.49e-05,
|
| 45542 |
+
"loss": 0.4187,
|
| 45543 |
+
"step": 6491
|
| 45544 |
+
},
|
| 45545 |
+
{
|
| 45546 |
+
"epoch": 0.0006492,
|
| 45547 |
+
"grad_norm": 4.68524694442749,
|
| 45548 |
+
"learning_rate": 6.491e-05,
|
| 45549 |
+
"loss": 1.042,
|
| 45550 |
+
"step": 6492
|
| 45551 |
+
},
|
| 45552 |
+
{
|
| 45553 |
+
"epoch": 0.0006493,
|
| 45554 |
+
"grad_norm": 1.2329721450805664,
|
| 45555 |
+
"learning_rate": 6.492e-05,
|
| 45556 |
+
"loss": 0.407,
|
| 45557 |
+
"step": 6493
|
| 45558 |
+
},
|
| 45559 |
+
{
|
| 45560 |
+
"epoch": 0.0006494,
|
| 45561 |
+
"grad_norm": 1.0657070875167847,
|
| 45562 |
+
"learning_rate": 6.493e-05,
|
| 45563 |
+
"loss": 0.4189,
|
| 45564 |
+
"step": 6494
|
| 45565 |
+
},
|
| 45566 |
+
{
|
| 45567 |
+
"epoch": 0.0006495,
|
| 45568 |
+
"grad_norm": 1.1602028608322144,
|
| 45569 |
+
"learning_rate": 6.494000000000001e-05,
|
| 45570 |
+
"loss": 0.4355,
|
| 45571 |
+
"step": 6495
|
| 45572 |
+
},
|
| 45573 |
+
{
|
| 45574 |
+
"epoch": 0.0006496,
|
| 45575 |
+
"grad_norm": 1.4874273538589478,
|
| 45576 |
+
"learning_rate": 6.494999999999999e-05,
|
| 45577 |
+
"loss": 0.4944,
|
| 45578 |
+
"step": 6496
|
| 45579 |
+
},
|
| 45580 |
+
{
|
| 45581 |
+
"epoch": 0.0006497,
|
| 45582 |
+
"grad_norm": 4.198200702667236,
|
| 45583 |
+
"learning_rate": 6.496e-05,
|
| 45584 |
+
"loss": 0.5972,
|
| 45585 |
+
"step": 6497
|
| 45586 |
+
},
|
| 45587 |
+
{
|
| 45588 |
+
"epoch": 0.0006498,
|
| 45589 |
+
"grad_norm": 1.6802000999450684,
|
| 45590 |
+
"learning_rate": 6.497000000000001e-05,
|
| 45591 |
+
"loss": 0.4402,
|
| 45592 |
+
"step": 6498
|
| 45593 |
+
},
|
| 45594 |
+
{
|
| 45595 |
+
"epoch": 0.0006499,
|
| 45596 |
+
"grad_norm": 1.3226237297058105,
|
| 45597 |
+
"learning_rate": 6.497999999999999e-05,
|
| 45598 |
+
"loss": 0.4204,
|
| 45599 |
+
"step": 6499
|
| 45600 |
+
},
|
| 45601 |
+
{
|
| 45602 |
+
"epoch": 0.00065,
|
| 45603 |
+
"grad_norm": 1.4562503099441528,
|
| 45604 |
+
"learning_rate": 6.499e-05,
|
| 45605 |
+
"loss": 0.448,
|
| 45606 |
+
"step": 6500
|
| 45607 |
+
},
|
| 45608 |
+
{
|
| 45609 |
+
"epoch": 0.00065,
|
| 45610 |
+
"eval_loss": 0.04971218481659889,
|
| 45611 |
+
"eval_runtime": 364.9703,
|
| 45612 |
+
"eval_samples_per_second": 27.399,
|
| 45613 |
+
"eval_steps_per_second": 1.712,
|
| 45614 |
+
"step": 6500
|
| 45615 |
}
|
| 45616 |
],
|
| 45617 |
"logging_steps": 1,
|