Training in progress, step 8000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 91951912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0eda40429900d56f55f042468f1b11de473dc4182c8d69dca60d6b3658ad190d
|
| 3 |
size 91951912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 183991627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33f7c2c0908b552c11eb2acab462162b6bb0f0663a7ef7c97c1bd61852fe5d91
|
| 3 |
size 183991627
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01f9a0f7843a37be87edd23f4e88aa93b38b95cc2c07503eeb1cf2e4632453a2
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cad367c6dc56640de6e3b3c0a59eb77ef5a020b8573336e64cdea8452f4bc66
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -52628,6 +52628,3514 @@
|
|
| 52628 |
"eval_samples_per_second": 27.568,
|
| 52629 |
"eval_steps_per_second": 1.723,
|
| 52630 |
"step": 7500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52631 |
}
|
| 52632 |
],
|
| 52633 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.0008,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 8000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 52628 |
"eval_samples_per_second": 27.568,
|
| 52629 |
"eval_steps_per_second": 1.723,
|
| 52630 |
"step": 7500
|
| 52631 |
+
},
|
| 52632 |
+
{
|
| 52633 |
+
"epoch": 0.0007501,
|
| 52634 |
+
"grad_norm": 0.9165650606155396,
|
| 52635 |
+
"learning_rate": 7.500000000000001e-05,
|
| 52636 |
+
"loss": 0.1976,
|
| 52637 |
+
"step": 7501
|
| 52638 |
+
},
|
| 52639 |
+
{
|
| 52640 |
+
"epoch": 0.0007502,
|
| 52641 |
+
"grad_norm": 0.4532630443572998,
|
| 52642 |
+
"learning_rate": 7.501e-05,
|
| 52643 |
+
"loss": 0.1943,
|
| 52644 |
+
"step": 7502
|
| 52645 |
+
},
|
| 52646 |
+
{
|
| 52647 |
+
"epoch": 0.0007503,
|
| 52648 |
+
"grad_norm": 0.4622610807418823,
|
| 52649 |
+
"learning_rate": 7.502e-05,
|
| 52650 |
+
"loss": 0.1764,
|
| 52651 |
+
"step": 7503
|
| 52652 |
+
},
|
| 52653 |
+
{
|
| 52654 |
+
"epoch": 0.0007504,
|
| 52655 |
+
"grad_norm": 0.4768509268760681,
|
| 52656 |
+
"learning_rate": 7.503e-05,
|
| 52657 |
+
"loss": 0.1836,
|
| 52658 |
+
"step": 7504
|
| 52659 |
+
},
|
| 52660 |
+
{
|
| 52661 |
+
"epoch": 0.0007505,
|
| 52662 |
+
"grad_norm": 0.4220264256000519,
|
| 52663 |
+
"learning_rate": 7.504e-05,
|
| 52664 |
+
"loss": 0.1754,
|
| 52665 |
+
"step": 7505
|
| 52666 |
+
},
|
| 52667 |
+
{
|
| 52668 |
+
"epoch": 0.0007506,
|
| 52669 |
+
"grad_norm": 0.4648164212703705,
|
| 52670 |
+
"learning_rate": 7.505e-05,
|
| 52671 |
+
"loss": 0.1864,
|
| 52672 |
+
"step": 7506
|
| 52673 |
+
},
|
| 52674 |
+
{
|
| 52675 |
+
"epoch": 0.0007507,
|
| 52676 |
+
"grad_norm": 0.42253103852272034,
|
| 52677 |
+
"learning_rate": 7.506e-05,
|
| 52678 |
+
"loss": 0.1842,
|
| 52679 |
+
"step": 7507
|
| 52680 |
+
},
|
| 52681 |
+
{
|
| 52682 |
+
"epoch": 0.0007508,
|
| 52683 |
+
"grad_norm": 0.41251248121261597,
|
| 52684 |
+
"learning_rate": 7.507000000000001e-05,
|
| 52685 |
+
"loss": 0.1821,
|
| 52686 |
+
"step": 7508
|
| 52687 |
+
},
|
| 52688 |
+
{
|
| 52689 |
+
"epoch": 0.0007509,
|
| 52690 |
+
"grad_norm": 0.41563406586647034,
|
| 52691 |
+
"learning_rate": 7.507999999999999e-05,
|
| 52692 |
+
"loss": 0.1869,
|
| 52693 |
+
"step": 7509
|
| 52694 |
+
},
|
| 52695 |
+
{
|
| 52696 |
+
"epoch": 0.000751,
|
| 52697 |
+
"grad_norm": 0.40142396092414856,
|
| 52698 |
+
"learning_rate": 7.509e-05,
|
| 52699 |
+
"loss": 0.1851,
|
| 52700 |
+
"step": 7510
|
| 52701 |
+
},
|
| 52702 |
+
{
|
| 52703 |
+
"epoch": 0.0007511,
|
| 52704 |
+
"grad_norm": 0.43007296323776245,
|
| 52705 |
+
"learning_rate": 7.510000000000001e-05,
|
| 52706 |
+
"loss": 0.1853,
|
| 52707 |
+
"step": 7511
|
| 52708 |
+
},
|
| 52709 |
+
{
|
| 52710 |
+
"epoch": 0.0007512,
|
| 52711 |
+
"grad_norm": 1.2085107564926147,
|
| 52712 |
+
"learning_rate": 7.511e-05,
|
| 52713 |
+
"loss": 0.25,
|
| 52714 |
+
"step": 7512
|
| 52715 |
+
},
|
| 52716 |
+
{
|
| 52717 |
+
"epoch": 0.0007513,
|
| 52718 |
+
"grad_norm": 0.6795465350151062,
|
| 52719 |
+
"learning_rate": 7.512e-05,
|
| 52720 |
+
"loss": 0.226,
|
| 52721 |
+
"step": 7513
|
| 52722 |
+
},
|
| 52723 |
+
{
|
| 52724 |
+
"epoch": 0.0007514,
|
| 52725 |
+
"grad_norm": 0.475106805562973,
|
| 52726 |
+
"learning_rate": 7.513000000000001e-05,
|
| 52727 |
+
"loss": 0.1952,
|
| 52728 |
+
"step": 7514
|
| 52729 |
+
},
|
| 52730 |
+
{
|
| 52731 |
+
"epoch": 0.0007515,
|
| 52732 |
+
"grad_norm": 0.4340527057647705,
|
| 52733 |
+
"learning_rate": 7.514e-05,
|
| 52734 |
+
"loss": 0.1792,
|
| 52735 |
+
"step": 7515
|
| 52736 |
+
},
|
| 52737 |
+
{
|
| 52738 |
+
"epoch": 0.0007516,
|
| 52739 |
+
"grad_norm": 0.4670300781726837,
|
| 52740 |
+
"learning_rate": 7.515e-05,
|
| 52741 |
+
"loss": 0.1843,
|
| 52742 |
+
"step": 7516
|
| 52743 |
+
},
|
| 52744 |
+
{
|
| 52745 |
+
"epoch": 0.0007517,
|
| 52746 |
+
"grad_norm": 0.7026162147521973,
|
| 52747 |
+
"learning_rate": 7.516e-05,
|
| 52748 |
+
"loss": 0.1896,
|
| 52749 |
+
"step": 7517
|
| 52750 |
+
},
|
| 52751 |
+
{
|
| 52752 |
+
"epoch": 0.0007518,
|
| 52753 |
+
"grad_norm": 0.4725876748561859,
|
| 52754 |
+
"learning_rate": 7.517e-05,
|
| 52755 |
+
"loss": 0.1976,
|
| 52756 |
+
"step": 7518
|
| 52757 |
+
},
|
| 52758 |
+
{
|
| 52759 |
+
"epoch": 0.0007519,
|
| 52760 |
+
"grad_norm": 0.5276523232460022,
|
| 52761 |
+
"learning_rate": 7.518e-05,
|
| 52762 |
+
"loss": 0.1975,
|
| 52763 |
+
"step": 7519
|
| 52764 |
+
},
|
| 52765 |
+
{
|
| 52766 |
+
"epoch": 0.000752,
|
| 52767 |
+
"grad_norm": 0.38987573981285095,
|
| 52768 |
+
"learning_rate": 7.519e-05,
|
| 52769 |
+
"loss": 0.1737,
|
| 52770 |
+
"step": 7520
|
| 52771 |
+
},
|
| 52772 |
+
{
|
| 52773 |
+
"epoch": 0.0007521,
|
| 52774 |
+
"grad_norm": 0.6679162383079529,
|
| 52775 |
+
"learning_rate": 7.52e-05,
|
| 52776 |
+
"loss": 0.2024,
|
| 52777 |
+
"step": 7521
|
| 52778 |
+
},
|
| 52779 |
+
{
|
| 52780 |
+
"epoch": 0.0007522,
|
| 52781 |
+
"grad_norm": 0.6624200940132141,
|
| 52782 |
+
"learning_rate": 7.520999999999999e-05,
|
| 52783 |
+
"loss": 0.2177,
|
| 52784 |
+
"step": 7522
|
| 52785 |
+
},
|
| 52786 |
+
{
|
| 52787 |
+
"epoch": 0.0007523,
|
| 52788 |
+
"grad_norm": 1.9751900434494019,
|
| 52789 |
+
"learning_rate": 7.522e-05,
|
| 52790 |
+
"loss": 0.2249,
|
| 52791 |
+
"step": 7523
|
| 52792 |
+
},
|
| 52793 |
+
{
|
| 52794 |
+
"epoch": 0.0007524,
|
| 52795 |
+
"grad_norm": 0.5174213647842407,
|
| 52796 |
+
"learning_rate": 7.523000000000001e-05,
|
| 52797 |
+
"loss": 0.1787,
|
| 52798 |
+
"step": 7524
|
| 52799 |
+
},
|
| 52800 |
+
{
|
| 52801 |
+
"epoch": 0.0007525,
|
| 52802 |
+
"grad_norm": 0.5586757659912109,
|
| 52803 |
+
"learning_rate": 7.524e-05,
|
| 52804 |
+
"loss": 0.1896,
|
| 52805 |
+
"step": 7525
|
| 52806 |
+
},
|
| 52807 |
+
{
|
| 52808 |
+
"epoch": 0.0007526,
|
| 52809 |
+
"grad_norm": 0.6579071879386902,
|
| 52810 |
+
"learning_rate": 7.525e-05,
|
| 52811 |
+
"loss": 0.2024,
|
| 52812 |
+
"step": 7526
|
| 52813 |
+
},
|
| 52814 |
+
{
|
| 52815 |
+
"epoch": 0.0007527,
|
| 52816 |
+
"grad_norm": 0.4693479835987091,
|
| 52817 |
+
"learning_rate": 7.526000000000001e-05,
|
| 52818 |
+
"loss": 0.1692,
|
| 52819 |
+
"step": 7527
|
| 52820 |
+
},
|
| 52821 |
+
{
|
| 52822 |
+
"epoch": 0.0007528,
|
| 52823 |
+
"grad_norm": 0.4994659423828125,
|
| 52824 |
+
"learning_rate": 7.527e-05,
|
| 52825 |
+
"loss": 0.1989,
|
| 52826 |
+
"step": 7528
|
| 52827 |
+
},
|
| 52828 |
+
{
|
| 52829 |
+
"epoch": 0.0007529,
|
| 52830 |
+
"grad_norm": 0.476550430059433,
|
| 52831 |
+
"learning_rate": 7.528e-05,
|
| 52832 |
+
"loss": 0.1885,
|
| 52833 |
+
"step": 7529
|
| 52834 |
+
},
|
| 52835 |
+
{
|
| 52836 |
+
"epoch": 0.000753,
|
| 52837 |
+
"grad_norm": 0.48724666237831116,
|
| 52838 |
+
"learning_rate": 7.529e-05,
|
| 52839 |
+
"loss": 0.1892,
|
| 52840 |
+
"step": 7530
|
| 52841 |
+
},
|
| 52842 |
+
{
|
| 52843 |
+
"epoch": 0.0007531,
|
| 52844 |
+
"grad_norm": 0.4618290066719055,
|
| 52845 |
+
"learning_rate": 7.53e-05,
|
| 52846 |
+
"loss": 0.1797,
|
| 52847 |
+
"step": 7531
|
| 52848 |
+
},
|
| 52849 |
+
{
|
| 52850 |
+
"epoch": 0.0007532,
|
| 52851 |
+
"grad_norm": 1.1197789907455444,
|
| 52852 |
+
"learning_rate": 7.531e-05,
|
| 52853 |
+
"loss": 0.2489,
|
| 52854 |
+
"step": 7532
|
| 52855 |
+
},
|
| 52856 |
+
{
|
| 52857 |
+
"epoch": 0.0007533,
|
| 52858 |
+
"grad_norm": 0.6072105169296265,
|
| 52859 |
+
"learning_rate": 7.532e-05,
|
| 52860 |
+
"loss": 0.1869,
|
| 52861 |
+
"step": 7533
|
| 52862 |
+
},
|
| 52863 |
+
{
|
| 52864 |
+
"epoch": 0.0007534,
|
| 52865 |
+
"grad_norm": 0.7256792187690735,
|
| 52866 |
+
"learning_rate": 7.533e-05,
|
| 52867 |
+
"loss": 0.2135,
|
| 52868 |
+
"step": 7534
|
| 52869 |
+
},
|
| 52870 |
+
{
|
| 52871 |
+
"epoch": 0.0007535,
|
| 52872 |
+
"grad_norm": 0.5450584888458252,
|
| 52873 |
+
"learning_rate": 7.534000000000001e-05,
|
| 52874 |
+
"loss": 0.1846,
|
| 52875 |
+
"step": 7535
|
| 52876 |
+
},
|
| 52877 |
+
{
|
| 52878 |
+
"epoch": 0.0007536,
|
| 52879 |
+
"grad_norm": 0.738315224647522,
|
| 52880 |
+
"learning_rate": 7.535e-05,
|
| 52881 |
+
"loss": 0.207,
|
| 52882 |
+
"step": 7536
|
| 52883 |
+
},
|
| 52884 |
+
{
|
| 52885 |
+
"epoch": 0.0007537,
|
| 52886 |
+
"grad_norm": 0.41732147336006165,
|
| 52887 |
+
"learning_rate": 7.536e-05,
|
| 52888 |
+
"loss": 0.1733,
|
| 52889 |
+
"step": 7537
|
| 52890 |
+
},
|
| 52891 |
+
{
|
| 52892 |
+
"epoch": 0.0007538,
|
| 52893 |
+
"grad_norm": 0.6369668841362,
|
| 52894 |
+
"learning_rate": 7.537e-05,
|
| 52895 |
+
"loss": 0.1918,
|
| 52896 |
+
"step": 7538
|
| 52897 |
+
},
|
| 52898 |
+
{
|
| 52899 |
+
"epoch": 0.0007539,
|
| 52900 |
+
"grad_norm": 0.6139333844184875,
|
| 52901 |
+
"learning_rate": 7.538e-05,
|
| 52902 |
+
"loss": 0.1976,
|
| 52903 |
+
"step": 7539
|
| 52904 |
+
},
|
| 52905 |
+
{
|
| 52906 |
+
"epoch": 0.000754,
|
| 52907 |
+
"grad_norm": 0.6071346998214722,
|
| 52908 |
+
"learning_rate": 7.539e-05,
|
| 52909 |
+
"loss": 0.2021,
|
| 52910 |
+
"step": 7540
|
| 52911 |
+
},
|
| 52912 |
+
{
|
| 52913 |
+
"epoch": 0.0007541,
|
| 52914 |
+
"grad_norm": 0.9224357008934021,
|
| 52915 |
+
"learning_rate": 7.54e-05,
|
| 52916 |
+
"loss": 0.2198,
|
| 52917 |
+
"step": 7541
|
| 52918 |
+
},
|
| 52919 |
+
{
|
| 52920 |
+
"epoch": 0.0007542,
|
| 52921 |
+
"grad_norm": 0.6226947903633118,
|
| 52922 |
+
"learning_rate": 7.541000000000001e-05,
|
| 52923 |
+
"loss": 0.2031,
|
| 52924 |
+
"step": 7542
|
| 52925 |
+
},
|
| 52926 |
+
{
|
| 52927 |
+
"epoch": 0.0007543,
|
| 52928 |
+
"grad_norm": 0.44756942987442017,
|
| 52929 |
+
"learning_rate": 7.541999999999999e-05,
|
| 52930 |
+
"loss": 0.1685,
|
| 52931 |
+
"step": 7543
|
| 52932 |
+
},
|
| 52933 |
+
{
|
| 52934 |
+
"epoch": 0.0007544,
|
| 52935 |
+
"grad_norm": 0.551895797252655,
|
| 52936 |
+
"learning_rate": 7.543e-05,
|
| 52937 |
+
"loss": 0.1893,
|
| 52938 |
+
"step": 7544
|
| 52939 |
+
},
|
| 52940 |
+
{
|
| 52941 |
+
"epoch": 0.0007545,
|
| 52942 |
+
"grad_norm": 2.0797204971313477,
|
| 52943 |
+
"learning_rate": 7.544000000000001e-05,
|
| 52944 |
+
"loss": 0.3489,
|
| 52945 |
+
"step": 7545
|
| 52946 |
+
},
|
| 52947 |
+
{
|
| 52948 |
+
"epoch": 0.0007546,
|
| 52949 |
+
"grad_norm": 0.49439364671707153,
|
| 52950 |
+
"learning_rate": 7.545e-05,
|
| 52951 |
+
"loss": 0.1821,
|
| 52952 |
+
"step": 7546
|
| 52953 |
+
},
|
| 52954 |
+
{
|
| 52955 |
+
"epoch": 0.0007547,
|
| 52956 |
+
"grad_norm": 0.6312146186828613,
|
| 52957 |
+
"learning_rate": 7.546e-05,
|
| 52958 |
+
"loss": 0.2114,
|
| 52959 |
+
"step": 7547
|
| 52960 |
+
},
|
| 52961 |
+
{
|
| 52962 |
+
"epoch": 0.0007548,
|
| 52963 |
+
"grad_norm": 0.4911887049674988,
|
| 52964 |
+
"learning_rate": 7.547000000000001e-05,
|
| 52965 |
+
"loss": 0.1764,
|
| 52966 |
+
"step": 7548
|
| 52967 |
+
},
|
| 52968 |
+
{
|
| 52969 |
+
"epoch": 0.0007549,
|
| 52970 |
+
"grad_norm": 0.45326417684555054,
|
| 52971 |
+
"learning_rate": 7.548e-05,
|
| 52972 |
+
"loss": 0.1736,
|
| 52973 |
+
"step": 7549
|
| 52974 |
+
},
|
| 52975 |
+
{
|
| 52976 |
+
"epoch": 0.000755,
|
| 52977 |
+
"grad_norm": 0.46612992882728577,
|
| 52978 |
+
"learning_rate": 7.549e-05,
|
| 52979 |
+
"loss": 0.1879,
|
| 52980 |
+
"step": 7550
|
| 52981 |
+
},
|
| 52982 |
+
{
|
| 52983 |
+
"epoch": 0.0007551,
|
| 52984 |
+
"grad_norm": 0.5469991564750671,
|
| 52985 |
+
"learning_rate": 7.55e-05,
|
| 52986 |
+
"loss": 0.1855,
|
| 52987 |
+
"step": 7551
|
| 52988 |
+
},
|
| 52989 |
+
{
|
| 52990 |
+
"epoch": 0.0007552,
|
| 52991 |
+
"grad_norm": 0.48811817169189453,
|
| 52992 |
+
"learning_rate": 7.551e-05,
|
| 52993 |
+
"loss": 0.1913,
|
| 52994 |
+
"step": 7552
|
| 52995 |
+
},
|
| 52996 |
+
{
|
| 52997 |
+
"epoch": 0.0007553,
|
| 52998 |
+
"grad_norm": 0.4290868639945984,
|
| 52999 |
+
"learning_rate": 7.552e-05,
|
| 53000 |
+
"loss": 0.172,
|
| 53001 |
+
"step": 7553
|
| 53002 |
+
},
|
| 53003 |
+
{
|
| 53004 |
+
"epoch": 0.0007554,
|
| 53005 |
+
"grad_norm": 0.4164026975631714,
|
| 53006 |
+
"learning_rate": 7.553e-05,
|
| 53007 |
+
"loss": 0.1748,
|
| 53008 |
+
"step": 7554
|
| 53009 |
+
},
|
| 53010 |
+
{
|
| 53011 |
+
"epoch": 0.0007555,
|
| 53012 |
+
"grad_norm": 0.5739708542823792,
|
| 53013 |
+
"learning_rate": 7.554e-05,
|
| 53014 |
+
"loss": 0.1913,
|
| 53015 |
+
"step": 7555
|
| 53016 |
+
},
|
| 53017 |
+
{
|
| 53018 |
+
"epoch": 0.0007556,
|
| 53019 |
+
"grad_norm": 0.584821343421936,
|
| 53020 |
+
"learning_rate": 7.555e-05,
|
| 53021 |
+
"loss": 0.204,
|
| 53022 |
+
"step": 7556
|
| 53023 |
+
},
|
| 53024 |
+
{
|
| 53025 |
+
"epoch": 0.0007557,
|
| 53026 |
+
"grad_norm": 0.40070706605911255,
|
| 53027 |
+
"learning_rate": 7.556e-05,
|
| 53028 |
+
"loss": 0.1747,
|
| 53029 |
+
"step": 7557
|
| 53030 |
+
},
|
| 53031 |
+
{
|
| 53032 |
+
"epoch": 0.0007558,
|
| 53033 |
+
"grad_norm": 0.4085039496421814,
|
| 53034 |
+
"learning_rate": 7.557000000000001e-05,
|
| 53035 |
+
"loss": 0.1798,
|
| 53036 |
+
"step": 7558
|
| 53037 |
+
},
|
| 53038 |
+
{
|
| 53039 |
+
"epoch": 0.0007559,
|
| 53040 |
+
"grad_norm": 0.41738244891166687,
|
| 53041 |
+
"learning_rate": 7.558e-05,
|
| 53042 |
+
"loss": 0.1814,
|
| 53043 |
+
"step": 7559
|
| 53044 |
+
},
|
| 53045 |
+
{
|
| 53046 |
+
"epoch": 0.000756,
|
| 53047 |
+
"grad_norm": 0.40071919560432434,
|
| 53048 |
+
"learning_rate": 7.559e-05,
|
| 53049 |
+
"loss": 0.1765,
|
| 53050 |
+
"step": 7560
|
| 53051 |
+
},
|
| 53052 |
+
{
|
| 53053 |
+
"epoch": 0.0007561,
|
| 53054 |
+
"grad_norm": 0.4130774736404419,
|
| 53055 |
+
"learning_rate": 7.560000000000001e-05,
|
| 53056 |
+
"loss": 0.1772,
|
| 53057 |
+
"step": 7561
|
| 53058 |
+
},
|
| 53059 |
+
{
|
| 53060 |
+
"epoch": 0.0007562,
|
| 53061 |
+
"grad_norm": 0.386495441198349,
|
| 53062 |
+
"learning_rate": 7.561e-05,
|
| 53063 |
+
"loss": 0.1761,
|
| 53064 |
+
"step": 7562
|
| 53065 |
+
},
|
| 53066 |
+
{
|
| 53067 |
+
"epoch": 0.0007563,
|
| 53068 |
+
"grad_norm": 0.3618008494377136,
|
| 53069 |
+
"learning_rate": 7.562e-05,
|
| 53070 |
+
"loss": 0.167,
|
| 53071 |
+
"step": 7563
|
| 53072 |
+
},
|
| 53073 |
+
{
|
| 53074 |
+
"epoch": 0.0007564,
|
| 53075 |
+
"grad_norm": 0.5081895589828491,
|
| 53076 |
+
"learning_rate": 7.563e-05,
|
| 53077 |
+
"loss": 0.1785,
|
| 53078 |
+
"step": 7564
|
| 53079 |
+
},
|
| 53080 |
+
{
|
| 53081 |
+
"epoch": 0.0007565,
|
| 53082 |
+
"grad_norm": 0.36260002851486206,
|
| 53083 |
+
"learning_rate": 7.564e-05,
|
| 53084 |
+
"loss": 0.1639,
|
| 53085 |
+
"step": 7565
|
| 53086 |
+
},
|
| 53087 |
+
{
|
| 53088 |
+
"epoch": 0.0007566,
|
| 53089 |
+
"grad_norm": 0.43156561255455017,
|
| 53090 |
+
"learning_rate": 7.565e-05,
|
| 53091 |
+
"loss": 0.1884,
|
| 53092 |
+
"step": 7566
|
| 53093 |
+
},
|
| 53094 |
+
{
|
| 53095 |
+
"epoch": 0.0007567,
|
| 53096 |
+
"grad_norm": 0.4242669641971588,
|
| 53097 |
+
"learning_rate": 7.566e-05,
|
| 53098 |
+
"loss": 0.1725,
|
| 53099 |
+
"step": 7567
|
| 53100 |
+
},
|
| 53101 |
+
{
|
| 53102 |
+
"epoch": 0.0007568,
|
| 53103 |
+
"grad_norm": 0.38370412588119507,
|
| 53104 |
+
"learning_rate": 7.567e-05,
|
| 53105 |
+
"loss": 0.1597,
|
| 53106 |
+
"step": 7568
|
| 53107 |
+
},
|
| 53108 |
+
{
|
| 53109 |
+
"epoch": 0.0007569,
|
| 53110 |
+
"grad_norm": 0.4324035942554474,
|
| 53111 |
+
"learning_rate": 7.568000000000001e-05,
|
| 53112 |
+
"loss": 0.1891,
|
| 53113 |
+
"step": 7569
|
| 53114 |
+
},
|
| 53115 |
+
{
|
| 53116 |
+
"epoch": 0.000757,
|
| 53117 |
+
"grad_norm": 1.4893518686294556,
|
| 53118 |
+
"learning_rate": 7.569e-05,
|
| 53119 |
+
"loss": 0.275,
|
| 53120 |
+
"step": 7570
|
| 53121 |
+
},
|
| 53122 |
+
{
|
| 53123 |
+
"epoch": 0.0007571,
|
| 53124 |
+
"grad_norm": 0.41903969645500183,
|
| 53125 |
+
"learning_rate": 7.57e-05,
|
| 53126 |
+
"loss": 0.176,
|
| 53127 |
+
"step": 7571
|
| 53128 |
+
},
|
| 53129 |
+
{
|
| 53130 |
+
"epoch": 0.0007572,
|
| 53131 |
+
"grad_norm": 0.6930134296417236,
|
| 53132 |
+
"learning_rate": 7.571e-05,
|
| 53133 |
+
"loss": 0.2087,
|
| 53134 |
+
"step": 7572
|
| 53135 |
+
},
|
| 53136 |
+
{
|
| 53137 |
+
"epoch": 0.0007573,
|
| 53138 |
+
"grad_norm": 0.4721878170967102,
|
| 53139 |
+
"learning_rate": 7.572e-05,
|
| 53140 |
+
"loss": 0.1761,
|
| 53141 |
+
"step": 7573
|
| 53142 |
+
},
|
| 53143 |
+
{
|
| 53144 |
+
"epoch": 0.0007574,
|
| 53145 |
+
"grad_norm": 0.5028660893440247,
|
| 53146 |
+
"learning_rate": 7.573e-05,
|
| 53147 |
+
"loss": 0.1757,
|
| 53148 |
+
"step": 7574
|
| 53149 |
+
},
|
| 53150 |
+
{
|
| 53151 |
+
"epoch": 0.0007575,
|
| 53152 |
+
"grad_norm": 0.45845597982406616,
|
| 53153 |
+
"learning_rate": 7.574e-05,
|
| 53154 |
+
"loss": 0.1774,
|
| 53155 |
+
"step": 7575
|
| 53156 |
+
},
|
| 53157 |
+
{
|
| 53158 |
+
"epoch": 0.0007576,
|
| 53159 |
+
"grad_norm": 0.45461151003837585,
|
| 53160 |
+
"learning_rate": 7.575000000000001e-05,
|
| 53161 |
+
"loss": 0.1665,
|
| 53162 |
+
"step": 7576
|
| 53163 |
+
},
|
| 53164 |
+
{
|
| 53165 |
+
"epoch": 0.0007577,
|
| 53166 |
+
"grad_norm": 0.6267446279525757,
|
| 53167 |
+
"learning_rate": 7.575999999999999e-05,
|
| 53168 |
+
"loss": 0.1907,
|
| 53169 |
+
"step": 7577
|
| 53170 |
+
},
|
| 53171 |
+
{
|
| 53172 |
+
"epoch": 0.0007578,
|
| 53173 |
+
"grad_norm": 0.39918413758277893,
|
| 53174 |
+
"learning_rate": 7.577e-05,
|
| 53175 |
+
"loss": 0.172,
|
| 53176 |
+
"step": 7578
|
| 53177 |
+
},
|
| 53178 |
+
{
|
| 53179 |
+
"epoch": 0.0007579,
|
| 53180 |
+
"grad_norm": 0.46049246191978455,
|
| 53181 |
+
"learning_rate": 7.578000000000001e-05,
|
| 53182 |
+
"loss": 0.1713,
|
| 53183 |
+
"step": 7579
|
| 53184 |
+
},
|
| 53185 |
+
{
|
| 53186 |
+
"epoch": 0.000758,
|
| 53187 |
+
"grad_norm": 0.4430519938468933,
|
| 53188 |
+
"learning_rate": 7.579e-05,
|
| 53189 |
+
"loss": 0.179,
|
| 53190 |
+
"step": 7580
|
| 53191 |
+
},
|
| 53192 |
+
{
|
| 53193 |
+
"epoch": 0.0007581,
|
| 53194 |
+
"grad_norm": 0.686094343662262,
|
| 53195 |
+
"learning_rate": 7.58e-05,
|
| 53196 |
+
"loss": 0.1864,
|
| 53197 |
+
"step": 7581
|
| 53198 |
+
},
|
| 53199 |
+
{
|
| 53200 |
+
"epoch": 0.0007582,
|
| 53201 |
+
"grad_norm": 0.5256968140602112,
|
| 53202 |
+
"learning_rate": 7.581000000000001e-05,
|
| 53203 |
+
"loss": 0.1952,
|
| 53204 |
+
"step": 7582
|
| 53205 |
+
},
|
| 53206 |
+
{
|
| 53207 |
+
"epoch": 0.0007583,
|
| 53208 |
+
"grad_norm": 0.4756493866443634,
|
| 53209 |
+
"learning_rate": 7.582e-05,
|
| 53210 |
+
"loss": 0.1783,
|
| 53211 |
+
"step": 7583
|
| 53212 |
+
},
|
| 53213 |
+
{
|
| 53214 |
+
"epoch": 0.0007584,
|
| 53215 |
+
"grad_norm": 0.5038774013519287,
|
| 53216 |
+
"learning_rate": 7.583e-05,
|
| 53217 |
+
"loss": 0.1887,
|
| 53218 |
+
"step": 7584
|
| 53219 |
+
},
|
| 53220 |
+
{
|
| 53221 |
+
"epoch": 0.0007585,
|
| 53222 |
+
"grad_norm": 0.3784870207309723,
|
| 53223 |
+
"learning_rate": 7.584e-05,
|
| 53224 |
+
"loss": 0.158,
|
| 53225 |
+
"step": 7585
|
| 53226 |
+
},
|
| 53227 |
+
{
|
| 53228 |
+
"epoch": 0.0007586,
|
| 53229 |
+
"grad_norm": 0.3814629018306732,
|
| 53230 |
+
"learning_rate": 7.585e-05,
|
| 53231 |
+
"loss": 0.1688,
|
| 53232 |
+
"step": 7586
|
| 53233 |
+
},
|
| 53234 |
+
{
|
| 53235 |
+
"epoch": 0.0007587,
|
| 53236 |
+
"grad_norm": 0.3632339835166931,
|
| 53237 |
+
"learning_rate": 7.586e-05,
|
| 53238 |
+
"loss": 0.1689,
|
| 53239 |
+
"step": 7587
|
| 53240 |
+
},
|
| 53241 |
+
{
|
| 53242 |
+
"epoch": 0.0007588,
|
| 53243 |
+
"grad_norm": 0.3821631669998169,
|
| 53244 |
+
"learning_rate": 7.587e-05,
|
| 53245 |
+
"loss": 0.1665,
|
| 53246 |
+
"step": 7588
|
| 53247 |
+
},
|
| 53248 |
+
{
|
| 53249 |
+
"epoch": 0.0007589,
|
| 53250 |
+
"grad_norm": 0.40399041771888733,
|
| 53251 |
+
"learning_rate": 7.588e-05,
|
| 53252 |
+
"loss": 0.1812,
|
| 53253 |
+
"step": 7589
|
| 53254 |
+
},
|
| 53255 |
+
{
|
| 53256 |
+
"epoch": 0.000759,
|
| 53257 |
+
"grad_norm": 0.49214446544647217,
|
| 53258 |
+
"learning_rate": 7.589e-05,
|
| 53259 |
+
"loss": 0.1893,
|
| 53260 |
+
"step": 7590
|
| 53261 |
+
},
|
| 53262 |
+
{
|
| 53263 |
+
"epoch": 0.0007591,
|
| 53264 |
+
"grad_norm": 0.3859829306602478,
|
| 53265 |
+
"learning_rate": 7.59e-05,
|
| 53266 |
+
"loss": 0.1659,
|
| 53267 |
+
"step": 7591
|
| 53268 |
+
},
|
| 53269 |
+
{
|
| 53270 |
+
"epoch": 0.0007592,
|
| 53271 |
+
"grad_norm": 0.40475544333457947,
|
| 53272 |
+
"learning_rate": 7.591000000000001e-05,
|
| 53273 |
+
"loss": 0.173,
|
| 53274 |
+
"step": 7592
|
| 53275 |
+
},
|
| 53276 |
+
{
|
| 53277 |
+
"epoch": 0.0007593,
|
| 53278 |
+
"grad_norm": 0.3380837142467499,
|
| 53279 |
+
"learning_rate": 7.592e-05,
|
| 53280 |
+
"loss": 0.1613,
|
| 53281 |
+
"step": 7593
|
| 53282 |
+
},
|
| 53283 |
+
{
|
| 53284 |
+
"epoch": 0.0007594,
|
| 53285 |
+
"grad_norm": 0.4463665783405304,
|
| 53286 |
+
"learning_rate": 7.593e-05,
|
| 53287 |
+
"loss": 0.1709,
|
| 53288 |
+
"step": 7594
|
| 53289 |
+
},
|
| 53290 |
+
{
|
| 53291 |
+
"epoch": 0.0007595,
|
| 53292 |
+
"grad_norm": 0.3347475826740265,
|
| 53293 |
+
"learning_rate": 7.594000000000001e-05,
|
| 53294 |
+
"loss": 0.162,
|
| 53295 |
+
"step": 7595
|
| 53296 |
+
},
|
| 53297 |
+
{
|
| 53298 |
+
"epoch": 0.0007596,
|
| 53299 |
+
"grad_norm": 0.39011281728744507,
|
| 53300 |
+
"learning_rate": 7.595e-05,
|
| 53301 |
+
"loss": 0.175,
|
| 53302 |
+
"step": 7596
|
| 53303 |
+
},
|
| 53304 |
+
{
|
| 53305 |
+
"epoch": 0.0007597,
|
| 53306 |
+
"grad_norm": 0.424954891204834,
|
| 53307 |
+
"learning_rate": 7.596e-05,
|
| 53308 |
+
"loss": 0.1672,
|
| 53309 |
+
"step": 7597
|
| 53310 |
+
},
|
| 53311 |
+
{
|
| 53312 |
+
"epoch": 0.0007598,
|
| 53313 |
+
"grad_norm": 0.38835087418556213,
|
| 53314 |
+
"learning_rate": 7.597e-05,
|
| 53315 |
+
"loss": 0.1725,
|
| 53316 |
+
"step": 7598
|
| 53317 |
+
},
|
| 53318 |
+
{
|
| 53319 |
+
"epoch": 0.0007599,
|
| 53320 |
+
"grad_norm": 0.37602484226226807,
|
| 53321 |
+
"learning_rate": 7.598e-05,
|
| 53322 |
+
"loss": 0.1693,
|
| 53323 |
+
"step": 7599
|
| 53324 |
+
},
|
| 53325 |
+
{
|
| 53326 |
+
"epoch": 0.00076,
|
| 53327 |
+
"grad_norm": 0.3643645942211151,
|
| 53328 |
+
"learning_rate": 7.599e-05,
|
| 53329 |
+
"loss": 0.1647,
|
| 53330 |
+
"step": 7600
|
| 53331 |
+
},
|
| 53332 |
+
{
|
| 53333 |
+
"epoch": 0.0007601,
|
| 53334 |
+
"grad_norm": 0.36501967906951904,
|
| 53335 |
+
"learning_rate": 7.6e-05,
|
| 53336 |
+
"loss": 0.1669,
|
| 53337 |
+
"step": 7601
|
| 53338 |
+
},
|
| 53339 |
+
{
|
| 53340 |
+
"epoch": 0.0007602,
|
| 53341 |
+
"grad_norm": 0.34811708331108093,
|
| 53342 |
+
"learning_rate": 7.601e-05,
|
| 53343 |
+
"loss": 0.1586,
|
| 53344 |
+
"step": 7602
|
| 53345 |
+
},
|
| 53346 |
+
{
|
| 53347 |
+
"epoch": 0.0007603,
|
| 53348 |
+
"grad_norm": 0.3570595383644104,
|
| 53349 |
+
"learning_rate": 7.602000000000001e-05,
|
| 53350 |
+
"loss": 0.1688,
|
| 53351 |
+
"step": 7603
|
| 53352 |
+
},
|
| 53353 |
+
{
|
| 53354 |
+
"epoch": 0.0007604,
|
| 53355 |
+
"grad_norm": 0.411408394575119,
|
| 53356 |
+
"learning_rate": 7.603e-05,
|
| 53357 |
+
"loss": 0.1757,
|
| 53358 |
+
"step": 7604
|
| 53359 |
+
},
|
| 53360 |
+
{
|
| 53361 |
+
"epoch": 0.0007605,
|
| 53362 |
+
"grad_norm": 0.45920392870903015,
|
| 53363 |
+
"learning_rate": 7.604e-05,
|
| 53364 |
+
"loss": 0.1887,
|
| 53365 |
+
"step": 7605
|
| 53366 |
+
},
|
| 53367 |
+
{
|
| 53368 |
+
"epoch": 0.0007606,
|
| 53369 |
+
"grad_norm": 0.39549022912979126,
|
| 53370 |
+
"learning_rate": 7.605e-05,
|
| 53371 |
+
"loss": 0.1738,
|
| 53372 |
+
"step": 7606
|
| 53373 |
+
},
|
| 53374 |
+
{
|
| 53375 |
+
"epoch": 0.0007607,
|
| 53376 |
+
"grad_norm": 0.3656753599643707,
|
| 53377 |
+
"learning_rate": 7.606e-05,
|
| 53378 |
+
"loss": 0.1666,
|
| 53379 |
+
"step": 7607
|
| 53380 |
+
},
|
| 53381 |
+
{
|
| 53382 |
+
"epoch": 0.0007608,
|
| 53383 |
+
"grad_norm": 0.32445859909057617,
|
| 53384 |
+
"learning_rate": 7.607e-05,
|
| 53385 |
+
"loss": 0.1559,
|
| 53386 |
+
"step": 7608
|
| 53387 |
+
},
|
| 53388 |
+
{
|
| 53389 |
+
"epoch": 0.0007609,
|
| 53390 |
+
"grad_norm": 0.5102970600128174,
|
| 53391 |
+
"learning_rate": 7.608e-05,
|
| 53392 |
+
"loss": 0.1697,
|
| 53393 |
+
"step": 7609
|
| 53394 |
+
},
|
| 53395 |
+
{
|
| 53396 |
+
"epoch": 0.000761,
|
| 53397 |
+
"grad_norm": 0.5840592384338379,
|
| 53398 |
+
"learning_rate": 7.609000000000001e-05,
|
| 53399 |
+
"loss": 0.192,
|
| 53400 |
+
"step": 7610
|
| 53401 |
+
},
|
| 53402 |
+
{
|
| 53403 |
+
"epoch": 0.0007611,
|
| 53404 |
+
"grad_norm": 0.8516383171081543,
|
| 53405 |
+
"learning_rate": 7.609999999999999e-05,
|
| 53406 |
+
"loss": 0.1871,
|
| 53407 |
+
"step": 7611
|
| 53408 |
+
},
|
| 53409 |
+
{
|
| 53410 |
+
"epoch": 0.0007612,
|
| 53411 |
+
"grad_norm": 0.4764192998409271,
|
| 53412 |
+
"learning_rate": 7.611e-05,
|
| 53413 |
+
"loss": 0.1598,
|
| 53414 |
+
"step": 7612
|
| 53415 |
+
},
|
| 53416 |
+
{
|
| 53417 |
+
"epoch": 0.0007613,
|
| 53418 |
+
"grad_norm": 0.34317582845687866,
|
| 53419 |
+
"learning_rate": 7.612000000000001e-05,
|
| 53420 |
+
"loss": 0.1567,
|
| 53421 |
+
"step": 7613
|
| 53422 |
+
},
|
| 53423 |
+
{
|
| 53424 |
+
"epoch": 0.0007614,
|
| 53425 |
+
"grad_norm": 0.46695294976234436,
|
| 53426 |
+
"learning_rate": 7.613e-05,
|
| 53427 |
+
"loss": 0.173,
|
| 53428 |
+
"step": 7614
|
| 53429 |
+
},
|
| 53430 |
+
{
|
| 53431 |
+
"epoch": 0.0007615,
|
| 53432 |
+
"grad_norm": 0.6321865320205688,
|
| 53433 |
+
"learning_rate": 7.614e-05,
|
| 53434 |
+
"loss": 0.2097,
|
| 53435 |
+
"step": 7615
|
| 53436 |
+
},
|
| 53437 |
+
{
|
| 53438 |
+
"epoch": 0.0007616,
|
| 53439 |
+
"grad_norm": 0.4267997145652771,
|
| 53440 |
+
"learning_rate": 7.615000000000001e-05,
|
| 53441 |
+
"loss": 0.1639,
|
| 53442 |
+
"step": 7616
|
| 53443 |
+
},
|
| 53444 |
+
{
|
| 53445 |
+
"epoch": 0.0007617,
|
| 53446 |
+
"grad_norm": 0.40251919627189636,
|
| 53447 |
+
"learning_rate": 7.616e-05,
|
| 53448 |
+
"loss": 0.168,
|
| 53449 |
+
"step": 7617
|
| 53450 |
+
},
|
| 53451 |
+
{
|
| 53452 |
+
"epoch": 0.0007618,
|
| 53453 |
+
"grad_norm": 0.5825067162513733,
|
| 53454 |
+
"learning_rate": 7.617e-05,
|
| 53455 |
+
"loss": 0.1808,
|
| 53456 |
+
"step": 7618
|
| 53457 |
+
},
|
| 53458 |
+
{
|
| 53459 |
+
"epoch": 0.0007619,
|
| 53460 |
+
"grad_norm": 0.37340980768203735,
|
| 53461 |
+
"learning_rate": 7.618e-05,
|
| 53462 |
+
"loss": 0.1584,
|
| 53463 |
+
"step": 7619
|
| 53464 |
+
},
|
| 53465 |
+
{
|
| 53466 |
+
"epoch": 0.000762,
|
| 53467 |
+
"grad_norm": 0.3515920639038086,
|
| 53468 |
+
"learning_rate": 7.619e-05,
|
| 53469 |
+
"loss": 0.1608,
|
| 53470 |
+
"step": 7620
|
| 53471 |
+
},
|
| 53472 |
+
{
|
| 53473 |
+
"epoch": 0.0007621,
|
| 53474 |
+
"grad_norm": 0.48071399331092834,
|
| 53475 |
+
"learning_rate": 7.62e-05,
|
| 53476 |
+
"loss": 0.1793,
|
| 53477 |
+
"step": 7621
|
| 53478 |
+
},
|
| 53479 |
+
{
|
| 53480 |
+
"epoch": 0.0007622,
|
| 53481 |
+
"grad_norm": 0.39304035902023315,
|
| 53482 |
+
"learning_rate": 7.621e-05,
|
| 53483 |
+
"loss": 0.1621,
|
| 53484 |
+
"step": 7622
|
| 53485 |
+
},
|
| 53486 |
+
{
|
| 53487 |
+
"epoch": 0.0007623,
|
| 53488 |
+
"grad_norm": 0.40765902400016785,
|
| 53489 |
+
"learning_rate": 7.622e-05,
|
| 53490 |
+
"loss": 0.1689,
|
| 53491 |
+
"step": 7623
|
| 53492 |
+
},
|
| 53493 |
+
{
|
| 53494 |
+
"epoch": 0.0007624,
|
| 53495 |
+
"grad_norm": 0.41402965784072876,
|
| 53496 |
+
"learning_rate": 7.623e-05,
|
| 53497 |
+
"loss": 0.1735,
|
| 53498 |
+
"step": 7624
|
| 53499 |
+
},
|
| 53500 |
+
{
|
| 53501 |
+
"epoch": 0.0007625,
|
| 53502 |
+
"grad_norm": 0.3803972899913788,
|
| 53503 |
+
"learning_rate": 7.624e-05,
|
| 53504 |
+
"loss": 0.1638,
|
| 53505 |
+
"step": 7625
|
| 53506 |
+
},
|
| 53507 |
+
{
|
| 53508 |
+
"epoch": 0.0007626,
|
| 53509 |
+
"grad_norm": 0.6322067975997925,
|
| 53510 |
+
"learning_rate": 7.625e-05,
|
| 53511 |
+
"loss": 0.2078,
|
| 53512 |
+
"step": 7626
|
| 53513 |
+
},
|
| 53514 |
+
{
|
| 53515 |
+
"epoch": 0.0007627,
|
| 53516 |
+
"grad_norm": 0.3621191084384918,
|
| 53517 |
+
"learning_rate": 7.626e-05,
|
| 53518 |
+
"loss": 0.1599,
|
| 53519 |
+
"step": 7627
|
| 53520 |
+
},
|
| 53521 |
+
{
|
| 53522 |
+
"epoch": 0.0007628,
|
| 53523 |
+
"grad_norm": 0.3360922634601593,
|
| 53524 |
+
"learning_rate": 7.627e-05,
|
| 53525 |
+
"loss": 0.1519,
|
| 53526 |
+
"step": 7628
|
| 53527 |
+
},
|
| 53528 |
+
{
|
| 53529 |
+
"epoch": 0.0007629,
|
| 53530 |
+
"grad_norm": 0.37620165944099426,
|
| 53531 |
+
"learning_rate": 7.628000000000001e-05,
|
| 53532 |
+
"loss": 0.1726,
|
| 53533 |
+
"step": 7629
|
| 53534 |
+
},
|
| 53535 |
+
{
|
| 53536 |
+
"epoch": 0.000763,
|
| 53537 |
+
"grad_norm": 0.44851481914520264,
|
| 53538 |
+
"learning_rate": 7.629e-05,
|
| 53539 |
+
"loss": 0.1827,
|
| 53540 |
+
"step": 7630
|
| 53541 |
+
},
|
| 53542 |
+
{
|
| 53543 |
+
"epoch": 0.0007631,
|
| 53544 |
+
"grad_norm": 0.5561351776123047,
|
| 53545 |
+
"learning_rate": 7.63e-05,
|
| 53546 |
+
"loss": 0.1719,
|
| 53547 |
+
"step": 7631
|
| 53548 |
+
},
|
| 53549 |
+
{
|
| 53550 |
+
"epoch": 0.0007632,
|
| 53551 |
+
"grad_norm": 0.6119112372398376,
|
| 53552 |
+
"learning_rate": 7.631e-05,
|
| 53553 |
+
"loss": 0.1886,
|
| 53554 |
+
"step": 7632
|
| 53555 |
+
},
|
| 53556 |
+
{
|
| 53557 |
+
"epoch": 0.0007633,
|
| 53558 |
+
"grad_norm": 1.0111428499221802,
|
| 53559 |
+
"learning_rate": 7.632e-05,
|
| 53560 |
+
"loss": 0.2168,
|
| 53561 |
+
"step": 7633
|
| 53562 |
+
},
|
| 53563 |
+
{
|
| 53564 |
+
"epoch": 0.0007634,
|
| 53565 |
+
"grad_norm": 0.42068493366241455,
|
| 53566 |
+
"learning_rate": 7.633e-05,
|
| 53567 |
+
"loss": 0.1674,
|
| 53568 |
+
"step": 7634
|
| 53569 |
+
},
|
| 53570 |
+
{
|
| 53571 |
+
"epoch": 0.0007635,
|
| 53572 |
+
"grad_norm": 0.365709513425827,
|
| 53573 |
+
"learning_rate": 7.634e-05,
|
| 53574 |
+
"loss": 0.1561,
|
| 53575 |
+
"step": 7635
|
| 53576 |
+
},
|
| 53577 |
+
{
|
| 53578 |
+
"epoch": 0.0007636,
|
| 53579 |
+
"grad_norm": 0.5593344569206238,
|
| 53580 |
+
"learning_rate": 7.635e-05,
|
| 53581 |
+
"loss": 0.1716,
|
| 53582 |
+
"step": 7636
|
| 53583 |
+
},
|
| 53584 |
+
{
|
| 53585 |
+
"epoch": 0.0007637,
|
| 53586 |
+
"grad_norm": 0.37984171509742737,
|
| 53587 |
+
"learning_rate": 7.636000000000001e-05,
|
| 53588 |
+
"loss": 0.1577,
|
| 53589 |
+
"step": 7637
|
| 53590 |
+
},
|
| 53591 |
+
{
|
| 53592 |
+
"epoch": 0.0007638,
|
| 53593 |
+
"grad_norm": 0.38292455673217773,
|
| 53594 |
+
"learning_rate": 7.637e-05,
|
| 53595 |
+
"loss": 0.1676,
|
| 53596 |
+
"step": 7638
|
| 53597 |
+
},
|
| 53598 |
+
{
|
| 53599 |
+
"epoch": 0.0007639,
|
| 53600 |
+
"grad_norm": 0.36735305190086365,
|
| 53601 |
+
"learning_rate": 7.638e-05,
|
| 53602 |
+
"loss": 0.1593,
|
| 53603 |
+
"step": 7639
|
| 53604 |
+
},
|
| 53605 |
+
{
|
| 53606 |
+
"epoch": 0.000764,
|
| 53607 |
+
"grad_norm": 0.47483325004577637,
|
| 53608 |
+
"learning_rate": 7.639e-05,
|
| 53609 |
+
"loss": 0.1794,
|
| 53610 |
+
"step": 7640
|
| 53611 |
+
},
|
| 53612 |
+
{
|
| 53613 |
+
"epoch": 0.0007641,
|
| 53614 |
+
"grad_norm": 0.4931349456310272,
|
| 53615 |
+
"learning_rate": 7.64e-05,
|
| 53616 |
+
"loss": 0.1638,
|
| 53617 |
+
"step": 7641
|
| 53618 |
+
},
|
| 53619 |
+
{
|
| 53620 |
+
"epoch": 0.0007642,
|
| 53621 |
+
"grad_norm": 0.5691468119621277,
|
| 53622 |
+
"learning_rate": 7.641e-05,
|
| 53623 |
+
"loss": 0.1766,
|
| 53624 |
+
"step": 7642
|
| 53625 |
+
},
|
| 53626 |
+
{
|
| 53627 |
+
"epoch": 0.0007643,
|
| 53628 |
+
"grad_norm": 0.39042598009109497,
|
| 53629 |
+
"learning_rate": 7.642e-05,
|
| 53630 |
+
"loss": 0.1659,
|
| 53631 |
+
"step": 7643
|
| 53632 |
+
},
|
| 53633 |
+
{
|
| 53634 |
+
"epoch": 0.0007644,
|
| 53635 |
+
"grad_norm": 0.8049102425575256,
|
| 53636 |
+
"learning_rate": 7.643000000000001e-05,
|
| 53637 |
+
"loss": 0.2017,
|
| 53638 |
+
"step": 7644
|
| 53639 |
+
},
|
| 53640 |
+
{
|
| 53641 |
+
"epoch": 0.0007645,
|
| 53642 |
+
"grad_norm": 0.34670665860176086,
|
| 53643 |
+
"learning_rate": 7.643999999999999e-05,
|
| 53644 |
+
"loss": 0.1509,
|
| 53645 |
+
"step": 7645
|
| 53646 |
+
},
|
| 53647 |
+
{
|
| 53648 |
+
"epoch": 0.0007646,
|
| 53649 |
+
"grad_norm": 0.3409683108329773,
|
| 53650 |
+
"learning_rate": 7.645e-05,
|
| 53651 |
+
"loss": 0.1525,
|
| 53652 |
+
"step": 7646
|
| 53653 |
+
},
|
| 53654 |
+
{
|
| 53655 |
+
"epoch": 0.0007647,
|
| 53656 |
+
"grad_norm": 0.38340339064598083,
|
| 53657 |
+
"learning_rate": 7.646000000000001e-05,
|
| 53658 |
+
"loss": 0.1576,
|
| 53659 |
+
"step": 7647
|
| 53660 |
+
},
|
| 53661 |
+
{
|
| 53662 |
+
"epoch": 0.0007648,
|
| 53663 |
+
"grad_norm": 0.8329829573631287,
|
| 53664 |
+
"learning_rate": 7.646999999999999e-05,
|
| 53665 |
+
"loss": 0.223,
|
| 53666 |
+
"step": 7648
|
| 53667 |
+
},
|
| 53668 |
+
{
|
| 53669 |
+
"epoch": 0.0007649,
|
| 53670 |
+
"grad_norm": 0.7235862016677856,
|
| 53671 |
+
"learning_rate": 7.648e-05,
|
| 53672 |
+
"loss": 0.2101,
|
| 53673 |
+
"step": 7649
|
| 53674 |
+
},
|
| 53675 |
+
{
|
| 53676 |
+
"epoch": 0.000765,
|
| 53677 |
+
"grad_norm": 0.35512569546699524,
|
| 53678 |
+
"learning_rate": 7.649000000000001e-05,
|
| 53679 |
+
"loss": 0.1611,
|
| 53680 |
+
"step": 7650
|
| 53681 |
+
},
|
| 53682 |
+
{
|
| 53683 |
+
"epoch": 0.0007651,
|
| 53684 |
+
"grad_norm": 0.39725664258003235,
|
| 53685 |
+
"learning_rate": 7.65e-05,
|
| 53686 |
+
"loss": 0.1606,
|
| 53687 |
+
"step": 7651
|
| 53688 |
+
},
|
| 53689 |
+
{
|
| 53690 |
+
"epoch": 0.0007652,
|
| 53691 |
+
"grad_norm": 0.47114476561546326,
|
| 53692 |
+
"learning_rate": 7.651e-05,
|
| 53693 |
+
"loss": 0.1812,
|
| 53694 |
+
"step": 7652
|
| 53695 |
+
},
|
| 53696 |
+
{
|
| 53697 |
+
"epoch": 0.0007653,
|
| 53698 |
+
"grad_norm": 0.3914787471294403,
|
| 53699 |
+
"learning_rate": 7.652e-05,
|
| 53700 |
+
"loss": 0.1664,
|
| 53701 |
+
"step": 7653
|
| 53702 |
+
},
|
| 53703 |
+
{
|
| 53704 |
+
"epoch": 0.0007654,
|
| 53705 |
+
"grad_norm": 0.4580937623977661,
|
| 53706 |
+
"learning_rate": 7.653e-05,
|
| 53707 |
+
"loss": 0.1665,
|
| 53708 |
+
"step": 7654
|
| 53709 |
+
},
|
| 53710 |
+
{
|
| 53711 |
+
"epoch": 0.0007655,
|
| 53712 |
+
"grad_norm": 0.3929605782032013,
|
| 53713 |
+
"learning_rate": 7.654e-05,
|
| 53714 |
+
"loss": 0.1534,
|
| 53715 |
+
"step": 7655
|
| 53716 |
+
},
|
| 53717 |
+
{
|
| 53718 |
+
"epoch": 0.0007656,
|
| 53719 |
+
"grad_norm": 0.4484695792198181,
|
| 53720 |
+
"learning_rate": 7.655e-05,
|
| 53721 |
+
"loss": 0.1812,
|
| 53722 |
+
"step": 7656
|
| 53723 |
+
},
|
| 53724 |
+
{
|
| 53725 |
+
"epoch": 0.0007657,
|
| 53726 |
+
"grad_norm": 0.43384209275245667,
|
| 53727 |
+
"learning_rate": 7.656e-05,
|
| 53728 |
+
"loss": 0.1854,
|
| 53729 |
+
"step": 7657
|
| 53730 |
+
},
|
| 53731 |
+
{
|
| 53732 |
+
"epoch": 0.0007658,
|
| 53733 |
+
"grad_norm": 0.38437792658805847,
|
| 53734 |
+
"learning_rate": 7.657e-05,
|
| 53735 |
+
"loss": 0.1582,
|
| 53736 |
+
"step": 7658
|
| 53737 |
+
},
|
| 53738 |
+
{
|
| 53739 |
+
"epoch": 0.0007659,
|
| 53740 |
+
"grad_norm": 0.33982738852500916,
|
| 53741 |
+
"learning_rate": 7.658e-05,
|
| 53742 |
+
"loss": 0.1604,
|
| 53743 |
+
"step": 7659
|
| 53744 |
+
},
|
| 53745 |
+
{
|
| 53746 |
+
"epoch": 0.000766,
|
| 53747 |
+
"grad_norm": 0.8198331594467163,
|
| 53748 |
+
"learning_rate": 7.659e-05,
|
| 53749 |
+
"loss": 0.2065,
|
| 53750 |
+
"step": 7660
|
| 53751 |
+
},
|
| 53752 |
+
{
|
| 53753 |
+
"epoch": 0.0007661,
|
| 53754 |
+
"grad_norm": 0.4300197660923004,
|
| 53755 |
+
"learning_rate": 7.66e-05,
|
| 53756 |
+
"loss": 0.161,
|
| 53757 |
+
"step": 7661
|
| 53758 |
+
},
|
| 53759 |
+
{
|
| 53760 |
+
"epoch": 0.0007662,
|
| 53761 |
+
"grad_norm": 0.40067654848098755,
|
| 53762 |
+
"learning_rate": 7.661e-05,
|
| 53763 |
+
"loss": 0.1666,
|
| 53764 |
+
"step": 7662
|
| 53765 |
+
},
|
| 53766 |
+
{
|
| 53767 |
+
"epoch": 0.0007663,
|
| 53768 |
+
"grad_norm": 0.36768773198127747,
|
| 53769 |
+
"learning_rate": 7.662000000000001e-05,
|
| 53770 |
+
"loss": 0.1655,
|
| 53771 |
+
"step": 7663
|
| 53772 |
+
},
|
| 53773 |
+
{
|
| 53774 |
+
"epoch": 0.0007664,
|
| 53775 |
+
"grad_norm": 0.3558520972728729,
|
| 53776 |
+
"learning_rate": 7.663e-05,
|
| 53777 |
+
"loss": 0.1475,
|
| 53778 |
+
"step": 7664
|
| 53779 |
+
},
|
| 53780 |
+
{
|
| 53781 |
+
"epoch": 0.0007665,
|
| 53782 |
+
"grad_norm": 0.5428350567817688,
|
| 53783 |
+
"learning_rate": 7.664e-05,
|
| 53784 |
+
"loss": 0.1729,
|
| 53785 |
+
"step": 7665
|
| 53786 |
+
},
|
| 53787 |
+
{
|
| 53788 |
+
"epoch": 0.0007666,
|
| 53789 |
+
"grad_norm": 0.48063817620277405,
|
| 53790 |
+
"learning_rate": 7.665e-05,
|
| 53791 |
+
"loss": 0.1938,
|
| 53792 |
+
"step": 7666
|
| 53793 |
+
},
|
| 53794 |
+
{
|
| 53795 |
+
"epoch": 0.0007667,
|
| 53796 |
+
"grad_norm": 0.3305480182170868,
|
| 53797 |
+
"learning_rate": 7.666e-05,
|
| 53798 |
+
"loss": 0.1467,
|
| 53799 |
+
"step": 7667
|
| 53800 |
+
},
|
| 53801 |
+
{
|
| 53802 |
+
"epoch": 0.0007668,
|
| 53803 |
+
"grad_norm": 0.7609458565711975,
|
| 53804 |
+
"learning_rate": 7.667e-05,
|
| 53805 |
+
"loss": 0.1814,
|
| 53806 |
+
"step": 7668
|
| 53807 |
+
},
|
| 53808 |
+
{
|
| 53809 |
+
"epoch": 0.0007669,
|
| 53810 |
+
"grad_norm": 0.3762044608592987,
|
| 53811 |
+
"learning_rate": 7.668e-05,
|
| 53812 |
+
"loss": 0.1622,
|
| 53813 |
+
"step": 7669
|
| 53814 |
+
},
|
| 53815 |
+
{
|
| 53816 |
+
"epoch": 0.000767,
|
| 53817 |
+
"grad_norm": 0.3550243079662323,
|
| 53818 |
+
"learning_rate": 7.669e-05,
|
| 53819 |
+
"loss": 0.1553,
|
| 53820 |
+
"step": 7670
|
| 53821 |
+
},
|
| 53822 |
+
{
|
| 53823 |
+
"epoch": 0.0007671,
|
| 53824 |
+
"grad_norm": 0.3320472240447998,
|
| 53825 |
+
"learning_rate": 7.670000000000001e-05,
|
| 53826 |
+
"loss": 0.1483,
|
| 53827 |
+
"step": 7671
|
| 53828 |
+
},
|
| 53829 |
+
{
|
| 53830 |
+
"epoch": 0.0007672,
|
| 53831 |
+
"grad_norm": 0.5858722925186157,
|
| 53832 |
+
"learning_rate": 7.671e-05,
|
| 53833 |
+
"loss": 0.1892,
|
| 53834 |
+
"step": 7672
|
| 53835 |
+
},
|
| 53836 |
+
{
|
| 53837 |
+
"epoch": 0.0007673,
|
| 53838 |
+
"grad_norm": 1.368833065032959,
|
| 53839 |
+
"learning_rate": 7.672e-05,
|
| 53840 |
+
"loss": 0.2322,
|
| 53841 |
+
"step": 7673
|
| 53842 |
+
},
|
| 53843 |
+
{
|
| 53844 |
+
"epoch": 0.0007674,
|
| 53845 |
+
"grad_norm": 0.532004714012146,
|
| 53846 |
+
"learning_rate": 7.673e-05,
|
| 53847 |
+
"loss": 0.1707,
|
| 53848 |
+
"step": 7674
|
| 53849 |
+
},
|
| 53850 |
+
{
|
| 53851 |
+
"epoch": 0.0007675,
|
| 53852 |
+
"grad_norm": 0.550662100315094,
|
| 53853 |
+
"learning_rate": 7.674e-05,
|
| 53854 |
+
"loss": 0.1891,
|
| 53855 |
+
"step": 7675
|
| 53856 |
+
},
|
| 53857 |
+
{
|
| 53858 |
+
"epoch": 0.0007676,
|
| 53859 |
+
"grad_norm": 0.8571083545684814,
|
| 53860 |
+
"learning_rate": 7.675e-05,
|
| 53861 |
+
"loss": 0.1967,
|
| 53862 |
+
"step": 7676
|
| 53863 |
+
},
|
| 53864 |
+
{
|
| 53865 |
+
"epoch": 0.0007677,
|
| 53866 |
+
"grad_norm": 0.5520954728126526,
|
| 53867 |
+
"learning_rate": 7.676e-05,
|
| 53868 |
+
"loss": 0.172,
|
| 53869 |
+
"step": 7677
|
| 53870 |
+
},
|
| 53871 |
+
{
|
| 53872 |
+
"epoch": 0.0007678,
|
| 53873 |
+
"grad_norm": 0.4285428524017334,
|
| 53874 |
+
"learning_rate": 7.677000000000001e-05,
|
| 53875 |
+
"loss": 0.1538,
|
| 53876 |
+
"step": 7678
|
| 53877 |
+
},
|
| 53878 |
+
{
|
| 53879 |
+
"epoch": 0.0007679,
|
| 53880 |
+
"grad_norm": 0.38580891489982605,
|
| 53881 |
+
"learning_rate": 7.678e-05,
|
| 53882 |
+
"loss": 0.147,
|
| 53883 |
+
"step": 7679
|
| 53884 |
+
},
|
| 53885 |
+
{
|
| 53886 |
+
"epoch": 0.000768,
|
| 53887 |
+
"grad_norm": 0.37791067361831665,
|
| 53888 |
+
"learning_rate": 7.679e-05,
|
| 53889 |
+
"loss": 0.1521,
|
| 53890 |
+
"step": 7680
|
| 53891 |
+
},
|
| 53892 |
+
{
|
| 53893 |
+
"epoch": 0.0007681,
|
| 53894 |
+
"grad_norm": 0.3506149351596832,
|
| 53895 |
+
"learning_rate": 7.680000000000001e-05,
|
| 53896 |
+
"loss": 0.1484,
|
| 53897 |
+
"step": 7681
|
| 53898 |
+
},
|
| 53899 |
+
{
|
| 53900 |
+
"epoch": 0.0007682,
|
| 53901 |
+
"grad_norm": 0.4228236973285675,
|
| 53902 |
+
"learning_rate": 7.680999999999999e-05,
|
| 53903 |
+
"loss": 0.1617,
|
| 53904 |
+
"step": 7682
|
| 53905 |
+
},
|
| 53906 |
+
{
|
| 53907 |
+
"epoch": 0.0007683,
|
| 53908 |
+
"grad_norm": 0.4166804850101471,
|
| 53909 |
+
"learning_rate": 7.682e-05,
|
| 53910 |
+
"loss": 0.1674,
|
| 53911 |
+
"step": 7683
|
| 53912 |
+
},
|
| 53913 |
+
{
|
| 53914 |
+
"epoch": 0.0007684,
|
| 53915 |
+
"grad_norm": 0.4382307529449463,
|
| 53916 |
+
"learning_rate": 7.683000000000001e-05,
|
| 53917 |
+
"loss": 0.1726,
|
| 53918 |
+
"step": 7684
|
| 53919 |
+
},
|
| 53920 |
+
{
|
| 53921 |
+
"epoch": 0.0007685,
|
| 53922 |
+
"grad_norm": 0.4771483838558197,
|
| 53923 |
+
"learning_rate": 7.684e-05,
|
| 53924 |
+
"loss": 0.1758,
|
| 53925 |
+
"step": 7685
|
| 53926 |
+
},
|
| 53927 |
+
{
|
| 53928 |
+
"epoch": 0.0007686,
|
| 53929 |
+
"grad_norm": 0.3528839945793152,
|
| 53930 |
+
"learning_rate": 7.685e-05,
|
| 53931 |
+
"loss": 0.1581,
|
| 53932 |
+
"step": 7686
|
| 53933 |
+
},
|
| 53934 |
+
{
|
| 53935 |
+
"epoch": 0.0007687,
|
| 53936 |
+
"grad_norm": 0.7994784116744995,
|
| 53937 |
+
"learning_rate": 7.686e-05,
|
| 53938 |
+
"loss": 0.1545,
|
| 53939 |
+
"step": 7687
|
| 53940 |
+
},
|
| 53941 |
+
{
|
| 53942 |
+
"epoch": 0.0007688,
|
| 53943 |
+
"grad_norm": 0.4543117582798004,
|
| 53944 |
+
"learning_rate": 7.687e-05,
|
| 53945 |
+
"loss": 0.1691,
|
| 53946 |
+
"step": 7688
|
| 53947 |
+
},
|
| 53948 |
+
{
|
| 53949 |
+
"epoch": 0.0007689,
|
| 53950 |
+
"grad_norm": 0.45544180274009705,
|
| 53951 |
+
"learning_rate": 7.688e-05,
|
| 53952 |
+
"loss": 0.1576,
|
| 53953 |
+
"step": 7689
|
| 53954 |
+
},
|
| 53955 |
+
{
|
| 53956 |
+
"epoch": 0.000769,
|
| 53957 |
+
"grad_norm": 0.432519793510437,
|
| 53958 |
+
"learning_rate": 7.689e-05,
|
| 53959 |
+
"loss": 0.1637,
|
| 53960 |
+
"step": 7690
|
| 53961 |
+
},
|
| 53962 |
+
{
|
| 53963 |
+
"epoch": 0.0007691,
|
| 53964 |
+
"grad_norm": 0.4242666959762573,
|
| 53965 |
+
"learning_rate": 7.69e-05,
|
| 53966 |
+
"loss": 0.163,
|
| 53967 |
+
"step": 7691
|
| 53968 |
+
},
|
| 53969 |
+
{
|
| 53970 |
+
"epoch": 0.0007692,
|
| 53971 |
+
"grad_norm": 0.38810381293296814,
|
| 53972 |
+
"learning_rate": 7.691e-05,
|
| 53973 |
+
"loss": 0.1603,
|
| 53974 |
+
"step": 7692
|
| 53975 |
+
},
|
| 53976 |
+
{
|
| 53977 |
+
"epoch": 0.0007693,
|
| 53978 |
+
"grad_norm": 0.3763027489185333,
|
| 53979 |
+
"learning_rate": 7.692e-05,
|
| 53980 |
+
"loss": 0.1527,
|
| 53981 |
+
"step": 7693
|
| 53982 |
+
},
|
| 53983 |
+
{
|
| 53984 |
+
"epoch": 0.0007694,
|
| 53985 |
+
"grad_norm": 0.39184290170669556,
|
| 53986 |
+
"learning_rate": 7.693e-05,
|
| 53987 |
+
"loss": 0.1674,
|
| 53988 |
+
"step": 7694
|
| 53989 |
+
},
|
| 53990 |
+
{
|
| 53991 |
+
"epoch": 0.0007695,
|
| 53992 |
+
"grad_norm": 0.35056430101394653,
|
| 53993 |
+
"learning_rate": 7.694e-05,
|
| 53994 |
+
"loss": 0.1562,
|
| 53995 |
+
"step": 7695
|
| 53996 |
+
},
|
| 53997 |
+
{
|
| 53998 |
+
"epoch": 0.0007696,
|
| 53999 |
+
"grad_norm": 0.3862243592739105,
|
| 54000 |
+
"learning_rate": 7.695e-05,
|
| 54001 |
+
"loss": 0.1631,
|
| 54002 |
+
"step": 7696
|
| 54003 |
+
},
|
| 54004 |
+
{
|
| 54005 |
+
"epoch": 0.0007697,
|
| 54006 |
+
"grad_norm": 0.3677322268486023,
|
| 54007 |
+
"learning_rate": 7.696000000000001e-05,
|
| 54008 |
+
"loss": 0.1675,
|
| 54009 |
+
"step": 7697
|
| 54010 |
+
},
|
| 54011 |
+
{
|
| 54012 |
+
"epoch": 0.0007698,
|
| 54013 |
+
"grad_norm": 0.37952372431755066,
|
| 54014 |
+
"learning_rate": 7.697e-05,
|
| 54015 |
+
"loss": 0.1625,
|
| 54016 |
+
"step": 7698
|
| 54017 |
+
},
|
| 54018 |
+
{
|
| 54019 |
+
"epoch": 0.0007699,
|
| 54020 |
+
"grad_norm": 0.6752346158027649,
|
| 54021 |
+
"learning_rate": 7.698e-05,
|
| 54022 |
+
"loss": 0.1904,
|
| 54023 |
+
"step": 7699
|
| 54024 |
+
},
|
| 54025 |
+
{
|
| 54026 |
+
"epoch": 0.00077,
|
| 54027 |
+
"grad_norm": 0.4253908097743988,
|
| 54028 |
+
"learning_rate": 7.699e-05,
|
| 54029 |
+
"loss": 0.168,
|
| 54030 |
+
"step": 7700
|
| 54031 |
+
},
|
| 54032 |
+
{
|
| 54033 |
+
"epoch": 0.0007701,
|
| 54034 |
+
"grad_norm": 0.3445245921611786,
|
| 54035 |
+
"learning_rate": 7.7e-05,
|
| 54036 |
+
"loss": 0.1594,
|
| 54037 |
+
"step": 7701
|
| 54038 |
+
},
|
| 54039 |
+
{
|
| 54040 |
+
"epoch": 0.0007702,
|
| 54041 |
+
"grad_norm": 0.37778040766716003,
|
| 54042 |
+
"learning_rate": 7.701e-05,
|
| 54043 |
+
"loss": 0.1606,
|
| 54044 |
+
"step": 7702
|
| 54045 |
+
},
|
| 54046 |
+
{
|
| 54047 |
+
"epoch": 0.0007703,
|
| 54048 |
+
"grad_norm": 0.3563236594200134,
|
| 54049 |
+
"learning_rate": 7.702e-05,
|
| 54050 |
+
"loss": 0.156,
|
| 54051 |
+
"step": 7703
|
| 54052 |
+
},
|
| 54053 |
+
{
|
| 54054 |
+
"epoch": 0.0007704,
|
| 54055 |
+
"grad_norm": 0.35258838534355164,
|
| 54056 |
+
"learning_rate": 7.703e-05,
|
| 54057 |
+
"loss": 0.1527,
|
| 54058 |
+
"step": 7704
|
| 54059 |
+
},
|
| 54060 |
+
{
|
| 54061 |
+
"epoch": 0.0007705,
|
| 54062 |
+
"grad_norm": 0.6172411441802979,
|
| 54063 |
+
"learning_rate": 7.704000000000001e-05,
|
| 54064 |
+
"loss": 0.1848,
|
| 54065 |
+
"step": 7705
|
| 54066 |
+
},
|
| 54067 |
+
{
|
| 54068 |
+
"epoch": 0.0007706,
|
| 54069 |
+
"grad_norm": 0.7439136505126953,
|
| 54070 |
+
"learning_rate": 7.705e-05,
|
| 54071 |
+
"loss": 0.2045,
|
| 54072 |
+
"step": 7706
|
| 54073 |
+
},
|
| 54074 |
+
{
|
| 54075 |
+
"epoch": 0.0007707,
|
| 54076 |
+
"grad_norm": 0.43097370862960815,
|
| 54077 |
+
"learning_rate": 7.706e-05,
|
| 54078 |
+
"loss": 0.1796,
|
| 54079 |
+
"step": 7707
|
| 54080 |
+
},
|
| 54081 |
+
{
|
| 54082 |
+
"epoch": 0.0007708,
|
| 54083 |
+
"grad_norm": 0.41762420535087585,
|
| 54084 |
+
"learning_rate": 7.707e-05,
|
| 54085 |
+
"loss": 0.1705,
|
| 54086 |
+
"step": 7708
|
| 54087 |
+
},
|
| 54088 |
+
{
|
| 54089 |
+
"epoch": 0.0007709,
|
| 54090 |
+
"grad_norm": 0.5003089308738708,
|
| 54091 |
+
"learning_rate": 7.708e-05,
|
| 54092 |
+
"loss": 0.1791,
|
| 54093 |
+
"step": 7709
|
| 54094 |
+
},
|
| 54095 |
+
{
|
| 54096 |
+
"epoch": 0.000771,
|
| 54097 |
+
"grad_norm": 0.3558039963245392,
|
| 54098 |
+
"learning_rate": 7.709e-05,
|
| 54099 |
+
"loss": 0.1578,
|
| 54100 |
+
"step": 7710
|
| 54101 |
+
},
|
| 54102 |
+
{
|
| 54103 |
+
"epoch": 0.0007711,
|
| 54104 |
+
"grad_norm": 0.436168372631073,
|
| 54105 |
+
"learning_rate": 7.71e-05,
|
| 54106 |
+
"loss": 0.1694,
|
| 54107 |
+
"step": 7711
|
| 54108 |
+
},
|
| 54109 |
+
{
|
| 54110 |
+
"epoch": 0.0007712,
|
| 54111 |
+
"grad_norm": 0.5260476469993591,
|
| 54112 |
+
"learning_rate": 7.711000000000001e-05,
|
| 54113 |
+
"loss": 0.162,
|
| 54114 |
+
"step": 7712
|
| 54115 |
+
},
|
| 54116 |
+
{
|
| 54117 |
+
"epoch": 0.0007713,
|
| 54118 |
+
"grad_norm": 0.45862051844596863,
|
| 54119 |
+
"learning_rate": 7.712e-05,
|
| 54120 |
+
"loss": 0.1552,
|
| 54121 |
+
"step": 7713
|
| 54122 |
+
},
|
| 54123 |
+
{
|
| 54124 |
+
"epoch": 0.0007714,
|
| 54125 |
+
"grad_norm": 0.3381466567516327,
|
| 54126 |
+
"learning_rate": 7.713e-05,
|
| 54127 |
+
"loss": 0.1447,
|
| 54128 |
+
"step": 7714
|
| 54129 |
+
},
|
| 54130 |
+
{
|
| 54131 |
+
"epoch": 0.0007715,
|
| 54132 |
+
"grad_norm": 0.3526186943054199,
|
| 54133 |
+
"learning_rate": 7.714000000000001e-05,
|
| 54134 |
+
"loss": 0.145,
|
| 54135 |
+
"step": 7715
|
| 54136 |
+
},
|
| 54137 |
+
{
|
| 54138 |
+
"epoch": 0.0007716,
|
| 54139 |
+
"grad_norm": 0.34365805983543396,
|
| 54140 |
+
"learning_rate": 7.714999999999999e-05,
|
| 54141 |
+
"loss": 0.1533,
|
| 54142 |
+
"step": 7716
|
| 54143 |
+
},
|
| 54144 |
+
{
|
| 54145 |
+
"epoch": 0.0007717,
|
| 54146 |
+
"grad_norm": 0.36505240201950073,
|
| 54147 |
+
"learning_rate": 7.716e-05,
|
| 54148 |
+
"loss": 0.1576,
|
| 54149 |
+
"step": 7717
|
| 54150 |
+
},
|
| 54151 |
+
{
|
| 54152 |
+
"epoch": 0.0007718,
|
| 54153 |
+
"grad_norm": 0.5150579214096069,
|
| 54154 |
+
"learning_rate": 7.717000000000001e-05,
|
| 54155 |
+
"loss": 0.1769,
|
| 54156 |
+
"step": 7718
|
| 54157 |
+
},
|
| 54158 |
+
{
|
| 54159 |
+
"epoch": 0.0007719,
|
| 54160 |
+
"grad_norm": 0.3398381471633911,
|
| 54161 |
+
"learning_rate": 7.718e-05,
|
| 54162 |
+
"loss": 0.1509,
|
| 54163 |
+
"step": 7719
|
| 54164 |
+
},
|
| 54165 |
+
{
|
| 54166 |
+
"epoch": 0.000772,
|
| 54167 |
+
"grad_norm": 0.3642643094062805,
|
| 54168 |
+
"learning_rate": 7.719e-05,
|
| 54169 |
+
"loss": 0.1587,
|
| 54170 |
+
"step": 7720
|
| 54171 |
+
},
|
| 54172 |
+
{
|
| 54173 |
+
"epoch": 0.0007721,
|
| 54174 |
+
"grad_norm": 1.7854269742965698,
|
| 54175 |
+
"learning_rate": 7.72e-05,
|
| 54176 |
+
"loss": 0.281,
|
| 54177 |
+
"step": 7721
|
| 54178 |
+
},
|
| 54179 |
+
{
|
| 54180 |
+
"epoch": 0.0007722,
|
| 54181 |
+
"grad_norm": 0.364483505487442,
|
| 54182 |
+
"learning_rate": 7.721e-05,
|
| 54183 |
+
"loss": 0.1516,
|
| 54184 |
+
"step": 7722
|
| 54185 |
+
},
|
| 54186 |
+
{
|
| 54187 |
+
"epoch": 0.0007723,
|
| 54188 |
+
"grad_norm": 0.6805715560913086,
|
| 54189 |
+
"learning_rate": 7.722e-05,
|
| 54190 |
+
"loss": 0.1942,
|
| 54191 |
+
"step": 7723
|
| 54192 |
+
},
|
| 54193 |
+
{
|
| 54194 |
+
"epoch": 0.0007724,
|
| 54195 |
+
"grad_norm": 0.39517566561698914,
|
| 54196 |
+
"learning_rate": 7.723e-05,
|
| 54197 |
+
"loss": 0.1525,
|
| 54198 |
+
"step": 7724
|
| 54199 |
+
},
|
| 54200 |
+
{
|
| 54201 |
+
"epoch": 0.0007725,
|
| 54202 |
+
"grad_norm": 0.37582632899284363,
|
| 54203 |
+
"learning_rate": 7.724e-05,
|
| 54204 |
+
"loss": 0.1567,
|
| 54205 |
+
"step": 7725
|
| 54206 |
+
},
|
| 54207 |
+
{
|
| 54208 |
+
"epoch": 0.0007726,
|
| 54209 |
+
"grad_norm": 0.4059215188026428,
|
| 54210 |
+
"learning_rate": 7.725e-05,
|
| 54211 |
+
"loss": 0.1572,
|
| 54212 |
+
"step": 7726
|
| 54213 |
+
},
|
| 54214 |
+
{
|
| 54215 |
+
"epoch": 0.0007727,
|
| 54216 |
+
"grad_norm": 0.4789154529571533,
|
| 54217 |
+
"learning_rate": 7.726e-05,
|
| 54218 |
+
"loss": 0.1592,
|
| 54219 |
+
"step": 7727
|
| 54220 |
+
},
|
| 54221 |
+
{
|
| 54222 |
+
"epoch": 0.0007728,
|
| 54223 |
+
"grad_norm": 0.6848322749137878,
|
| 54224 |
+
"learning_rate": 7.727e-05,
|
| 54225 |
+
"loss": 0.1976,
|
| 54226 |
+
"step": 7728
|
| 54227 |
+
},
|
| 54228 |
+
{
|
| 54229 |
+
"epoch": 0.0007729,
|
| 54230 |
+
"grad_norm": 0.3184192180633545,
|
| 54231 |
+
"learning_rate": 7.728e-05,
|
| 54232 |
+
"loss": 0.1453,
|
| 54233 |
+
"step": 7729
|
| 54234 |
+
},
|
| 54235 |
+
{
|
| 54236 |
+
"epoch": 0.000773,
|
| 54237 |
+
"grad_norm": 0.35173535346984863,
|
| 54238 |
+
"learning_rate": 7.729e-05,
|
| 54239 |
+
"loss": 0.1531,
|
| 54240 |
+
"step": 7730
|
| 54241 |
+
},
|
| 54242 |
+
{
|
| 54243 |
+
"epoch": 0.0007731,
|
| 54244 |
+
"grad_norm": 0.3543420732021332,
|
| 54245 |
+
"learning_rate": 7.730000000000001e-05,
|
| 54246 |
+
"loss": 0.1516,
|
| 54247 |
+
"step": 7731
|
| 54248 |
+
},
|
| 54249 |
+
{
|
| 54250 |
+
"epoch": 0.0007732,
|
| 54251 |
+
"grad_norm": 0.3434199094772339,
|
| 54252 |
+
"learning_rate": 7.731e-05,
|
| 54253 |
+
"loss": 0.1539,
|
| 54254 |
+
"step": 7732
|
| 54255 |
+
},
|
| 54256 |
+
{
|
| 54257 |
+
"epoch": 0.0007733,
|
| 54258 |
+
"grad_norm": 0.3729701638221741,
|
| 54259 |
+
"learning_rate": 7.732e-05,
|
| 54260 |
+
"loss": 0.156,
|
| 54261 |
+
"step": 7733
|
| 54262 |
+
},
|
| 54263 |
+
{
|
| 54264 |
+
"epoch": 0.0007734,
|
| 54265 |
+
"grad_norm": 0.3544479310512543,
|
| 54266 |
+
"learning_rate": 7.733e-05,
|
| 54267 |
+
"loss": 0.1488,
|
| 54268 |
+
"step": 7734
|
| 54269 |
+
},
|
| 54270 |
+
{
|
| 54271 |
+
"epoch": 0.0007735,
|
| 54272 |
+
"grad_norm": 0.46375659108161926,
|
| 54273 |
+
"learning_rate": 7.734e-05,
|
| 54274 |
+
"loss": 0.1637,
|
| 54275 |
+
"step": 7735
|
| 54276 |
+
},
|
| 54277 |
+
{
|
| 54278 |
+
"epoch": 0.0007736,
|
| 54279 |
+
"grad_norm": 0.3291948437690735,
|
| 54280 |
+
"learning_rate": 7.735e-05,
|
| 54281 |
+
"loss": 0.1565,
|
| 54282 |
+
"step": 7736
|
| 54283 |
+
},
|
| 54284 |
+
{
|
| 54285 |
+
"epoch": 0.0007737,
|
| 54286 |
+
"grad_norm": 0.327825665473938,
|
| 54287 |
+
"learning_rate": 7.736e-05,
|
| 54288 |
+
"loss": 0.1416,
|
| 54289 |
+
"step": 7737
|
| 54290 |
+
},
|
| 54291 |
+
{
|
| 54292 |
+
"epoch": 0.0007738,
|
| 54293 |
+
"grad_norm": 0.5666698217391968,
|
| 54294 |
+
"learning_rate": 7.737e-05,
|
| 54295 |
+
"loss": 0.1598,
|
| 54296 |
+
"step": 7738
|
| 54297 |
+
},
|
| 54298 |
+
{
|
| 54299 |
+
"epoch": 0.0007739,
|
| 54300 |
+
"grad_norm": 0.38737985491752625,
|
| 54301 |
+
"learning_rate": 7.738000000000001e-05,
|
| 54302 |
+
"loss": 0.1444,
|
| 54303 |
+
"step": 7739
|
| 54304 |
+
},
|
| 54305 |
+
{
|
| 54306 |
+
"epoch": 0.000774,
|
| 54307 |
+
"grad_norm": 0.38880079984664917,
|
| 54308 |
+
"learning_rate": 7.739e-05,
|
| 54309 |
+
"loss": 0.1627,
|
| 54310 |
+
"step": 7740
|
| 54311 |
+
},
|
| 54312 |
+
{
|
| 54313 |
+
"epoch": 0.0007741,
|
| 54314 |
+
"grad_norm": 0.4217063784599304,
|
| 54315 |
+
"learning_rate": 7.74e-05,
|
| 54316 |
+
"loss": 0.1664,
|
| 54317 |
+
"step": 7741
|
| 54318 |
+
},
|
| 54319 |
+
{
|
| 54320 |
+
"epoch": 0.0007742,
|
| 54321 |
+
"grad_norm": 0.32560673356056213,
|
| 54322 |
+
"learning_rate": 7.741e-05,
|
| 54323 |
+
"loss": 0.1459,
|
| 54324 |
+
"step": 7742
|
| 54325 |
+
},
|
| 54326 |
+
{
|
| 54327 |
+
"epoch": 0.0007743,
|
| 54328 |
+
"grad_norm": 0.5402155518531799,
|
| 54329 |
+
"learning_rate": 7.742e-05,
|
| 54330 |
+
"loss": 0.1838,
|
| 54331 |
+
"step": 7743
|
| 54332 |
+
},
|
| 54333 |
+
{
|
| 54334 |
+
"epoch": 0.0007744,
|
| 54335 |
+
"grad_norm": 0.3342635929584503,
|
| 54336 |
+
"learning_rate": 7.743e-05,
|
| 54337 |
+
"loss": 0.1528,
|
| 54338 |
+
"step": 7744
|
| 54339 |
+
},
|
| 54340 |
+
{
|
| 54341 |
+
"epoch": 0.0007745,
|
| 54342 |
+
"grad_norm": 0.9251406192779541,
|
| 54343 |
+
"learning_rate": 7.744e-05,
|
| 54344 |
+
"loss": 0.2164,
|
| 54345 |
+
"step": 7745
|
| 54346 |
+
},
|
| 54347 |
+
{
|
| 54348 |
+
"epoch": 0.0007746,
|
| 54349 |
+
"grad_norm": 0.34178224205970764,
|
| 54350 |
+
"learning_rate": 7.745000000000001e-05,
|
| 54351 |
+
"loss": 0.149,
|
| 54352 |
+
"step": 7746
|
| 54353 |
+
},
|
| 54354 |
+
{
|
| 54355 |
+
"epoch": 0.0007747,
|
| 54356 |
+
"grad_norm": 0.7903544902801514,
|
| 54357 |
+
"learning_rate": 7.746e-05,
|
| 54358 |
+
"loss": 0.1927,
|
| 54359 |
+
"step": 7747
|
| 54360 |
+
},
|
| 54361 |
+
{
|
| 54362 |
+
"epoch": 0.0007748,
|
| 54363 |
+
"grad_norm": 0.428143173456192,
|
| 54364 |
+
"learning_rate": 7.747e-05,
|
| 54365 |
+
"loss": 0.1549,
|
| 54366 |
+
"step": 7748
|
| 54367 |
+
},
|
| 54368 |
+
{
|
| 54369 |
+
"epoch": 0.0007749,
|
| 54370 |
+
"grad_norm": 0.44141685962677,
|
| 54371 |
+
"learning_rate": 7.748000000000001e-05,
|
| 54372 |
+
"loss": 0.1468,
|
| 54373 |
+
"step": 7749
|
| 54374 |
+
},
|
| 54375 |
+
{
|
| 54376 |
+
"epoch": 0.000775,
|
| 54377 |
+
"grad_norm": 0.5193094611167908,
|
| 54378 |
+
"learning_rate": 7.748999999999999e-05,
|
| 54379 |
+
"loss": 0.1578,
|
| 54380 |
+
"step": 7750
|
| 54381 |
+
},
|
| 54382 |
+
{
|
| 54383 |
+
"epoch": 0.0007751,
|
| 54384 |
+
"grad_norm": 0.41440680623054504,
|
| 54385 |
+
"learning_rate": 7.75e-05,
|
| 54386 |
+
"loss": 0.1541,
|
| 54387 |
+
"step": 7751
|
| 54388 |
+
},
|
| 54389 |
+
{
|
| 54390 |
+
"epoch": 0.0007752,
|
| 54391 |
+
"grad_norm": 0.5027520060539246,
|
| 54392 |
+
"learning_rate": 7.751000000000001e-05,
|
| 54393 |
+
"loss": 0.1676,
|
| 54394 |
+
"step": 7752
|
| 54395 |
+
},
|
| 54396 |
+
{
|
| 54397 |
+
"epoch": 0.0007753,
|
| 54398 |
+
"grad_norm": 0.4081827998161316,
|
| 54399 |
+
"learning_rate": 7.752e-05,
|
| 54400 |
+
"loss": 0.1498,
|
| 54401 |
+
"step": 7753
|
| 54402 |
+
},
|
| 54403 |
+
{
|
| 54404 |
+
"epoch": 0.0007754,
|
| 54405 |
+
"grad_norm": 0.316013365983963,
|
| 54406 |
+
"learning_rate": 7.753e-05,
|
| 54407 |
+
"loss": 0.142,
|
| 54408 |
+
"step": 7754
|
| 54409 |
+
},
|
| 54410 |
+
{
|
| 54411 |
+
"epoch": 0.0007755,
|
| 54412 |
+
"grad_norm": 0.6248079538345337,
|
| 54413 |
+
"learning_rate": 7.754e-05,
|
| 54414 |
+
"loss": 0.1661,
|
| 54415 |
+
"step": 7755
|
| 54416 |
+
},
|
| 54417 |
+
{
|
| 54418 |
+
"epoch": 0.0007756,
|
| 54419 |
+
"grad_norm": 0.39839357137680054,
|
| 54420 |
+
"learning_rate": 7.755e-05,
|
| 54421 |
+
"loss": 0.1584,
|
| 54422 |
+
"step": 7756
|
| 54423 |
+
},
|
| 54424 |
+
{
|
| 54425 |
+
"epoch": 0.0007757,
|
| 54426 |
+
"grad_norm": 0.4424418807029724,
|
| 54427 |
+
"learning_rate": 7.756e-05,
|
| 54428 |
+
"loss": 0.165,
|
| 54429 |
+
"step": 7757
|
| 54430 |
+
},
|
| 54431 |
+
{
|
| 54432 |
+
"epoch": 0.0007758,
|
| 54433 |
+
"grad_norm": 0.4060595631599426,
|
| 54434 |
+
"learning_rate": 7.757e-05,
|
| 54435 |
+
"loss": 0.1548,
|
| 54436 |
+
"step": 7758
|
| 54437 |
+
},
|
| 54438 |
+
{
|
| 54439 |
+
"epoch": 0.0007759,
|
| 54440 |
+
"grad_norm": 0.4461621046066284,
|
| 54441 |
+
"learning_rate": 7.758e-05,
|
| 54442 |
+
"loss": 0.1583,
|
| 54443 |
+
"step": 7759
|
| 54444 |
+
},
|
| 54445 |
+
{
|
| 54446 |
+
"epoch": 0.000776,
|
| 54447 |
+
"grad_norm": 0.41382384300231934,
|
| 54448 |
+
"learning_rate": 7.759e-05,
|
| 54449 |
+
"loss": 0.1623,
|
| 54450 |
+
"step": 7760
|
| 54451 |
+
},
|
| 54452 |
+
{
|
| 54453 |
+
"epoch": 0.0007761,
|
| 54454 |
+
"grad_norm": 0.608735203742981,
|
| 54455 |
+
"learning_rate": 7.76e-05,
|
| 54456 |
+
"loss": 0.1565,
|
| 54457 |
+
"step": 7761
|
| 54458 |
+
},
|
| 54459 |
+
{
|
| 54460 |
+
"epoch": 0.0007762,
|
| 54461 |
+
"grad_norm": 0.4113566279411316,
|
| 54462 |
+
"learning_rate": 7.761e-05,
|
| 54463 |
+
"loss": 0.1598,
|
| 54464 |
+
"step": 7762
|
| 54465 |
+
},
|
| 54466 |
+
{
|
| 54467 |
+
"epoch": 0.0007763,
|
| 54468 |
+
"grad_norm": 0.3585749864578247,
|
| 54469 |
+
"learning_rate": 7.762e-05,
|
| 54470 |
+
"loss": 0.1584,
|
| 54471 |
+
"step": 7763
|
| 54472 |
+
},
|
| 54473 |
+
{
|
| 54474 |
+
"epoch": 0.0007764,
|
| 54475 |
+
"grad_norm": 0.4358258545398712,
|
| 54476 |
+
"learning_rate": 7.763e-05,
|
| 54477 |
+
"loss": 0.1681,
|
| 54478 |
+
"step": 7764
|
| 54479 |
+
},
|
| 54480 |
+
{
|
| 54481 |
+
"epoch": 0.0007765,
|
| 54482 |
+
"grad_norm": 0.37854641675949097,
|
| 54483 |
+
"learning_rate": 7.764000000000001e-05,
|
| 54484 |
+
"loss": 0.1599,
|
| 54485 |
+
"step": 7765
|
| 54486 |
+
},
|
| 54487 |
+
{
|
| 54488 |
+
"epoch": 0.0007766,
|
| 54489 |
+
"grad_norm": 0.37787294387817383,
|
| 54490 |
+
"learning_rate": 7.765e-05,
|
| 54491 |
+
"loss": 0.1555,
|
| 54492 |
+
"step": 7766
|
| 54493 |
+
},
|
| 54494 |
+
{
|
| 54495 |
+
"epoch": 0.0007767,
|
| 54496 |
+
"grad_norm": 0.3425155282020569,
|
| 54497 |
+
"learning_rate": 7.766e-05,
|
| 54498 |
+
"loss": 0.158,
|
| 54499 |
+
"step": 7767
|
| 54500 |
+
},
|
| 54501 |
+
{
|
| 54502 |
+
"epoch": 0.0007768,
|
| 54503 |
+
"grad_norm": 0.3806110918521881,
|
| 54504 |
+
"learning_rate": 7.767000000000001e-05,
|
| 54505 |
+
"loss": 0.1537,
|
| 54506 |
+
"step": 7768
|
| 54507 |
+
},
|
| 54508 |
+
{
|
| 54509 |
+
"epoch": 0.0007769,
|
| 54510 |
+
"grad_norm": 0.5556537508964539,
|
| 54511 |
+
"learning_rate": 7.768e-05,
|
| 54512 |
+
"loss": 0.1769,
|
| 54513 |
+
"step": 7769
|
| 54514 |
+
},
|
| 54515 |
+
{
|
| 54516 |
+
"epoch": 0.000777,
|
| 54517 |
+
"grad_norm": 0.40484434366226196,
|
| 54518 |
+
"learning_rate": 7.769e-05,
|
| 54519 |
+
"loss": 0.1725,
|
| 54520 |
+
"step": 7770
|
| 54521 |
+
},
|
| 54522 |
+
{
|
| 54523 |
+
"epoch": 0.0007771,
|
| 54524 |
+
"grad_norm": 0.3824002146720886,
|
| 54525 |
+
"learning_rate": 7.77e-05,
|
| 54526 |
+
"loss": 0.1613,
|
| 54527 |
+
"step": 7771
|
| 54528 |
+
},
|
| 54529 |
+
{
|
| 54530 |
+
"epoch": 0.0007772,
|
| 54531 |
+
"grad_norm": 0.3158770203590393,
|
| 54532 |
+
"learning_rate": 7.771e-05,
|
| 54533 |
+
"loss": 0.141,
|
| 54534 |
+
"step": 7772
|
| 54535 |
+
},
|
| 54536 |
+
{
|
| 54537 |
+
"epoch": 0.0007773,
|
| 54538 |
+
"grad_norm": 0.3717173933982849,
|
| 54539 |
+
"learning_rate": 7.772000000000001e-05,
|
| 54540 |
+
"loss": 0.1532,
|
| 54541 |
+
"step": 7773
|
| 54542 |
+
},
|
| 54543 |
+
{
|
| 54544 |
+
"epoch": 0.0007774,
|
| 54545 |
+
"grad_norm": 0.4274844229221344,
|
| 54546 |
+
"learning_rate": 7.773e-05,
|
| 54547 |
+
"loss": 0.1587,
|
| 54548 |
+
"step": 7774
|
| 54549 |
+
},
|
| 54550 |
+
{
|
| 54551 |
+
"epoch": 0.0007775,
|
| 54552 |
+
"grad_norm": 0.4046543836593628,
|
| 54553 |
+
"learning_rate": 7.774e-05,
|
| 54554 |
+
"loss": 0.1643,
|
| 54555 |
+
"step": 7775
|
| 54556 |
+
},
|
| 54557 |
+
{
|
| 54558 |
+
"epoch": 0.0007776,
|
| 54559 |
+
"grad_norm": 0.3226039409637451,
|
| 54560 |
+
"learning_rate": 7.775e-05,
|
| 54561 |
+
"loss": 0.1456,
|
| 54562 |
+
"step": 7776
|
| 54563 |
+
},
|
| 54564 |
+
{
|
| 54565 |
+
"epoch": 0.0007777,
|
| 54566 |
+
"grad_norm": 0.5570123195648193,
|
| 54567 |
+
"learning_rate": 7.776e-05,
|
| 54568 |
+
"loss": 0.1631,
|
| 54569 |
+
"step": 7777
|
| 54570 |
+
},
|
| 54571 |
+
{
|
| 54572 |
+
"epoch": 0.0007778,
|
| 54573 |
+
"grad_norm": 0.3571377098560333,
|
| 54574 |
+
"learning_rate": 7.777e-05,
|
| 54575 |
+
"loss": 0.155,
|
| 54576 |
+
"step": 7778
|
| 54577 |
+
},
|
| 54578 |
+
{
|
| 54579 |
+
"epoch": 0.0007779,
|
| 54580 |
+
"grad_norm": 1.138600468635559,
|
| 54581 |
+
"learning_rate": 7.778e-05,
|
| 54582 |
+
"loss": 0.2061,
|
| 54583 |
+
"step": 7779
|
| 54584 |
+
},
|
| 54585 |
+
{
|
| 54586 |
+
"epoch": 0.000778,
|
| 54587 |
+
"grad_norm": 0.3432740867137909,
|
| 54588 |
+
"learning_rate": 7.779000000000001e-05,
|
| 54589 |
+
"loss": 0.1429,
|
| 54590 |
+
"step": 7780
|
| 54591 |
+
},
|
| 54592 |
+
{
|
| 54593 |
+
"epoch": 0.0007781,
|
| 54594 |
+
"grad_norm": 0.4284251928329468,
|
| 54595 |
+
"learning_rate": 7.78e-05,
|
| 54596 |
+
"loss": 0.1482,
|
| 54597 |
+
"step": 7781
|
| 54598 |
+
},
|
| 54599 |
+
{
|
| 54600 |
+
"epoch": 0.0007782,
|
| 54601 |
+
"grad_norm": 0.4238644540309906,
|
| 54602 |
+
"learning_rate": 7.781e-05,
|
| 54603 |
+
"loss": 0.144,
|
| 54604 |
+
"step": 7782
|
| 54605 |
+
},
|
| 54606 |
+
{
|
| 54607 |
+
"epoch": 0.0007783,
|
| 54608 |
+
"grad_norm": 0.44244638085365295,
|
| 54609 |
+
"learning_rate": 7.782000000000001e-05,
|
| 54610 |
+
"loss": 0.1576,
|
| 54611 |
+
"step": 7783
|
| 54612 |
+
},
|
| 54613 |
+
{
|
| 54614 |
+
"epoch": 0.0007784,
|
| 54615 |
+
"grad_norm": 0.37761566042900085,
|
| 54616 |
+
"learning_rate": 7.782999999999999e-05,
|
| 54617 |
+
"loss": 0.1478,
|
| 54618 |
+
"step": 7784
|
| 54619 |
+
},
|
| 54620 |
+
{
|
| 54621 |
+
"epoch": 0.0007785,
|
| 54622 |
+
"grad_norm": 0.4310877025127411,
|
| 54623 |
+
"learning_rate": 7.784e-05,
|
| 54624 |
+
"loss": 0.1544,
|
| 54625 |
+
"step": 7785
|
| 54626 |
+
},
|
| 54627 |
+
{
|
| 54628 |
+
"epoch": 0.0007786,
|
| 54629 |
+
"grad_norm": 0.3911275863647461,
|
| 54630 |
+
"learning_rate": 7.785000000000001e-05,
|
| 54631 |
+
"loss": 0.1542,
|
| 54632 |
+
"step": 7786
|
| 54633 |
+
},
|
| 54634 |
+
{
|
| 54635 |
+
"epoch": 0.0007787,
|
| 54636 |
+
"grad_norm": 0.3833433985710144,
|
| 54637 |
+
"learning_rate": 7.786e-05,
|
| 54638 |
+
"loss": 0.1534,
|
| 54639 |
+
"step": 7787
|
| 54640 |
+
},
|
| 54641 |
+
{
|
| 54642 |
+
"epoch": 0.0007788,
|
| 54643 |
+
"grad_norm": 0.3688901364803314,
|
| 54644 |
+
"learning_rate": 7.787e-05,
|
| 54645 |
+
"loss": 0.1469,
|
| 54646 |
+
"step": 7788
|
| 54647 |
+
},
|
| 54648 |
+
{
|
| 54649 |
+
"epoch": 0.0007789,
|
| 54650 |
+
"grad_norm": 0.39441004395484924,
|
| 54651 |
+
"learning_rate": 7.788e-05,
|
| 54652 |
+
"loss": 0.1512,
|
| 54653 |
+
"step": 7789
|
| 54654 |
+
},
|
| 54655 |
+
{
|
| 54656 |
+
"epoch": 0.000779,
|
| 54657 |
+
"grad_norm": 0.37613943219184875,
|
| 54658 |
+
"learning_rate": 7.789e-05,
|
| 54659 |
+
"loss": 0.1494,
|
| 54660 |
+
"step": 7790
|
| 54661 |
+
},
|
| 54662 |
+
{
|
| 54663 |
+
"epoch": 0.0007791,
|
| 54664 |
+
"grad_norm": 0.41287046670913696,
|
| 54665 |
+
"learning_rate": 7.79e-05,
|
| 54666 |
+
"loss": 0.1469,
|
| 54667 |
+
"step": 7791
|
| 54668 |
+
},
|
| 54669 |
+
{
|
| 54670 |
+
"epoch": 0.0007792,
|
| 54671 |
+
"grad_norm": 0.35042914748191833,
|
| 54672 |
+
"learning_rate": 7.791e-05,
|
| 54673 |
+
"loss": 0.1486,
|
| 54674 |
+
"step": 7792
|
| 54675 |
+
},
|
| 54676 |
+
{
|
| 54677 |
+
"epoch": 0.0007793,
|
| 54678 |
+
"grad_norm": 0.3768109977245331,
|
| 54679 |
+
"learning_rate": 7.792e-05,
|
| 54680 |
+
"loss": 0.152,
|
| 54681 |
+
"step": 7793
|
| 54682 |
+
},
|
| 54683 |
+
{
|
| 54684 |
+
"epoch": 0.0007794,
|
| 54685 |
+
"grad_norm": 0.33065614104270935,
|
| 54686 |
+
"learning_rate": 7.793e-05,
|
| 54687 |
+
"loss": 0.1479,
|
| 54688 |
+
"step": 7794
|
| 54689 |
+
},
|
| 54690 |
+
{
|
| 54691 |
+
"epoch": 0.0007795,
|
| 54692 |
+
"grad_norm": 0.3601537048816681,
|
| 54693 |
+
"learning_rate": 7.794e-05,
|
| 54694 |
+
"loss": 0.1597,
|
| 54695 |
+
"step": 7795
|
| 54696 |
+
},
|
| 54697 |
+
{
|
| 54698 |
+
"epoch": 0.0007796,
|
| 54699 |
+
"grad_norm": 0.34998899698257446,
|
| 54700 |
+
"learning_rate": 7.795e-05,
|
| 54701 |
+
"loss": 0.1503,
|
| 54702 |
+
"step": 7796
|
| 54703 |
+
},
|
| 54704 |
+
{
|
| 54705 |
+
"epoch": 0.0007797,
|
| 54706 |
+
"grad_norm": 0.3146880865097046,
|
| 54707 |
+
"learning_rate": 7.796e-05,
|
| 54708 |
+
"loss": 0.1412,
|
| 54709 |
+
"step": 7797
|
| 54710 |
+
},
|
| 54711 |
+
{
|
| 54712 |
+
"epoch": 0.0007798,
|
| 54713 |
+
"grad_norm": 0.3786034882068634,
|
| 54714 |
+
"learning_rate": 7.797e-05,
|
| 54715 |
+
"loss": 0.1472,
|
| 54716 |
+
"step": 7798
|
| 54717 |
+
},
|
| 54718 |
+
{
|
| 54719 |
+
"epoch": 0.0007799,
|
| 54720 |
+
"grad_norm": 0.3339554965496063,
|
| 54721 |
+
"learning_rate": 7.798000000000001e-05,
|
| 54722 |
+
"loss": 0.1521,
|
| 54723 |
+
"step": 7799
|
| 54724 |
+
},
|
| 54725 |
+
{
|
| 54726 |
+
"epoch": 0.00078,
|
| 54727 |
+
"grad_norm": 0.3790935277938843,
|
| 54728 |
+
"learning_rate": 7.799e-05,
|
| 54729 |
+
"loss": 0.1631,
|
| 54730 |
+
"step": 7800
|
| 54731 |
+
},
|
| 54732 |
+
{
|
| 54733 |
+
"epoch": 0.0007801,
|
| 54734 |
+
"grad_norm": 0.39043620228767395,
|
| 54735 |
+
"learning_rate": 7.8e-05,
|
| 54736 |
+
"loss": 0.1442,
|
| 54737 |
+
"step": 7801
|
| 54738 |
+
},
|
| 54739 |
+
{
|
| 54740 |
+
"epoch": 0.0007802,
|
| 54741 |
+
"grad_norm": 0.3289644420146942,
|
| 54742 |
+
"learning_rate": 7.801000000000001e-05,
|
| 54743 |
+
"loss": 0.1454,
|
| 54744 |
+
"step": 7802
|
| 54745 |
+
},
|
| 54746 |
+
{
|
| 54747 |
+
"epoch": 0.0007803,
|
| 54748 |
+
"grad_norm": 0.33908987045288086,
|
| 54749 |
+
"learning_rate": 7.802e-05,
|
| 54750 |
+
"loss": 0.1498,
|
| 54751 |
+
"step": 7803
|
| 54752 |
+
},
|
| 54753 |
+
{
|
| 54754 |
+
"epoch": 0.0007804,
|
| 54755 |
+
"grad_norm": 0.5826635360717773,
|
| 54756 |
+
"learning_rate": 7.803e-05,
|
| 54757 |
+
"loss": 0.1678,
|
| 54758 |
+
"step": 7804
|
| 54759 |
+
},
|
| 54760 |
+
{
|
| 54761 |
+
"epoch": 0.0007805,
|
| 54762 |
+
"grad_norm": 0.32743117213249207,
|
| 54763 |
+
"learning_rate": 7.804e-05,
|
| 54764 |
+
"loss": 0.15,
|
| 54765 |
+
"step": 7805
|
| 54766 |
+
},
|
| 54767 |
+
{
|
| 54768 |
+
"epoch": 0.0007806,
|
| 54769 |
+
"grad_norm": 0.35113999247550964,
|
| 54770 |
+
"learning_rate": 7.805e-05,
|
| 54771 |
+
"loss": 0.1476,
|
| 54772 |
+
"step": 7806
|
| 54773 |
+
},
|
| 54774 |
+
{
|
| 54775 |
+
"epoch": 0.0007807,
|
| 54776 |
+
"grad_norm": 0.37699851393699646,
|
| 54777 |
+
"learning_rate": 7.806000000000001e-05,
|
| 54778 |
+
"loss": 0.1589,
|
| 54779 |
+
"step": 7807
|
| 54780 |
+
},
|
| 54781 |
+
{
|
| 54782 |
+
"epoch": 0.0007808,
|
| 54783 |
+
"grad_norm": 0.3135019838809967,
|
| 54784 |
+
"learning_rate": 7.807e-05,
|
| 54785 |
+
"loss": 0.147,
|
| 54786 |
+
"step": 7808
|
| 54787 |
+
},
|
| 54788 |
+
{
|
| 54789 |
+
"epoch": 0.0007809,
|
| 54790 |
+
"grad_norm": 0.5410703420639038,
|
| 54791 |
+
"learning_rate": 7.808e-05,
|
| 54792 |
+
"loss": 0.1752,
|
| 54793 |
+
"step": 7809
|
| 54794 |
+
},
|
| 54795 |
+
{
|
| 54796 |
+
"epoch": 0.000781,
|
| 54797 |
+
"grad_norm": 0.3744533360004425,
|
| 54798 |
+
"learning_rate": 7.809e-05,
|
| 54799 |
+
"loss": 0.1543,
|
| 54800 |
+
"step": 7810
|
| 54801 |
+
},
|
| 54802 |
+
{
|
| 54803 |
+
"epoch": 0.0007811,
|
| 54804 |
+
"grad_norm": 0.3290867507457733,
|
| 54805 |
+
"learning_rate": 7.81e-05,
|
| 54806 |
+
"loss": 0.1504,
|
| 54807 |
+
"step": 7811
|
| 54808 |
+
},
|
| 54809 |
+
{
|
| 54810 |
+
"epoch": 0.0007812,
|
| 54811 |
+
"grad_norm": 0.34372928738594055,
|
| 54812 |
+
"learning_rate": 7.811e-05,
|
| 54813 |
+
"loss": 0.1526,
|
| 54814 |
+
"step": 7812
|
| 54815 |
+
},
|
| 54816 |
+
{
|
| 54817 |
+
"epoch": 0.0007813,
|
| 54818 |
+
"grad_norm": 0.31069982051849365,
|
| 54819 |
+
"learning_rate": 7.812e-05,
|
| 54820 |
+
"loss": 0.1418,
|
| 54821 |
+
"step": 7813
|
| 54822 |
+
},
|
| 54823 |
+
{
|
| 54824 |
+
"epoch": 0.0007814,
|
| 54825 |
+
"grad_norm": 0.40363672375679016,
|
| 54826 |
+
"learning_rate": 7.813000000000001e-05,
|
| 54827 |
+
"loss": 0.1586,
|
| 54828 |
+
"step": 7814
|
| 54829 |
+
},
|
| 54830 |
+
{
|
| 54831 |
+
"epoch": 0.0007815,
|
| 54832 |
+
"grad_norm": 0.40617135167121887,
|
| 54833 |
+
"learning_rate": 7.814e-05,
|
| 54834 |
+
"loss": 0.1495,
|
| 54835 |
+
"step": 7815
|
| 54836 |
+
},
|
| 54837 |
+
{
|
| 54838 |
+
"epoch": 0.0007816,
|
| 54839 |
+
"grad_norm": 0.40958961844444275,
|
| 54840 |
+
"learning_rate": 7.815e-05,
|
| 54841 |
+
"loss": 0.147,
|
| 54842 |
+
"step": 7816
|
| 54843 |
+
},
|
| 54844 |
+
{
|
| 54845 |
+
"epoch": 0.0007817,
|
| 54846 |
+
"grad_norm": 0.4198823869228363,
|
| 54847 |
+
"learning_rate": 7.816000000000001e-05,
|
| 54848 |
+
"loss": 0.1656,
|
| 54849 |
+
"step": 7817
|
| 54850 |
+
},
|
| 54851 |
+
{
|
| 54852 |
+
"epoch": 0.0007818,
|
| 54853 |
+
"grad_norm": 0.3430757522583008,
|
| 54854 |
+
"learning_rate": 7.816999999999999e-05,
|
| 54855 |
+
"loss": 0.1426,
|
| 54856 |
+
"step": 7818
|
| 54857 |
+
},
|
| 54858 |
+
{
|
| 54859 |
+
"epoch": 0.0007819,
|
| 54860 |
+
"grad_norm": 0.4560718834400177,
|
| 54861 |
+
"learning_rate": 7.818e-05,
|
| 54862 |
+
"loss": 0.1598,
|
| 54863 |
+
"step": 7819
|
| 54864 |
+
},
|
| 54865 |
+
{
|
| 54866 |
+
"epoch": 0.000782,
|
| 54867 |
+
"grad_norm": 0.3987734615802765,
|
| 54868 |
+
"learning_rate": 7.819000000000001e-05,
|
| 54869 |
+
"loss": 0.1591,
|
| 54870 |
+
"step": 7820
|
| 54871 |
+
},
|
| 54872 |
+
{
|
| 54873 |
+
"epoch": 0.0007821,
|
| 54874 |
+
"grad_norm": 0.33172231912612915,
|
| 54875 |
+
"learning_rate": 7.82e-05,
|
| 54876 |
+
"loss": 0.1405,
|
| 54877 |
+
"step": 7821
|
| 54878 |
+
},
|
| 54879 |
+
{
|
| 54880 |
+
"epoch": 0.0007822,
|
| 54881 |
+
"grad_norm": 0.33455225825309753,
|
| 54882 |
+
"learning_rate": 7.821e-05,
|
| 54883 |
+
"loss": 0.1505,
|
| 54884 |
+
"step": 7822
|
| 54885 |
+
},
|
| 54886 |
+
{
|
| 54887 |
+
"epoch": 0.0007823,
|
| 54888 |
+
"grad_norm": 0.33328595757484436,
|
| 54889 |
+
"learning_rate": 7.822e-05,
|
| 54890 |
+
"loss": 0.1454,
|
| 54891 |
+
"step": 7823
|
| 54892 |
+
},
|
| 54893 |
+
{
|
| 54894 |
+
"epoch": 0.0007824,
|
| 54895 |
+
"grad_norm": 0.5431308746337891,
|
| 54896 |
+
"learning_rate": 7.823e-05,
|
| 54897 |
+
"loss": 0.1887,
|
| 54898 |
+
"step": 7824
|
| 54899 |
+
},
|
| 54900 |
+
{
|
| 54901 |
+
"epoch": 0.0007825,
|
| 54902 |
+
"grad_norm": 0.3220846951007843,
|
| 54903 |
+
"learning_rate": 7.824e-05,
|
| 54904 |
+
"loss": 0.1449,
|
| 54905 |
+
"step": 7825
|
| 54906 |
+
},
|
| 54907 |
+
{
|
| 54908 |
+
"epoch": 0.0007826,
|
| 54909 |
+
"grad_norm": 0.37850919365882874,
|
| 54910 |
+
"learning_rate": 7.825e-05,
|
| 54911 |
+
"loss": 0.1575,
|
| 54912 |
+
"step": 7826
|
| 54913 |
+
},
|
| 54914 |
+
{
|
| 54915 |
+
"epoch": 0.0007827,
|
| 54916 |
+
"grad_norm": 0.3572224974632263,
|
| 54917 |
+
"learning_rate": 7.826e-05,
|
| 54918 |
+
"loss": 0.1514,
|
| 54919 |
+
"step": 7827
|
| 54920 |
+
},
|
| 54921 |
+
{
|
| 54922 |
+
"epoch": 0.0007828,
|
| 54923 |
+
"grad_norm": 0.3942662179470062,
|
| 54924 |
+
"learning_rate": 7.827e-05,
|
| 54925 |
+
"loss": 0.1654,
|
| 54926 |
+
"step": 7828
|
| 54927 |
+
},
|
| 54928 |
+
{
|
| 54929 |
+
"epoch": 0.0007829,
|
| 54930 |
+
"grad_norm": 0.3946959972381592,
|
| 54931 |
+
"learning_rate": 7.828e-05,
|
| 54932 |
+
"loss": 0.1572,
|
| 54933 |
+
"step": 7829
|
| 54934 |
+
},
|
| 54935 |
+
{
|
| 54936 |
+
"epoch": 0.000783,
|
| 54937 |
+
"grad_norm": 0.3043114244937897,
|
| 54938 |
+
"learning_rate": 7.829e-05,
|
| 54939 |
+
"loss": 0.1438,
|
| 54940 |
+
"step": 7830
|
| 54941 |
+
},
|
| 54942 |
+
{
|
| 54943 |
+
"epoch": 0.0007831,
|
| 54944 |
+
"grad_norm": 0.41395580768585205,
|
| 54945 |
+
"learning_rate": 7.83e-05,
|
| 54946 |
+
"loss": 0.1577,
|
| 54947 |
+
"step": 7831
|
| 54948 |
+
},
|
| 54949 |
+
{
|
| 54950 |
+
"epoch": 0.0007832,
|
| 54951 |
+
"grad_norm": 0.3440980613231659,
|
| 54952 |
+
"learning_rate": 7.831e-05,
|
| 54953 |
+
"loss": 0.156,
|
| 54954 |
+
"step": 7832
|
| 54955 |
+
},
|
| 54956 |
+
{
|
| 54957 |
+
"epoch": 0.0007833,
|
| 54958 |
+
"grad_norm": 0.406460165977478,
|
| 54959 |
+
"learning_rate": 7.832000000000001e-05,
|
| 54960 |
+
"loss": 0.1556,
|
| 54961 |
+
"step": 7833
|
| 54962 |
+
},
|
| 54963 |
+
{
|
| 54964 |
+
"epoch": 0.0007834,
|
| 54965 |
+
"grad_norm": 0.5229368209838867,
|
| 54966 |
+
"learning_rate": 7.833e-05,
|
| 54967 |
+
"loss": 0.1759,
|
| 54968 |
+
"step": 7834
|
| 54969 |
+
},
|
| 54970 |
+
{
|
| 54971 |
+
"epoch": 0.0007835,
|
| 54972 |
+
"grad_norm": 0.42203351855278015,
|
| 54973 |
+
"learning_rate": 7.834e-05,
|
| 54974 |
+
"loss": 0.1543,
|
| 54975 |
+
"step": 7835
|
| 54976 |
+
},
|
| 54977 |
+
{
|
| 54978 |
+
"epoch": 0.0007836,
|
| 54979 |
+
"grad_norm": 0.3226388394832611,
|
| 54980 |
+
"learning_rate": 7.835000000000001e-05,
|
| 54981 |
+
"loss": 0.1447,
|
| 54982 |
+
"step": 7836
|
| 54983 |
+
},
|
| 54984 |
+
{
|
| 54985 |
+
"epoch": 0.0007837,
|
| 54986 |
+
"grad_norm": 0.3410513997077942,
|
| 54987 |
+
"learning_rate": 7.836e-05,
|
| 54988 |
+
"loss": 0.1506,
|
| 54989 |
+
"step": 7837
|
| 54990 |
+
},
|
| 54991 |
+
{
|
| 54992 |
+
"epoch": 0.0007838,
|
| 54993 |
+
"grad_norm": 0.34009653329849243,
|
| 54994 |
+
"learning_rate": 7.837e-05,
|
| 54995 |
+
"loss": 0.1454,
|
| 54996 |
+
"step": 7838
|
| 54997 |
+
},
|
| 54998 |
+
{
|
| 54999 |
+
"epoch": 0.0007839,
|
| 55000 |
+
"grad_norm": 0.47929927706718445,
|
| 55001 |
+
"learning_rate": 7.838e-05,
|
| 55002 |
+
"loss": 0.1639,
|
| 55003 |
+
"step": 7839
|
| 55004 |
+
},
|
| 55005 |
+
{
|
| 55006 |
+
"epoch": 0.000784,
|
| 55007 |
+
"grad_norm": 0.5508768558502197,
|
| 55008 |
+
"learning_rate": 7.839e-05,
|
| 55009 |
+
"loss": 0.1681,
|
| 55010 |
+
"step": 7840
|
| 55011 |
+
},
|
| 55012 |
+
{
|
| 55013 |
+
"epoch": 0.0007841,
|
| 55014 |
+
"grad_norm": 0.8741626143455505,
|
| 55015 |
+
"learning_rate": 7.840000000000001e-05,
|
| 55016 |
+
"loss": 0.1545,
|
| 55017 |
+
"step": 7841
|
| 55018 |
+
},
|
| 55019 |
+
{
|
| 55020 |
+
"epoch": 0.0007842,
|
| 55021 |
+
"grad_norm": 0.7747453451156616,
|
| 55022 |
+
"learning_rate": 7.841e-05,
|
| 55023 |
+
"loss": 0.1639,
|
| 55024 |
+
"step": 7842
|
| 55025 |
+
},
|
| 55026 |
+
{
|
| 55027 |
+
"epoch": 0.0007843,
|
| 55028 |
+
"grad_norm": 0.3523169755935669,
|
| 55029 |
+
"learning_rate": 7.842e-05,
|
| 55030 |
+
"loss": 0.1378,
|
| 55031 |
+
"step": 7843
|
| 55032 |
+
},
|
| 55033 |
+
{
|
| 55034 |
+
"epoch": 0.0007844,
|
| 55035 |
+
"grad_norm": 0.36965256929397583,
|
| 55036 |
+
"learning_rate": 7.843e-05,
|
| 55037 |
+
"loss": 0.1441,
|
| 55038 |
+
"step": 7844
|
| 55039 |
+
},
|
| 55040 |
+
{
|
| 55041 |
+
"epoch": 0.0007845,
|
| 55042 |
+
"grad_norm": 0.49674296379089355,
|
| 55043 |
+
"learning_rate": 7.844e-05,
|
| 55044 |
+
"loss": 0.1678,
|
| 55045 |
+
"step": 7845
|
| 55046 |
+
},
|
| 55047 |
+
{
|
| 55048 |
+
"epoch": 0.0007846,
|
| 55049 |
+
"grad_norm": 0.40077468752861023,
|
| 55050 |
+
"learning_rate": 7.845e-05,
|
| 55051 |
+
"loss": 0.1493,
|
| 55052 |
+
"step": 7846
|
| 55053 |
+
},
|
| 55054 |
+
{
|
| 55055 |
+
"epoch": 0.0007847,
|
| 55056 |
+
"grad_norm": 0.5569064617156982,
|
| 55057 |
+
"learning_rate": 7.846e-05,
|
| 55058 |
+
"loss": 0.1643,
|
| 55059 |
+
"step": 7847
|
| 55060 |
+
},
|
| 55061 |
+
{
|
| 55062 |
+
"epoch": 0.0007848,
|
| 55063 |
+
"grad_norm": 0.7520188093185425,
|
| 55064 |
+
"learning_rate": 7.847000000000001e-05,
|
| 55065 |
+
"loss": 0.1694,
|
| 55066 |
+
"step": 7848
|
| 55067 |
+
},
|
| 55068 |
+
{
|
| 55069 |
+
"epoch": 0.0007849,
|
| 55070 |
+
"grad_norm": 0.3864554762840271,
|
| 55071 |
+
"learning_rate": 7.848e-05,
|
| 55072 |
+
"loss": 0.1467,
|
| 55073 |
+
"step": 7849
|
| 55074 |
+
},
|
| 55075 |
+
{
|
| 55076 |
+
"epoch": 0.000785,
|
| 55077 |
+
"grad_norm": 0.4093283414840698,
|
| 55078 |
+
"learning_rate": 7.849e-05,
|
| 55079 |
+
"loss": 0.1493,
|
| 55080 |
+
"step": 7850
|
| 55081 |
+
},
|
| 55082 |
+
{
|
| 55083 |
+
"epoch": 0.0007851,
|
| 55084 |
+
"grad_norm": 0.36037686467170715,
|
| 55085 |
+
"learning_rate": 7.850000000000001e-05,
|
| 55086 |
+
"loss": 0.1345,
|
| 55087 |
+
"step": 7851
|
| 55088 |
+
},
|
| 55089 |
+
{
|
| 55090 |
+
"epoch": 0.0007852,
|
| 55091 |
+
"grad_norm": 0.4377402365207672,
|
| 55092 |
+
"learning_rate": 7.850999999999999e-05,
|
| 55093 |
+
"loss": 0.147,
|
| 55094 |
+
"step": 7852
|
| 55095 |
+
},
|
| 55096 |
+
{
|
| 55097 |
+
"epoch": 0.0007853,
|
| 55098 |
+
"grad_norm": 0.5319004058837891,
|
| 55099 |
+
"learning_rate": 7.852e-05,
|
| 55100 |
+
"loss": 0.1862,
|
| 55101 |
+
"step": 7853
|
| 55102 |
+
},
|
| 55103 |
+
{
|
| 55104 |
+
"epoch": 0.0007854,
|
| 55105 |
+
"grad_norm": 0.3483973741531372,
|
| 55106 |
+
"learning_rate": 7.853000000000001e-05,
|
| 55107 |
+
"loss": 0.1357,
|
| 55108 |
+
"step": 7854
|
| 55109 |
+
},
|
| 55110 |
+
{
|
| 55111 |
+
"epoch": 0.0007855,
|
| 55112 |
+
"grad_norm": 0.7695083022117615,
|
| 55113 |
+
"learning_rate": 7.854e-05,
|
| 55114 |
+
"loss": 0.2,
|
| 55115 |
+
"step": 7855
|
| 55116 |
+
},
|
| 55117 |
+
{
|
| 55118 |
+
"epoch": 0.0007856,
|
| 55119 |
+
"grad_norm": 0.37926194071769714,
|
| 55120 |
+
"learning_rate": 7.855e-05,
|
| 55121 |
+
"loss": 0.1421,
|
| 55122 |
+
"step": 7856
|
| 55123 |
+
},
|
| 55124 |
+
{
|
| 55125 |
+
"epoch": 0.0007857,
|
| 55126 |
+
"grad_norm": 0.4063139259815216,
|
| 55127 |
+
"learning_rate": 7.856000000000001e-05,
|
| 55128 |
+
"loss": 0.1527,
|
| 55129 |
+
"step": 7857
|
| 55130 |
+
},
|
| 55131 |
+
{
|
| 55132 |
+
"epoch": 0.0007858,
|
| 55133 |
+
"grad_norm": 0.5253282785415649,
|
| 55134 |
+
"learning_rate": 7.857e-05,
|
| 55135 |
+
"loss": 0.1573,
|
| 55136 |
+
"step": 7858
|
| 55137 |
+
},
|
| 55138 |
+
{
|
| 55139 |
+
"epoch": 0.0007859,
|
| 55140 |
+
"grad_norm": 0.3392678499221802,
|
| 55141 |
+
"learning_rate": 7.858e-05,
|
| 55142 |
+
"loss": 0.1376,
|
| 55143 |
+
"step": 7859
|
| 55144 |
+
},
|
| 55145 |
+
{
|
| 55146 |
+
"epoch": 0.000786,
|
| 55147 |
+
"grad_norm": 0.41936904191970825,
|
| 55148 |
+
"learning_rate": 7.859e-05,
|
| 55149 |
+
"loss": 0.1471,
|
| 55150 |
+
"step": 7860
|
| 55151 |
+
},
|
| 55152 |
+
{
|
| 55153 |
+
"epoch": 0.0007861,
|
| 55154 |
+
"grad_norm": 0.6929224729537964,
|
| 55155 |
+
"learning_rate": 7.86e-05,
|
| 55156 |
+
"loss": 0.1624,
|
| 55157 |
+
"step": 7861
|
| 55158 |
+
},
|
| 55159 |
+
{
|
| 55160 |
+
"epoch": 0.0007862,
|
| 55161 |
+
"grad_norm": 0.35438454151153564,
|
| 55162 |
+
"learning_rate": 7.861e-05,
|
| 55163 |
+
"loss": 0.1478,
|
| 55164 |
+
"step": 7862
|
| 55165 |
+
},
|
| 55166 |
+
{
|
| 55167 |
+
"epoch": 0.0007863,
|
| 55168 |
+
"grad_norm": 0.3281148374080658,
|
| 55169 |
+
"learning_rate": 7.862e-05,
|
| 55170 |
+
"loss": 0.1339,
|
| 55171 |
+
"step": 7863
|
| 55172 |
+
},
|
| 55173 |
+
{
|
| 55174 |
+
"epoch": 0.0007864,
|
| 55175 |
+
"grad_norm": 0.3948661983013153,
|
| 55176 |
+
"learning_rate": 7.863e-05,
|
| 55177 |
+
"loss": 0.1462,
|
| 55178 |
+
"step": 7864
|
| 55179 |
+
},
|
| 55180 |
+
{
|
| 55181 |
+
"epoch": 0.0007865,
|
| 55182 |
+
"grad_norm": 0.5109783411026001,
|
| 55183 |
+
"learning_rate": 7.864e-05,
|
| 55184 |
+
"loss": 0.1672,
|
| 55185 |
+
"step": 7865
|
| 55186 |
+
},
|
| 55187 |
+
{
|
| 55188 |
+
"epoch": 0.0007866,
|
| 55189 |
+
"grad_norm": 0.48566001653671265,
|
| 55190 |
+
"learning_rate": 7.865e-05,
|
| 55191 |
+
"loss": 0.1656,
|
| 55192 |
+
"step": 7866
|
| 55193 |
+
},
|
| 55194 |
+
{
|
| 55195 |
+
"epoch": 0.0007867,
|
| 55196 |
+
"grad_norm": 0.3471790552139282,
|
| 55197 |
+
"learning_rate": 7.866e-05,
|
| 55198 |
+
"loss": 0.1484,
|
| 55199 |
+
"step": 7867
|
| 55200 |
+
},
|
| 55201 |
+
{
|
| 55202 |
+
"epoch": 0.0007868,
|
| 55203 |
+
"grad_norm": 0.322068989276886,
|
| 55204 |
+
"learning_rate": 7.867e-05,
|
| 55205 |
+
"loss": 0.1382,
|
| 55206 |
+
"step": 7868
|
| 55207 |
+
},
|
| 55208 |
+
{
|
| 55209 |
+
"epoch": 0.0007869,
|
| 55210 |
+
"grad_norm": 0.2925344407558441,
|
| 55211 |
+
"learning_rate": 7.868e-05,
|
| 55212 |
+
"loss": 0.1327,
|
| 55213 |
+
"step": 7869
|
| 55214 |
+
},
|
| 55215 |
+
{
|
| 55216 |
+
"epoch": 0.000787,
|
| 55217 |
+
"grad_norm": 0.43323126435279846,
|
| 55218 |
+
"learning_rate": 7.869000000000001e-05,
|
| 55219 |
+
"loss": 0.151,
|
| 55220 |
+
"step": 7870
|
| 55221 |
+
},
|
| 55222 |
+
{
|
| 55223 |
+
"epoch": 0.0007871,
|
| 55224 |
+
"grad_norm": 0.3427598476409912,
|
| 55225 |
+
"learning_rate": 7.87e-05,
|
| 55226 |
+
"loss": 0.1414,
|
| 55227 |
+
"step": 7871
|
| 55228 |
+
},
|
| 55229 |
+
{
|
| 55230 |
+
"epoch": 0.0007872,
|
| 55231 |
+
"grad_norm": 0.36143046617507935,
|
| 55232 |
+
"learning_rate": 7.871e-05,
|
| 55233 |
+
"loss": 0.1459,
|
| 55234 |
+
"step": 7872
|
| 55235 |
+
},
|
| 55236 |
+
{
|
| 55237 |
+
"epoch": 0.0007873,
|
| 55238 |
+
"grad_norm": 0.4235434830188751,
|
| 55239 |
+
"learning_rate": 7.872e-05,
|
| 55240 |
+
"loss": 0.1543,
|
| 55241 |
+
"step": 7873
|
| 55242 |
+
},
|
| 55243 |
+
{
|
| 55244 |
+
"epoch": 0.0007874,
|
| 55245 |
+
"grad_norm": 0.32518136501312256,
|
| 55246 |
+
"learning_rate": 7.873e-05,
|
| 55247 |
+
"loss": 0.1453,
|
| 55248 |
+
"step": 7874
|
| 55249 |
+
},
|
| 55250 |
+
{
|
| 55251 |
+
"epoch": 0.0007875,
|
| 55252 |
+
"grad_norm": 0.30913761258125305,
|
| 55253 |
+
"learning_rate": 7.874000000000001e-05,
|
| 55254 |
+
"loss": 0.1451,
|
| 55255 |
+
"step": 7875
|
| 55256 |
+
},
|
| 55257 |
+
{
|
| 55258 |
+
"epoch": 0.0007876,
|
| 55259 |
+
"grad_norm": 0.3425231873989105,
|
| 55260 |
+
"learning_rate": 7.875e-05,
|
| 55261 |
+
"loss": 0.1501,
|
| 55262 |
+
"step": 7876
|
| 55263 |
+
},
|
| 55264 |
+
{
|
| 55265 |
+
"epoch": 0.0007877,
|
| 55266 |
+
"grad_norm": 0.4112080931663513,
|
| 55267 |
+
"learning_rate": 7.876e-05,
|
| 55268 |
+
"loss": 0.1632,
|
| 55269 |
+
"step": 7877
|
| 55270 |
+
},
|
| 55271 |
+
{
|
| 55272 |
+
"epoch": 0.0007878,
|
| 55273 |
+
"grad_norm": 1.0945180654525757,
|
| 55274 |
+
"learning_rate": 7.877e-05,
|
| 55275 |
+
"loss": 0.1748,
|
| 55276 |
+
"step": 7878
|
| 55277 |
+
},
|
| 55278 |
+
{
|
| 55279 |
+
"epoch": 0.0007879,
|
| 55280 |
+
"grad_norm": 0.31509843468666077,
|
| 55281 |
+
"learning_rate": 7.878e-05,
|
| 55282 |
+
"loss": 0.1437,
|
| 55283 |
+
"step": 7879
|
| 55284 |
+
},
|
| 55285 |
+
{
|
| 55286 |
+
"epoch": 0.000788,
|
| 55287 |
+
"grad_norm": 0.3834889233112335,
|
| 55288 |
+
"learning_rate": 7.879e-05,
|
| 55289 |
+
"loss": 0.1498,
|
| 55290 |
+
"step": 7880
|
| 55291 |
+
},
|
| 55292 |
+
{
|
| 55293 |
+
"epoch": 0.0007881,
|
| 55294 |
+
"grad_norm": 0.40916872024536133,
|
| 55295 |
+
"learning_rate": 7.88e-05,
|
| 55296 |
+
"loss": 0.1515,
|
| 55297 |
+
"step": 7881
|
| 55298 |
+
},
|
| 55299 |
+
{
|
| 55300 |
+
"epoch": 0.0007882,
|
| 55301 |
+
"grad_norm": 0.34113648533821106,
|
| 55302 |
+
"learning_rate": 7.881000000000001e-05,
|
| 55303 |
+
"loss": 0.1396,
|
| 55304 |
+
"step": 7882
|
| 55305 |
+
},
|
| 55306 |
+
{
|
| 55307 |
+
"epoch": 0.0007883,
|
| 55308 |
+
"grad_norm": 0.41003337502479553,
|
| 55309 |
+
"learning_rate": 7.882e-05,
|
| 55310 |
+
"loss": 0.1453,
|
| 55311 |
+
"step": 7883
|
| 55312 |
+
},
|
| 55313 |
+
{
|
| 55314 |
+
"epoch": 0.0007884,
|
| 55315 |
+
"grad_norm": 0.38141313195228577,
|
| 55316 |
+
"learning_rate": 7.883e-05,
|
| 55317 |
+
"loss": 0.1506,
|
| 55318 |
+
"step": 7884
|
| 55319 |
+
},
|
| 55320 |
+
{
|
| 55321 |
+
"epoch": 0.0007885,
|
| 55322 |
+
"grad_norm": 0.6405525803565979,
|
| 55323 |
+
"learning_rate": 7.884000000000001e-05,
|
| 55324 |
+
"loss": 0.1666,
|
| 55325 |
+
"step": 7885
|
| 55326 |
+
},
|
| 55327 |
+
{
|
| 55328 |
+
"epoch": 0.0007886,
|
| 55329 |
+
"grad_norm": 0.5458857417106628,
|
| 55330 |
+
"learning_rate": 7.884999999999999e-05,
|
| 55331 |
+
"loss": 0.1517,
|
| 55332 |
+
"step": 7886
|
| 55333 |
+
},
|
| 55334 |
+
{
|
| 55335 |
+
"epoch": 0.0007887,
|
| 55336 |
+
"grad_norm": 0.4375963509082794,
|
| 55337 |
+
"learning_rate": 7.886e-05,
|
| 55338 |
+
"loss": 0.1586,
|
| 55339 |
+
"step": 7887
|
| 55340 |
+
},
|
| 55341 |
+
{
|
| 55342 |
+
"epoch": 0.0007888,
|
| 55343 |
+
"grad_norm": 0.3580589294433594,
|
| 55344 |
+
"learning_rate": 7.887000000000001e-05,
|
| 55345 |
+
"loss": 0.1418,
|
| 55346 |
+
"step": 7888
|
| 55347 |
+
},
|
| 55348 |
+
{
|
| 55349 |
+
"epoch": 0.0007889,
|
| 55350 |
+
"grad_norm": 0.4851737916469574,
|
| 55351 |
+
"learning_rate": 7.887999999999999e-05,
|
| 55352 |
+
"loss": 0.1617,
|
| 55353 |
+
"step": 7889
|
| 55354 |
+
},
|
| 55355 |
+
{
|
| 55356 |
+
"epoch": 0.000789,
|
| 55357 |
+
"grad_norm": 0.3856046497821808,
|
| 55358 |
+
"learning_rate": 7.889e-05,
|
| 55359 |
+
"loss": 0.1459,
|
| 55360 |
+
"step": 7890
|
| 55361 |
+
},
|
| 55362 |
+
{
|
| 55363 |
+
"epoch": 0.0007891,
|
| 55364 |
+
"grad_norm": 0.3718718886375427,
|
| 55365 |
+
"learning_rate": 7.890000000000001e-05,
|
| 55366 |
+
"loss": 0.1462,
|
| 55367 |
+
"step": 7891
|
| 55368 |
+
},
|
| 55369 |
+
{
|
| 55370 |
+
"epoch": 0.0007892,
|
| 55371 |
+
"grad_norm": 0.37324395775794983,
|
| 55372 |
+
"learning_rate": 7.891e-05,
|
| 55373 |
+
"loss": 0.1523,
|
| 55374 |
+
"step": 7892
|
| 55375 |
+
},
|
| 55376 |
+
{
|
| 55377 |
+
"epoch": 0.0007893,
|
| 55378 |
+
"grad_norm": 0.5335335731506348,
|
| 55379 |
+
"learning_rate": 7.892e-05,
|
| 55380 |
+
"loss": 0.1814,
|
| 55381 |
+
"step": 7893
|
| 55382 |
+
},
|
| 55383 |
+
{
|
| 55384 |
+
"epoch": 0.0007894,
|
| 55385 |
+
"grad_norm": 0.3738335371017456,
|
| 55386 |
+
"learning_rate": 7.893e-05,
|
| 55387 |
+
"loss": 0.1495,
|
| 55388 |
+
"step": 7894
|
| 55389 |
+
},
|
| 55390 |
+
{
|
| 55391 |
+
"epoch": 0.0007895,
|
| 55392 |
+
"grad_norm": 0.30939367413520813,
|
| 55393 |
+
"learning_rate": 7.894e-05,
|
| 55394 |
+
"loss": 0.1425,
|
| 55395 |
+
"step": 7895
|
| 55396 |
+
},
|
| 55397 |
+
{
|
| 55398 |
+
"epoch": 0.0007896,
|
| 55399 |
+
"grad_norm": 0.4040488302707672,
|
| 55400 |
+
"learning_rate": 7.895e-05,
|
| 55401 |
+
"loss": 0.1473,
|
| 55402 |
+
"step": 7896
|
| 55403 |
+
},
|
| 55404 |
+
{
|
| 55405 |
+
"epoch": 0.0007897,
|
| 55406 |
+
"grad_norm": 0.36211928725242615,
|
| 55407 |
+
"learning_rate": 7.896e-05,
|
| 55408 |
+
"loss": 0.1423,
|
| 55409 |
+
"step": 7897
|
| 55410 |
+
},
|
| 55411 |
+
{
|
| 55412 |
+
"epoch": 0.0007898,
|
| 55413 |
+
"grad_norm": 0.32226863503456116,
|
| 55414 |
+
"learning_rate": 7.897e-05,
|
| 55415 |
+
"loss": 0.1384,
|
| 55416 |
+
"step": 7898
|
| 55417 |
+
},
|
| 55418 |
+
{
|
| 55419 |
+
"epoch": 0.0007899,
|
| 55420 |
+
"grad_norm": 0.473587304353714,
|
| 55421 |
+
"learning_rate": 7.898e-05,
|
| 55422 |
+
"loss": 0.1644,
|
| 55423 |
+
"step": 7899
|
| 55424 |
+
},
|
| 55425 |
+
{
|
| 55426 |
+
"epoch": 0.00079,
|
| 55427 |
+
"grad_norm": 4.129487037658691,
|
| 55428 |
+
"learning_rate": 7.899e-05,
|
| 55429 |
+
"loss": 0.2184,
|
| 55430 |
+
"step": 7900
|
| 55431 |
+
},
|
| 55432 |
+
{
|
| 55433 |
+
"epoch": 0.0007901,
|
| 55434 |
+
"grad_norm": 0.33910876512527466,
|
| 55435 |
+
"learning_rate": 7.9e-05,
|
| 55436 |
+
"loss": 0.1405,
|
| 55437 |
+
"step": 7901
|
| 55438 |
+
},
|
| 55439 |
+
{
|
| 55440 |
+
"epoch": 0.0007902,
|
| 55441 |
+
"grad_norm": 0.6521164774894714,
|
| 55442 |
+
"learning_rate": 7.901e-05,
|
| 55443 |
+
"loss": 0.1721,
|
| 55444 |
+
"step": 7902
|
| 55445 |
+
},
|
| 55446 |
+
{
|
| 55447 |
+
"epoch": 0.0007903,
|
| 55448 |
+
"grad_norm": 0.38239943981170654,
|
| 55449 |
+
"learning_rate": 7.902e-05,
|
| 55450 |
+
"loss": 0.1503,
|
| 55451 |
+
"step": 7903
|
| 55452 |
+
},
|
| 55453 |
+
{
|
| 55454 |
+
"epoch": 0.0007904,
|
| 55455 |
+
"grad_norm": 1.1751346588134766,
|
| 55456 |
+
"learning_rate": 7.903000000000001e-05,
|
| 55457 |
+
"loss": 0.2278,
|
| 55458 |
+
"step": 7904
|
| 55459 |
+
},
|
| 55460 |
+
{
|
| 55461 |
+
"epoch": 0.0007905,
|
| 55462 |
+
"grad_norm": 0.48078861832618713,
|
| 55463 |
+
"learning_rate": 7.904e-05,
|
| 55464 |
+
"loss": 0.1519,
|
| 55465 |
+
"step": 7905
|
| 55466 |
+
},
|
| 55467 |
+
{
|
| 55468 |
+
"epoch": 0.0007906,
|
| 55469 |
+
"grad_norm": 0.4788834750652313,
|
| 55470 |
+
"learning_rate": 7.905e-05,
|
| 55471 |
+
"loss": 0.1583,
|
| 55472 |
+
"step": 7906
|
| 55473 |
+
},
|
| 55474 |
+
{
|
| 55475 |
+
"epoch": 0.0007907,
|
| 55476 |
+
"grad_norm": 0.9449865818023682,
|
| 55477 |
+
"learning_rate": 7.906e-05,
|
| 55478 |
+
"loss": 0.1902,
|
| 55479 |
+
"step": 7907
|
| 55480 |
+
},
|
| 55481 |
+
{
|
| 55482 |
+
"epoch": 0.0007908,
|
| 55483 |
+
"grad_norm": 0.4132283926010132,
|
| 55484 |
+
"learning_rate": 7.907e-05,
|
| 55485 |
+
"loss": 0.1516,
|
| 55486 |
+
"step": 7908
|
| 55487 |
+
},
|
| 55488 |
+
{
|
| 55489 |
+
"epoch": 0.0007909,
|
| 55490 |
+
"grad_norm": 0.43124404549598694,
|
| 55491 |
+
"learning_rate": 7.908000000000001e-05,
|
| 55492 |
+
"loss": 0.1493,
|
| 55493 |
+
"step": 7909
|
| 55494 |
+
},
|
| 55495 |
+
{
|
| 55496 |
+
"epoch": 0.000791,
|
| 55497 |
+
"grad_norm": 0.4902573227882385,
|
| 55498 |
+
"learning_rate": 7.909e-05,
|
| 55499 |
+
"loss": 0.1521,
|
| 55500 |
+
"step": 7910
|
| 55501 |
+
},
|
| 55502 |
+
{
|
| 55503 |
+
"epoch": 0.0007911,
|
| 55504 |
+
"grad_norm": 0.4065714478492737,
|
| 55505 |
+
"learning_rate": 7.91e-05,
|
| 55506 |
+
"loss": 0.1421,
|
| 55507 |
+
"step": 7911
|
| 55508 |
+
},
|
| 55509 |
+
{
|
| 55510 |
+
"epoch": 0.0007912,
|
| 55511 |
+
"grad_norm": 0.5426554679870605,
|
| 55512 |
+
"learning_rate": 7.911e-05,
|
| 55513 |
+
"loss": 0.1595,
|
| 55514 |
+
"step": 7912
|
| 55515 |
+
},
|
| 55516 |
+
{
|
| 55517 |
+
"epoch": 0.0007913,
|
| 55518 |
+
"grad_norm": 0.43201830983161926,
|
| 55519 |
+
"learning_rate": 7.912e-05,
|
| 55520 |
+
"loss": 0.1473,
|
| 55521 |
+
"step": 7913
|
| 55522 |
+
},
|
| 55523 |
+
{
|
| 55524 |
+
"epoch": 0.0007914,
|
| 55525 |
+
"grad_norm": 0.5732458233833313,
|
| 55526 |
+
"learning_rate": 7.913e-05,
|
| 55527 |
+
"loss": 0.181,
|
| 55528 |
+
"step": 7914
|
| 55529 |
+
},
|
| 55530 |
+
{
|
| 55531 |
+
"epoch": 0.0007915,
|
| 55532 |
+
"grad_norm": 0.41392019391059875,
|
| 55533 |
+
"learning_rate": 7.914e-05,
|
| 55534 |
+
"loss": 0.155,
|
| 55535 |
+
"step": 7915
|
| 55536 |
+
},
|
| 55537 |
+
{
|
| 55538 |
+
"epoch": 0.0007916,
|
| 55539 |
+
"grad_norm": 0.42396053671836853,
|
| 55540 |
+
"learning_rate": 7.915000000000001e-05,
|
| 55541 |
+
"loss": 0.1511,
|
| 55542 |
+
"step": 7916
|
| 55543 |
+
},
|
| 55544 |
+
{
|
| 55545 |
+
"epoch": 0.0007917,
|
| 55546 |
+
"grad_norm": 0.3622252345085144,
|
| 55547 |
+
"learning_rate": 7.916e-05,
|
| 55548 |
+
"loss": 0.1421,
|
| 55549 |
+
"step": 7917
|
| 55550 |
+
},
|
| 55551 |
+
{
|
| 55552 |
+
"epoch": 0.0007918,
|
| 55553 |
+
"grad_norm": 0.4684685468673706,
|
| 55554 |
+
"learning_rate": 7.917e-05,
|
| 55555 |
+
"loss": 0.1606,
|
| 55556 |
+
"step": 7918
|
| 55557 |
+
},
|
| 55558 |
+
{
|
| 55559 |
+
"epoch": 0.0007919,
|
| 55560 |
+
"grad_norm": 0.38661402463912964,
|
| 55561 |
+
"learning_rate": 7.918000000000001e-05,
|
| 55562 |
+
"loss": 0.1548,
|
| 55563 |
+
"step": 7919
|
| 55564 |
+
},
|
| 55565 |
+
{
|
| 55566 |
+
"epoch": 0.000792,
|
| 55567 |
+
"grad_norm": 0.3429745137691498,
|
| 55568 |
+
"learning_rate": 7.918999999999999e-05,
|
| 55569 |
+
"loss": 0.1407,
|
| 55570 |
+
"step": 7920
|
| 55571 |
+
},
|
| 55572 |
+
{
|
| 55573 |
+
"epoch": 0.0007921,
|
| 55574 |
+
"grad_norm": 0.30344387888908386,
|
| 55575 |
+
"learning_rate": 7.92e-05,
|
| 55576 |
+
"loss": 0.1328,
|
| 55577 |
+
"step": 7921
|
| 55578 |
+
},
|
| 55579 |
+
{
|
| 55580 |
+
"epoch": 0.0007922,
|
| 55581 |
+
"grad_norm": 0.3710497319698334,
|
| 55582 |
+
"learning_rate": 7.921000000000001e-05,
|
| 55583 |
+
"loss": 0.1435,
|
| 55584 |
+
"step": 7922
|
| 55585 |
+
},
|
| 55586 |
+
{
|
| 55587 |
+
"epoch": 0.0007923,
|
| 55588 |
+
"grad_norm": 0.37194472551345825,
|
| 55589 |
+
"learning_rate": 7.921999999999999e-05,
|
| 55590 |
+
"loss": 0.144,
|
| 55591 |
+
"step": 7923
|
| 55592 |
+
},
|
| 55593 |
+
{
|
| 55594 |
+
"epoch": 0.0007924,
|
| 55595 |
+
"grad_norm": 0.42788729071617126,
|
| 55596 |
+
"learning_rate": 7.923e-05,
|
| 55597 |
+
"loss": 0.1478,
|
| 55598 |
+
"step": 7924
|
| 55599 |
+
},
|
| 55600 |
+
{
|
| 55601 |
+
"epoch": 0.0007925,
|
| 55602 |
+
"grad_norm": 0.38140884041786194,
|
| 55603 |
+
"learning_rate": 7.924000000000001e-05,
|
| 55604 |
+
"loss": 0.1467,
|
| 55605 |
+
"step": 7925
|
| 55606 |
+
},
|
| 55607 |
+
{
|
| 55608 |
+
"epoch": 0.0007926,
|
| 55609 |
+
"grad_norm": 0.32990655303001404,
|
| 55610 |
+
"learning_rate": 7.925e-05,
|
| 55611 |
+
"loss": 0.1461,
|
| 55612 |
+
"step": 7926
|
| 55613 |
+
},
|
| 55614 |
+
{
|
| 55615 |
+
"epoch": 0.0007927,
|
| 55616 |
+
"grad_norm": 0.2901013195514679,
|
| 55617 |
+
"learning_rate": 7.926e-05,
|
| 55618 |
+
"loss": 0.1317,
|
| 55619 |
+
"step": 7927
|
| 55620 |
+
},
|
| 55621 |
+
{
|
| 55622 |
+
"epoch": 0.0007928,
|
| 55623 |
+
"grad_norm": 0.328251451253891,
|
| 55624 |
+
"learning_rate": 7.927e-05,
|
| 55625 |
+
"loss": 0.1436,
|
| 55626 |
+
"step": 7928
|
| 55627 |
+
},
|
| 55628 |
+
{
|
| 55629 |
+
"epoch": 0.0007929,
|
| 55630 |
+
"grad_norm": 0.6993154287338257,
|
| 55631 |
+
"learning_rate": 7.928e-05,
|
| 55632 |
+
"loss": 0.1659,
|
| 55633 |
+
"step": 7929
|
| 55634 |
+
},
|
| 55635 |
+
{
|
| 55636 |
+
"epoch": 0.000793,
|
| 55637 |
+
"grad_norm": 0.4166194498538971,
|
| 55638 |
+
"learning_rate": 7.929e-05,
|
| 55639 |
+
"loss": 0.162,
|
| 55640 |
+
"step": 7930
|
| 55641 |
+
},
|
| 55642 |
+
{
|
| 55643 |
+
"epoch": 0.0007931,
|
| 55644 |
+
"grad_norm": 0.3751193583011627,
|
| 55645 |
+
"learning_rate": 7.93e-05,
|
| 55646 |
+
"loss": 0.1506,
|
| 55647 |
+
"step": 7931
|
| 55648 |
+
},
|
| 55649 |
+
{
|
| 55650 |
+
"epoch": 0.0007932,
|
| 55651 |
+
"grad_norm": 0.3409431278705597,
|
| 55652 |
+
"learning_rate": 7.931e-05,
|
| 55653 |
+
"loss": 0.1422,
|
| 55654 |
+
"step": 7932
|
| 55655 |
+
},
|
| 55656 |
+
{
|
| 55657 |
+
"epoch": 0.0007933,
|
| 55658 |
+
"grad_norm": 0.4018118977546692,
|
| 55659 |
+
"learning_rate": 7.932e-05,
|
| 55660 |
+
"loss": 0.1655,
|
| 55661 |
+
"step": 7933
|
| 55662 |
+
},
|
| 55663 |
+
{
|
| 55664 |
+
"epoch": 0.0007934,
|
| 55665 |
+
"grad_norm": 0.5279244780540466,
|
| 55666 |
+
"learning_rate": 7.933e-05,
|
| 55667 |
+
"loss": 0.1791,
|
| 55668 |
+
"step": 7934
|
| 55669 |
+
},
|
| 55670 |
+
{
|
| 55671 |
+
"epoch": 0.0007935,
|
| 55672 |
+
"grad_norm": 0.31002724170684814,
|
| 55673 |
+
"learning_rate": 7.934e-05,
|
| 55674 |
+
"loss": 0.1406,
|
| 55675 |
+
"step": 7935
|
| 55676 |
+
},
|
| 55677 |
+
{
|
| 55678 |
+
"epoch": 0.0007936,
|
| 55679 |
+
"grad_norm": 0.30614563822746277,
|
| 55680 |
+
"learning_rate": 7.935e-05,
|
| 55681 |
+
"loss": 0.134,
|
| 55682 |
+
"step": 7936
|
| 55683 |
+
},
|
| 55684 |
+
{
|
| 55685 |
+
"epoch": 0.0007937,
|
| 55686 |
+
"grad_norm": 0.4066023528575897,
|
| 55687 |
+
"learning_rate": 7.936e-05,
|
| 55688 |
+
"loss": 0.1545,
|
| 55689 |
+
"step": 7937
|
| 55690 |
+
},
|
| 55691 |
+
{
|
| 55692 |
+
"epoch": 0.0007938,
|
| 55693 |
+
"grad_norm": 0.315294086933136,
|
| 55694 |
+
"learning_rate": 7.937000000000001e-05,
|
| 55695 |
+
"loss": 0.1436,
|
| 55696 |
+
"step": 7938
|
| 55697 |
+
},
|
| 55698 |
+
{
|
| 55699 |
+
"epoch": 0.0007939,
|
| 55700 |
+
"grad_norm": 0.3314513564109802,
|
| 55701 |
+
"learning_rate": 7.938e-05,
|
| 55702 |
+
"loss": 0.1362,
|
| 55703 |
+
"step": 7939
|
| 55704 |
+
},
|
| 55705 |
+
{
|
| 55706 |
+
"epoch": 0.000794,
|
| 55707 |
+
"grad_norm": 0.3202115297317505,
|
| 55708 |
+
"learning_rate": 7.939e-05,
|
| 55709 |
+
"loss": 0.1483,
|
| 55710 |
+
"step": 7940
|
| 55711 |
+
},
|
| 55712 |
+
{
|
| 55713 |
+
"epoch": 0.0007941,
|
| 55714 |
+
"grad_norm": 0.35739099979400635,
|
| 55715 |
+
"learning_rate": 7.94e-05,
|
| 55716 |
+
"loss": 0.1511,
|
| 55717 |
+
"step": 7941
|
| 55718 |
+
},
|
| 55719 |
+
{
|
| 55720 |
+
"epoch": 0.0007942,
|
| 55721 |
+
"grad_norm": 0.5248399972915649,
|
| 55722 |
+
"learning_rate": 7.941e-05,
|
| 55723 |
+
"loss": 0.1779,
|
| 55724 |
+
"step": 7942
|
| 55725 |
+
},
|
| 55726 |
+
{
|
| 55727 |
+
"epoch": 0.0007943,
|
| 55728 |
+
"grad_norm": 0.3720186948776245,
|
| 55729 |
+
"learning_rate": 7.942000000000001e-05,
|
| 55730 |
+
"loss": 0.1468,
|
| 55731 |
+
"step": 7943
|
| 55732 |
+
},
|
| 55733 |
+
{
|
| 55734 |
+
"epoch": 0.0007944,
|
| 55735 |
+
"grad_norm": 0.34426867961883545,
|
| 55736 |
+
"learning_rate": 7.943e-05,
|
| 55737 |
+
"loss": 0.1442,
|
| 55738 |
+
"step": 7944
|
| 55739 |
+
},
|
| 55740 |
+
{
|
| 55741 |
+
"epoch": 0.0007945,
|
| 55742 |
+
"grad_norm": 0.3085344135761261,
|
| 55743 |
+
"learning_rate": 7.944e-05,
|
| 55744 |
+
"loss": 0.14,
|
| 55745 |
+
"step": 7945
|
| 55746 |
+
},
|
| 55747 |
+
{
|
| 55748 |
+
"epoch": 0.0007946,
|
| 55749 |
+
"grad_norm": 0.30736759305000305,
|
| 55750 |
+
"learning_rate": 7.945e-05,
|
| 55751 |
+
"loss": 0.1423,
|
| 55752 |
+
"step": 7946
|
| 55753 |
+
},
|
| 55754 |
+
{
|
| 55755 |
+
"epoch": 0.0007947,
|
| 55756 |
+
"grad_norm": 0.4966878294944763,
|
| 55757 |
+
"learning_rate": 7.946e-05,
|
| 55758 |
+
"loss": 0.1608,
|
| 55759 |
+
"step": 7947
|
| 55760 |
+
},
|
| 55761 |
+
{
|
| 55762 |
+
"epoch": 0.0007948,
|
| 55763 |
+
"grad_norm": 0.320726215839386,
|
| 55764 |
+
"learning_rate": 7.947e-05,
|
| 55765 |
+
"loss": 0.1387,
|
| 55766 |
+
"step": 7948
|
| 55767 |
+
},
|
| 55768 |
+
{
|
| 55769 |
+
"epoch": 0.0007949,
|
| 55770 |
+
"grad_norm": 0.32221245765686035,
|
| 55771 |
+
"learning_rate": 7.948e-05,
|
| 55772 |
+
"loss": 0.1467,
|
| 55773 |
+
"step": 7949
|
| 55774 |
+
},
|
| 55775 |
+
{
|
| 55776 |
+
"epoch": 0.000795,
|
| 55777 |
+
"grad_norm": 0.3078095614910126,
|
| 55778 |
+
"learning_rate": 7.949000000000001e-05,
|
| 55779 |
+
"loss": 0.1463,
|
| 55780 |
+
"step": 7950
|
| 55781 |
+
},
|
| 55782 |
+
{
|
| 55783 |
+
"epoch": 0.0007951,
|
| 55784 |
+
"grad_norm": 0.3305199444293976,
|
| 55785 |
+
"learning_rate": 7.95e-05,
|
| 55786 |
+
"loss": 0.1338,
|
| 55787 |
+
"step": 7951
|
| 55788 |
+
},
|
| 55789 |
+
{
|
| 55790 |
+
"epoch": 0.0007952,
|
| 55791 |
+
"grad_norm": 0.31949639320373535,
|
| 55792 |
+
"learning_rate": 7.951e-05,
|
| 55793 |
+
"loss": 0.1353,
|
| 55794 |
+
"step": 7952
|
| 55795 |
+
},
|
| 55796 |
+
{
|
| 55797 |
+
"epoch": 0.0007953,
|
| 55798 |
+
"grad_norm": 0.2862573266029358,
|
| 55799 |
+
"learning_rate": 7.952000000000001e-05,
|
| 55800 |
+
"loss": 0.1323,
|
| 55801 |
+
"step": 7953
|
| 55802 |
+
},
|
| 55803 |
+
{
|
| 55804 |
+
"epoch": 0.0007954,
|
| 55805 |
+
"grad_norm": 0.281089723110199,
|
| 55806 |
+
"learning_rate": 7.952999999999999e-05,
|
| 55807 |
+
"loss": 0.1327,
|
| 55808 |
+
"step": 7954
|
| 55809 |
+
},
|
| 55810 |
+
{
|
| 55811 |
+
"epoch": 0.0007955,
|
| 55812 |
+
"grad_norm": 0.2934763729572296,
|
| 55813 |
+
"learning_rate": 7.954e-05,
|
| 55814 |
+
"loss": 0.1378,
|
| 55815 |
+
"step": 7955
|
| 55816 |
+
},
|
| 55817 |
+
{
|
| 55818 |
+
"epoch": 0.0007956,
|
| 55819 |
+
"grad_norm": 0.6798914670944214,
|
| 55820 |
+
"learning_rate": 7.955000000000001e-05,
|
| 55821 |
+
"loss": 0.1608,
|
| 55822 |
+
"step": 7956
|
| 55823 |
+
},
|
| 55824 |
+
{
|
| 55825 |
+
"epoch": 0.0007957,
|
| 55826 |
+
"grad_norm": 0.28892913460731506,
|
| 55827 |
+
"learning_rate": 7.955999999999999e-05,
|
| 55828 |
+
"loss": 0.137,
|
| 55829 |
+
"step": 7957
|
| 55830 |
+
},
|
| 55831 |
+
{
|
| 55832 |
+
"epoch": 0.0007958,
|
| 55833 |
+
"grad_norm": 0.3551887273788452,
|
| 55834 |
+
"learning_rate": 7.957e-05,
|
| 55835 |
+
"loss": 0.1418,
|
| 55836 |
+
"step": 7958
|
| 55837 |
+
},
|
| 55838 |
+
{
|
| 55839 |
+
"epoch": 0.0007959,
|
| 55840 |
+
"grad_norm": 0.3428361117839813,
|
| 55841 |
+
"learning_rate": 7.958000000000001e-05,
|
| 55842 |
+
"loss": 0.1379,
|
| 55843 |
+
"step": 7959
|
| 55844 |
+
},
|
| 55845 |
+
{
|
| 55846 |
+
"epoch": 0.000796,
|
| 55847 |
+
"grad_norm": 0.320889413356781,
|
| 55848 |
+
"learning_rate": 7.959e-05,
|
| 55849 |
+
"loss": 0.1445,
|
| 55850 |
+
"step": 7960
|
| 55851 |
+
},
|
| 55852 |
+
{
|
| 55853 |
+
"epoch": 0.0007961,
|
| 55854 |
+
"grad_norm": 0.30409809947013855,
|
| 55855 |
+
"learning_rate": 7.96e-05,
|
| 55856 |
+
"loss": 0.1387,
|
| 55857 |
+
"step": 7961
|
| 55858 |
+
},
|
| 55859 |
+
{
|
| 55860 |
+
"epoch": 0.0007962,
|
| 55861 |
+
"grad_norm": 0.3267022669315338,
|
| 55862 |
+
"learning_rate": 7.961e-05,
|
| 55863 |
+
"loss": 0.1296,
|
| 55864 |
+
"step": 7962
|
| 55865 |
+
},
|
| 55866 |
+
{
|
| 55867 |
+
"epoch": 0.0007963,
|
| 55868 |
+
"grad_norm": 0.32833611965179443,
|
| 55869 |
+
"learning_rate": 7.962e-05,
|
| 55870 |
+
"loss": 0.1385,
|
| 55871 |
+
"step": 7963
|
| 55872 |
+
},
|
| 55873 |
+
{
|
| 55874 |
+
"epoch": 0.0007964,
|
| 55875 |
+
"grad_norm": 0.37005800008773804,
|
| 55876 |
+
"learning_rate": 7.963e-05,
|
| 55877 |
+
"loss": 0.1472,
|
| 55878 |
+
"step": 7964
|
| 55879 |
+
},
|
| 55880 |
+
{
|
| 55881 |
+
"epoch": 0.0007965,
|
| 55882 |
+
"grad_norm": 0.3744034171104431,
|
| 55883 |
+
"learning_rate": 7.964e-05,
|
| 55884 |
+
"loss": 0.1439,
|
| 55885 |
+
"step": 7965
|
| 55886 |
+
},
|
| 55887 |
+
{
|
| 55888 |
+
"epoch": 0.0007966,
|
| 55889 |
+
"grad_norm": 0.3019333779811859,
|
| 55890 |
+
"learning_rate": 7.965e-05,
|
| 55891 |
+
"loss": 0.1337,
|
| 55892 |
+
"step": 7966
|
| 55893 |
+
},
|
| 55894 |
+
{
|
| 55895 |
+
"epoch": 0.0007967,
|
| 55896 |
+
"grad_norm": 0.3061756491661072,
|
| 55897 |
+
"learning_rate": 7.966e-05,
|
| 55898 |
+
"loss": 0.1347,
|
| 55899 |
+
"step": 7967
|
| 55900 |
+
},
|
| 55901 |
+
{
|
| 55902 |
+
"epoch": 0.0007968,
|
| 55903 |
+
"grad_norm": 0.32018759846687317,
|
| 55904 |
+
"learning_rate": 7.967e-05,
|
| 55905 |
+
"loss": 0.1421,
|
| 55906 |
+
"step": 7968
|
| 55907 |
+
},
|
| 55908 |
+
{
|
| 55909 |
+
"epoch": 0.0007969,
|
| 55910 |
+
"grad_norm": 0.28421124815940857,
|
| 55911 |
+
"learning_rate": 7.968e-05,
|
| 55912 |
+
"loss": 0.1273,
|
| 55913 |
+
"step": 7969
|
| 55914 |
+
},
|
| 55915 |
+
{
|
| 55916 |
+
"epoch": 0.000797,
|
| 55917 |
+
"grad_norm": 0.32791009545326233,
|
| 55918 |
+
"learning_rate": 7.969e-05,
|
| 55919 |
+
"loss": 0.1407,
|
| 55920 |
+
"step": 7970
|
| 55921 |
+
},
|
| 55922 |
+
{
|
| 55923 |
+
"epoch": 0.0007971,
|
| 55924 |
+
"grad_norm": 0.30136266350746155,
|
| 55925 |
+
"learning_rate": 7.97e-05,
|
| 55926 |
+
"loss": 0.1366,
|
| 55927 |
+
"step": 7971
|
| 55928 |
+
},
|
| 55929 |
+
{
|
| 55930 |
+
"epoch": 0.0007972,
|
| 55931 |
+
"grad_norm": 0.3345014750957489,
|
| 55932 |
+
"learning_rate": 7.971000000000001e-05,
|
| 55933 |
+
"loss": 0.1399,
|
| 55934 |
+
"step": 7972
|
| 55935 |
+
},
|
| 55936 |
+
{
|
| 55937 |
+
"epoch": 0.0007973,
|
| 55938 |
+
"grad_norm": 0.5534088015556335,
|
| 55939 |
+
"learning_rate": 7.972e-05,
|
| 55940 |
+
"loss": 0.1508,
|
| 55941 |
+
"step": 7973
|
| 55942 |
+
},
|
| 55943 |
+
{
|
| 55944 |
+
"epoch": 0.0007974,
|
| 55945 |
+
"grad_norm": 0.3192349076271057,
|
| 55946 |
+
"learning_rate": 7.973e-05,
|
| 55947 |
+
"loss": 0.1401,
|
| 55948 |
+
"step": 7974
|
| 55949 |
+
},
|
| 55950 |
+
{
|
| 55951 |
+
"epoch": 0.0007975,
|
| 55952 |
+
"grad_norm": 0.3512609004974365,
|
| 55953 |
+
"learning_rate": 7.974e-05,
|
| 55954 |
+
"loss": 0.1338,
|
| 55955 |
+
"step": 7975
|
| 55956 |
+
},
|
| 55957 |
+
{
|
| 55958 |
+
"epoch": 0.0007976,
|
| 55959 |
+
"grad_norm": 0.3142569363117218,
|
| 55960 |
+
"learning_rate": 7.975e-05,
|
| 55961 |
+
"loss": 0.1385,
|
| 55962 |
+
"step": 7976
|
| 55963 |
+
},
|
| 55964 |
+
{
|
| 55965 |
+
"epoch": 0.0007977,
|
| 55966 |
+
"grad_norm": 0.3346051275730133,
|
| 55967 |
+
"learning_rate": 7.976000000000001e-05,
|
| 55968 |
+
"loss": 0.1378,
|
| 55969 |
+
"step": 7977
|
| 55970 |
+
},
|
| 55971 |
+
{
|
| 55972 |
+
"epoch": 0.0007978,
|
| 55973 |
+
"grad_norm": 0.5163511037826538,
|
| 55974 |
+
"learning_rate": 7.977e-05,
|
| 55975 |
+
"loss": 0.148,
|
| 55976 |
+
"step": 7978
|
| 55977 |
+
},
|
| 55978 |
+
{
|
| 55979 |
+
"epoch": 0.0007979,
|
| 55980 |
+
"grad_norm": 0.322229266166687,
|
| 55981 |
+
"learning_rate": 7.978e-05,
|
| 55982 |
+
"loss": 0.1316,
|
| 55983 |
+
"step": 7979
|
| 55984 |
+
},
|
| 55985 |
+
{
|
| 55986 |
+
"epoch": 0.000798,
|
| 55987 |
+
"grad_norm": 0.2973017692565918,
|
| 55988 |
+
"learning_rate": 7.979000000000001e-05,
|
| 55989 |
+
"loss": 0.134,
|
| 55990 |
+
"step": 7980
|
| 55991 |
+
},
|
| 55992 |
+
{
|
| 55993 |
+
"epoch": 0.0007981,
|
| 55994 |
+
"grad_norm": 0.3267301023006439,
|
| 55995 |
+
"learning_rate": 7.98e-05,
|
| 55996 |
+
"loss": 0.1378,
|
| 55997 |
+
"step": 7981
|
| 55998 |
+
},
|
| 55999 |
+
{
|
| 56000 |
+
"epoch": 0.0007982,
|
| 56001 |
+
"grad_norm": 0.656015932559967,
|
| 56002 |
+
"learning_rate": 7.981e-05,
|
| 56003 |
+
"loss": 0.1586,
|
| 56004 |
+
"step": 7982
|
| 56005 |
+
},
|
| 56006 |
+
{
|
| 56007 |
+
"epoch": 0.0007983,
|
| 56008 |
+
"grad_norm": 0.2986345589160919,
|
| 56009 |
+
"learning_rate": 7.982e-05,
|
| 56010 |
+
"loss": 0.1298,
|
| 56011 |
+
"step": 7983
|
| 56012 |
+
},
|
| 56013 |
+
{
|
| 56014 |
+
"epoch": 0.0007984,
|
| 56015 |
+
"grad_norm": 0.33788925409317017,
|
| 56016 |
+
"learning_rate": 7.983000000000001e-05,
|
| 56017 |
+
"loss": 0.1356,
|
| 56018 |
+
"step": 7984
|
| 56019 |
+
},
|
| 56020 |
+
{
|
| 56021 |
+
"epoch": 0.0007985,
|
| 56022 |
+
"grad_norm": 0.28335437178611755,
|
| 56023 |
+
"learning_rate": 7.984e-05,
|
| 56024 |
+
"loss": 0.1284,
|
| 56025 |
+
"step": 7985
|
| 56026 |
+
},
|
| 56027 |
+
{
|
| 56028 |
+
"epoch": 0.0007986,
|
| 56029 |
+
"grad_norm": 0.2979366183280945,
|
| 56030 |
+
"learning_rate": 7.985e-05,
|
| 56031 |
+
"loss": 0.1285,
|
| 56032 |
+
"step": 7986
|
| 56033 |
+
},
|
| 56034 |
+
{
|
| 56035 |
+
"epoch": 0.0007987,
|
| 56036 |
+
"grad_norm": 0.28379109501838684,
|
| 56037 |
+
"learning_rate": 7.986000000000001e-05,
|
| 56038 |
+
"loss": 0.1285,
|
| 56039 |
+
"step": 7987
|
| 56040 |
+
},
|
| 56041 |
+
{
|
| 56042 |
+
"epoch": 0.0007988,
|
| 56043 |
+
"grad_norm": 0.4956672489643097,
|
| 56044 |
+
"learning_rate": 7.986999999999999e-05,
|
| 56045 |
+
"loss": 0.1365,
|
| 56046 |
+
"step": 7988
|
| 56047 |
+
},
|
| 56048 |
+
{
|
| 56049 |
+
"epoch": 0.0007989,
|
| 56050 |
+
"grad_norm": 0.29996466636657715,
|
| 56051 |
+
"learning_rate": 7.988e-05,
|
| 56052 |
+
"loss": 0.1342,
|
| 56053 |
+
"step": 7989
|
| 56054 |
+
},
|
| 56055 |
+
{
|
| 56056 |
+
"epoch": 0.000799,
|
| 56057 |
+
"grad_norm": 0.283013254404068,
|
| 56058 |
+
"learning_rate": 7.989000000000001e-05,
|
| 56059 |
+
"loss": 0.1217,
|
| 56060 |
+
"step": 7990
|
| 56061 |
+
},
|
| 56062 |
+
{
|
| 56063 |
+
"epoch": 0.0007991,
|
| 56064 |
+
"grad_norm": 0.29078409075737,
|
| 56065 |
+
"learning_rate": 7.989999999999999e-05,
|
| 56066 |
+
"loss": 0.1221,
|
| 56067 |
+
"step": 7991
|
| 56068 |
+
},
|
| 56069 |
+
{
|
| 56070 |
+
"epoch": 0.0007992,
|
| 56071 |
+
"grad_norm": 0.3446073830127716,
|
| 56072 |
+
"learning_rate": 7.991e-05,
|
| 56073 |
+
"loss": 0.1348,
|
| 56074 |
+
"step": 7992
|
| 56075 |
+
},
|
| 56076 |
+
{
|
| 56077 |
+
"epoch": 0.0007993,
|
| 56078 |
+
"grad_norm": 0.2769072949886322,
|
| 56079 |
+
"learning_rate": 7.992000000000001e-05,
|
| 56080 |
+
"loss": 0.1256,
|
| 56081 |
+
"step": 7993
|
| 56082 |
+
},
|
| 56083 |
+
{
|
| 56084 |
+
"epoch": 0.0007994,
|
| 56085 |
+
"grad_norm": 0.28110471367836,
|
| 56086 |
+
"learning_rate": 7.993e-05,
|
| 56087 |
+
"loss": 0.1293,
|
| 56088 |
+
"step": 7994
|
| 56089 |
+
},
|
| 56090 |
+
{
|
| 56091 |
+
"epoch": 0.0007995,
|
| 56092 |
+
"grad_norm": 0.3948468863964081,
|
| 56093 |
+
"learning_rate": 7.994e-05,
|
| 56094 |
+
"loss": 0.1466,
|
| 56095 |
+
"step": 7995
|
| 56096 |
+
},
|
| 56097 |
+
{
|
| 56098 |
+
"epoch": 0.0007996,
|
| 56099 |
+
"grad_norm": 0.2587652802467346,
|
| 56100 |
+
"learning_rate": 7.995e-05,
|
| 56101 |
+
"loss": 0.1201,
|
| 56102 |
+
"step": 7996
|
| 56103 |
+
},
|
| 56104 |
+
{
|
| 56105 |
+
"epoch": 0.0007997,
|
| 56106 |
+
"grad_norm": 0.2804921269416809,
|
| 56107 |
+
"learning_rate": 7.996e-05,
|
| 56108 |
+
"loss": 0.1254,
|
| 56109 |
+
"step": 7997
|
| 56110 |
+
},
|
| 56111 |
+
{
|
| 56112 |
+
"epoch": 0.0007998,
|
| 56113 |
+
"grad_norm": 0.31802529096603394,
|
| 56114 |
+
"learning_rate": 7.997e-05,
|
| 56115 |
+
"loss": 0.137,
|
| 56116 |
+
"step": 7998
|
| 56117 |
+
},
|
| 56118 |
+
{
|
| 56119 |
+
"epoch": 0.0007999,
|
| 56120 |
+
"grad_norm": 0.7691544890403748,
|
| 56121 |
+
"learning_rate": 7.998e-05,
|
| 56122 |
+
"loss": 0.1774,
|
| 56123 |
+
"step": 7999
|
| 56124 |
+
},
|
| 56125 |
+
{
|
| 56126 |
+
"epoch": 0.0008,
|
| 56127 |
+
"grad_norm": 3.40895938873291,
|
| 56128 |
+
"learning_rate": 7.999e-05,
|
| 56129 |
+
"loss": 0.1745,
|
| 56130 |
+
"step": 8000
|
| 56131 |
+
},
|
| 56132 |
+
{
|
| 56133 |
+
"epoch": 0.0008,
|
| 56134 |
+
"eval_loss": 0.016226764768362045,
|
| 56135 |
+
"eval_runtime": 389.6015,
|
| 56136 |
+
"eval_samples_per_second": 25.667,
|
| 56137 |
+
"eval_steps_per_second": 1.604,
|
| 56138 |
+
"step": 8000
|
| 56139 |
}
|
| 56140 |
],
|
| 56141 |
"logging_steps": 1,
|