Training in progress, step 7500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 91951912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d0ea80839e929f01d587d346a5063f556116f10160d7a618b25a6890b6348b9
|
| 3 |
size 91951912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 183991627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e0f7054ca7f5b0561b8e89ef4fc4fd2219a1c6ad41e177c2ae0027f530382ea
|
| 3 |
size 183991627
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:718a0f3db00824213036a2c0441849791319b7d9cf189065873bb26a7020738e
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abe0ae6a1b71adb35a63cd5c8e14789fba5904a8caeca816820ecb246299a101
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -49120,6 +49120,3514 @@
|
|
| 49120 |
"eval_samples_per_second": 27.593,
|
| 49121 |
"eval_steps_per_second": 1.725,
|
| 49122 |
"step": 7000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49123 |
}
|
| 49124 |
],
|
| 49125 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.00075,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 7500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 49120 |
"eval_samples_per_second": 27.593,
|
| 49121 |
"eval_steps_per_second": 1.725,
|
| 49122 |
"step": 7000
|
| 49123 |
+
},
|
| 49124 |
+
{
|
| 49125 |
+
"epoch": 0.0007001,
|
| 49126 |
+
"grad_norm": 0.8307585716247559,
|
| 49127 |
+
"learning_rate": 7.000000000000001e-05,
|
| 49128 |
+
"loss": 0.2679,
|
| 49129 |
+
"step": 7001
|
| 49130 |
+
},
|
| 49131 |
+
{
|
| 49132 |
+
"epoch": 0.0007002,
|
| 49133 |
+
"grad_norm": 0.6910313367843628,
|
| 49134 |
+
"learning_rate": 7.001e-05,
|
| 49135 |
+
"loss": 0.2467,
|
| 49136 |
+
"step": 7002
|
| 49137 |
+
},
|
| 49138 |
+
{
|
| 49139 |
+
"epoch": 0.0007003,
|
| 49140 |
+
"grad_norm": 0.9133024215698242,
|
| 49141 |
+
"learning_rate": 7.002e-05,
|
| 49142 |
+
"loss": 0.3354,
|
| 49143 |
+
"step": 7003
|
| 49144 |
+
},
|
| 49145 |
+
{
|
| 49146 |
+
"epoch": 0.0007004,
|
| 49147 |
+
"grad_norm": 0.7538177967071533,
|
| 49148 |
+
"learning_rate": 7.003e-05,
|
| 49149 |
+
"loss": 0.2761,
|
| 49150 |
+
"step": 7004
|
| 49151 |
+
},
|
| 49152 |
+
{
|
| 49153 |
+
"epoch": 0.0007005,
|
| 49154 |
+
"grad_norm": 0.6836936473846436,
|
| 49155 |
+
"learning_rate": 7.004e-05,
|
| 49156 |
+
"loss": 0.2589,
|
| 49157 |
+
"step": 7005
|
| 49158 |
+
},
|
| 49159 |
+
{
|
| 49160 |
+
"epoch": 0.0007006,
|
| 49161 |
+
"grad_norm": 0.6986979246139526,
|
| 49162 |
+
"learning_rate": 7.005e-05,
|
| 49163 |
+
"loss": 0.2457,
|
| 49164 |
+
"step": 7006
|
| 49165 |
+
},
|
| 49166 |
+
{
|
| 49167 |
+
"epoch": 0.0007007,
|
| 49168 |
+
"grad_norm": 0.7378904223442078,
|
| 49169 |
+
"learning_rate": 7.006e-05,
|
| 49170 |
+
"loss": 0.272,
|
| 49171 |
+
"step": 7007
|
| 49172 |
+
},
|
| 49173 |
+
{
|
| 49174 |
+
"epoch": 0.0007008,
|
| 49175 |
+
"grad_norm": 0.6308010816574097,
|
| 49176 |
+
"learning_rate": 7.007000000000001e-05,
|
| 49177 |
+
"loss": 0.2618,
|
| 49178 |
+
"step": 7008
|
| 49179 |
+
},
|
| 49180 |
+
{
|
| 49181 |
+
"epoch": 0.0007009,
|
| 49182 |
+
"grad_norm": 1.998252034187317,
|
| 49183 |
+
"learning_rate": 7.007999999999999e-05,
|
| 49184 |
+
"loss": 0.2814,
|
| 49185 |
+
"step": 7009
|
| 49186 |
+
},
|
| 49187 |
+
{
|
| 49188 |
+
"epoch": 0.000701,
|
| 49189 |
+
"grad_norm": 0.829488217830658,
|
| 49190 |
+
"learning_rate": 7.009e-05,
|
| 49191 |
+
"loss": 0.2583,
|
| 49192 |
+
"step": 7010
|
| 49193 |
+
},
|
| 49194 |
+
{
|
| 49195 |
+
"epoch": 0.0007011,
|
| 49196 |
+
"grad_norm": 1.1512203216552734,
|
| 49197 |
+
"learning_rate": 7.010000000000001e-05,
|
| 49198 |
+
"loss": 0.3521,
|
| 49199 |
+
"step": 7011
|
| 49200 |
+
},
|
| 49201 |
+
{
|
| 49202 |
+
"epoch": 0.0007012,
|
| 49203 |
+
"grad_norm": 0.7830899953842163,
|
| 49204 |
+
"learning_rate": 7.010999999999999e-05,
|
| 49205 |
+
"loss": 0.2651,
|
| 49206 |
+
"step": 7012
|
| 49207 |
+
},
|
| 49208 |
+
{
|
| 49209 |
+
"epoch": 0.0007013,
|
| 49210 |
+
"grad_norm": 0.6934369802474976,
|
| 49211 |
+
"learning_rate": 7.012e-05,
|
| 49212 |
+
"loss": 0.261,
|
| 49213 |
+
"step": 7013
|
| 49214 |
+
},
|
| 49215 |
+
{
|
| 49216 |
+
"epoch": 0.0007014,
|
| 49217 |
+
"grad_norm": 0.668596625328064,
|
| 49218 |
+
"learning_rate": 7.013000000000001e-05,
|
| 49219 |
+
"loss": 0.2466,
|
| 49220 |
+
"step": 7014
|
| 49221 |
+
},
|
| 49222 |
+
{
|
| 49223 |
+
"epoch": 0.0007015,
|
| 49224 |
+
"grad_norm": 0.756309449672699,
|
| 49225 |
+
"learning_rate": 7.014e-05,
|
| 49226 |
+
"loss": 0.2596,
|
| 49227 |
+
"step": 7015
|
| 49228 |
+
},
|
| 49229 |
+
{
|
| 49230 |
+
"epoch": 0.0007016,
|
| 49231 |
+
"grad_norm": 0.6385685801506042,
|
| 49232 |
+
"learning_rate": 7.015e-05,
|
| 49233 |
+
"loss": 0.2443,
|
| 49234 |
+
"step": 7016
|
| 49235 |
+
},
|
| 49236 |
+
{
|
| 49237 |
+
"epoch": 0.0007017,
|
| 49238 |
+
"grad_norm": 0.7099800109863281,
|
| 49239 |
+
"learning_rate": 7.016e-05,
|
| 49240 |
+
"loss": 0.2765,
|
| 49241 |
+
"step": 7017
|
| 49242 |
+
},
|
| 49243 |
+
{
|
| 49244 |
+
"epoch": 0.0007018,
|
| 49245 |
+
"grad_norm": 0.6158483028411865,
|
| 49246 |
+
"learning_rate": 7.017e-05,
|
| 49247 |
+
"loss": 0.2483,
|
| 49248 |
+
"step": 7018
|
| 49249 |
+
},
|
| 49250 |
+
{
|
| 49251 |
+
"epoch": 0.0007019,
|
| 49252 |
+
"grad_norm": 0.8103829622268677,
|
| 49253 |
+
"learning_rate": 7.018e-05,
|
| 49254 |
+
"loss": 0.2808,
|
| 49255 |
+
"step": 7019
|
| 49256 |
+
},
|
| 49257 |
+
{
|
| 49258 |
+
"epoch": 0.000702,
|
| 49259 |
+
"grad_norm": 0.6360248923301697,
|
| 49260 |
+
"learning_rate": 7.019e-05,
|
| 49261 |
+
"loss": 0.2416,
|
| 49262 |
+
"step": 7020
|
| 49263 |
+
},
|
| 49264 |
+
{
|
| 49265 |
+
"epoch": 0.0007021,
|
| 49266 |
+
"grad_norm": 0.6225180625915527,
|
| 49267 |
+
"learning_rate": 7.02e-05,
|
| 49268 |
+
"loss": 0.2439,
|
| 49269 |
+
"step": 7021
|
| 49270 |
+
},
|
| 49271 |
+
{
|
| 49272 |
+
"epoch": 0.0007022,
|
| 49273 |
+
"grad_norm": 0.5726187825202942,
|
| 49274 |
+
"learning_rate": 7.021000000000001e-05,
|
| 49275 |
+
"loss": 0.2388,
|
| 49276 |
+
"step": 7022
|
| 49277 |
+
},
|
| 49278 |
+
{
|
| 49279 |
+
"epoch": 0.0007023,
|
| 49280 |
+
"grad_norm": 0.562736988067627,
|
| 49281 |
+
"learning_rate": 7.022e-05,
|
| 49282 |
+
"loss": 0.2426,
|
| 49283 |
+
"step": 7023
|
| 49284 |
+
},
|
| 49285 |
+
{
|
| 49286 |
+
"epoch": 0.0007024,
|
| 49287 |
+
"grad_norm": 0.6522629261016846,
|
| 49288 |
+
"learning_rate": 7.023e-05,
|
| 49289 |
+
"loss": 0.259,
|
| 49290 |
+
"step": 7024
|
| 49291 |
+
},
|
| 49292 |
+
{
|
| 49293 |
+
"epoch": 0.0007025,
|
| 49294 |
+
"grad_norm": 0.5590613484382629,
|
| 49295 |
+
"learning_rate": 7.024e-05,
|
| 49296 |
+
"loss": 0.2461,
|
| 49297 |
+
"step": 7025
|
| 49298 |
+
},
|
| 49299 |
+
{
|
| 49300 |
+
"epoch": 0.0007026,
|
| 49301 |
+
"grad_norm": 1.0506471395492554,
|
| 49302 |
+
"learning_rate": 7.025e-05,
|
| 49303 |
+
"loss": 0.3125,
|
| 49304 |
+
"step": 7026
|
| 49305 |
+
},
|
| 49306 |
+
{
|
| 49307 |
+
"epoch": 0.0007027,
|
| 49308 |
+
"grad_norm": 0.7463265061378479,
|
| 49309 |
+
"learning_rate": 7.026e-05,
|
| 49310 |
+
"loss": 0.2753,
|
| 49311 |
+
"step": 7027
|
| 49312 |
+
},
|
| 49313 |
+
{
|
| 49314 |
+
"epoch": 0.0007028,
|
| 49315 |
+
"grad_norm": 1.6071248054504395,
|
| 49316 |
+
"learning_rate": 7.027e-05,
|
| 49317 |
+
"loss": 0.2897,
|
| 49318 |
+
"step": 7028
|
| 49319 |
+
},
|
| 49320 |
+
{
|
| 49321 |
+
"epoch": 0.0007029,
|
| 49322 |
+
"grad_norm": 0.69710373878479,
|
| 49323 |
+
"learning_rate": 7.028e-05,
|
| 49324 |
+
"loss": 0.2585,
|
| 49325 |
+
"step": 7029
|
| 49326 |
+
},
|
| 49327 |
+
{
|
| 49328 |
+
"epoch": 0.000703,
|
| 49329 |
+
"grad_norm": 0.6717625260353088,
|
| 49330 |
+
"learning_rate": 7.029e-05,
|
| 49331 |
+
"loss": 0.2501,
|
| 49332 |
+
"step": 7030
|
| 49333 |
+
},
|
| 49334 |
+
{
|
| 49335 |
+
"epoch": 0.0007031,
|
| 49336 |
+
"grad_norm": 0.8978825807571411,
|
| 49337 |
+
"learning_rate": 7.03e-05,
|
| 49338 |
+
"loss": 0.2637,
|
| 49339 |
+
"step": 7031
|
| 49340 |
+
},
|
| 49341 |
+
{
|
| 49342 |
+
"epoch": 0.0007032,
|
| 49343 |
+
"grad_norm": 0.5704598426818848,
|
| 49344 |
+
"learning_rate": 7.031000000000001e-05,
|
| 49345 |
+
"loss": 0.2299,
|
| 49346 |
+
"step": 7032
|
| 49347 |
+
},
|
| 49348 |
+
{
|
| 49349 |
+
"epoch": 0.0007033,
|
| 49350 |
+
"grad_norm": 0.997661828994751,
|
| 49351 |
+
"learning_rate": 7.032e-05,
|
| 49352 |
+
"loss": 0.2888,
|
| 49353 |
+
"step": 7033
|
| 49354 |
+
},
|
| 49355 |
+
{
|
| 49356 |
+
"epoch": 0.0007034,
|
| 49357 |
+
"grad_norm": 0.6185047030448914,
|
| 49358 |
+
"learning_rate": 7.033e-05,
|
| 49359 |
+
"loss": 0.2429,
|
| 49360 |
+
"step": 7034
|
| 49361 |
+
},
|
| 49362 |
+
{
|
| 49363 |
+
"epoch": 0.0007035,
|
| 49364 |
+
"grad_norm": 0.7629363536834717,
|
| 49365 |
+
"learning_rate": 7.034000000000001e-05,
|
| 49366 |
+
"loss": 0.2982,
|
| 49367 |
+
"step": 7035
|
| 49368 |
+
},
|
| 49369 |
+
{
|
| 49370 |
+
"epoch": 0.0007036,
|
| 49371 |
+
"grad_norm": 2.4536244869232178,
|
| 49372 |
+
"learning_rate": 7.035e-05,
|
| 49373 |
+
"loss": 0.4689,
|
| 49374 |
+
"step": 7036
|
| 49375 |
+
},
|
| 49376 |
+
{
|
| 49377 |
+
"epoch": 0.0007037,
|
| 49378 |
+
"grad_norm": 0.7827295064926147,
|
| 49379 |
+
"learning_rate": 7.036e-05,
|
| 49380 |
+
"loss": 0.2858,
|
| 49381 |
+
"step": 7037
|
| 49382 |
+
},
|
| 49383 |
+
{
|
| 49384 |
+
"epoch": 0.0007038,
|
| 49385 |
+
"grad_norm": 0.6093035936355591,
|
| 49386 |
+
"learning_rate": 7.037e-05,
|
| 49387 |
+
"loss": 0.2321,
|
| 49388 |
+
"step": 7038
|
| 49389 |
+
},
|
| 49390 |
+
{
|
| 49391 |
+
"epoch": 0.0007039,
|
| 49392 |
+
"grad_norm": 0.6487656831741333,
|
| 49393 |
+
"learning_rate": 7.038e-05,
|
| 49394 |
+
"loss": 0.2454,
|
| 49395 |
+
"step": 7039
|
| 49396 |
+
},
|
| 49397 |
+
{
|
| 49398 |
+
"epoch": 0.000704,
|
| 49399 |
+
"grad_norm": 0.9772380590438843,
|
| 49400 |
+
"learning_rate": 7.039e-05,
|
| 49401 |
+
"loss": 0.2457,
|
| 49402 |
+
"step": 7040
|
| 49403 |
+
},
|
| 49404 |
+
{
|
| 49405 |
+
"epoch": 0.0007041,
|
| 49406 |
+
"grad_norm": 0.6265672445297241,
|
| 49407 |
+
"learning_rate": 7.04e-05,
|
| 49408 |
+
"loss": 0.2416,
|
| 49409 |
+
"step": 7041
|
| 49410 |
+
},
|
| 49411 |
+
{
|
| 49412 |
+
"epoch": 0.0007042,
|
| 49413 |
+
"grad_norm": 0.7439896464347839,
|
| 49414 |
+
"learning_rate": 7.041000000000001e-05,
|
| 49415 |
+
"loss": 0.2644,
|
| 49416 |
+
"step": 7042
|
| 49417 |
+
},
|
| 49418 |
+
{
|
| 49419 |
+
"epoch": 0.0007043,
|
| 49420 |
+
"grad_norm": 0.7880527973175049,
|
| 49421 |
+
"learning_rate": 7.042e-05,
|
| 49422 |
+
"loss": 0.2607,
|
| 49423 |
+
"step": 7043
|
| 49424 |
+
},
|
| 49425 |
+
{
|
| 49426 |
+
"epoch": 0.0007044,
|
| 49427 |
+
"grad_norm": 0.5661341547966003,
|
| 49428 |
+
"learning_rate": 7.043e-05,
|
| 49429 |
+
"loss": 0.2383,
|
| 49430 |
+
"step": 7044
|
| 49431 |
+
},
|
| 49432 |
+
{
|
| 49433 |
+
"epoch": 0.0007045,
|
| 49434 |
+
"grad_norm": 0.5856346487998962,
|
| 49435 |
+
"learning_rate": 7.044000000000001e-05,
|
| 49436 |
+
"loss": 0.2411,
|
| 49437 |
+
"step": 7045
|
| 49438 |
+
},
|
| 49439 |
+
{
|
| 49440 |
+
"epoch": 0.0007046,
|
| 49441 |
+
"grad_norm": 0.5938259959220886,
|
| 49442 |
+
"learning_rate": 7.044999999999999e-05,
|
| 49443 |
+
"loss": 0.2565,
|
| 49444 |
+
"step": 7046
|
| 49445 |
+
},
|
| 49446 |
+
{
|
| 49447 |
+
"epoch": 0.0007047,
|
| 49448 |
+
"grad_norm": 0.5852174758911133,
|
| 49449 |
+
"learning_rate": 7.046e-05,
|
| 49450 |
+
"loss": 0.2401,
|
| 49451 |
+
"step": 7047
|
| 49452 |
+
},
|
| 49453 |
+
{
|
| 49454 |
+
"epoch": 0.0007048,
|
| 49455 |
+
"grad_norm": 0.6033628582954407,
|
| 49456 |
+
"learning_rate": 7.047000000000001e-05,
|
| 49457 |
+
"loss": 0.2523,
|
| 49458 |
+
"step": 7048
|
| 49459 |
+
},
|
| 49460 |
+
{
|
| 49461 |
+
"epoch": 0.0007049,
|
| 49462 |
+
"grad_norm": 0.6606107950210571,
|
| 49463 |
+
"learning_rate": 7.048e-05,
|
| 49464 |
+
"loss": 0.2605,
|
| 49465 |
+
"step": 7049
|
| 49466 |
+
},
|
| 49467 |
+
{
|
| 49468 |
+
"epoch": 0.000705,
|
| 49469 |
+
"grad_norm": 0.9846977591514587,
|
| 49470 |
+
"learning_rate": 7.049e-05,
|
| 49471 |
+
"loss": 0.2983,
|
| 49472 |
+
"step": 7050
|
| 49473 |
+
},
|
| 49474 |
+
{
|
| 49475 |
+
"epoch": 0.0007051,
|
| 49476 |
+
"grad_norm": 0.6883967518806458,
|
| 49477 |
+
"learning_rate": 7.05e-05,
|
| 49478 |
+
"loss": 0.2708,
|
| 49479 |
+
"step": 7051
|
| 49480 |
+
},
|
| 49481 |
+
{
|
| 49482 |
+
"epoch": 0.0007052,
|
| 49483 |
+
"grad_norm": 1.5532714128494263,
|
| 49484 |
+
"learning_rate": 7.051e-05,
|
| 49485 |
+
"loss": 0.3688,
|
| 49486 |
+
"step": 7052
|
| 49487 |
+
},
|
| 49488 |
+
{
|
| 49489 |
+
"epoch": 0.0007053,
|
| 49490 |
+
"grad_norm": 0.6477527022361755,
|
| 49491 |
+
"learning_rate": 7.052e-05,
|
| 49492 |
+
"loss": 0.25,
|
| 49493 |
+
"step": 7053
|
| 49494 |
+
},
|
| 49495 |
+
{
|
| 49496 |
+
"epoch": 0.0007054,
|
| 49497 |
+
"grad_norm": 2.8588967323303223,
|
| 49498 |
+
"learning_rate": 7.053e-05,
|
| 49499 |
+
"loss": 0.4503,
|
| 49500 |
+
"step": 7054
|
| 49501 |
+
},
|
| 49502 |
+
{
|
| 49503 |
+
"epoch": 0.0007055,
|
| 49504 |
+
"grad_norm": 0.6326694488525391,
|
| 49505 |
+
"learning_rate": 7.054e-05,
|
| 49506 |
+
"loss": 0.2188,
|
| 49507 |
+
"step": 7055
|
| 49508 |
+
},
|
| 49509 |
+
{
|
| 49510 |
+
"epoch": 0.0007056,
|
| 49511 |
+
"grad_norm": 0.803915798664093,
|
| 49512 |
+
"learning_rate": 7.055000000000001e-05,
|
| 49513 |
+
"loss": 0.2584,
|
| 49514 |
+
"step": 7056
|
| 49515 |
+
},
|
| 49516 |
+
{
|
| 49517 |
+
"epoch": 0.0007057,
|
| 49518 |
+
"grad_norm": 0.9331216216087341,
|
| 49519 |
+
"learning_rate": 7.056e-05,
|
| 49520 |
+
"loss": 0.3292,
|
| 49521 |
+
"step": 7057
|
| 49522 |
+
},
|
| 49523 |
+
{
|
| 49524 |
+
"epoch": 0.0007058,
|
| 49525 |
+
"grad_norm": 0.5624357461929321,
|
| 49526 |
+
"learning_rate": 7.057e-05,
|
| 49527 |
+
"loss": 0.2285,
|
| 49528 |
+
"step": 7058
|
| 49529 |
+
},
|
| 49530 |
+
{
|
| 49531 |
+
"epoch": 0.0007059,
|
| 49532 |
+
"grad_norm": 0.7122544646263123,
|
| 49533 |
+
"learning_rate": 7.058e-05,
|
| 49534 |
+
"loss": 0.2476,
|
| 49535 |
+
"step": 7059
|
| 49536 |
+
},
|
| 49537 |
+
{
|
| 49538 |
+
"epoch": 0.000706,
|
| 49539 |
+
"grad_norm": 0.6760504841804504,
|
| 49540 |
+
"learning_rate": 7.059e-05,
|
| 49541 |
+
"loss": 0.2561,
|
| 49542 |
+
"step": 7060
|
| 49543 |
+
},
|
| 49544 |
+
{
|
| 49545 |
+
"epoch": 0.0007061,
|
| 49546 |
+
"grad_norm": 0.6072905659675598,
|
| 49547 |
+
"learning_rate": 7.06e-05,
|
| 49548 |
+
"loss": 0.2404,
|
| 49549 |
+
"step": 7061
|
| 49550 |
+
},
|
| 49551 |
+
{
|
| 49552 |
+
"epoch": 0.0007062,
|
| 49553 |
+
"grad_norm": 0.7852693796157837,
|
| 49554 |
+
"learning_rate": 7.061e-05,
|
| 49555 |
+
"loss": 0.2744,
|
| 49556 |
+
"step": 7062
|
| 49557 |
+
},
|
| 49558 |
+
{
|
| 49559 |
+
"epoch": 0.0007063,
|
| 49560 |
+
"grad_norm": 0.6445418000221252,
|
| 49561 |
+
"learning_rate": 7.062e-05,
|
| 49562 |
+
"loss": 0.2714,
|
| 49563 |
+
"step": 7063
|
| 49564 |
+
},
|
| 49565 |
+
{
|
| 49566 |
+
"epoch": 0.0007064,
|
| 49567 |
+
"grad_norm": 0.9340659976005554,
|
| 49568 |
+
"learning_rate": 7.063e-05,
|
| 49569 |
+
"loss": 0.2786,
|
| 49570 |
+
"step": 7064
|
| 49571 |
+
},
|
| 49572 |
+
{
|
| 49573 |
+
"epoch": 0.0007065,
|
| 49574 |
+
"grad_norm": 0.596617579460144,
|
| 49575 |
+
"learning_rate": 7.064e-05,
|
| 49576 |
+
"loss": 0.2374,
|
| 49577 |
+
"step": 7065
|
| 49578 |
+
},
|
| 49579 |
+
{
|
| 49580 |
+
"epoch": 0.0007066,
|
| 49581 |
+
"grad_norm": 0.5787464380264282,
|
| 49582 |
+
"learning_rate": 7.065000000000001e-05,
|
| 49583 |
+
"loss": 0.2371,
|
| 49584 |
+
"step": 7066
|
| 49585 |
+
},
|
| 49586 |
+
{
|
| 49587 |
+
"epoch": 0.0007067,
|
| 49588 |
+
"grad_norm": 0.598484456539154,
|
| 49589 |
+
"learning_rate": 7.066e-05,
|
| 49590 |
+
"loss": 0.2352,
|
| 49591 |
+
"step": 7067
|
| 49592 |
+
},
|
| 49593 |
+
{
|
| 49594 |
+
"epoch": 0.0007068,
|
| 49595 |
+
"grad_norm": 0.633368730545044,
|
| 49596 |
+
"learning_rate": 7.067e-05,
|
| 49597 |
+
"loss": 0.2314,
|
| 49598 |
+
"step": 7068
|
| 49599 |
+
},
|
| 49600 |
+
{
|
| 49601 |
+
"epoch": 0.0007069,
|
| 49602 |
+
"grad_norm": 0.5488775968551636,
|
| 49603 |
+
"learning_rate": 7.068000000000001e-05,
|
| 49604 |
+
"loss": 0.2299,
|
| 49605 |
+
"step": 7069
|
| 49606 |
+
},
|
| 49607 |
+
{
|
| 49608 |
+
"epoch": 0.000707,
|
| 49609 |
+
"grad_norm": 0.5350727438926697,
|
| 49610 |
+
"learning_rate": 7.069e-05,
|
| 49611 |
+
"loss": 0.2345,
|
| 49612 |
+
"step": 7070
|
| 49613 |
+
},
|
| 49614 |
+
{
|
| 49615 |
+
"epoch": 0.0007071,
|
| 49616 |
+
"grad_norm": 0.6432333588600159,
|
| 49617 |
+
"learning_rate": 7.07e-05,
|
| 49618 |
+
"loss": 0.2474,
|
| 49619 |
+
"step": 7071
|
| 49620 |
+
},
|
| 49621 |
+
{
|
| 49622 |
+
"epoch": 0.0007072,
|
| 49623 |
+
"grad_norm": 0.60258948802948,
|
| 49624 |
+
"learning_rate": 7.071e-05,
|
| 49625 |
+
"loss": 0.2483,
|
| 49626 |
+
"step": 7072
|
| 49627 |
+
},
|
| 49628 |
+
{
|
| 49629 |
+
"epoch": 0.0007073,
|
| 49630 |
+
"grad_norm": 0.7571836113929749,
|
| 49631 |
+
"learning_rate": 7.072e-05,
|
| 49632 |
+
"loss": 0.2815,
|
| 49633 |
+
"step": 7073
|
| 49634 |
+
},
|
| 49635 |
+
{
|
| 49636 |
+
"epoch": 0.0007074,
|
| 49637 |
+
"grad_norm": 0.8476506471633911,
|
| 49638 |
+
"learning_rate": 7.073e-05,
|
| 49639 |
+
"loss": 0.3021,
|
| 49640 |
+
"step": 7074
|
| 49641 |
+
},
|
| 49642 |
+
{
|
| 49643 |
+
"epoch": 0.0007075,
|
| 49644 |
+
"grad_norm": 0.5610617399215698,
|
| 49645 |
+
"learning_rate": 7.074e-05,
|
| 49646 |
+
"loss": 0.2454,
|
| 49647 |
+
"step": 7075
|
| 49648 |
+
},
|
| 49649 |
+
{
|
| 49650 |
+
"epoch": 0.0007076,
|
| 49651 |
+
"grad_norm": 0.7464528679847717,
|
| 49652 |
+
"learning_rate": 7.075000000000001e-05,
|
| 49653 |
+
"loss": 0.2898,
|
| 49654 |
+
"step": 7076
|
| 49655 |
+
},
|
| 49656 |
+
{
|
| 49657 |
+
"epoch": 0.0007077,
|
| 49658 |
+
"grad_norm": 0.5852903723716736,
|
| 49659 |
+
"learning_rate": 7.076e-05,
|
| 49660 |
+
"loss": 0.2452,
|
| 49661 |
+
"step": 7077
|
| 49662 |
+
},
|
| 49663 |
+
{
|
| 49664 |
+
"epoch": 0.0007078,
|
| 49665 |
+
"grad_norm": 0.5367820858955383,
|
| 49666 |
+
"learning_rate": 7.077e-05,
|
| 49667 |
+
"loss": 0.2278,
|
| 49668 |
+
"step": 7078
|
| 49669 |
+
},
|
| 49670 |
+
{
|
| 49671 |
+
"epoch": 0.0007079,
|
| 49672 |
+
"grad_norm": 0.547356903553009,
|
| 49673 |
+
"learning_rate": 7.078000000000001e-05,
|
| 49674 |
+
"loss": 0.2323,
|
| 49675 |
+
"step": 7079
|
| 49676 |
+
},
|
| 49677 |
+
{
|
| 49678 |
+
"epoch": 0.000708,
|
| 49679 |
+
"grad_norm": 0.5330636501312256,
|
| 49680 |
+
"learning_rate": 7.078999999999999e-05,
|
| 49681 |
+
"loss": 0.234,
|
| 49682 |
+
"step": 7080
|
| 49683 |
+
},
|
| 49684 |
+
{
|
| 49685 |
+
"epoch": 0.0007081,
|
| 49686 |
+
"grad_norm": 0.5424564480781555,
|
| 49687 |
+
"learning_rate": 7.08e-05,
|
| 49688 |
+
"loss": 0.2311,
|
| 49689 |
+
"step": 7081
|
| 49690 |
+
},
|
| 49691 |
+
{
|
| 49692 |
+
"epoch": 0.0007082,
|
| 49693 |
+
"grad_norm": 0.5401227474212646,
|
| 49694 |
+
"learning_rate": 7.081000000000001e-05,
|
| 49695 |
+
"loss": 0.2405,
|
| 49696 |
+
"step": 7082
|
| 49697 |
+
},
|
| 49698 |
+
{
|
| 49699 |
+
"epoch": 0.0007083,
|
| 49700 |
+
"grad_norm": 1.2282289266586304,
|
| 49701 |
+
"learning_rate": 7.082e-05,
|
| 49702 |
+
"loss": 0.2833,
|
| 49703 |
+
"step": 7083
|
| 49704 |
+
},
|
| 49705 |
+
{
|
| 49706 |
+
"epoch": 0.0007084,
|
| 49707 |
+
"grad_norm": 1.706687092781067,
|
| 49708 |
+
"learning_rate": 7.083e-05,
|
| 49709 |
+
"loss": 0.3715,
|
| 49710 |
+
"step": 7084
|
| 49711 |
+
},
|
| 49712 |
+
{
|
| 49713 |
+
"epoch": 0.0007085,
|
| 49714 |
+
"grad_norm": 0.6320382356643677,
|
| 49715 |
+
"learning_rate": 7.084e-05,
|
| 49716 |
+
"loss": 0.2365,
|
| 49717 |
+
"step": 7085
|
| 49718 |
+
},
|
| 49719 |
+
{
|
| 49720 |
+
"epoch": 0.0007086,
|
| 49721 |
+
"grad_norm": 1.0006563663482666,
|
| 49722 |
+
"learning_rate": 7.085e-05,
|
| 49723 |
+
"loss": 0.3013,
|
| 49724 |
+
"step": 7086
|
| 49725 |
+
},
|
| 49726 |
+
{
|
| 49727 |
+
"epoch": 0.0007087,
|
| 49728 |
+
"grad_norm": 0.591720461845398,
|
| 49729 |
+
"learning_rate": 7.086e-05,
|
| 49730 |
+
"loss": 0.2504,
|
| 49731 |
+
"step": 7087
|
| 49732 |
+
},
|
| 49733 |
+
{
|
| 49734 |
+
"epoch": 0.0007088,
|
| 49735 |
+
"grad_norm": 0.5655443668365479,
|
| 49736 |
+
"learning_rate": 7.087e-05,
|
| 49737 |
+
"loss": 0.2433,
|
| 49738 |
+
"step": 7088
|
| 49739 |
+
},
|
| 49740 |
+
{
|
| 49741 |
+
"epoch": 0.0007089,
|
| 49742 |
+
"grad_norm": 0.6129018068313599,
|
| 49743 |
+
"learning_rate": 7.088e-05,
|
| 49744 |
+
"loss": 0.2415,
|
| 49745 |
+
"step": 7089
|
| 49746 |
+
},
|
| 49747 |
+
{
|
| 49748 |
+
"epoch": 0.000709,
|
| 49749 |
+
"grad_norm": 0.8806246519088745,
|
| 49750 |
+
"learning_rate": 7.089000000000001e-05,
|
| 49751 |
+
"loss": 0.3131,
|
| 49752 |
+
"step": 7090
|
| 49753 |
+
},
|
| 49754 |
+
{
|
| 49755 |
+
"epoch": 0.0007091,
|
| 49756 |
+
"grad_norm": 3.66709041595459,
|
| 49757 |
+
"learning_rate": 7.09e-05,
|
| 49758 |
+
"loss": 0.4194,
|
| 49759 |
+
"step": 7091
|
| 49760 |
+
},
|
| 49761 |
+
{
|
| 49762 |
+
"epoch": 0.0007092,
|
| 49763 |
+
"grad_norm": 0.6629515290260315,
|
| 49764 |
+
"learning_rate": 7.091e-05,
|
| 49765 |
+
"loss": 0.2397,
|
| 49766 |
+
"step": 7092
|
| 49767 |
+
},
|
| 49768 |
+
{
|
| 49769 |
+
"epoch": 0.0007093,
|
| 49770 |
+
"grad_norm": 1.096230149269104,
|
| 49771 |
+
"learning_rate": 7.092e-05,
|
| 49772 |
+
"loss": 0.2515,
|
| 49773 |
+
"step": 7093
|
| 49774 |
+
},
|
| 49775 |
+
{
|
| 49776 |
+
"epoch": 0.0007094,
|
| 49777 |
+
"grad_norm": 0.6950176358222961,
|
| 49778 |
+
"learning_rate": 7.093e-05,
|
| 49779 |
+
"loss": 0.2548,
|
| 49780 |
+
"step": 7094
|
| 49781 |
+
},
|
| 49782 |
+
{
|
| 49783 |
+
"epoch": 0.0007095,
|
| 49784 |
+
"grad_norm": 1.2441452741622925,
|
| 49785 |
+
"learning_rate": 7.094e-05,
|
| 49786 |
+
"loss": 0.3682,
|
| 49787 |
+
"step": 7095
|
| 49788 |
+
},
|
| 49789 |
+
{
|
| 49790 |
+
"epoch": 0.0007096,
|
| 49791 |
+
"grad_norm": 0.716272234916687,
|
| 49792 |
+
"learning_rate": 7.095e-05,
|
| 49793 |
+
"loss": 0.2465,
|
| 49794 |
+
"step": 7096
|
| 49795 |
+
},
|
| 49796 |
+
{
|
| 49797 |
+
"epoch": 0.0007097,
|
| 49798 |
+
"grad_norm": 1.6676087379455566,
|
| 49799 |
+
"learning_rate": 7.096e-05,
|
| 49800 |
+
"loss": 0.3446,
|
| 49801 |
+
"step": 7097
|
| 49802 |
+
},
|
| 49803 |
+
{
|
| 49804 |
+
"epoch": 0.0007098,
|
| 49805 |
+
"grad_norm": 0.9634347558021545,
|
| 49806 |
+
"learning_rate": 7.097e-05,
|
| 49807 |
+
"loss": 0.2997,
|
| 49808 |
+
"step": 7098
|
| 49809 |
+
},
|
| 49810 |
+
{
|
| 49811 |
+
"epoch": 0.0007099,
|
| 49812 |
+
"grad_norm": 0.9347872138023376,
|
| 49813 |
+
"learning_rate": 7.098e-05,
|
| 49814 |
+
"loss": 0.2888,
|
| 49815 |
+
"step": 7099
|
| 49816 |
+
},
|
| 49817 |
+
{
|
| 49818 |
+
"epoch": 0.00071,
|
| 49819 |
+
"grad_norm": 0.8800885677337646,
|
| 49820 |
+
"learning_rate": 7.099000000000001e-05,
|
| 49821 |
+
"loss": 0.2628,
|
| 49822 |
+
"step": 7100
|
| 49823 |
+
},
|
| 49824 |
+
{
|
| 49825 |
+
"epoch": 0.0007101,
|
| 49826 |
+
"grad_norm": 3.0393545627593994,
|
| 49827 |
+
"learning_rate": 7.1e-05,
|
| 49828 |
+
"loss": 0.3553,
|
| 49829 |
+
"step": 7101
|
| 49830 |
+
},
|
| 49831 |
+
{
|
| 49832 |
+
"epoch": 0.0007102,
|
| 49833 |
+
"grad_norm": 0.7605924606323242,
|
| 49834 |
+
"learning_rate": 7.101e-05,
|
| 49835 |
+
"loss": 0.2283,
|
| 49836 |
+
"step": 7102
|
| 49837 |
+
},
|
| 49838 |
+
{
|
| 49839 |
+
"epoch": 0.0007103,
|
| 49840 |
+
"grad_norm": 0.9857915043830872,
|
| 49841 |
+
"learning_rate": 7.102000000000001e-05,
|
| 49842 |
+
"loss": 0.2615,
|
| 49843 |
+
"step": 7103
|
| 49844 |
+
},
|
| 49845 |
+
{
|
| 49846 |
+
"epoch": 0.0007104,
|
| 49847 |
+
"grad_norm": 0.6909942626953125,
|
| 49848 |
+
"learning_rate": 7.103e-05,
|
| 49849 |
+
"loss": 0.2242,
|
| 49850 |
+
"step": 7104
|
| 49851 |
+
},
|
| 49852 |
+
{
|
| 49853 |
+
"epoch": 0.0007105,
|
| 49854 |
+
"grad_norm": 0.8127199411392212,
|
| 49855 |
+
"learning_rate": 7.104e-05,
|
| 49856 |
+
"loss": 0.2698,
|
| 49857 |
+
"step": 7105
|
| 49858 |
+
},
|
| 49859 |
+
{
|
| 49860 |
+
"epoch": 0.0007106,
|
| 49861 |
+
"grad_norm": 0.6459398865699768,
|
| 49862 |
+
"learning_rate": 7.105e-05,
|
| 49863 |
+
"loss": 0.2235,
|
| 49864 |
+
"step": 7106
|
| 49865 |
+
},
|
| 49866 |
+
{
|
| 49867 |
+
"epoch": 0.0007107,
|
| 49868 |
+
"grad_norm": 0.9567082524299622,
|
| 49869 |
+
"learning_rate": 7.106e-05,
|
| 49870 |
+
"loss": 0.3097,
|
| 49871 |
+
"step": 7107
|
| 49872 |
+
},
|
| 49873 |
+
{
|
| 49874 |
+
"epoch": 0.0007108,
|
| 49875 |
+
"grad_norm": 0.6782042980194092,
|
| 49876 |
+
"learning_rate": 7.107e-05,
|
| 49877 |
+
"loss": 0.2263,
|
| 49878 |
+
"step": 7108
|
| 49879 |
+
},
|
| 49880 |
+
{
|
| 49881 |
+
"epoch": 0.0007109,
|
| 49882 |
+
"grad_norm": 0.6514846086502075,
|
| 49883 |
+
"learning_rate": 7.108e-05,
|
| 49884 |
+
"loss": 0.2249,
|
| 49885 |
+
"step": 7109
|
| 49886 |
+
},
|
| 49887 |
+
{
|
| 49888 |
+
"epoch": 0.000711,
|
| 49889 |
+
"grad_norm": 1.0211524963378906,
|
| 49890 |
+
"learning_rate": 7.109e-05,
|
| 49891 |
+
"loss": 0.308,
|
| 49892 |
+
"step": 7110
|
| 49893 |
+
},
|
| 49894 |
+
{
|
| 49895 |
+
"epoch": 0.0007111,
|
| 49896 |
+
"grad_norm": 0.6992805600166321,
|
| 49897 |
+
"learning_rate": 7.11e-05,
|
| 49898 |
+
"loss": 0.244,
|
| 49899 |
+
"step": 7111
|
| 49900 |
+
},
|
| 49901 |
+
{
|
| 49902 |
+
"epoch": 0.0007112,
|
| 49903 |
+
"grad_norm": 0.6797493696212769,
|
| 49904 |
+
"learning_rate": 7.111e-05,
|
| 49905 |
+
"loss": 0.2457,
|
| 49906 |
+
"step": 7112
|
| 49907 |
+
},
|
| 49908 |
+
{
|
| 49909 |
+
"epoch": 0.0007113,
|
| 49910 |
+
"grad_norm": 0.6276497840881348,
|
| 49911 |
+
"learning_rate": 7.112000000000001e-05,
|
| 49912 |
+
"loss": 0.2314,
|
| 49913 |
+
"step": 7113
|
| 49914 |
+
},
|
| 49915 |
+
{
|
| 49916 |
+
"epoch": 0.0007114,
|
| 49917 |
+
"grad_norm": 0.657174825668335,
|
| 49918 |
+
"learning_rate": 7.112999999999999e-05,
|
| 49919 |
+
"loss": 0.2533,
|
| 49920 |
+
"step": 7114
|
| 49921 |
+
},
|
| 49922 |
+
{
|
| 49923 |
+
"epoch": 0.0007115,
|
| 49924 |
+
"grad_norm": 1.1678730249404907,
|
| 49925 |
+
"learning_rate": 7.114e-05,
|
| 49926 |
+
"loss": 0.2463,
|
| 49927 |
+
"step": 7115
|
| 49928 |
+
},
|
| 49929 |
+
{
|
| 49930 |
+
"epoch": 0.0007116,
|
| 49931 |
+
"grad_norm": 0.6611289978027344,
|
| 49932 |
+
"learning_rate": 7.115000000000001e-05,
|
| 49933 |
+
"loss": 0.2554,
|
| 49934 |
+
"step": 7116
|
| 49935 |
+
},
|
| 49936 |
+
{
|
| 49937 |
+
"epoch": 0.0007117,
|
| 49938 |
+
"grad_norm": 2.7565414905548096,
|
| 49939 |
+
"learning_rate": 7.116e-05,
|
| 49940 |
+
"loss": 0.439,
|
| 49941 |
+
"step": 7117
|
| 49942 |
+
},
|
| 49943 |
+
{
|
| 49944 |
+
"epoch": 0.0007118,
|
| 49945 |
+
"grad_norm": 0.9948599338531494,
|
| 49946 |
+
"learning_rate": 7.117e-05,
|
| 49947 |
+
"loss": 0.3165,
|
| 49948 |
+
"step": 7118
|
| 49949 |
+
},
|
| 49950 |
+
{
|
| 49951 |
+
"epoch": 0.0007119,
|
| 49952 |
+
"grad_norm": 0.6232846975326538,
|
| 49953 |
+
"learning_rate": 7.118e-05,
|
| 49954 |
+
"loss": 0.2372,
|
| 49955 |
+
"step": 7119
|
| 49956 |
+
},
|
| 49957 |
+
{
|
| 49958 |
+
"epoch": 0.000712,
|
| 49959 |
+
"grad_norm": 0.6444941163063049,
|
| 49960 |
+
"learning_rate": 7.119e-05,
|
| 49961 |
+
"loss": 0.2466,
|
| 49962 |
+
"step": 7120
|
| 49963 |
+
},
|
| 49964 |
+
{
|
| 49965 |
+
"epoch": 0.0007121,
|
| 49966 |
+
"grad_norm": 0.6959469318389893,
|
| 49967 |
+
"learning_rate": 7.12e-05,
|
| 49968 |
+
"loss": 0.2446,
|
| 49969 |
+
"step": 7121
|
| 49970 |
+
},
|
| 49971 |
+
{
|
| 49972 |
+
"epoch": 0.0007122,
|
| 49973 |
+
"grad_norm": 0.6532331109046936,
|
| 49974 |
+
"learning_rate": 7.121e-05,
|
| 49975 |
+
"loss": 0.2543,
|
| 49976 |
+
"step": 7122
|
| 49977 |
+
},
|
| 49978 |
+
{
|
| 49979 |
+
"epoch": 0.0007123,
|
| 49980 |
+
"grad_norm": 1.486360788345337,
|
| 49981 |
+
"learning_rate": 7.122e-05,
|
| 49982 |
+
"loss": 0.338,
|
| 49983 |
+
"step": 7123
|
| 49984 |
+
},
|
| 49985 |
+
{
|
| 49986 |
+
"epoch": 0.0007124,
|
| 49987 |
+
"grad_norm": 0.580660343170166,
|
| 49988 |
+
"learning_rate": 7.123000000000001e-05,
|
| 49989 |
+
"loss": 0.2288,
|
| 49990 |
+
"step": 7124
|
| 49991 |
+
},
|
| 49992 |
+
{
|
| 49993 |
+
"epoch": 0.0007125,
|
| 49994 |
+
"grad_norm": 0.7176545858383179,
|
| 49995 |
+
"learning_rate": 7.124e-05,
|
| 49996 |
+
"loss": 0.2649,
|
| 49997 |
+
"step": 7125
|
| 49998 |
+
},
|
| 49999 |
+
{
|
| 50000 |
+
"epoch": 0.0007126,
|
| 50001 |
+
"grad_norm": 0.7158398032188416,
|
| 50002 |
+
"learning_rate": 7.125e-05,
|
| 50003 |
+
"loss": 0.2253,
|
| 50004 |
+
"step": 7126
|
| 50005 |
+
},
|
| 50006 |
+
{
|
| 50007 |
+
"epoch": 0.0007127,
|
| 50008 |
+
"grad_norm": 0.5990284085273743,
|
| 50009 |
+
"learning_rate": 7.126e-05,
|
| 50010 |
+
"loss": 0.2277,
|
| 50011 |
+
"step": 7127
|
| 50012 |
+
},
|
| 50013 |
+
{
|
| 50014 |
+
"epoch": 0.0007128,
|
| 50015 |
+
"grad_norm": 0.6648378372192383,
|
| 50016 |
+
"learning_rate": 7.127e-05,
|
| 50017 |
+
"loss": 0.2471,
|
| 50018 |
+
"step": 7128
|
| 50019 |
+
},
|
| 50020 |
+
{
|
| 50021 |
+
"epoch": 0.0007129,
|
| 50022 |
+
"grad_norm": 0.6481736302375793,
|
| 50023 |
+
"learning_rate": 7.128e-05,
|
| 50024 |
+
"loss": 0.2374,
|
| 50025 |
+
"step": 7129
|
| 50026 |
+
},
|
| 50027 |
+
{
|
| 50028 |
+
"epoch": 0.000713,
|
| 50029 |
+
"grad_norm": 7.419454097747803,
|
| 50030 |
+
"learning_rate": 7.129e-05,
|
| 50031 |
+
"loss": 0.6338,
|
| 50032 |
+
"step": 7130
|
| 50033 |
+
},
|
| 50034 |
+
{
|
| 50035 |
+
"epoch": 0.0007131,
|
| 50036 |
+
"grad_norm": 0.5747849345207214,
|
| 50037 |
+
"learning_rate": 7.13e-05,
|
| 50038 |
+
"loss": 0.2126,
|
| 50039 |
+
"step": 7131
|
| 50040 |
+
},
|
| 50041 |
+
{
|
| 50042 |
+
"epoch": 0.0007132,
|
| 50043 |
+
"grad_norm": 0.6667564511299133,
|
| 50044 |
+
"learning_rate": 7.130999999999999e-05,
|
| 50045 |
+
"loss": 0.2419,
|
| 50046 |
+
"step": 7132
|
| 50047 |
+
},
|
| 50048 |
+
{
|
| 50049 |
+
"epoch": 0.0007133,
|
| 50050 |
+
"grad_norm": 0.5959503650665283,
|
| 50051 |
+
"learning_rate": 7.132e-05,
|
| 50052 |
+
"loss": 0.2371,
|
| 50053 |
+
"step": 7133
|
| 50054 |
+
},
|
| 50055 |
+
{
|
| 50056 |
+
"epoch": 0.0007134,
|
| 50057 |
+
"grad_norm": 0.6175190210342407,
|
| 50058 |
+
"learning_rate": 7.133000000000001e-05,
|
| 50059 |
+
"loss": 0.2394,
|
| 50060 |
+
"step": 7134
|
| 50061 |
+
},
|
| 50062 |
+
{
|
| 50063 |
+
"epoch": 0.0007135,
|
| 50064 |
+
"grad_norm": 0.5732832551002502,
|
| 50065 |
+
"learning_rate": 7.134e-05,
|
| 50066 |
+
"loss": 0.2313,
|
| 50067 |
+
"step": 7135
|
| 50068 |
+
},
|
| 50069 |
+
{
|
| 50070 |
+
"epoch": 0.0007136,
|
| 50071 |
+
"grad_norm": 0.576556384563446,
|
| 50072 |
+
"learning_rate": 7.135e-05,
|
| 50073 |
+
"loss": 0.2311,
|
| 50074 |
+
"step": 7136
|
| 50075 |
+
},
|
| 50076 |
+
{
|
| 50077 |
+
"epoch": 0.0007137,
|
| 50078 |
+
"grad_norm": 0.5220944881439209,
|
| 50079 |
+
"learning_rate": 7.136000000000001e-05,
|
| 50080 |
+
"loss": 0.2162,
|
| 50081 |
+
"step": 7137
|
| 50082 |
+
},
|
| 50083 |
+
{
|
| 50084 |
+
"epoch": 0.0007138,
|
| 50085 |
+
"grad_norm": 0.5937696695327759,
|
| 50086 |
+
"learning_rate": 7.137e-05,
|
| 50087 |
+
"loss": 0.2471,
|
| 50088 |
+
"step": 7138
|
| 50089 |
+
},
|
| 50090 |
+
{
|
| 50091 |
+
"epoch": 0.0007139,
|
| 50092 |
+
"grad_norm": 0.6484452486038208,
|
| 50093 |
+
"learning_rate": 7.138e-05,
|
| 50094 |
+
"loss": 0.2611,
|
| 50095 |
+
"step": 7139
|
| 50096 |
+
},
|
| 50097 |
+
{
|
| 50098 |
+
"epoch": 0.000714,
|
| 50099 |
+
"grad_norm": 0.6111674904823303,
|
| 50100 |
+
"learning_rate": 7.139e-05,
|
| 50101 |
+
"loss": 0.2383,
|
| 50102 |
+
"step": 7140
|
| 50103 |
+
},
|
| 50104 |
+
{
|
| 50105 |
+
"epoch": 0.0007141,
|
| 50106 |
+
"grad_norm": 2.137749195098877,
|
| 50107 |
+
"learning_rate": 7.14e-05,
|
| 50108 |
+
"loss": 0.4233,
|
| 50109 |
+
"step": 7141
|
| 50110 |
+
},
|
| 50111 |
+
{
|
| 50112 |
+
"epoch": 0.0007142,
|
| 50113 |
+
"grad_norm": 0.727592945098877,
|
| 50114 |
+
"learning_rate": 7.141e-05,
|
| 50115 |
+
"loss": 0.2676,
|
| 50116 |
+
"step": 7142
|
| 50117 |
+
},
|
| 50118 |
+
{
|
| 50119 |
+
"epoch": 0.0007143,
|
| 50120 |
+
"grad_norm": 0.6484252214431763,
|
| 50121 |
+
"learning_rate": 7.142e-05,
|
| 50122 |
+
"loss": 0.2339,
|
| 50123 |
+
"step": 7143
|
| 50124 |
+
},
|
| 50125 |
+
{
|
| 50126 |
+
"epoch": 0.0007144,
|
| 50127 |
+
"grad_norm": 1.0977895259857178,
|
| 50128 |
+
"learning_rate": 7.143e-05,
|
| 50129 |
+
"loss": 0.2665,
|
| 50130 |
+
"step": 7144
|
| 50131 |
+
},
|
| 50132 |
+
{
|
| 50133 |
+
"epoch": 0.0007145,
|
| 50134 |
+
"grad_norm": 1.1723270416259766,
|
| 50135 |
+
"learning_rate": 7.144e-05,
|
| 50136 |
+
"loss": 0.3542,
|
| 50137 |
+
"step": 7145
|
| 50138 |
+
},
|
| 50139 |
+
{
|
| 50140 |
+
"epoch": 0.0007146,
|
| 50141 |
+
"grad_norm": 1.385094404220581,
|
| 50142 |
+
"learning_rate": 7.145e-05,
|
| 50143 |
+
"loss": 0.3069,
|
| 50144 |
+
"step": 7146
|
| 50145 |
+
},
|
| 50146 |
+
{
|
| 50147 |
+
"epoch": 0.0007147,
|
| 50148 |
+
"grad_norm": 0.8112697601318359,
|
| 50149 |
+
"learning_rate": 7.146000000000001e-05,
|
| 50150 |
+
"loss": 0.2416,
|
| 50151 |
+
"step": 7147
|
| 50152 |
+
},
|
| 50153 |
+
{
|
| 50154 |
+
"epoch": 0.0007148,
|
| 50155 |
+
"grad_norm": 0.8700363039970398,
|
| 50156 |
+
"learning_rate": 7.146999999999999e-05,
|
| 50157 |
+
"loss": 0.2625,
|
| 50158 |
+
"step": 7148
|
| 50159 |
+
},
|
| 50160 |
+
{
|
| 50161 |
+
"epoch": 0.0007149,
|
| 50162 |
+
"grad_norm": 0.7144835591316223,
|
| 50163 |
+
"learning_rate": 7.148e-05,
|
| 50164 |
+
"loss": 0.2773,
|
| 50165 |
+
"step": 7149
|
| 50166 |
+
},
|
| 50167 |
+
{
|
| 50168 |
+
"epoch": 0.000715,
|
| 50169 |
+
"grad_norm": 0.6032698750495911,
|
| 50170 |
+
"learning_rate": 7.149000000000001e-05,
|
| 50171 |
+
"loss": 0.2333,
|
| 50172 |
+
"step": 7150
|
| 50173 |
+
},
|
| 50174 |
+
{
|
| 50175 |
+
"epoch": 0.0007151,
|
| 50176 |
+
"grad_norm": 0.7318822741508484,
|
| 50177 |
+
"learning_rate": 7.15e-05,
|
| 50178 |
+
"loss": 0.2494,
|
| 50179 |
+
"step": 7151
|
| 50180 |
+
},
|
| 50181 |
+
{
|
| 50182 |
+
"epoch": 0.0007152,
|
| 50183 |
+
"grad_norm": 0.5837897658348083,
|
| 50184 |
+
"learning_rate": 7.151e-05,
|
| 50185 |
+
"loss": 0.2097,
|
| 50186 |
+
"step": 7152
|
| 50187 |
+
},
|
| 50188 |
+
{
|
| 50189 |
+
"epoch": 0.0007153,
|
| 50190 |
+
"grad_norm": 0.6239914298057556,
|
| 50191 |
+
"learning_rate": 7.152e-05,
|
| 50192 |
+
"loss": 0.2346,
|
| 50193 |
+
"step": 7153
|
| 50194 |
+
},
|
| 50195 |
+
{
|
| 50196 |
+
"epoch": 0.0007154,
|
| 50197 |
+
"grad_norm": 0.5616824626922607,
|
| 50198 |
+
"learning_rate": 7.153e-05,
|
| 50199 |
+
"loss": 0.2317,
|
| 50200 |
+
"step": 7154
|
| 50201 |
+
},
|
| 50202 |
+
{
|
| 50203 |
+
"epoch": 0.0007155,
|
| 50204 |
+
"grad_norm": 4.966375350952148,
|
| 50205 |
+
"learning_rate": 7.154e-05,
|
| 50206 |
+
"loss": 0.4221,
|
| 50207 |
+
"step": 7155
|
| 50208 |
+
},
|
| 50209 |
+
{
|
| 50210 |
+
"epoch": 0.0007156,
|
| 50211 |
+
"grad_norm": 0.6621120572090149,
|
| 50212 |
+
"learning_rate": 7.155e-05,
|
| 50213 |
+
"loss": 0.2323,
|
| 50214 |
+
"step": 7156
|
| 50215 |
+
},
|
| 50216 |
+
{
|
| 50217 |
+
"epoch": 0.0007157,
|
| 50218 |
+
"grad_norm": 0.7091697454452515,
|
| 50219 |
+
"learning_rate": 7.156e-05,
|
| 50220 |
+
"loss": 0.2356,
|
| 50221 |
+
"step": 7157
|
| 50222 |
+
},
|
| 50223 |
+
{
|
| 50224 |
+
"epoch": 0.0007158,
|
| 50225 |
+
"grad_norm": 0.6042245030403137,
|
| 50226 |
+
"learning_rate": 7.157000000000001e-05,
|
| 50227 |
+
"loss": 0.2158,
|
| 50228 |
+
"step": 7158
|
| 50229 |
+
},
|
| 50230 |
+
{
|
| 50231 |
+
"epoch": 0.0007159,
|
| 50232 |
+
"grad_norm": 0.6950005292892456,
|
| 50233 |
+
"learning_rate": 7.158e-05,
|
| 50234 |
+
"loss": 0.2396,
|
| 50235 |
+
"step": 7159
|
| 50236 |
+
},
|
| 50237 |
+
{
|
| 50238 |
+
"epoch": 0.000716,
|
| 50239 |
+
"grad_norm": 0.9411141276359558,
|
| 50240 |
+
"learning_rate": 7.159e-05,
|
| 50241 |
+
"loss": 0.2965,
|
| 50242 |
+
"step": 7160
|
| 50243 |
+
},
|
| 50244 |
+
{
|
| 50245 |
+
"epoch": 0.0007161,
|
| 50246 |
+
"grad_norm": 2.314314603805542,
|
| 50247 |
+
"learning_rate": 7.16e-05,
|
| 50248 |
+
"loss": 0.3401,
|
| 50249 |
+
"step": 7161
|
| 50250 |
+
},
|
| 50251 |
+
{
|
| 50252 |
+
"epoch": 0.0007162,
|
| 50253 |
+
"grad_norm": 0.6668578386306763,
|
| 50254 |
+
"learning_rate": 7.161e-05,
|
| 50255 |
+
"loss": 0.2271,
|
| 50256 |
+
"step": 7162
|
| 50257 |
+
},
|
| 50258 |
+
{
|
| 50259 |
+
"epoch": 0.0007163,
|
| 50260 |
+
"grad_norm": 1.7270629405975342,
|
| 50261 |
+
"learning_rate": 7.162e-05,
|
| 50262 |
+
"loss": 0.3015,
|
| 50263 |
+
"step": 7163
|
| 50264 |
+
},
|
| 50265 |
+
{
|
| 50266 |
+
"epoch": 0.0007164,
|
| 50267 |
+
"grad_norm": 0.7965278029441833,
|
| 50268 |
+
"learning_rate": 7.163e-05,
|
| 50269 |
+
"loss": 0.2498,
|
| 50270 |
+
"step": 7164
|
| 50271 |
+
},
|
| 50272 |
+
{
|
| 50273 |
+
"epoch": 0.0007165,
|
| 50274 |
+
"grad_norm": 0.7627633213996887,
|
| 50275 |
+
"learning_rate": 7.164e-05,
|
| 50276 |
+
"loss": 0.2413,
|
| 50277 |
+
"step": 7165
|
| 50278 |
+
},
|
| 50279 |
+
{
|
| 50280 |
+
"epoch": 0.0007166,
|
| 50281 |
+
"grad_norm": 0.6501511931419373,
|
| 50282 |
+
"learning_rate": 7.165e-05,
|
| 50283 |
+
"loss": 0.2357,
|
| 50284 |
+
"step": 7166
|
| 50285 |
+
},
|
| 50286 |
+
{
|
| 50287 |
+
"epoch": 0.0007167,
|
| 50288 |
+
"grad_norm": 0.5962719917297363,
|
| 50289 |
+
"learning_rate": 7.166e-05,
|
| 50290 |
+
"loss": 0.2275,
|
| 50291 |
+
"step": 7167
|
| 50292 |
+
},
|
| 50293 |
+
{
|
| 50294 |
+
"epoch": 0.0007168,
|
| 50295 |
+
"grad_norm": 0.585323691368103,
|
| 50296 |
+
"learning_rate": 7.167000000000001e-05,
|
| 50297 |
+
"loss": 0.2277,
|
| 50298 |
+
"step": 7168
|
| 50299 |
+
},
|
| 50300 |
+
{
|
| 50301 |
+
"epoch": 0.0007169,
|
| 50302 |
+
"grad_norm": 0.6851444840431213,
|
| 50303 |
+
"learning_rate": 7.168e-05,
|
| 50304 |
+
"loss": 0.2325,
|
| 50305 |
+
"step": 7169
|
| 50306 |
+
},
|
| 50307 |
+
{
|
| 50308 |
+
"epoch": 0.000717,
|
| 50309 |
+
"grad_norm": 0.6078755259513855,
|
| 50310 |
+
"learning_rate": 7.169e-05,
|
| 50311 |
+
"loss": 0.2395,
|
| 50312 |
+
"step": 7170
|
| 50313 |
+
},
|
| 50314 |
+
{
|
| 50315 |
+
"epoch": 0.0007171,
|
| 50316 |
+
"grad_norm": 0.574161946773529,
|
| 50317 |
+
"learning_rate": 7.170000000000001e-05,
|
| 50318 |
+
"loss": 0.2308,
|
| 50319 |
+
"step": 7171
|
| 50320 |
+
},
|
| 50321 |
+
{
|
| 50322 |
+
"epoch": 0.0007172,
|
| 50323 |
+
"grad_norm": 0.5724591016769409,
|
| 50324 |
+
"learning_rate": 7.171e-05,
|
| 50325 |
+
"loss": 0.2325,
|
| 50326 |
+
"step": 7172
|
| 50327 |
+
},
|
| 50328 |
+
{
|
| 50329 |
+
"epoch": 0.0007173,
|
| 50330 |
+
"grad_norm": 0.5637148022651672,
|
| 50331 |
+
"learning_rate": 7.172e-05,
|
| 50332 |
+
"loss": 0.2264,
|
| 50333 |
+
"step": 7173
|
| 50334 |
+
},
|
| 50335 |
+
{
|
| 50336 |
+
"epoch": 0.0007174,
|
| 50337 |
+
"grad_norm": 0.6590225100517273,
|
| 50338 |
+
"learning_rate": 7.173e-05,
|
| 50339 |
+
"loss": 0.2529,
|
| 50340 |
+
"step": 7174
|
| 50341 |
+
},
|
| 50342 |
+
{
|
| 50343 |
+
"epoch": 0.0007175,
|
| 50344 |
+
"grad_norm": 0.5135878324508667,
|
| 50345 |
+
"learning_rate": 7.174e-05,
|
| 50346 |
+
"loss": 0.2188,
|
| 50347 |
+
"step": 7175
|
| 50348 |
+
},
|
| 50349 |
+
{
|
| 50350 |
+
"epoch": 0.0007176,
|
| 50351 |
+
"grad_norm": 0.6357454657554626,
|
| 50352 |
+
"learning_rate": 7.175e-05,
|
| 50353 |
+
"loss": 0.2395,
|
| 50354 |
+
"step": 7176
|
| 50355 |
+
},
|
| 50356 |
+
{
|
| 50357 |
+
"epoch": 0.0007177,
|
| 50358 |
+
"grad_norm": 1.0965118408203125,
|
| 50359 |
+
"learning_rate": 7.176e-05,
|
| 50360 |
+
"loss": 0.2622,
|
| 50361 |
+
"step": 7177
|
| 50362 |
+
},
|
| 50363 |
+
{
|
| 50364 |
+
"epoch": 0.0007178,
|
| 50365 |
+
"grad_norm": 0.7161939740180969,
|
| 50366 |
+
"learning_rate": 7.177e-05,
|
| 50367 |
+
"loss": 0.2527,
|
| 50368 |
+
"step": 7178
|
| 50369 |
+
},
|
| 50370 |
+
{
|
| 50371 |
+
"epoch": 0.0007179,
|
| 50372 |
+
"grad_norm": 2.47273588180542,
|
| 50373 |
+
"learning_rate": 7.178e-05,
|
| 50374 |
+
"loss": 0.3,
|
| 50375 |
+
"step": 7179
|
| 50376 |
+
},
|
| 50377 |
+
{
|
| 50378 |
+
"epoch": 0.000718,
|
| 50379 |
+
"grad_norm": 0.6954152584075928,
|
| 50380 |
+
"learning_rate": 7.179e-05,
|
| 50381 |
+
"loss": 0.2368,
|
| 50382 |
+
"step": 7180
|
| 50383 |
+
},
|
| 50384 |
+
{
|
| 50385 |
+
"epoch": 0.0007181,
|
| 50386 |
+
"grad_norm": 0.7640717625617981,
|
| 50387 |
+
"learning_rate": 7.180000000000001e-05,
|
| 50388 |
+
"loss": 0.2489,
|
| 50389 |
+
"step": 7181
|
| 50390 |
+
},
|
| 50391 |
+
{
|
| 50392 |
+
"epoch": 0.0007182,
|
| 50393 |
+
"grad_norm": 0.6073949933052063,
|
| 50394 |
+
"learning_rate": 7.180999999999999e-05,
|
| 50395 |
+
"loss": 0.21,
|
| 50396 |
+
"step": 7182
|
| 50397 |
+
},
|
| 50398 |
+
{
|
| 50399 |
+
"epoch": 0.0007183,
|
| 50400 |
+
"grad_norm": 0.5503464937210083,
|
| 50401 |
+
"learning_rate": 7.182e-05,
|
| 50402 |
+
"loss": 0.2146,
|
| 50403 |
+
"step": 7183
|
| 50404 |
+
},
|
| 50405 |
+
{
|
| 50406 |
+
"epoch": 0.0007184,
|
| 50407 |
+
"grad_norm": 0.5253496766090393,
|
| 50408 |
+
"learning_rate": 7.183000000000001e-05,
|
| 50409 |
+
"loss": 0.2119,
|
| 50410 |
+
"step": 7184
|
| 50411 |
+
},
|
| 50412 |
+
{
|
| 50413 |
+
"epoch": 0.0007185,
|
| 50414 |
+
"grad_norm": 0.6075765490531921,
|
| 50415 |
+
"learning_rate": 7.184e-05,
|
| 50416 |
+
"loss": 0.2233,
|
| 50417 |
+
"step": 7185
|
| 50418 |
+
},
|
| 50419 |
+
{
|
| 50420 |
+
"epoch": 0.0007186,
|
| 50421 |
+
"grad_norm": 0.7229404449462891,
|
| 50422 |
+
"learning_rate": 7.185e-05,
|
| 50423 |
+
"loss": 0.2375,
|
| 50424 |
+
"step": 7186
|
| 50425 |
+
},
|
| 50426 |
+
{
|
| 50427 |
+
"epoch": 0.0007187,
|
| 50428 |
+
"grad_norm": 0.6995594501495361,
|
| 50429 |
+
"learning_rate": 7.186e-05,
|
| 50430 |
+
"loss": 0.2399,
|
| 50431 |
+
"step": 7187
|
| 50432 |
+
},
|
| 50433 |
+
{
|
| 50434 |
+
"epoch": 0.0007188,
|
| 50435 |
+
"grad_norm": 0.5980567336082458,
|
| 50436 |
+
"learning_rate": 7.187e-05,
|
| 50437 |
+
"loss": 0.2341,
|
| 50438 |
+
"step": 7188
|
| 50439 |
+
},
|
| 50440 |
+
{
|
| 50441 |
+
"epoch": 0.0007189,
|
| 50442 |
+
"grad_norm": 0.7101425528526306,
|
| 50443 |
+
"learning_rate": 7.188e-05,
|
| 50444 |
+
"loss": 0.2684,
|
| 50445 |
+
"step": 7189
|
| 50446 |
+
},
|
| 50447 |
+
{
|
| 50448 |
+
"epoch": 0.000719,
|
| 50449 |
+
"grad_norm": 0.6097431778907776,
|
| 50450 |
+
"learning_rate": 7.189e-05,
|
| 50451 |
+
"loss": 0.2347,
|
| 50452 |
+
"step": 7190
|
| 50453 |
+
},
|
| 50454 |
+
{
|
| 50455 |
+
"epoch": 0.0007191,
|
| 50456 |
+
"grad_norm": 0.5196309685707092,
|
| 50457 |
+
"learning_rate": 7.19e-05,
|
| 50458 |
+
"loss": 0.2109,
|
| 50459 |
+
"step": 7191
|
| 50460 |
+
},
|
| 50461 |
+
{
|
| 50462 |
+
"epoch": 0.0007192,
|
| 50463 |
+
"grad_norm": 0.6105871796607971,
|
| 50464 |
+
"learning_rate": 7.191000000000001e-05,
|
| 50465 |
+
"loss": 0.2308,
|
| 50466 |
+
"step": 7192
|
| 50467 |
+
},
|
| 50468 |
+
{
|
| 50469 |
+
"epoch": 0.0007193,
|
| 50470 |
+
"grad_norm": 1.5595483779907227,
|
| 50471 |
+
"learning_rate": 7.192e-05,
|
| 50472 |
+
"loss": 0.2831,
|
| 50473 |
+
"step": 7193
|
| 50474 |
+
},
|
| 50475 |
+
{
|
| 50476 |
+
"epoch": 0.0007194,
|
| 50477 |
+
"grad_norm": 0.5544551014900208,
|
| 50478 |
+
"learning_rate": 7.193e-05,
|
| 50479 |
+
"loss": 0.22,
|
| 50480 |
+
"step": 7194
|
| 50481 |
+
},
|
| 50482 |
+
{
|
| 50483 |
+
"epoch": 0.0007195,
|
| 50484 |
+
"grad_norm": 0.5593705177307129,
|
| 50485 |
+
"learning_rate": 7.194e-05,
|
| 50486 |
+
"loss": 0.2013,
|
| 50487 |
+
"step": 7195
|
| 50488 |
+
},
|
| 50489 |
+
{
|
| 50490 |
+
"epoch": 0.0007196,
|
| 50491 |
+
"grad_norm": 0.6130393743515015,
|
| 50492 |
+
"learning_rate": 7.195e-05,
|
| 50493 |
+
"loss": 0.2268,
|
| 50494 |
+
"step": 7196
|
| 50495 |
+
},
|
| 50496 |
+
{
|
| 50497 |
+
"epoch": 0.0007197,
|
| 50498 |
+
"grad_norm": 0.5510294437408447,
|
| 50499 |
+
"learning_rate": 7.196e-05,
|
| 50500 |
+
"loss": 0.2168,
|
| 50501 |
+
"step": 7197
|
| 50502 |
+
},
|
| 50503 |
+
{
|
| 50504 |
+
"epoch": 0.0007198,
|
| 50505 |
+
"grad_norm": 0.5207967162132263,
|
| 50506 |
+
"learning_rate": 7.197e-05,
|
| 50507 |
+
"loss": 0.2158,
|
| 50508 |
+
"step": 7198
|
| 50509 |
+
},
|
| 50510 |
+
{
|
| 50511 |
+
"epoch": 0.0007199,
|
| 50512 |
+
"grad_norm": 0.6211857795715332,
|
| 50513 |
+
"learning_rate": 7.198e-05,
|
| 50514 |
+
"loss": 0.2415,
|
| 50515 |
+
"step": 7199
|
| 50516 |
+
},
|
| 50517 |
+
{
|
| 50518 |
+
"epoch": 0.00072,
|
| 50519 |
+
"grad_norm": 0.564426839351654,
|
| 50520 |
+
"learning_rate": 7.199e-05,
|
| 50521 |
+
"loss": 0.2104,
|
| 50522 |
+
"step": 7200
|
| 50523 |
+
},
|
| 50524 |
+
{
|
| 50525 |
+
"epoch": 0.0007201,
|
| 50526 |
+
"grad_norm": 0.6109751462936401,
|
| 50527 |
+
"learning_rate": 7.2e-05,
|
| 50528 |
+
"loss": 0.231,
|
| 50529 |
+
"step": 7201
|
| 50530 |
+
},
|
| 50531 |
+
{
|
| 50532 |
+
"epoch": 0.0007202,
|
| 50533 |
+
"grad_norm": 0.5517870783805847,
|
| 50534 |
+
"learning_rate": 7.201000000000001e-05,
|
| 50535 |
+
"loss": 0.2203,
|
| 50536 |
+
"step": 7202
|
| 50537 |
+
},
|
| 50538 |
+
{
|
| 50539 |
+
"epoch": 0.0007203,
|
| 50540 |
+
"grad_norm": 1.919884443283081,
|
| 50541 |
+
"learning_rate": 7.202e-05,
|
| 50542 |
+
"loss": 0.2815,
|
| 50543 |
+
"step": 7203
|
| 50544 |
+
},
|
| 50545 |
+
{
|
| 50546 |
+
"epoch": 0.0007204,
|
| 50547 |
+
"grad_norm": 0.5402631163597107,
|
| 50548 |
+
"learning_rate": 7.203e-05,
|
| 50549 |
+
"loss": 0.2339,
|
| 50550 |
+
"step": 7204
|
| 50551 |
+
},
|
| 50552 |
+
{
|
| 50553 |
+
"epoch": 0.0007205,
|
| 50554 |
+
"grad_norm": 0.6419543623924255,
|
| 50555 |
+
"learning_rate": 7.204000000000001e-05,
|
| 50556 |
+
"loss": 0.235,
|
| 50557 |
+
"step": 7205
|
| 50558 |
+
},
|
| 50559 |
+
{
|
| 50560 |
+
"epoch": 0.0007206,
|
| 50561 |
+
"grad_norm": 6.634181499481201,
|
| 50562 |
+
"learning_rate": 7.205e-05,
|
| 50563 |
+
"loss": 0.5928,
|
| 50564 |
+
"step": 7206
|
| 50565 |
+
},
|
| 50566 |
+
{
|
| 50567 |
+
"epoch": 0.0007207,
|
| 50568 |
+
"grad_norm": 1.0302337408065796,
|
| 50569 |
+
"learning_rate": 7.206e-05,
|
| 50570 |
+
"loss": 0.2416,
|
| 50571 |
+
"step": 7207
|
| 50572 |
+
},
|
| 50573 |
+
{
|
| 50574 |
+
"epoch": 0.0007208,
|
| 50575 |
+
"grad_norm": 0.6726213097572327,
|
| 50576 |
+
"learning_rate": 7.207e-05,
|
| 50577 |
+
"loss": 0.2227,
|
| 50578 |
+
"step": 7208
|
| 50579 |
+
},
|
| 50580 |
+
{
|
| 50581 |
+
"epoch": 0.0007209,
|
| 50582 |
+
"grad_norm": 0.6477869153022766,
|
| 50583 |
+
"learning_rate": 7.208e-05,
|
| 50584 |
+
"loss": 0.241,
|
| 50585 |
+
"step": 7209
|
| 50586 |
+
},
|
| 50587 |
+
{
|
| 50588 |
+
"epoch": 0.000721,
|
| 50589 |
+
"grad_norm": 0.5784698128700256,
|
| 50590 |
+
"learning_rate": 7.209e-05,
|
| 50591 |
+
"loss": 0.2212,
|
| 50592 |
+
"step": 7210
|
| 50593 |
+
},
|
| 50594 |
+
{
|
| 50595 |
+
"epoch": 0.0007211,
|
| 50596 |
+
"grad_norm": 0.47859689593315125,
|
| 50597 |
+
"learning_rate": 7.21e-05,
|
| 50598 |
+
"loss": 0.2018,
|
| 50599 |
+
"step": 7211
|
| 50600 |
+
},
|
| 50601 |
+
{
|
| 50602 |
+
"epoch": 0.0007212,
|
| 50603 |
+
"grad_norm": 0.646013081073761,
|
| 50604 |
+
"learning_rate": 7.211e-05,
|
| 50605 |
+
"loss": 0.2505,
|
| 50606 |
+
"step": 7212
|
| 50607 |
+
},
|
| 50608 |
+
{
|
| 50609 |
+
"epoch": 0.0007213,
|
| 50610 |
+
"grad_norm": 0.615487277507782,
|
| 50611 |
+
"learning_rate": 7.212e-05,
|
| 50612 |
+
"loss": 0.2482,
|
| 50613 |
+
"step": 7213
|
| 50614 |
+
},
|
| 50615 |
+
{
|
| 50616 |
+
"epoch": 0.0007214,
|
| 50617 |
+
"grad_norm": 0.9184458255767822,
|
| 50618 |
+
"learning_rate": 7.213e-05,
|
| 50619 |
+
"loss": 0.2767,
|
| 50620 |
+
"step": 7214
|
| 50621 |
+
},
|
| 50622 |
+
{
|
| 50623 |
+
"epoch": 0.0007215,
|
| 50624 |
+
"grad_norm": 0.5351276993751526,
|
| 50625 |
+
"learning_rate": 7.214000000000001e-05,
|
| 50626 |
+
"loss": 0.2139,
|
| 50627 |
+
"step": 7215
|
| 50628 |
+
},
|
| 50629 |
+
{
|
| 50630 |
+
"epoch": 0.0007216,
|
| 50631 |
+
"grad_norm": 0.520902693271637,
|
| 50632 |
+
"learning_rate": 7.214999999999999e-05,
|
| 50633 |
+
"loss": 0.2069,
|
| 50634 |
+
"step": 7216
|
| 50635 |
+
},
|
| 50636 |
+
{
|
| 50637 |
+
"epoch": 0.0007217,
|
| 50638 |
+
"grad_norm": 0.8827000856399536,
|
| 50639 |
+
"learning_rate": 7.216e-05,
|
| 50640 |
+
"loss": 0.2527,
|
| 50641 |
+
"step": 7217
|
| 50642 |
+
},
|
| 50643 |
+
{
|
| 50644 |
+
"epoch": 0.0007218,
|
| 50645 |
+
"grad_norm": 0.5077048540115356,
|
| 50646 |
+
"learning_rate": 7.217000000000001e-05,
|
| 50647 |
+
"loss": 0.2145,
|
| 50648 |
+
"step": 7218
|
| 50649 |
+
},
|
| 50650 |
+
{
|
| 50651 |
+
"epoch": 0.0007219,
|
| 50652 |
+
"grad_norm": 0.5010983347892761,
|
| 50653 |
+
"learning_rate": 7.218e-05,
|
| 50654 |
+
"loss": 0.2025,
|
| 50655 |
+
"step": 7219
|
| 50656 |
+
},
|
| 50657 |
+
{
|
| 50658 |
+
"epoch": 0.000722,
|
| 50659 |
+
"grad_norm": 0.5093920230865479,
|
| 50660 |
+
"learning_rate": 7.219e-05,
|
| 50661 |
+
"loss": 0.2124,
|
| 50662 |
+
"step": 7220
|
| 50663 |
+
},
|
| 50664 |
+
{
|
| 50665 |
+
"epoch": 0.0007221,
|
| 50666 |
+
"grad_norm": 0.5570715069770813,
|
| 50667 |
+
"learning_rate": 7.22e-05,
|
| 50668 |
+
"loss": 0.218,
|
| 50669 |
+
"step": 7221
|
| 50670 |
+
},
|
| 50671 |
+
{
|
| 50672 |
+
"epoch": 0.0007222,
|
| 50673 |
+
"grad_norm": 0.5382811427116394,
|
| 50674 |
+
"learning_rate": 7.221e-05,
|
| 50675 |
+
"loss": 0.2222,
|
| 50676 |
+
"step": 7222
|
| 50677 |
+
},
|
| 50678 |
+
{
|
| 50679 |
+
"epoch": 0.0007223,
|
| 50680 |
+
"grad_norm": 0.8405284881591797,
|
| 50681 |
+
"learning_rate": 7.222e-05,
|
| 50682 |
+
"loss": 0.2488,
|
| 50683 |
+
"step": 7223
|
| 50684 |
+
},
|
| 50685 |
+
{
|
| 50686 |
+
"epoch": 0.0007224,
|
| 50687 |
+
"grad_norm": 0.5121170282363892,
|
| 50688 |
+
"learning_rate": 7.223e-05,
|
| 50689 |
+
"loss": 0.2166,
|
| 50690 |
+
"step": 7224
|
| 50691 |
+
},
|
| 50692 |
+
{
|
| 50693 |
+
"epoch": 0.0007225,
|
| 50694 |
+
"grad_norm": 1.4797457456588745,
|
| 50695 |
+
"learning_rate": 7.224e-05,
|
| 50696 |
+
"loss": 0.2417,
|
| 50697 |
+
"step": 7225
|
| 50698 |
+
},
|
| 50699 |
+
{
|
| 50700 |
+
"epoch": 0.0007226,
|
| 50701 |
+
"grad_norm": 0.6273139715194702,
|
| 50702 |
+
"learning_rate": 7.225000000000001e-05,
|
| 50703 |
+
"loss": 0.2262,
|
| 50704 |
+
"step": 7226
|
| 50705 |
+
},
|
| 50706 |
+
{
|
| 50707 |
+
"epoch": 0.0007227,
|
| 50708 |
+
"grad_norm": 0.6362743973731995,
|
| 50709 |
+
"learning_rate": 7.226e-05,
|
| 50710 |
+
"loss": 0.2286,
|
| 50711 |
+
"step": 7227
|
| 50712 |
+
},
|
| 50713 |
+
{
|
| 50714 |
+
"epoch": 0.0007228,
|
| 50715 |
+
"grad_norm": 0.6842048764228821,
|
| 50716 |
+
"learning_rate": 7.227e-05,
|
| 50717 |
+
"loss": 0.224,
|
| 50718 |
+
"step": 7228
|
| 50719 |
+
},
|
| 50720 |
+
{
|
| 50721 |
+
"epoch": 0.0007229,
|
| 50722 |
+
"grad_norm": 0.5175364017486572,
|
| 50723 |
+
"learning_rate": 7.228e-05,
|
| 50724 |
+
"loss": 0.2073,
|
| 50725 |
+
"step": 7229
|
| 50726 |
+
},
|
| 50727 |
+
{
|
| 50728 |
+
"epoch": 0.000723,
|
| 50729 |
+
"grad_norm": 0.48860543966293335,
|
| 50730 |
+
"learning_rate": 7.229e-05,
|
| 50731 |
+
"loss": 0.2068,
|
| 50732 |
+
"step": 7230
|
| 50733 |
+
},
|
| 50734 |
+
{
|
| 50735 |
+
"epoch": 0.0007231,
|
| 50736 |
+
"grad_norm": 0.5222052335739136,
|
| 50737 |
+
"learning_rate": 7.23e-05,
|
| 50738 |
+
"loss": 0.2159,
|
| 50739 |
+
"step": 7231
|
| 50740 |
+
},
|
| 50741 |
+
{
|
| 50742 |
+
"epoch": 0.0007232,
|
| 50743 |
+
"grad_norm": 0.5209774971008301,
|
| 50744 |
+
"learning_rate": 7.231e-05,
|
| 50745 |
+
"loss": 0.2061,
|
| 50746 |
+
"step": 7232
|
| 50747 |
+
},
|
| 50748 |
+
{
|
| 50749 |
+
"epoch": 0.0007233,
|
| 50750 |
+
"grad_norm": 1.7664886713027954,
|
| 50751 |
+
"learning_rate": 7.232e-05,
|
| 50752 |
+
"loss": 0.3177,
|
| 50753 |
+
"step": 7233
|
| 50754 |
+
},
|
| 50755 |
+
{
|
| 50756 |
+
"epoch": 0.0007234,
|
| 50757 |
+
"grad_norm": 0.524284839630127,
|
| 50758 |
+
"learning_rate": 7.233e-05,
|
| 50759 |
+
"loss": 0.2126,
|
| 50760 |
+
"step": 7234
|
| 50761 |
+
},
|
| 50762 |
+
{
|
| 50763 |
+
"epoch": 0.0007235,
|
| 50764 |
+
"grad_norm": 0.5629242658615112,
|
| 50765 |
+
"learning_rate": 7.234e-05,
|
| 50766 |
+
"loss": 0.2191,
|
| 50767 |
+
"step": 7235
|
| 50768 |
+
},
|
| 50769 |
+
{
|
| 50770 |
+
"epoch": 0.0007236,
|
| 50771 |
+
"grad_norm": 0.5035473108291626,
|
| 50772 |
+
"learning_rate": 7.235000000000001e-05,
|
| 50773 |
+
"loss": 0.2058,
|
| 50774 |
+
"step": 7236
|
| 50775 |
+
},
|
| 50776 |
+
{
|
| 50777 |
+
"epoch": 0.0007237,
|
| 50778 |
+
"grad_norm": 0.5276579856872559,
|
| 50779 |
+
"learning_rate": 7.236e-05,
|
| 50780 |
+
"loss": 0.2151,
|
| 50781 |
+
"step": 7237
|
| 50782 |
+
},
|
| 50783 |
+
{
|
| 50784 |
+
"epoch": 0.0007238,
|
| 50785 |
+
"grad_norm": 0.48516181111335754,
|
| 50786 |
+
"learning_rate": 7.237e-05,
|
| 50787 |
+
"loss": 0.2122,
|
| 50788 |
+
"step": 7238
|
| 50789 |
+
},
|
| 50790 |
+
{
|
| 50791 |
+
"epoch": 0.0007239,
|
| 50792 |
+
"grad_norm": 0.5754404664039612,
|
| 50793 |
+
"learning_rate": 7.238000000000001e-05,
|
| 50794 |
+
"loss": 0.2198,
|
| 50795 |
+
"step": 7239
|
| 50796 |
+
},
|
| 50797 |
+
{
|
| 50798 |
+
"epoch": 0.000724,
|
| 50799 |
+
"grad_norm": 0.4860302209854126,
|
| 50800 |
+
"learning_rate": 7.239e-05,
|
| 50801 |
+
"loss": 0.2046,
|
| 50802 |
+
"step": 7240
|
| 50803 |
+
},
|
| 50804 |
+
{
|
| 50805 |
+
"epoch": 0.0007241,
|
| 50806 |
+
"grad_norm": 0.6111292243003845,
|
| 50807 |
+
"learning_rate": 7.24e-05,
|
| 50808 |
+
"loss": 0.2305,
|
| 50809 |
+
"step": 7241
|
| 50810 |
+
},
|
| 50811 |
+
{
|
| 50812 |
+
"epoch": 0.0007242,
|
| 50813 |
+
"grad_norm": 0.49134159088134766,
|
| 50814 |
+
"learning_rate": 7.241e-05,
|
| 50815 |
+
"loss": 0.2047,
|
| 50816 |
+
"step": 7242
|
| 50817 |
+
},
|
| 50818 |
+
{
|
| 50819 |
+
"epoch": 0.0007243,
|
| 50820 |
+
"grad_norm": 0.5437736511230469,
|
| 50821 |
+
"learning_rate": 7.242e-05,
|
| 50822 |
+
"loss": 0.2113,
|
| 50823 |
+
"step": 7243
|
| 50824 |
+
},
|
| 50825 |
+
{
|
| 50826 |
+
"epoch": 0.0007244,
|
| 50827 |
+
"grad_norm": 0.7377773523330688,
|
| 50828 |
+
"learning_rate": 7.243e-05,
|
| 50829 |
+
"loss": 0.2406,
|
| 50830 |
+
"step": 7244
|
| 50831 |
+
},
|
| 50832 |
+
{
|
| 50833 |
+
"epoch": 0.0007245,
|
| 50834 |
+
"grad_norm": 0.5279169082641602,
|
| 50835 |
+
"learning_rate": 7.244e-05,
|
| 50836 |
+
"loss": 0.2158,
|
| 50837 |
+
"step": 7245
|
| 50838 |
+
},
|
| 50839 |
+
{
|
| 50840 |
+
"epoch": 0.0007246,
|
| 50841 |
+
"grad_norm": 0.550542950630188,
|
| 50842 |
+
"learning_rate": 7.245e-05,
|
| 50843 |
+
"loss": 0.2227,
|
| 50844 |
+
"step": 7246
|
| 50845 |
+
},
|
| 50846 |
+
{
|
| 50847 |
+
"epoch": 0.0007247,
|
| 50848 |
+
"grad_norm": 0.5191932916641235,
|
| 50849 |
+
"learning_rate": 7.246e-05,
|
| 50850 |
+
"loss": 0.2205,
|
| 50851 |
+
"step": 7247
|
| 50852 |
+
},
|
| 50853 |
+
{
|
| 50854 |
+
"epoch": 0.0007248,
|
| 50855 |
+
"grad_norm": 1.221814751625061,
|
| 50856 |
+
"learning_rate": 7.247e-05,
|
| 50857 |
+
"loss": 0.3137,
|
| 50858 |
+
"step": 7248
|
| 50859 |
+
},
|
| 50860 |
+
{
|
| 50861 |
+
"epoch": 0.0007249,
|
| 50862 |
+
"grad_norm": 1.6500520706176758,
|
| 50863 |
+
"learning_rate": 7.248000000000001e-05,
|
| 50864 |
+
"loss": 0.2714,
|
| 50865 |
+
"step": 7249
|
| 50866 |
+
},
|
| 50867 |
+
{
|
| 50868 |
+
"epoch": 0.000725,
|
| 50869 |
+
"grad_norm": 0.680809497833252,
|
| 50870 |
+
"learning_rate": 7.248999999999999e-05,
|
| 50871 |
+
"loss": 0.2228,
|
| 50872 |
+
"step": 7250
|
| 50873 |
+
},
|
| 50874 |
+
{
|
| 50875 |
+
"epoch": 0.0007251,
|
| 50876 |
+
"grad_norm": 0.5752734541893005,
|
| 50877 |
+
"learning_rate": 7.25e-05,
|
| 50878 |
+
"loss": 0.203,
|
| 50879 |
+
"step": 7251
|
| 50880 |
+
},
|
| 50881 |
+
{
|
| 50882 |
+
"epoch": 0.0007252,
|
| 50883 |
+
"grad_norm": 0.6003434062004089,
|
| 50884 |
+
"learning_rate": 7.251000000000001e-05,
|
| 50885 |
+
"loss": 0.2244,
|
| 50886 |
+
"step": 7252
|
| 50887 |
+
},
|
| 50888 |
+
{
|
| 50889 |
+
"epoch": 0.0007253,
|
| 50890 |
+
"grad_norm": 0.5211772322654724,
|
| 50891 |
+
"learning_rate": 7.252e-05,
|
| 50892 |
+
"loss": 0.2092,
|
| 50893 |
+
"step": 7253
|
| 50894 |
+
},
|
| 50895 |
+
{
|
| 50896 |
+
"epoch": 0.0007254,
|
| 50897 |
+
"grad_norm": 0.5306843519210815,
|
| 50898 |
+
"learning_rate": 7.253e-05,
|
| 50899 |
+
"loss": 0.2216,
|
| 50900 |
+
"step": 7254
|
| 50901 |
+
},
|
| 50902 |
+
{
|
| 50903 |
+
"epoch": 0.0007255,
|
| 50904 |
+
"grad_norm": 0.48355337977409363,
|
| 50905 |
+
"learning_rate": 7.254000000000001e-05,
|
| 50906 |
+
"loss": 0.2002,
|
| 50907 |
+
"step": 7255
|
| 50908 |
+
},
|
| 50909 |
+
{
|
| 50910 |
+
"epoch": 0.0007256,
|
| 50911 |
+
"grad_norm": 0.7639783620834351,
|
| 50912 |
+
"learning_rate": 7.255e-05,
|
| 50913 |
+
"loss": 0.2357,
|
| 50914 |
+
"step": 7256
|
| 50915 |
+
},
|
| 50916 |
+
{
|
| 50917 |
+
"epoch": 0.0007257,
|
| 50918 |
+
"grad_norm": 0.6967151165008545,
|
| 50919 |
+
"learning_rate": 7.256e-05,
|
| 50920 |
+
"loss": 0.2445,
|
| 50921 |
+
"step": 7257
|
| 50922 |
+
},
|
| 50923 |
+
{
|
| 50924 |
+
"epoch": 0.0007258,
|
| 50925 |
+
"grad_norm": 0.5458248853683472,
|
| 50926 |
+
"learning_rate": 7.257e-05,
|
| 50927 |
+
"loss": 0.2205,
|
| 50928 |
+
"step": 7258
|
| 50929 |
+
},
|
| 50930 |
+
{
|
| 50931 |
+
"epoch": 0.0007259,
|
| 50932 |
+
"grad_norm": 0.6922258734703064,
|
| 50933 |
+
"learning_rate": 7.258e-05,
|
| 50934 |
+
"loss": 0.2216,
|
| 50935 |
+
"step": 7259
|
| 50936 |
+
},
|
| 50937 |
+
{
|
| 50938 |
+
"epoch": 0.000726,
|
| 50939 |
+
"grad_norm": 0.4929615557193756,
|
| 50940 |
+
"learning_rate": 7.259e-05,
|
| 50941 |
+
"loss": 0.2106,
|
| 50942 |
+
"step": 7260
|
| 50943 |
+
},
|
| 50944 |
+
{
|
| 50945 |
+
"epoch": 0.0007261,
|
| 50946 |
+
"grad_norm": 0.5160805583000183,
|
| 50947 |
+
"learning_rate": 7.26e-05,
|
| 50948 |
+
"loss": 0.2074,
|
| 50949 |
+
"step": 7261
|
| 50950 |
+
},
|
| 50951 |
+
{
|
| 50952 |
+
"epoch": 0.0007262,
|
| 50953 |
+
"grad_norm": 1.9402644634246826,
|
| 50954 |
+
"learning_rate": 7.261e-05,
|
| 50955 |
+
"loss": 0.2408,
|
| 50956 |
+
"step": 7262
|
| 50957 |
+
},
|
| 50958 |
+
{
|
| 50959 |
+
"epoch": 0.0007263,
|
| 50960 |
+
"grad_norm": 0.46201208233833313,
|
| 50961 |
+
"learning_rate": 7.262e-05,
|
| 50962 |
+
"loss": 0.1937,
|
| 50963 |
+
"step": 7263
|
| 50964 |
+
},
|
| 50965 |
+
{
|
| 50966 |
+
"epoch": 0.0007264,
|
| 50967 |
+
"grad_norm": 0.5219488143920898,
|
| 50968 |
+
"learning_rate": 7.263e-05,
|
| 50969 |
+
"loss": 0.2086,
|
| 50970 |
+
"step": 7264
|
| 50971 |
+
},
|
| 50972 |
+
{
|
| 50973 |
+
"epoch": 0.0007265,
|
| 50974 |
+
"grad_norm": 0.4784381687641144,
|
| 50975 |
+
"learning_rate": 7.264e-05,
|
| 50976 |
+
"loss": 0.2035,
|
| 50977 |
+
"step": 7265
|
| 50978 |
+
},
|
| 50979 |
+
{
|
| 50980 |
+
"epoch": 0.0007266,
|
| 50981 |
+
"grad_norm": 0.4866734743118286,
|
| 50982 |
+
"learning_rate": 7.265e-05,
|
| 50983 |
+
"loss": 0.2037,
|
| 50984 |
+
"step": 7266
|
| 50985 |
+
},
|
| 50986 |
+
{
|
| 50987 |
+
"epoch": 0.0007267,
|
| 50988 |
+
"grad_norm": 0.5169983506202698,
|
| 50989 |
+
"learning_rate": 7.266e-05,
|
| 50990 |
+
"loss": 0.2137,
|
| 50991 |
+
"step": 7267
|
| 50992 |
+
},
|
| 50993 |
+
{
|
| 50994 |
+
"epoch": 0.0007268,
|
| 50995 |
+
"grad_norm": 0.4718962013721466,
|
| 50996 |
+
"learning_rate": 7.267e-05,
|
| 50997 |
+
"loss": 0.199,
|
| 50998 |
+
"step": 7268
|
| 50999 |
+
},
|
| 51000 |
+
{
|
| 51001 |
+
"epoch": 0.0007269,
|
| 51002 |
+
"grad_norm": 0.521872341632843,
|
| 51003 |
+
"learning_rate": 7.268e-05,
|
| 51004 |
+
"loss": 0.2175,
|
| 51005 |
+
"step": 7269
|
| 51006 |
+
},
|
| 51007 |
+
{
|
| 51008 |
+
"epoch": 0.000727,
|
| 51009 |
+
"grad_norm": 0.49978336691856384,
|
| 51010 |
+
"learning_rate": 7.269000000000001e-05,
|
| 51011 |
+
"loss": 0.213,
|
| 51012 |
+
"step": 7270
|
| 51013 |
+
},
|
| 51014 |
+
{
|
| 51015 |
+
"epoch": 0.0007271,
|
| 51016 |
+
"grad_norm": 0.4791599214076996,
|
| 51017 |
+
"learning_rate": 7.27e-05,
|
| 51018 |
+
"loss": 0.2037,
|
| 51019 |
+
"step": 7271
|
| 51020 |
+
},
|
| 51021 |
+
{
|
| 51022 |
+
"epoch": 0.0007272,
|
| 51023 |
+
"grad_norm": 0.4703919589519501,
|
| 51024 |
+
"learning_rate": 7.271e-05,
|
| 51025 |
+
"loss": 0.1992,
|
| 51026 |
+
"step": 7272
|
| 51027 |
+
},
|
| 51028 |
+
{
|
| 51029 |
+
"epoch": 0.0007273,
|
| 51030 |
+
"grad_norm": 0.439566433429718,
|
| 51031 |
+
"learning_rate": 7.272000000000001e-05,
|
| 51032 |
+
"loss": 0.2018,
|
| 51033 |
+
"step": 7273
|
| 51034 |
+
},
|
| 51035 |
+
{
|
| 51036 |
+
"epoch": 0.0007274,
|
| 51037 |
+
"grad_norm": 0.47832438349723816,
|
| 51038 |
+
"learning_rate": 7.273e-05,
|
| 51039 |
+
"loss": 0.2087,
|
| 51040 |
+
"step": 7274
|
| 51041 |
+
},
|
| 51042 |
+
{
|
| 51043 |
+
"epoch": 0.0007275,
|
| 51044 |
+
"grad_norm": 0.9735659956932068,
|
| 51045 |
+
"learning_rate": 7.274e-05,
|
| 51046 |
+
"loss": 0.2181,
|
| 51047 |
+
"step": 7275
|
| 51048 |
+
},
|
| 51049 |
+
{
|
| 51050 |
+
"epoch": 0.0007276,
|
| 51051 |
+
"grad_norm": 0.4623475968837738,
|
| 51052 |
+
"learning_rate": 7.275e-05,
|
| 51053 |
+
"loss": 0.1947,
|
| 51054 |
+
"step": 7276
|
| 51055 |
+
},
|
| 51056 |
+
{
|
| 51057 |
+
"epoch": 0.0007277,
|
| 51058 |
+
"grad_norm": 0.4709840714931488,
|
| 51059 |
+
"learning_rate": 7.276e-05,
|
| 51060 |
+
"loss": 0.2057,
|
| 51061 |
+
"step": 7277
|
| 51062 |
+
},
|
| 51063 |
+
{
|
| 51064 |
+
"epoch": 0.0007278,
|
| 51065 |
+
"grad_norm": 0.49509432911872864,
|
| 51066 |
+
"learning_rate": 7.277e-05,
|
| 51067 |
+
"loss": 0.2023,
|
| 51068 |
+
"step": 7278
|
| 51069 |
+
},
|
| 51070 |
+
{
|
| 51071 |
+
"epoch": 0.0007279,
|
| 51072 |
+
"grad_norm": 0.5068236589431763,
|
| 51073 |
+
"learning_rate": 7.278e-05,
|
| 51074 |
+
"loss": 0.201,
|
| 51075 |
+
"step": 7279
|
| 51076 |
+
},
|
| 51077 |
+
{
|
| 51078 |
+
"epoch": 0.000728,
|
| 51079 |
+
"grad_norm": 0.7884330749511719,
|
| 51080 |
+
"learning_rate": 7.279e-05,
|
| 51081 |
+
"loss": 0.226,
|
| 51082 |
+
"step": 7280
|
| 51083 |
+
},
|
| 51084 |
+
{
|
| 51085 |
+
"epoch": 0.0007281,
|
| 51086 |
+
"grad_norm": 5.262802600860596,
|
| 51087 |
+
"learning_rate": 7.28e-05,
|
| 51088 |
+
"loss": 0.2993,
|
| 51089 |
+
"step": 7281
|
| 51090 |
+
},
|
| 51091 |
+
{
|
| 51092 |
+
"epoch": 0.0007282,
|
| 51093 |
+
"grad_norm": 0.5736418962478638,
|
| 51094 |
+
"learning_rate": 7.281e-05,
|
| 51095 |
+
"loss": 0.2168,
|
| 51096 |
+
"step": 7282
|
| 51097 |
+
},
|
| 51098 |
+
{
|
| 51099 |
+
"epoch": 0.0007283,
|
| 51100 |
+
"grad_norm": 3.2191948890686035,
|
| 51101 |
+
"learning_rate": 7.282000000000001e-05,
|
| 51102 |
+
"loss": 0.4016,
|
| 51103 |
+
"step": 7283
|
| 51104 |
+
},
|
| 51105 |
+
{
|
| 51106 |
+
"epoch": 0.0007284,
|
| 51107 |
+
"grad_norm": 0.6173503398895264,
|
| 51108 |
+
"learning_rate": 7.282999999999999e-05,
|
| 51109 |
+
"loss": 0.1929,
|
| 51110 |
+
"step": 7284
|
| 51111 |
+
},
|
| 51112 |
+
{
|
| 51113 |
+
"epoch": 0.0007285,
|
| 51114 |
+
"grad_norm": 0.8881523609161377,
|
| 51115 |
+
"learning_rate": 7.284e-05,
|
| 51116 |
+
"loss": 0.2728,
|
| 51117 |
+
"step": 7285
|
| 51118 |
+
},
|
| 51119 |
+
{
|
| 51120 |
+
"epoch": 0.0007286,
|
| 51121 |
+
"grad_norm": 0.500853419303894,
|
| 51122 |
+
"learning_rate": 7.285000000000001e-05,
|
| 51123 |
+
"loss": 0.1998,
|
| 51124 |
+
"step": 7286
|
| 51125 |
+
},
|
| 51126 |
+
{
|
| 51127 |
+
"epoch": 0.0007287,
|
| 51128 |
+
"grad_norm": 0.49271973967552185,
|
| 51129 |
+
"learning_rate": 7.286e-05,
|
| 51130 |
+
"loss": 0.1963,
|
| 51131 |
+
"step": 7287
|
| 51132 |
+
},
|
| 51133 |
+
{
|
| 51134 |
+
"epoch": 0.0007288,
|
| 51135 |
+
"grad_norm": 0.5352133512496948,
|
| 51136 |
+
"learning_rate": 7.287e-05,
|
| 51137 |
+
"loss": 0.208,
|
| 51138 |
+
"step": 7288
|
| 51139 |
+
},
|
| 51140 |
+
{
|
| 51141 |
+
"epoch": 0.0007289,
|
| 51142 |
+
"grad_norm": 0.7467477917671204,
|
| 51143 |
+
"learning_rate": 7.288000000000001e-05,
|
| 51144 |
+
"loss": 0.2314,
|
| 51145 |
+
"step": 7289
|
| 51146 |
+
},
|
| 51147 |
+
{
|
| 51148 |
+
"epoch": 0.000729,
|
| 51149 |
+
"grad_norm": 1.1657520532608032,
|
| 51150 |
+
"learning_rate": 7.289e-05,
|
| 51151 |
+
"loss": 0.2361,
|
| 51152 |
+
"step": 7290
|
| 51153 |
+
},
|
| 51154 |
+
{
|
| 51155 |
+
"epoch": 0.0007291,
|
| 51156 |
+
"grad_norm": 0.7892240881919861,
|
| 51157 |
+
"learning_rate": 7.29e-05,
|
| 51158 |
+
"loss": 0.2517,
|
| 51159 |
+
"step": 7291
|
| 51160 |
+
},
|
| 51161 |
+
{
|
| 51162 |
+
"epoch": 0.0007292,
|
| 51163 |
+
"grad_norm": 0.5541574358940125,
|
| 51164 |
+
"learning_rate": 7.291e-05,
|
| 51165 |
+
"loss": 0.1995,
|
| 51166 |
+
"step": 7292
|
| 51167 |
+
},
|
| 51168 |
+
{
|
| 51169 |
+
"epoch": 0.0007293,
|
| 51170 |
+
"grad_norm": 0.6174006462097168,
|
| 51171 |
+
"learning_rate": 7.292e-05,
|
| 51172 |
+
"loss": 0.1985,
|
| 51173 |
+
"step": 7293
|
| 51174 |
+
},
|
| 51175 |
+
{
|
| 51176 |
+
"epoch": 0.0007294,
|
| 51177 |
+
"grad_norm": 0.5258652567863464,
|
| 51178 |
+
"learning_rate": 7.293e-05,
|
| 51179 |
+
"loss": 0.1866,
|
| 51180 |
+
"step": 7294
|
| 51181 |
+
},
|
| 51182 |
+
{
|
| 51183 |
+
"epoch": 0.0007295,
|
| 51184 |
+
"grad_norm": 0.5781332850456238,
|
| 51185 |
+
"learning_rate": 7.294e-05,
|
| 51186 |
+
"loss": 0.208,
|
| 51187 |
+
"step": 7295
|
| 51188 |
+
},
|
| 51189 |
+
{
|
| 51190 |
+
"epoch": 0.0007296,
|
| 51191 |
+
"grad_norm": 0.5011702179908752,
|
| 51192 |
+
"learning_rate": 7.295e-05,
|
| 51193 |
+
"loss": 0.208,
|
| 51194 |
+
"step": 7296
|
| 51195 |
+
},
|
| 51196 |
+
{
|
| 51197 |
+
"epoch": 0.0007297,
|
| 51198 |
+
"grad_norm": 0.7021820545196533,
|
| 51199 |
+
"learning_rate": 7.296e-05,
|
| 51200 |
+
"loss": 0.2271,
|
| 51201 |
+
"step": 7297
|
| 51202 |
+
},
|
| 51203 |
+
{
|
| 51204 |
+
"epoch": 0.0007298,
|
| 51205 |
+
"grad_norm": 0.5188300609588623,
|
| 51206 |
+
"learning_rate": 7.297e-05,
|
| 51207 |
+
"loss": 0.2019,
|
| 51208 |
+
"step": 7298
|
| 51209 |
+
},
|
| 51210 |
+
{
|
| 51211 |
+
"epoch": 0.0007299,
|
| 51212 |
+
"grad_norm": 0.5135867595672607,
|
| 51213 |
+
"learning_rate": 7.298e-05,
|
| 51214 |
+
"loss": 0.1907,
|
| 51215 |
+
"step": 7299
|
| 51216 |
+
},
|
| 51217 |
+
{
|
| 51218 |
+
"epoch": 0.00073,
|
| 51219 |
+
"grad_norm": 0.47076326608657837,
|
| 51220 |
+
"learning_rate": 7.299e-05,
|
| 51221 |
+
"loss": 0.1943,
|
| 51222 |
+
"step": 7300
|
| 51223 |
+
},
|
| 51224 |
+
{
|
| 51225 |
+
"epoch": 0.0007301,
|
| 51226 |
+
"grad_norm": 0.8561511039733887,
|
| 51227 |
+
"learning_rate": 7.3e-05,
|
| 51228 |
+
"loss": 0.2634,
|
| 51229 |
+
"step": 7301
|
| 51230 |
+
},
|
| 51231 |
+
{
|
| 51232 |
+
"epoch": 0.0007302,
|
| 51233 |
+
"grad_norm": 0.41544100642204285,
|
| 51234 |
+
"learning_rate": 7.301e-05,
|
| 51235 |
+
"loss": 0.1854,
|
| 51236 |
+
"step": 7302
|
| 51237 |
+
},
|
| 51238 |
+
{
|
| 51239 |
+
"epoch": 0.0007303,
|
| 51240 |
+
"grad_norm": 0.4582894444465637,
|
| 51241 |
+
"learning_rate": 7.302e-05,
|
| 51242 |
+
"loss": 0.1927,
|
| 51243 |
+
"step": 7303
|
| 51244 |
+
},
|
| 51245 |
+
{
|
| 51246 |
+
"epoch": 0.0007304,
|
| 51247 |
+
"grad_norm": 0.5104141235351562,
|
| 51248 |
+
"learning_rate": 7.303000000000001e-05,
|
| 51249 |
+
"loss": 0.2021,
|
| 51250 |
+
"step": 7304
|
| 51251 |
+
},
|
| 51252 |
+
{
|
| 51253 |
+
"epoch": 0.0007305,
|
| 51254 |
+
"grad_norm": 0.6090421080589294,
|
| 51255 |
+
"learning_rate": 7.304e-05,
|
| 51256 |
+
"loss": 0.2289,
|
| 51257 |
+
"step": 7305
|
| 51258 |
+
},
|
| 51259 |
+
{
|
| 51260 |
+
"epoch": 0.0007306,
|
| 51261 |
+
"grad_norm": 0.485851526260376,
|
| 51262 |
+
"learning_rate": 7.305e-05,
|
| 51263 |
+
"loss": 0.1978,
|
| 51264 |
+
"step": 7306
|
| 51265 |
+
},
|
| 51266 |
+
{
|
| 51267 |
+
"epoch": 0.0007307,
|
| 51268 |
+
"grad_norm": 0.41749894618988037,
|
| 51269 |
+
"learning_rate": 7.306000000000001e-05,
|
| 51270 |
+
"loss": 0.1914,
|
| 51271 |
+
"step": 7307
|
| 51272 |
+
},
|
| 51273 |
+
{
|
| 51274 |
+
"epoch": 0.0007308,
|
| 51275 |
+
"grad_norm": 0.4175671935081482,
|
| 51276 |
+
"learning_rate": 7.307e-05,
|
| 51277 |
+
"loss": 0.1859,
|
| 51278 |
+
"step": 7308
|
| 51279 |
+
},
|
| 51280 |
+
{
|
| 51281 |
+
"epoch": 0.0007309,
|
| 51282 |
+
"grad_norm": 0.465059757232666,
|
| 51283 |
+
"learning_rate": 7.308e-05,
|
| 51284 |
+
"loss": 0.1995,
|
| 51285 |
+
"step": 7309
|
| 51286 |
+
},
|
| 51287 |
+
{
|
| 51288 |
+
"epoch": 0.000731,
|
| 51289 |
+
"grad_norm": 0.44294998049736023,
|
| 51290 |
+
"learning_rate": 7.309e-05,
|
| 51291 |
+
"loss": 0.198,
|
| 51292 |
+
"step": 7310
|
| 51293 |
+
},
|
| 51294 |
+
{
|
| 51295 |
+
"epoch": 0.0007311,
|
| 51296 |
+
"grad_norm": 0.4414581060409546,
|
| 51297 |
+
"learning_rate": 7.31e-05,
|
| 51298 |
+
"loss": 0.1803,
|
| 51299 |
+
"step": 7311
|
| 51300 |
+
},
|
| 51301 |
+
{
|
| 51302 |
+
"epoch": 0.0007312,
|
| 51303 |
+
"grad_norm": 0.4441189467906952,
|
| 51304 |
+
"learning_rate": 7.311e-05,
|
| 51305 |
+
"loss": 0.1949,
|
| 51306 |
+
"step": 7312
|
| 51307 |
+
},
|
| 51308 |
+
{
|
| 51309 |
+
"epoch": 0.0007313,
|
| 51310 |
+
"grad_norm": 0.4735046625137329,
|
| 51311 |
+
"learning_rate": 7.312e-05,
|
| 51312 |
+
"loss": 0.2164,
|
| 51313 |
+
"step": 7313
|
| 51314 |
+
},
|
| 51315 |
+
{
|
| 51316 |
+
"epoch": 0.0007314,
|
| 51317 |
+
"grad_norm": 0.4368090033531189,
|
| 51318 |
+
"learning_rate": 7.313e-05,
|
| 51319 |
+
"loss": 0.1993,
|
| 51320 |
+
"step": 7314
|
| 51321 |
+
},
|
| 51322 |
+
{
|
| 51323 |
+
"epoch": 0.0007315,
|
| 51324 |
+
"grad_norm": 0.42844894528388977,
|
| 51325 |
+
"learning_rate": 7.314e-05,
|
| 51326 |
+
"loss": 0.1925,
|
| 51327 |
+
"step": 7315
|
| 51328 |
+
},
|
| 51329 |
+
{
|
| 51330 |
+
"epoch": 0.0007316,
|
| 51331 |
+
"grad_norm": 0.39331182837486267,
|
| 51332 |
+
"learning_rate": 7.315e-05,
|
| 51333 |
+
"loss": 0.1896,
|
| 51334 |
+
"step": 7316
|
| 51335 |
+
},
|
| 51336 |
+
{
|
| 51337 |
+
"epoch": 0.0007317,
|
| 51338 |
+
"grad_norm": 0.4788910746574402,
|
| 51339 |
+
"learning_rate": 7.316000000000001e-05,
|
| 51340 |
+
"loss": 0.2067,
|
| 51341 |
+
"step": 7317
|
| 51342 |
+
},
|
| 51343 |
+
{
|
| 51344 |
+
"epoch": 0.0007318,
|
| 51345 |
+
"grad_norm": 0.4292828142642975,
|
| 51346 |
+
"learning_rate": 7.316999999999999e-05,
|
| 51347 |
+
"loss": 0.1964,
|
| 51348 |
+
"step": 7318
|
| 51349 |
+
},
|
| 51350 |
+
{
|
| 51351 |
+
"epoch": 0.0007319,
|
| 51352 |
+
"grad_norm": 0.4058398902416229,
|
| 51353 |
+
"learning_rate": 7.318e-05,
|
| 51354 |
+
"loss": 0.1982,
|
| 51355 |
+
"step": 7319
|
| 51356 |
+
},
|
| 51357 |
+
{
|
| 51358 |
+
"epoch": 0.000732,
|
| 51359 |
+
"grad_norm": 0.43642178177833557,
|
| 51360 |
+
"learning_rate": 7.319000000000001e-05,
|
| 51361 |
+
"loss": 0.2009,
|
| 51362 |
+
"step": 7320
|
| 51363 |
+
},
|
| 51364 |
+
{
|
| 51365 |
+
"epoch": 0.0007321,
|
| 51366 |
+
"grad_norm": 0.43849149346351624,
|
| 51367 |
+
"learning_rate": 7.32e-05,
|
| 51368 |
+
"loss": 0.1888,
|
| 51369 |
+
"step": 7321
|
| 51370 |
+
},
|
| 51371 |
+
{
|
| 51372 |
+
"epoch": 0.0007322,
|
| 51373 |
+
"grad_norm": 0.43522223830223083,
|
| 51374 |
+
"learning_rate": 7.321e-05,
|
| 51375 |
+
"loss": 0.2036,
|
| 51376 |
+
"step": 7322
|
| 51377 |
+
},
|
| 51378 |
+
{
|
| 51379 |
+
"epoch": 0.0007323,
|
| 51380 |
+
"grad_norm": 1.0789283514022827,
|
| 51381 |
+
"learning_rate": 7.322000000000001e-05,
|
| 51382 |
+
"loss": 0.2266,
|
| 51383 |
+
"step": 7323
|
| 51384 |
+
},
|
| 51385 |
+
{
|
| 51386 |
+
"epoch": 0.0007324,
|
| 51387 |
+
"grad_norm": 0.8794465065002441,
|
| 51388 |
+
"learning_rate": 7.323e-05,
|
| 51389 |
+
"loss": 0.255,
|
| 51390 |
+
"step": 7324
|
| 51391 |
+
},
|
| 51392 |
+
{
|
| 51393 |
+
"epoch": 0.0007325,
|
| 51394 |
+
"grad_norm": 0.5397830009460449,
|
| 51395 |
+
"learning_rate": 7.324e-05,
|
| 51396 |
+
"loss": 0.191,
|
| 51397 |
+
"step": 7325
|
| 51398 |
+
},
|
| 51399 |
+
{
|
| 51400 |
+
"epoch": 0.0007326,
|
| 51401 |
+
"grad_norm": 0.5121053457260132,
|
| 51402 |
+
"learning_rate": 7.325e-05,
|
| 51403 |
+
"loss": 0.2021,
|
| 51404 |
+
"step": 7326
|
| 51405 |
+
},
|
| 51406 |
+
{
|
| 51407 |
+
"epoch": 0.0007327,
|
| 51408 |
+
"grad_norm": 1.1845084428787231,
|
| 51409 |
+
"learning_rate": 7.326e-05,
|
| 51410 |
+
"loss": 0.2732,
|
| 51411 |
+
"step": 7327
|
| 51412 |
+
},
|
| 51413 |
+
{
|
| 51414 |
+
"epoch": 0.0007328,
|
| 51415 |
+
"grad_norm": 0.5448693037033081,
|
| 51416 |
+
"learning_rate": 7.327e-05,
|
| 51417 |
+
"loss": 0.2064,
|
| 51418 |
+
"step": 7328
|
| 51419 |
+
},
|
| 51420 |
+
{
|
| 51421 |
+
"epoch": 0.0007329,
|
| 51422 |
+
"grad_norm": 0.49568578600883484,
|
| 51423 |
+
"learning_rate": 7.328e-05,
|
| 51424 |
+
"loss": 0.1957,
|
| 51425 |
+
"step": 7329
|
| 51426 |
+
},
|
| 51427 |
+
{
|
| 51428 |
+
"epoch": 0.000733,
|
| 51429 |
+
"grad_norm": 0.5749213695526123,
|
| 51430 |
+
"learning_rate": 7.329e-05,
|
| 51431 |
+
"loss": 0.2073,
|
| 51432 |
+
"step": 7330
|
| 51433 |
+
},
|
| 51434 |
+
{
|
| 51435 |
+
"epoch": 0.0007331,
|
| 51436 |
+
"grad_norm": 0.653283953666687,
|
| 51437 |
+
"learning_rate": 7.33e-05,
|
| 51438 |
+
"loss": 0.2335,
|
| 51439 |
+
"step": 7331
|
| 51440 |
+
},
|
| 51441 |
+
{
|
| 51442 |
+
"epoch": 0.0007332,
|
| 51443 |
+
"grad_norm": 0.6415607929229736,
|
| 51444 |
+
"learning_rate": 7.331e-05,
|
| 51445 |
+
"loss": 0.2091,
|
| 51446 |
+
"step": 7332
|
| 51447 |
+
},
|
| 51448 |
+
{
|
| 51449 |
+
"epoch": 0.0007333,
|
| 51450 |
+
"grad_norm": 0.49434953927993774,
|
| 51451 |
+
"learning_rate": 7.332e-05,
|
| 51452 |
+
"loss": 0.208,
|
| 51453 |
+
"step": 7333
|
| 51454 |
+
},
|
| 51455 |
+
{
|
| 51456 |
+
"epoch": 0.0007334,
|
| 51457 |
+
"grad_norm": 0.5622429251670837,
|
| 51458 |
+
"learning_rate": 7.333e-05,
|
| 51459 |
+
"loss": 0.194,
|
| 51460 |
+
"step": 7334
|
| 51461 |
+
},
|
| 51462 |
+
{
|
| 51463 |
+
"epoch": 0.0007335,
|
| 51464 |
+
"grad_norm": 0.5433710813522339,
|
| 51465 |
+
"learning_rate": 7.334e-05,
|
| 51466 |
+
"loss": 0.196,
|
| 51467 |
+
"step": 7335
|
| 51468 |
+
},
|
| 51469 |
+
{
|
| 51470 |
+
"epoch": 0.0007336,
|
| 51471 |
+
"grad_norm": 0.5698725581169128,
|
| 51472 |
+
"learning_rate": 7.335e-05,
|
| 51473 |
+
"loss": 0.1993,
|
| 51474 |
+
"step": 7336
|
| 51475 |
+
},
|
| 51476 |
+
{
|
| 51477 |
+
"epoch": 0.0007337,
|
| 51478 |
+
"grad_norm": 0.6964457631111145,
|
| 51479 |
+
"learning_rate": 7.336e-05,
|
| 51480 |
+
"loss": 0.2349,
|
| 51481 |
+
"step": 7337
|
| 51482 |
+
},
|
| 51483 |
+
{
|
| 51484 |
+
"epoch": 0.0007338,
|
| 51485 |
+
"grad_norm": 0.44615158438682556,
|
| 51486 |
+
"learning_rate": 7.337000000000001e-05,
|
| 51487 |
+
"loss": 0.1914,
|
| 51488 |
+
"step": 7338
|
| 51489 |
+
},
|
| 51490 |
+
{
|
| 51491 |
+
"epoch": 0.0007339,
|
| 51492 |
+
"grad_norm": 0.5419948697090149,
|
| 51493 |
+
"learning_rate": 7.338e-05,
|
| 51494 |
+
"loss": 0.1934,
|
| 51495 |
+
"step": 7339
|
| 51496 |
+
},
|
| 51497 |
+
{
|
| 51498 |
+
"epoch": 0.000734,
|
| 51499 |
+
"grad_norm": 0.44522324204444885,
|
| 51500 |
+
"learning_rate": 7.339e-05,
|
| 51501 |
+
"loss": 0.1965,
|
| 51502 |
+
"step": 7340
|
| 51503 |
+
},
|
| 51504 |
+
{
|
| 51505 |
+
"epoch": 0.0007341,
|
| 51506 |
+
"grad_norm": 0.42922475934028625,
|
| 51507 |
+
"learning_rate": 7.340000000000001e-05,
|
| 51508 |
+
"loss": 0.1924,
|
| 51509 |
+
"step": 7341
|
| 51510 |
+
},
|
| 51511 |
+
{
|
| 51512 |
+
"epoch": 0.0007342,
|
| 51513 |
+
"grad_norm": 0.46745359897613525,
|
| 51514 |
+
"learning_rate": 7.341e-05,
|
| 51515 |
+
"loss": 0.1975,
|
| 51516 |
+
"step": 7342
|
| 51517 |
+
},
|
| 51518 |
+
{
|
| 51519 |
+
"epoch": 0.0007343,
|
| 51520 |
+
"grad_norm": 0.42670372128486633,
|
| 51521 |
+
"learning_rate": 7.342e-05,
|
| 51522 |
+
"loss": 0.1805,
|
| 51523 |
+
"step": 7343
|
| 51524 |
+
},
|
| 51525 |
+
{
|
| 51526 |
+
"epoch": 0.0007344,
|
| 51527 |
+
"grad_norm": 0.6460187435150146,
|
| 51528 |
+
"learning_rate": 7.343e-05,
|
| 51529 |
+
"loss": 0.213,
|
| 51530 |
+
"step": 7344
|
| 51531 |
+
},
|
| 51532 |
+
{
|
| 51533 |
+
"epoch": 0.0007345,
|
| 51534 |
+
"grad_norm": 1.687685251235962,
|
| 51535 |
+
"learning_rate": 7.344e-05,
|
| 51536 |
+
"loss": 0.2593,
|
| 51537 |
+
"step": 7345
|
| 51538 |
+
},
|
| 51539 |
+
{
|
| 51540 |
+
"epoch": 0.0007346,
|
| 51541 |
+
"grad_norm": 0.42223459482192993,
|
| 51542 |
+
"learning_rate": 7.345e-05,
|
| 51543 |
+
"loss": 0.1827,
|
| 51544 |
+
"step": 7346
|
| 51545 |
+
},
|
| 51546 |
+
{
|
| 51547 |
+
"epoch": 0.0007347,
|
| 51548 |
+
"grad_norm": 0.4287385046482086,
|
| 51549 |
+
"learning_rate": 7.346e-05,
|
| 51550 |
+
"loss": 0.1843,
|
| 51551 |
+
"step": 7347
|
| 51552 |
+
},
|
| 51553 |
+
{
|
| 51554 |
+
"epoch": 0.0007348,
|
| 51555 |
+
"grad_norm": 0.4938461184501648,
|
| 51556 |
+
"learning_rate": 7.347e-05,
|
| 51557 |
+
"loss": 0.1989,
|
| 51558 |
+
"step": 7348
|
| 51559 |
+
},
|
| 51560 |
+
{
|
| 51561 |
+
"epoch": 0.0007349,
|
| 51562 |
+
"grad_norm": 1.1912397146224976,
|
| 51563 |
+
"learning_rate": 7.348e-05,
|
| 51564 |
+
"loss": 0.2338,
|
| 51565 |
+
"step": 7349
|
| 51566 |
+
},
|
| 51567 |
+
{
|
| 51568 |
+
"epoch": 0.000735,
|
| 51569 |
+
"grad_norm": 0.525805652141571,
|
| 51570 |
+
"learning_rate": 7.349e-05,
|
| 51571 |
+
"loss": 0.2101,
|
| 51572 |
+
"step": 7350
|
| 51573 |
+
},
|
| 51574 |
+
{
|
| 51575 |
+
"epoch": 0.0007351,
|
| 51576 |
+
"grad_norm": 0.48151376843452454,
|
| 51577 |
+
"learning_rate": 7.35e-05,
|
| 51578 |
+
"loss": 0.2007,
|
| 51579 |
+
"step": 7351
|
| 51580 |
+
},
|
| 51581 |
+
{
|
| 51582 |
+
"epoch": 0.0007352,
|
| 51583 |
+
"grad_norm": 0.4943440556526184,
|
| 51584 |
+
"learning_rate": 7.350999999999999e-05,
|
| 51585 |
+
"loss": 0.1957,
|
| 51586 |
+
"step": 7352
|
| 51587 |
+
},
|
| 51588 |
+
{
|
| 51589 |
+
"epoch": 0.0007353,
|
| 51590 |
+
"grad_norm": 0.6150953769683838,
|
| 51591 |
+
"learning_rate": 7.352e-05,
|
| 51592 |
+
"loss": 0.1945,
|
| 51593 |
+
"step": 7353
|
| 51594 |
+
},
|
| 51595 |
+
{
|
| 51596 |
+
"epoch": 0.0007354,
|
| 51597 |
+
"grad_norm": 0.4299049377441406,
|
| 51598 |
+
"learning_rate": 7.353000000000001e-05,
|
| 51599 |
+
"loss": 0.181,
|
| 51600 |
+
"step": 7354
|
| 51601 |
+
},
|
| 51602 |
+
{
|
| 51603 |
+
"epoch": 0.0007355,
|
| 51604 |
+
"grad_norm": 0.46449828147888184,
|
| 51605 |
+
"learning_rate": 7.354e-05,
|
| 51606 |
+
"loss": 0.1918,
|
| 51607 |
+
"step": 7355
|
| 51608 |
+
},
|
| 51609 |
+
{
|
| 51610 |
+
"epoch": 0.0007356,
|
| 51611 |
+
"grad_norm": 0.4400832951068878,
|
| 51612 |
+
"learning_rate": 7.355e-05,
|
| 51613 |
+
"loss": 0.1927,
|
| 51614 |
+
"step": 7356
|
| 51615 |
+
},
|
| 51616 |
+
{
|
| 51617 |
+
"epoch": 0.0007357,
|
| 51618 |
+
"grad_norm": 0.5461811423301697,
|
| 51619 |
+
"learning_rate": 7.356000000000001e-05,
|
| 51620 |
+
"loss": 0.2039,
|
| 51621 |
+
"step": 7357
|
| 51622 |
+
},
|
| 51623 |
+
{
|
| 51624 |
+
"epoch": 0.0007358,
|
| 51625 |
+
"grad_norm": 0.48086267709732056,
|
| 51626 |
+
"learning_rate": 7.357e-05,
|
| 51627 |
+
"loss": 0.1926,
|
| 51628 |
+
"step": 7358
|
| 51629 |
+
},
|
| 51630 |
+
{
|
| 51631 |
+
"epoch": 0.0007359,
|
| 51632 |
+
"grad_norm": 0.47575855255126953,
|
| 51633 |
+
"learning_rate": 7.358e-05,
|
| 51634 |
+
"loss": 0.2006,
|
| 51635 |
+
"step": 7359
|
| 51636 |
+
},
|
| 51637 |
+
{
|
| 51638 |
+
"epoch": 0.000736,
|
| 51639 |
+
"grad_norm": 0.47889426350593567,
|
| 51640 |
+
"learning_rate": 7.359e-05,
|
| 51641 |
+
"loss": 0.2018,
|
| 51642 |
+
"step": 7360
|
| 51643 |
+
},
|
| 51644 |
+
{
|
| 51645 |
+
"epoch": 0.0007361,
|
| 51646 |
+
"grad_norm": 0.5811017155647278,
|
| 51647 |
+
"learning_rate": 7.36e-05,
|
| 51648 |
+
"loss": 0.2306,
|
| 51649 |
+
"step": 7361
|
| 51650 |
+
},
|
| 51651 |
+
{
|
| 51652 |
+
"epoch": 0.0007362,
|
| 51653 |
+
"grad_norm": 0.39591217041015625,
|
| 51654 |
+
"learning_rate": 7.361e-05,
|
| 51655 |
+
"loss": 0.1869,
|
| 51656 |
+
"step": 7362
|
| 51657 |
+
},
|
| 51658 |
+
{
|
| 51659 |
+
"epoch": 0.0007363,
|
| 51660 |
+
"grad_norm": 0.4870452582836151,
|
| 51661 |
+
"learning_rate": 7.362e-05,
|
| 51662 |
+
"loss": 0.2189,
|
| 51663 |
+
"step": 7363
|
| 51664 |
+
},
|
| 51665 |
+
{
|
| 51666 |
+
"epoch": 0.0007364,
|
| 51667 |
+
"grad_norm": 0.8045810461044312,
|
| 51668 |
+
"learning_rate": 7.363e-05,
|
| 51669 |
+
"loss": 0.2463,
|
| 51670 |
+
"step": 7364
|
| 51671 |
+
},
|
| 51672 |
+
{
|
| 51673 |
+
"epoch": 0.0007365,
|
| 51674 |
+
"grad_norm": 0.8989725708961487,
|
| 51675 |
+
"learning_rate": 7.364e-05,
|
| 51676 |
+
"loss": 0.2142,
|
| 51677 |
+
"step": 7365
|
| 51678 |
+
},
|
| 51679 |
+
{
|
| 51680 |
+
"epoch": 0.0007366,
|
| 51681 |
+
"grad_norm": 0.7122185230255127,
|
| 51682 |
+
"learning_rate": 7.365e-05,
|
| 51683 |
+
"loss": 0.2395,
|
| 51684 |
+
"step": 7366
|
| 51685 |
+
},
|
| 51686 |
+
{
|
| 51687 |
+
"epoch": 0.0007367,
|
| 51688 |
+
"grad_norm": 0.5073297619819641,
|
| 51689 |
+
"learning_rate": 7.366e-05,
|
| 51690 |
+
"loss": 0.1963,
|
| 51691 |
+
"step": 7367
|
| 51692 |
+
},
|
| 51693 |
+
{
|
| 51694 |
+
"epoch": 0.0007368,
|
| 51695 |
+
"grad_norm": 0.509922444820404,
|
| 51696 |
+
"learning_rate": 7.367e-05,
|
| 51697 |
+
"loss": 0.1998,
|
| 51698 |
+
"step": 7368
|
| 51699 |
+
},
|
| 51700 |
+
{
|
| 51701 |
+
"epoch": 0.0007369,
|
| 51702 |
+
"grad_norm": 0.5014863014221191,
|
| 51703 |
+
"learning_rate": 7.368e-05,
|
| 51704 |
+
"loss": 0.1973,
|
| 51705 |
+
"step": 7369
|
| 51706 |
+
},
|
| 51707 |
+
{
|
| 51708 |
+
"epoch": 0.000737,
|
| 51709 |
+
"grad_norm": 0.4400615692138672,
|
| 51710 |
+
"learning_rate": 7.369e-05,
|
| 51711 |
+
"loss": 0.1881,
|
| 51712 |
+
"step": 7370
|
| 51713 |
+
},
|
| 51714 |
+
{
|
| 51715 |
+
"epoch": 0.0007371,
|
| 51716 |
+
"grad_norm": 0.5283257961273193,
|
| 51717 |
+
"learning_rate": 7.37e-05,
|
| 51718 |
+
"loss": 0.1947,
|
| 51719 |
+
"step": 7371
|
| 51720 |
+
},
|
| 51721 |
+
{
|
| 51722 |
+
"epoch": 0.0007372,
|
| 51723 |
+
"grad_norm": 0.4372621774673462,
|
| 51724 |
+
"learning_rate": 7.371000000000001e-05,
|
| 51725 |
+
"loss": 0.1917,
|
| 51726 |
+
"step": 7372
|
| 51727 |
+
},
|
| 51728 |
+
{
|
| 51729 |
+
"epoch": 0.0007373,
|
| 51730 |
+
"grad_norm": 0.43310028314590454,
|
| 51731 |
+
"learning_rate": 7.371999999999999e-05,
|
| 51732 |
+
"loss": 0.1907,
|
| 51733 |
+
"step": 7373
|
| 51734 |
+
},
|
| 51735 |
+
{
|
| 51736 |
+
"epoch": 0.0007374,
|
| 51737 |
+
"grad_norm": 0.6451244950294495,
|
| 51738 |
+
"learning_rate": 7.373e-05,
|
| 51739 |
+
"loss": 0.2101,
|
| 51740 |
+
"step": 7374
|
| 51741 |
+
},
|
| 51742 |
+
{
|
| 51743 |
+
"epoch": 0.0007375,
|
| 51744 |
+
"grad_norm": 0.47620660066604614,
|
| 51745 |
+
"learning_rate": 7.374000000000001e-05,
|
| 51746 |
+
"loss": 0.1891,
|
| 51747 |
+
"step": 7375
|
| 51748 |
+
},
|
| 51749 |
+
{
|
| 51750 |
+
"epoch": 0.0007376,
|
| 51751 |
+
"grad_norm": 0.5217645168304443,
|
| 51752 |
+
"learning_rate": 7.375e-05,
|
| 51753 |
+
"loss": 0.2037,
|
| 51754 |
+
"step": 7376
|
| 51755 |
+
},
|
| 51756 |
+
{
|
| 51757 |
+
"epoch": 0.0007377,
|
| 51758 |
+
"grad_norm": 0.46922340989112854,
|
| 51759 |
+
"learning_rate": 7.376e-05,
|
| 51760 |
+
"loss": 0.1991,
|
| 51761 |
+
"step": 7377
|
| 51762 |
+
},
|
| 51763 |
+
{
|
| 51764 |
+
"epoch": 0.0007378,
|
| 51765 |
+
"grad_norm": 0.42827391624450684,
|
| 51766 |
+
"learning_rate": 7.377000000000001e-05,
|
| 51767 |
+
"loss": 0.1909,
|
| 51768 |
+
"step": 7378
|
| 51769 |
+
},
|
| 51770 |
+
{
|
| 51771 |
+
"epoch": 0.0007379,
|
| 51772 |
+
"grad_norm": 0.637048065662384,
|
| 51773 |
+
"learning_rate": 7.378e-05,
|
| 51774 |
+
"loss": 0.2262,
|
| 51775 |
+
"step": 7379
|
| 51776 |
+
},
|
| 51777 |
+
{
|
| 51778 |
+
"epoch": 0.000738,
|
| 51779 |
+
"grad_norm": 0.4661789834499359,
|
| 51780 |
+
"learning_rate": 7.379e-05,
|
| 51781 |
+
"loss": 0.1899,
|
| 51782 |
+
"step": 7380
|
| 51783 |
+
},
|
| 51784 |
+
{
|
| 51785 |
+
"epoch": 0.0007381,
|
| 51786 |
+
"grad_norm": 0.6267582774162292,
|
| 51787 |
+
"learning_rate": 7.38e-05,
|
| 51788 |
+
"loss": 0.1986,
|
| 51789 |
+
"step": 7381
|
| 51790 |
+
},
|
| 51791 |
+
{
|
| 51792 |
+
"epoch": 0.0007382,
|
| 51793 |
+
"grad_norm": 0.42628756165504456,
|
| 51794 |
+
"learning_rate": 7.381e-05,
|
| 51795 |
+
"loss": 0.1986,
|
| 51796 |
+
"step": 7382
|
| 51797 |
+
},
|
| 51798 |
+
{
|
| 51799 |
+
"epoch": 0.0007383,
|
| 51800 |
+
"grad_norm": 0.7806330919265747,
|
| 51801 |
+
"learning_rate": 7.382e-05,
|
| 51802 |
+
"loss": 0.219,
|
| 51803 |
+
"step": 7383
|
| 51804 |
+
},
|
| 51805 |
+
{
|
| 51806 |
+
"epoch": 0.0007384,
|
| 51807 |
+
"grad_norm": 0.5062658786773682,
|
| 51808 |
+
"learning_rate": 7.383e-05,
|
| 51809 |
+
"loss": 0.2126,
|
| 51810 |
+
"step": 7384
|
| 51811 |
+
},
|
| 51812 |
+
{
|
| 51813 |
+
"epoch": 0.0007385,
|
| 51814 |
+
"grad_norm": 0.5987305045127869,
|
| 51815 |
+
"learning_rate": 7.384e-05,
|
| 51816 |
+
"loss": 0.2074,
|
| 51817 |
+
"step": 7385
|
| 51818 |
+
},
|
| 51819 |
+
{
|
| 51820 |
+
"epoch": 0.0007386,
|
| 51821 |
+
"grad_norm": 0.4769977629184723,
|
| 51822 |
+
"learning_rate": 7.384999999999999e-05,
|
| 51823 |
+
"loss": 0.192,
|
| 51824 |
+
"step": 7386
|
| 51825 |
+
},
|
| 51826 |
+
{
|
| 51827 |
+
"epoch": 0.0007387,
|
| 51828 |
+
"grad_norm": 0.4302308261394501,
|
| 51829 |
+
"learning_rate": 7.386e-05,
|
| 51830 |
+
"loss": 0.1853,
|
| 51831 |
+
"step": 7387
|
| 51832 |
+
},
|
| 51833 |
+
{
|
| 51834 |
+
"epoch": 0.0007388,
|
| 51835 |
+
"grad_norm": 0.48651114106178284,
|
| 51836 |
+
"learning_rate": 7.387000000000001e-05,
|
| 51837 |
+
"loss": 0.1923,
|
| 51838 |
+
"step": 7388
|
| 51839 |
+
},
|
| 51840 |
+
{
|
| 51841 |
+
"epoch": 0.0007389,
|
| 51842 |
+
"grad_norm": 0.42370152473449707,
|
| 51843 |
+
"learning_rate": 7.388e-05,
|
| 51844 |
+
"loss": 0.1874,
|
| 51845 |
+
"step": 7389
|
| 51846 |
+
},
|
| 51847 |
+
{
|
| 51848 |
+
"epoch": 0.000739,
|
| 51849 |
+
"grad_norm": 0.3867861330509186,
|
| 51850 |
+
"learning_rate": 7.389e-05,
|
| 51851 |
+
"loss": 0.1808,
|
| 51852 |
+
"step": 7390
|
| 51853 |
+
},
|
| 51854 |
+
{
|
| 51855 |
+
"epoch": 0.0007391,
|
| 51856 |
+
"grad_norm": 0.4189610481262207,
|
| 51857 |
+
"learning_rate": 7.390000000000001e-05,
|
| 51858 |
+
"loss": 0.1919,
|
| 51859 |
+
"step": 7391
|
| 51860 |
+
},
|
| 51861 |
+
{
|
| 51862 |
+
"epoch": 0.0007392,
|
| 51863 |
+
"grad_norm": 0.5503688454627991,
|
| 51864 |
+
"learning_rate": 7.391e-05,
|
| 51865 |
+
"loss": 0.1958,
|
| 51866 |
+
"step": 7392
|
| 51867 |
+
},
|
| 51868 |
+
{
|
| 51869 |
+
"epoch": 0.0007393,
|
| 51870 |
+
"grad_norm": 0.48368340730667114,
|
| 51871 |
+
"learning_rate": 7.392e-05,
|
| 51872 |
+
"loss": 0.1897,
|
| 51873 |
+
"step": 7393
|
| 51874 |
+
},
|
| 51875 |
+
{
|
| 51876 |
+
"epoch": 0.0007394,
|
| 51877 |
+
"grad_norm": 0.404477596282959,
|
| 51878 |
+
"learning_rate": 7.393e-05,
|
| 51879 |
+
"loss": 0.1832,
|
| 51880 |
+
"step": 7394
|
| 51881 |
+
},
|
| 51882 |
+
{
|
| 51883 |
+
"epoch": 0.0007395,
|
| 51884 |
+
"grad_norm": 0.5074638724327087,
|
| 51885 |
+
"learning_rate": 7.394e-05,
|
| 51886 |
+
"loss": 0.1979,
|
| 51887 |
+
"step": 7395
|
| 51888 |
+
},
|
| 51889 |
+
{
|
| 51890 |
+
"epoch": 0.0007396,
|
| 51891 |
+
"grad_norm": 0.45375561714172363,
|
| 51892 |
+
"learning_rate": 7.395e-05,
|
| 51893 |
+
"loss": 0.1978,
|
| 51894 |
+
"step": 7396
|
| 51895 |
+
},
|
| 51896 |
+
{
|
| 51897 |
+
"epoch": 0.0007397,
|
| 51898 |
+
"grad_norm": 0.38874441385269165,
|
| 51899 |
+
"learning_rate": 7.396e-05,
|
| 51900 |
+
"loss": 0.1836,
|
| 51901 |
+
"step": 7397
|
| 51902 |
+
},
|
| 51903 |
+
{
|
| 51904 |
+
"epoch": 0.0007398,
|
| 51905 |
+
"grad_norm": 0.4363499581813812,
|
| 51906 |
+
"learning_rate": 7.397e-05,
|
| 51907 |
+
"loss": 0.1925,
|
| 51908 |
+
"step": 7398
|
| 51909 |
+
},
|
| 51910 |
+
{
|
| 51911 |
+
"epoch": 0.0007399,
|
| 51912 |
+
"grad_norm": 0.44526228308677673,
|
| 51913 |
+
"learning_rate": 7.398e-05,
|
| 51914 |
+
"loss": 0.1915,
|
| 51915 |
+
"step": 7399
|
| 51916 |
+
},
|
| 51917 |
+
{
|
| 51918 |
+
"epoch": 0.00074,
|
| 51919 |
+
"grad_norm": 0.7410265207290649,
|
| 51920 |
+
"learning_rate": 7.399e-05,
|
| 51921 |
+
"loss": 0.2211,
|
| 51922 |
+
"step": 7400
|
| 51923 |
+
},
|
| 51924 |
+
{
|
| 51925 |
+
"epoch": 0.0007401,
|
| 51926 |
+
"grad_norm": 0.39623939990997314,
|
| 51927 |
+
"learning_rate": 7.4e-05,
|
| 51928 |
+
"loss": 0.1835,
|
| 51929 |
+
"step": 7401
|
| 51930 |
+
},
|
| 51931 |
+
{
|
| 51932 |
+
"epoch": 0.0007402,
|
| 51933 |
+
"grad_norm": 0.42353275418281555,
|
| 51934 |
+
"learning_rate": 7.401e-05,
|
| 51935 |
+
"loss": 0.1909,
|
| 51936 |
+
"step": 7402
|
| 51937 |
+
},
|
| 51938 |
+
{
|
| 51939 |
+
"epoch": 0.0007403,
|
| 51940 |
+
"grad_norm": 0.4267234206199646,
|
| 51941 |
+
"learning_rate": 7.402e-05,
|
| 51942 |
+
"loss": 0.1969,
|
| 51943 |
+
"step": 7403
|
| 51944 |
+
},
|
| 51945 |
+
{
|
| 51946 |
+
"epoch": 0.0007404,
|
| 51947 |
+
"grad_norm": 0.4426077902317047,
|
| 51948 |
+
"learning_rate": 7.403e-05,
|
| 51949 |
+
"loss": 0.1924,
|
| 51950 |
+
"step": 7404
|
| 51951 |
+
},
|
| 51952 |
+
{
|
| 51953 |
+
"epoch": 0.0007405,
|
| 51954 |
+
"grad_norm": 0.6750543713569641,
|
| 51955 |
+
"learning_rate": 7.404e-05,
|
| 51956 |
+
"loss": 0.2429,
|
| 51957 |
+
"step": 7405
|
| 51958 |
+
},
|
| 51959 |
+
{
|
| 51960 |
+
"epoch": 0.0007406,
|
| 51961 |
+
"grad_norm": 0.39575839042663574,
|
| 51962 |
+
"learning_rate": 7.405000000000001e-05,
|
| 51963 |
+
"loss": 0.1823,
|
| 51964 |
+
"step": 7406
|
| 51965 |
+
},
|
| 51966 |
+
{
|
| 51967 |
+
"epoch": 0.0007407,
|
| 51968 |
+
"grad_norm": 0.4387655258178711,
|
| 51969 |
+
"learning_rate": 7.405999999999999e-05,
|
| 51970 |
+
"loss": 0.1881,
|
| 51971 |
+
"step": 7407
|
| 51972 |
+
},
|
| 51973 |
+
{
|
| 51974 |
+
"epoch": 0.0007408,
|
| 51975 |
+
"grad_norm": 0.47220295667648315,
|
| 51976 |
+
"learning_rate": 7.407e-05,
|
| 51977 |
+
"loss": 0.1996,
|
| 51978 |
+
"step": 7408
|
| 51979 |
+
},
|
| 51980 |
+
{
|
| 51981 |
+
"epoch": 0.0007409,
|
| 51982 |
+
"grad_norm": 0.3826233446598053,
|
| 51983 |
+
"learning_rate": 7.408000000000001e-05,
|
| 51984 |
+
"loss": 0.179,
|
| 51985 |
+
"step": 7409
|
| 51986 |
+
},
|
| 51987 |
+
{
|
| 51988 |
+
"epoch": 0.000741,
|
| 51989 |
+
"grad_norm": 0.5239911079406738,
|
| 51990 |
+
"learning_rate": 7.409e-05,
|
| 51991 |
+
"loss": 0.1937,
|
| 51992 |
+
"step": 7410
|
| 51993 |
+
},
|
| 51994 |
+
{
|
| 51995 |
+
"epoch": 0.0007411,
|
| 51996 |
+
"grad_norm": 0.37321022152900696,
|
| 51997 |
+
"learning_rate": 7.41e-05,
|
| 51998 |
+
"loss": 0.1775,
|
| 51999 |
+
"step": 7411
|
| 52000 |
+
},
|
| 52001 |
+
{
|
| 52002 |
+
"epoch": 0.0007412,
|
| 52003 |
+
"grad_norm": 0.7720602750778198,
|
| 52004 |
+
"learning_rate": 7.411000000000001e-05,
|
| 52005 |
+
"loss": 0.2292,
|
| 52006 |
+
"step": 7412
|
| 52007 |
+
},
|
| 52008 |
+
{
|
| 52009 |
+
"epoch": 0.0007413,
|
| 52010 |
+
"grad_norm": 0.5750840306282043,
|
| 52011 |
+
"learning_rate": 7.412e-05,
|
| 52012 |
+
"loss": 0.1973,
|
| 52013 |
+
"step": 7413
|
| 52014 |
+
},
|
| 52015 |
+
{
|
| 52016 |
+
"epoch": 0.0007414,
|
| 52017 |
+
"grad_norm": 4.924017906188965,
|
| 52018 |
+
"learning_rate": 7.413e-05,
|
| 52019 |
+
"loss": 0.3469,
|
| 52020 |
+
"step": 7414
|
| 52021 |
+
},
|
| 52022 |
+
{
|
| 52023 |
+
"epoch": 0.0007415,
|
| 52024 |
+
"grad_norm": 0.47601503133773804,
|
| 52025 |
+
"learning_rate": 7.414e-05,
|
| 52026 |
+
"loss": 0.1858,
|
| 52027 |
+
"step": 7415
|
| 52028 |
+
},
|
| 52029 |
+
{
|
| 52030 |
+
"epoch": 0.0007416,
|
| 52031 |
+
"grad_norm": 0.4628167748451233,
|
| 52032 |
+
"learning_rate": 7.415e-05,
|
| 52033 |
+
"loss": 0.1874,
|
| 52034 |
+
"step": 7416
|
| 52035 |
+
},
|
| 52036 |
+
{
|
| 52037 |
+
"epoch": 0.0007417,
|
| 52038 |
+
"grad_norm": 0.6044511198997498,
|
| 52039 |
+
"learning_rate": 7.416e-05,
|
| 52040 |
+
"loss": 0.2205,
|
| 52041 |
+
"step": 7417
|
| 52042 |
+
},
|
| 52043 |
+
{
|
| 52044 |
+
"epoch": 0.0007418,
|
| 52045 |
+
"grad_norm": 0.7673957943916321,
|
| 52046 |
+
"learning_rate": 7.417e-05,
|
| 52047 |
+
"loss": 0.1954,
|
| 52048 |
+
"step": 7418
|
| 52049 |
+
},
|
| 52050 |
+
{
|
| 52051 |
+
"epoch": 0.0007419,
|
| 52052 |
+
"grad_norm": 0.45705947279930115,
|
| 52053 |
+
"learning_rate": 7.418e-05,
|
| 52054 |
+
"loss": 0.1969,
|
| 52055 |
+
"step": 7419
|
| 52056 |
+
},
|
| 52057 |
+
{
|
| 52058 |
+
"epoch": 0.000742,
|
| 52059 |
+
"grad_norm": 0.5442333221435547,
|
| 52060 |
+
"learning_rate": 7.418999999999999e-05,
|
| 52061 |
+
"loss": 0.2039,
|
| 52062 |
+
"step": 7420
|
| 52063 |
+
},
|
| 52064 |
+
{
|
| 52065 |
+
"epoch": 0.0007421,
|
| 52066 |
+
"grad_norm": 0.7553530335426331,
|
| 52067 |
+
"learning_rate": 7.42e-05,
|
| 52068 |
+
"loss": 0.2479,
|
| 52069 |
+
"step": 7421
|
| 52070 |
+
},
|
| 52071 |
+
{
|
| 52072 |
+
"epoch": 0.0007422,
|
| 52073 |
+
"grad_norm": 1.0189530849456787,
|
| 52074 |
+
"learning_rate": 7.421000000000001e-05,
|
| 52075 |
+
"loss": 0.2329,
|
| 52076 |
+
"step": 7422
|
| 52077 |
+
},
|
| 52078 |
+
{
|
| 52079 |
+
"epoch": 0.0007423,
|
| 52080 |
+
"grad_norm": 0.5386321544647217,
|
| 52081 |
+
"learning_rate": 7.422e-05,
|
| 52082 |
+
"loss": 0.1906,
|
| 52083 |
+
"step": 7423
|
| 52084 |
+
},
|
| 52085 |
+
{
|
| 52086 |
+
"epoch": 0.0007424,
|
| 52087 |
+
"grad_norm": 1.569715976715088,
|
| 52088 |
+
"learning_rate": 7.423e-05,
|
| 52089 |
+
"loss": 0.3202,
|
| 52090 |
+
"step": 7424
|
| 52091 |
+
},
|
| 52092 |
+
{
|
| 52093 |
+
"epoch": 0.0007425,
|
| 52094 |
+
"grad_norm": 0.5454429388046265,
|
| 52095 |
+
"learning_rate": 7.424000000000001e-05,
|
| 52096 |
+
"loss": 0.1985,
|
| 52097 |
+
"step": 7425
|
| 52098 |
+
},
|
| 52099 |
+
{
|
| 52100 |
+
"epoch": 0.0007426,
|
| 52101 |
+
"grad_norm": 0.5449747443199158,
|
| 52102 |
+
"learning_rate": 7.425e-05,
|
| 52103 |
+
"loss": 0.1947,
|
| 52104 |
+
"step": 7426
|
| 52105 |
+
},
|
| 52106 |
+
{
|
| 52107 |
+
"epoch": 0.0007427,
|
| 52108 |
+
"grad_norm": 0.7405964136123657,
|
| 52109 |
+
"learning_rate": 7.426e-05,
|
| 52110 |
+
"loss": 0.2125,
|
| 52111 |
+
"step": 7427
|
| 52112 |
+
},
|
| 52113 |
+
{
|
| 52114 |
+
"epoch": 0.0007428,
|
| 52115 |
+
"grad_norm": 0.45297539234161377,
|
| 52116 |
+
"learning_rate": 7.427e-05,
|
| 52117 |
+
"loss": 0.1805,
|
| 52118 |
+
"step": 7428
|
| 52119 |
+
},
|
| 52120 |
+
{
|
| 52121 |
+
"epoch": 0.0007429,
|
| 52122 |
+
"grad_norm": 0.4657517671585083,
|
| 52123 |
+
"learning_rate": 7.428e-05,
|
| 52124 |
+
"loss": 0.1866,
|
| 52125 |
+
"step": 7429
|
| 52126 |
+
},
|
| 52127 |
+
{
|
| 52128 |
+
"epoch": 0.000743,
|
| 52129 |
+
"grad_norm": 0.45083001255989075,
|
| 52130 |
+
"learning_rate": 7.429e-05,
|
| 52131 |
+
"loss": 0.179,
|
| 52132 |
+
"step": 7430
|
| 52133 |
+
},
|
| 52134 |
+
{
|
| 52135 |
+
"epoch": 0.0007431,
|
| 52136 |
+
"grad_norm": 0.5598366856575012,
|
| 52137 |
+
"learning_rate": 7.43e-05,
|
| 52138 |
+
"loss": 0.1898,
|
| 52139 |
+
"step": 7431
|
| 52140 |
+
},
|
| 52141 |
+
{
|
| 52142 |
+
"epoch": 0.0007432,
|
| 52143 |
+
"grad_norm": 0.4710606336593628,
|
| 52144 |
+
"learning_rate": 7.431e-05,
|
| 52145 |
+
"loss": 0.1908,
|
| 52146 |
+
"step": 7432
|
| 52147 |
+
},
|
| 52148 |
+
{
|
| 52149 |
+
"epoch": 0.0007433,
|
| 52150 |
+
"grad_norm": 0.42426031827926636,
|
| 52151 |
+
"learning_rate": 7.432e-05,
|
| 52152 |
+
"loss": 0.1807,
|
| 52153 |
+
"step": 7433
|
| 52154 |
+
},
|
| 52155 |
+
{
|
| 52156 |
+
"epoch": 0.0007434,
|
| 52157 |
+
"grad_norm": 0.5185211300849915,
|
| 52158 |
+
"learning_rate": 7.433e-05,
|
| 52159 |
+
"loss": 0.1958,
|
| 52160 |
+
"step": 7434
|
| 52161 |
+
},
|
| 52162 |
+
{
|
| 52163 |
+
"epoch": 0.0007435,
|
| 52164 |
+
"grad_norm": 0.546903133392334,
|
| 52165 |
+
"learning_rate": 7.434e-05,
|
| 52166 |
+
"loss": 0.192,
|
| 52167 |
+
"step": 7435
|
| 52168 |
+
},
|
| 52169 |
+
{
|
| 52170 |
+
"epoch": 0.0007436,
|
| 52171 |
+
"grad_norm": 0.501542866230011,
|
| 52172 |
+
"learning_rate": 7.435e-05,
|
| 52173 |
+
"loss": 0.1984,
|
| 52174 |
+
"step": 7436
|
| 52175 |
+
},
|
| 52176 |
+
{
|
| 52177 |
+
"epoch": 0.0007437,
|
| 52178 |
+
"grad_norm": 0.4432472288608551,
|
| 52179 |
+
"learning_rate": 7.436e-05,
|
| 52180 |
+
"loss": 0.1846,
|
| 52181 |
+
"step": 7437
|
| 52182 |
+
},
|
| 52183 |
+
{
|
| 52184 |
+
"epoch": 0.0007438,
|
| 52185 |
+
"grad_norm": 0.4535142481327057,
|
| 52186 |
+
"learning_rate": 7.437e-05,
|
| 52187 |
+
"loss": 0.1908,
|
| 52188 |
+
"step": 7438
|
| 52189 |
+
},
|
| 52190 |
+
{
|
| 52191 |
+
"epoch": 0.0007439,
|
| 52192 |
+
"grad_norm": 0.49497199058532715,
|
| 52193 |
+
"learning_rate": 7.438e-05,
|
| 52194 |
+
"loss": 0.1914,
|
| 52195 |
+
"step": 7439
|
| 52196 |
+
},
|
| 52197 |
+
{
|
| 52198 |
+
"epoch": 0.000744,
|
| 52199 |
+
"grad_norm": 0.6516047716140747,
|
| 52200 |
+
"learning_rate": 7.439000000000001e-05,
|
| 52201 |
+
"loss": 0.2054,
|
| 52202 |
+
"step": 7440
|
| 52203 |
+
},
|
| 52204 |
+
{
|
| 52205 |
+
"epoch": 0.0007441,
|
| 52206 |
+
"grad_norm": 0.5227893590927124,
|
| 52207 |
+
"learning_rate": 7.439999999999999e-05,
|
| 52208 |
+
"loss": 0.2042,
|
| 52209 |
+
"step": 7441
|
| 52210 |
+
},
|
| 52211 |
+
{
|
| 52212 |
+
"epoch": 0.0007442,
|
| 52213 |
+
"grad_norm": 0.4323962926864624,
|
| 52214 |
+
"learning_rate": 7.441e-05,
|
| 52215 |
+
"loss": 0.1847,
|
| 52216 |
+
"step": 7442
|
| 52217 |
+
},
|
| 52218 |
+
{
|
| 52219 |
+
"epoch": 0.0007443,
|
| 52220 |
+
"grad_norm": 0.4462810754776001,
|
| 52221 |
+
"learning_rate": 7.442000000000001e-05,
|
| 52222 |
+
"loss": 0.186,
|
| 52223 |
+
"step": 7443
|
| 52224 |
+
},
|
| 52225 |
+
{
|
| 52226 |
+
"epoch": 0.0007444,
|
| 52227 |
+
"grad_norm": 0.44686514139175415,
|
| 52228 |
+
"learning_rate": 7.443e-05,
|
| 52229 |
+
"loss": 0.1833,
|
| 52230 |
+
"step": 7444
|
| 52231 |
+
},
|
| 52232 |
+
{
|
| 52233 |
+
"epoch": 0.0007445,
|
| 52234 |
+
"grad_norm": 0.42378124594688416,
|
| 52235 |
+
"learning_rate": 7.444e-05,
|
| 52236 |
+
"loss": 0.178,
|
| 52237 |
+
"step": 7445
|
| 52238 |
+
},
|
| 52239 |
+
{
|
| 52240 |
+
"epoch": 0.0007446,
|
| 52241 |
+
"grad_norm": 0.4270733594894409,
|
| 52242 |
+
"learning_rate": 7.445000000000001e-05,
|
| 52243 |
+
"loss": 0.1819,
|
| 52244 |
+
"step": 7446
|
| 52245 |
+
},
|
| 52246 |
+
{
|
| 52247 |
+
"epoch": 0.0007447,
|
| 52248 |
+
"grad_norm": 0.7717322111129761,
|
| 52249 |
+
"learning_rate": 7.446e-05,
|
| 52250 |
+
"loss": 0.2372,
|
| 52251 |
+
"step": 7447
|
| 52252 |
+
},
|
| 52253 |
+
{
|
| 52254 |
+
"epoch": 0.0007448,
|
| 52255 |
+
"grad_norm": 0.4648915231227875,
|
| 52256 |
+
"learning_rate": 7.447e-05,
|
| 52257 |
+
"loss": 0.1921,
|
| 52258 |
+
"step": 7448
|
| 52259 |
+
},
|
| 52260 |
+
{
|
| 52261 |
+
"epoch": 0.0007449,
|
| 52262 |
+
"grad_norm": 0.46609944105148315,
|
| 52263 |
+
"learning_rate": 7.448e-05,
|
| 52264 |
+
"loss": 0.1882,
|
| 52265 |
+
"step": 7449
|
| 52266 |
+
},
|
| 52267 |
+
{
|
| 52268 |
+
"epoch": 0.000745,
|
| 52269 |
+
"grad_norm": 0.589912474155426,
|
| 52270 |
+
"learning_rate": 7.449e-05,
|
| 52271 |
+
"loss": 0.2164,
|
| 52272 |
+
"step": 7450
|
| 52273 |
+
},
|
| 52274 |
+
{
|
| 52275 |
+
"epoch": 0.0007451,
|
| 52276 |
+
"grad_norm": 0.48248955607414246,
|
| 52277 |
+
"learning_rate": 7.45e-05,
|
| 52278 |
+
"loss": 0.1825,
|
| 52279 |
+
"step": 7451
|
| 52280 |
+
},
|
| 52281 |
+
{
|
| 52282 |
+
"epoch": 0.0007452,
|
| 52283 |
+
"grad_norm": 0.4037376344203949,
|
| 52284 |
+
"learning_rate": 7.451e-05,
|
| 52285 |
+
"loss": 0.1868,
|
| 52286 |
+
"step": 7452
|
| 52287 |
+
},
|
| 52288 |
+
{
|
| 52289 |
+
"epoch": 0.0007453,
|
| 52290 |
+
"grad_norm": 0.4312678277492523,
|
| 52291 |
+
"learning_rate": 7.452e-05,
|
| 52292 |
+
"loss": 0.1851,
|
| 52293 |
+
"step": 7453
|
| 52294 |
+
},
|
| 52295 |
+
{
|
| 52296 |
+
"epoch": 0.0007454,
|
| 52297 |
+
"grad_norm": 0.40998610854148865,
|
| 52298 |
+
"learning_rate": 7.452999999999999e-05,
|
| 52299 |
+
"loss": 0.1848,
|
| 52300 |
+
"step": 7454
|
| 52301 |
+
},
|
| 52302 |
+
{
|
| 52303 |
+
"epoch": 0.0007455,
|
| 52304 |
+
"grad_norm": 0.4464742839336395,
|
| 52305 |
+
"learning_rate": 7.454e-05,
|
| 52306 |
+
"loss": 0.1913,
|
| 52307 |
+
"step": 7455
|
| 52308 |
+
},
|
| 52309 |
+
{
|
| 52310 |
+
"epoch": 0.0007456,
|
| 52311 |
+
"grad_norm": 0.4490673243999481,
|
| 52312 |
+
"learning_rate": 7.455000000000001e-05,
|
| 52313 |
+
"loss": 0.1864,
|
| 52314 |
+
"step": 7456
|
| 52315 |
+
},
|
| 52316 |
+
{
|
| 52317 |
+
"epoch": 0.0007457,
|
| 52318 |
+
"grad_norm": 0.39138734340667725,
|
| 52319 |
+
"learning_rate": 7.456e-05,
|
| 52320 |
+
"loss": 0.1748,
|
| 52321 |
+
"step": 7457
|
| 52322 |
+
},
|
| 52323 |
+
{
|
| 52324 |
+
"epoch": 0.0007458,
|
| 52325 |
+
"grad_norm": 0.5053826570510864,
|
| 52326 |
+
"learning_rate": 7.457e-05,
|
| 52327 |
+
"loss": 0.2015,
|
| 52328 |
+
"step": 7458
|
| 52329 |
+
},
|
| 52330 |
+
{
|
| 52331 |
+
"epoch": 0.0007459,
|
| 52332 |
+
"grad_norm": 0.4277768135070801,
|
| 52333 |
+
"learning_rate": 7.458000000000001e-05,
|
| 52334 |
+
"loss": 0.1869,
|
| 52335 |
+
"step": 7459
|
| 52336 |
+
},
|
| 52337 |
+
{
|
| 52338 |
+
"epoch": 0.000746,
|
| 52339 |
+
"grad_norm": 0.37439417839050293,
|
| 52340 |
+
"learning_rate": 7.459e-05,
|
| 52341 |
+
"loss": 0.1818,
|
| 52342 |
+
"step": 7460
|
| 52343 |
+
},
|
| 52344 |
+
{
|
| 52345 |
+
"epoch": 0.0007461,
|
| 52346 |
+
"grad_norm": 0.4284929931163788,
|
| 52347 |
+
"learning_rate": 7.46e-05,
|
| 52348 |
+
"loss": 0.1865,
|
| 52349 |
+
"step": 7461
|
| 52350 |
+
},
|
| 52351 |
+
{
|
| 52352 |
+
"epoch": 0.0007462,
|
| 52353 |
+
"grad_norm": 7.139459609985352,
|
| 52354 |
+
"learning_rate": 7.461e-05,
|
| 52355 |
+
"loss": 0.6917,
|
| 52356 |
+
"step": 7462
|
| 52357 |
+
},
|
| 52358 |
+
{
|
| 52359 |
+
"epoch": 0.0007463,
|
| 52360 |
+
"grad_norm": 1.4773494005203247,
|
| 52361 |
+
"learning_rate": 7.462e-05,
|
| 52362 |
+
"loss": 0.2689,
|
| 52363 |
+
"step": 7463
|
| 52364 |
+
},
|
| 52365 |
+
{
|
| 52366 |
+
"epoch": 0.0007464,
|
| 52367 |
+
"grad_norm": 0.5157195329666138,
|
| 52368 |
+
"learning_rate": 7.463e-05,
|
| 52369 |
+
"loss": 0.1873,
|
| 52370 |
+
"step": 7464
|
| 52371 |
+
},
|
| 52372 |
+
{
|
| 52373 |
+
"epoch": 0.0007465,
|
| 52374 |
+
"grad_norm": 0.4672258496284485,
|
| 52375 |
+
"learning_rate": 7.464e-05,
|
| 52376 |
+
"loss": 0.1758,
|
| 52377 |
+
"step": 7465
|
| 52378 |
+
},
|
| 52379 |
+
{
|
| 52380 |
+
"epoch": 0.0007466,
|
| 52381 |
+
"grad_norm": 0.6287294626235962,
|
| 52382 |
+
"learning_rate": 7.465e-05,
|
| 52383 |
+
"loss": 0.203,
|
| 52384 |
+
"step": 7466
|
| 52385 |
+
},
|
| 52386 |
+
{
|
| 52387 |
+
"epoch": 0.0007467,
|
| 52388 |
+
"grad_norm": 0.5864905118942261,
|
| 52389 |
+
"learning_rate": 7.466000000000001e-05,
|
| 52390 |
+
"loss": 0.1952,
|
| 52391 |
+
"step": 7467
|
| 52392 |
+
},
|
| 52393 |
+
{
|
| 52394 |
+
"epoch": 0.0007468,
|
| 52395 |
+
"grad_norm": 0.5101815462112427,
|
| 52396 |
+
"learning_rate": 7.467e-05,
|
| 52397 |
+
"loss": 0.1755,
|
| 52398 |
+
"step": 7468
|
| 52399 |
+
},
|
| 52400 |
+
{
|
| 52401 |
+
"epoch": 0.0007469,
|
| 52402 |
+
"grad_norm": 0.9012991786003113,
|
| 52403 |
+
"learning_rate": 7.468e-05,
|
| 52404 |
+
"loss": 0.2595,
|
| 52405 |
+
"step": 7469
|
| 52406 |
+
},
|
| 52407 |
+
{
|
| 52408 |
+
"epoch": 0.000747,
|
| 52409 |
+
"grad_norm": 0.42554858326911926,
|
| 52410 |
+
"learning_rate": 7.469e-05,
|
| 52411 |
+
"loss": 0.1702,
|
| 52412 |
+
"step": 7470
|
| 52413 |
+
},
|
| 52414 |
+
{
|
| 52415 |
+
"epoch": 0.0007471,
|
| 52416 |
+
"grad_norm": 0.5329145193099976,
|
| 52417 |
+
"learning_rate": 7.47e-05,
|
| 52418 |
+
"loss": 0.1949,
|
| 52419 |
+
"step": 7471
|
| 52420 |
+
},
|
| 52421 |
+
{
|
| 52422 |
+
"epoch": 0.0007472,
|
| 52423 |
+
"grad_norm": 0.4959746301174164,
|
| 52424 |
+
"learning_rate": 7.471e-05,
|
| 52425 |
+
"loss": 0.179,
|
| 52426 |
+
"step": 7472
|
| 52427 |
+
},
|
| 52428 |
+
{
|
| 52429 |
+
"epoch": 0.0007473,
|
| 52430 |
+
"grad_norm": 1.148716688156128,
|
| 52431 |
+
"learning_rate": 7.472e-05,
|
| 52432 |
+
"loss": 0.223,
|
| 52433 |
+
"step": 7473
|
| 52434 |
+
},
|
| 52435 |
+
{
|
| 52436 |
+
"epoch": 0.0007474,
|
| 52437 |
+
"grad_norm": 0.5317788124084473,
|
| 52438 |
+
"learning_rate": 7.473000000000001e-05,
|
| 52439 |
+
"loss": 0.2101,
|
| 52440 |
+
"step": 7474
|
| 52441 |
+
},
|
| 52442 |
+
{
|
| 52443 |
+
"epoch": 0.0007475,
|
| 52444 |
+
"grad_norm": 0.5204247832298279,
|
| 52445 |
+
"learning_rate": 7.473999999999999e-05,
|
| 52446 |
+
"loss": 0.1957,
|
| 52447 |
+
"step": 7475
|
| 52448 |
+
},
|
| 52449 |
+
{
|
| 52450 |
+
"epoch": 0.0007476,
|
| 52451 |
+
"grad_norm": 0.6172888875007629,
|
| 52452 |
+
"learning_rate": 7.475e-05,
|
| 52453 |
+
"loss": 0.1917,
|
| 52454 |
+
"step": 7476
|
| 52455 |
+
},
|
| 52456 |
+
{
|
| 52457 |
+
"epoch": 0.0007477,
|
| 52458 |
+
"grad_norm": 0.6861792802810669,
|
| 52459 |
+
"learning_rate": 7.476000000000001e-05,
|
| 52460 |
+
"loss": 0.1942,
|
| 52461 |
+
"step": 7477
|
| 52462 |
+
},
|
| 52463 |
+
{
|
| 52464 |
+
"epoch": 0.0007478,
|
| 52465 |
+
"grad_norm": 0.4458335340023041,
|
| 52466 |
+
"learning_rate": 7.477e-05,
|
| 52467 |
+
"loss": 0.1857,
|
| 52468 |
+
"step": 7478
|
| 52469 |
+
},
|
| 52470 |
+
{
|
| 52471 |
+
"epoch": 0.0007479,
|
| 52472 |
+
"grad_norm": 0.43245258927345276,
|
| 52473 |
+
"learning_rate": 7.478e-05,
|
| 52474 |
+
"loss": 0.1737,
|
| 52475 |
+
"step": 7479
|
| 52476 |
+
},
|
| 52477 |
+
{
|
| 52478 |
+
"epoch": 0.000748,
|
| 52479 |
+
"grad_norm": 0.4693397283554077,
|
| 52480 |
+
"learning_rate": 7.479000000000001e-05,
|
| 52481 |
+
"loss": 0.1851,
|
| 52482 |
+
"step": 7480
|
| 52483 |
+
},
|
| 52484 |
+
{
|
| 52485 |
+
"epoch": 0.0007481,
|
| 52486 |
+
"grad_norm": 0.5341662168502808,
|
| 52487 |
+
"learning_rate": 7.48e-05,
|
| 52488 |
+
"loss": 0.1902,
|
| 52489 |
+
"step": 7481
|
| 52490 |
+
},
|
| 52491 |
+
{
|
| 52492 |
+
"epoch": 0.0007482,
|
| 52493 |
+
"grad_norm": 0.6064165830612183,
|
| 52494 |
+
"learning_rate": 7.481e-05,
|
| 52495 |
+
"loss": 0.2173,
|
| 52496 |
+
"step": 7482
|
| 52497 |
+
},
|
| 52498 |
+
{
|
| 52499 |
+
"epoch": 0.0007483,
|
| 52500 |
+
"grad_norm": 0.39724573493003845,
|
| 52501 |
+
"learning_rate": 7.482e-05,
|
| 52502 |
+
"loss": 0.1735,
|
| 52503 |
+
"step": 7483
|
| 52504 |
+
},
|
| 52505 |
+
{
|
| 52506 |
+
"epoch": 0.0007484,
|
| 52507 |
+
"grad_norm": 0.3705470860004425,
|
| 52508 |
+
"learning_rate": 7.483e-05,
|
| 52509 |
+
"loss": 0.1703,
|
| 52510 |
+
"step": 7484
|
| 52511 |
+
},
|
| 52512 |
+
{
|
| 52513 |
+
"epoch": 0.0007485,
|
| 52514 |
+
"grad_norm": 0.43255889415740967,
|
| 52515 |
+
"learning_rate": 7.484e-05,
|
| 52516 |
+
"loss": 0.1763,
|
| 52517 |
+
"step": 7485
|
| 52518 |
+
},
|
| 52519 |
+
{
|
| 52520 |
+
"epoch": 0.0007486,
|
| 52521 |
+
"grad_norm": 0.4192142188549042,
|
| 52522 |
+
"learning_rate": 7.485e-05,
|
| 52523 |
+
"loss": 0.1801,
|
| 52524 |
+
"step": 7486
|
| 52525 |
+
},
|
| 52526 |
+
{
|
| 52527 |
+
"epoch": 0.0007487,
|
| 52528 |
+
"grad_norm": 0.7527932524681091,
|
| 52529 |
+
"learning_rate": 7.486e-05,
|
| 52530 |
+
"loss": 0.2267,
|
| 52531 |
+
"step": 7487
|
| 52532 |
+
},
|
| 52533 |
+
{
|
| 52534 |
+
"epoch": 0.0007488,
|
| 52535 |
+
"grad_norm": 0.39235246181488037,
|
| 52536 |
+
"learning_rate": 7.486999999999999e-05,
|
| 52537 |
+
"loss": 0.1724,
|
| 52538 |
+
"step": 7488
|
| 52539 |
+
},
|
| 52540 |
+
{
|
| 52541 |
+
"epoch": 0.0007489,
|
| 52542 |
+
"grad_norm": 0.42596572637557983,
|
| 52543 |
+
"learning_rate": 7.488e-05,
|
| 52544 |
+
"loss": 0.1805,
|
| 52545 |
+
"step": 7489
|
| 52546 |
+
},
|
| 52547 |
+
{
|
| 52548 |
+
"epoch": 0.000749,
|
| 52549 |
+
"grad_norm": 0.43126609921455383,
|
| 52550 |
+
"learning_rate": 7.489000000000001e-05,
|
| 52551 |
+
"loss": 0.1859,
|
| 52552 |
+
"step": 7490
|
| 52553 |
+
},
|
| 52554 |
+
{
|
| 52555 |
+
"epoch": 0.0007491,
|
| 52556 |
+
"grad_norm": 0.42793038487434387,
|
| 52557 |
+
"learning_rate": 7.49e-05,
|
| 52558 |
+
"loss": 0.1788,
|
| 52559 |
+
"step": 7491
|
| 52560 |
+
},
|
| 52561 |
+
{
|
| 52562 |
+
"epoch": 0.0007492,
|
| 52563 |
+
"grad_norm": 0.44571396708488464,
|
| 52564 |
+
"learning_rate": 7.491e-05,
|
| 52565 |
+
"loss": 0.1943,
|
| 52566 |
+
"step": 7492
|
| 52567 |
+
},
|
| 52568 |
+
{
|
| 52569 |
+
"epoch": 0.0007493,
|
| 52570 |
+
"grad_norm": 0.5444081425666809,
|
| 52571 |
+
"learning_rate": 7.492000000000001e-05,
|
| 52572 |
+
"loss": 0.197,
|
| 52573 |
+
"step": 7493
|
| 52574 |
+
},
|
| 52575 |
+
{
|
| 52576 |
+
"epoch": 0.0007494,
|
| 52577 |
+
"grad_norm": 0.4795020520687103,
|
| 52578 |
+
"learning_rate": 7.493e-05,
|
| 52579 |
+
"loss": 0.204,
|
| 52580 |
+
"step": 7494
|
| 52581 |
+
},
|
| 52582 |
+
{
|
| 52583 |
+
"epoch": 0.0007495,
|
| 52584 |
+
"grad_norm": 0.4522680342197418,
|
| 52585 |
+
"learning_rate": 7.494e-05,
|
| 52586 |
+
"loss": 0.194,
|
| 52587 |
+
"step": 7495
|
| 52588 |
+
},
|
| 52589 |
+
{
|
| 52590 |
+
"epoch": 0.0007496,
|
| 52591 |
+
"grad_norm": 0.3937327563762665,
|
| 52592 |
+
"learning_rate": 7.495e-05,
|
| 52593 |
+
"loss": 0.1844,
|
| 52594 |
+
"step": 7496
|
| 52595 |
+
},
|
| 52596 |
+
{
|
| 52597 |
+
"epoch": 0.0007497,
|
| 52598 |
+
"grad_norm": 1.3814177513122559,
|
| 52599 |
+
"learning_rate": 7.496e-05,
|
| 52600 |
+
"loss": 0.2533,
|
| 52601 |
+
"step": 7497
|
| 52602 |
+
},
|
| 52603 |
+
{
|
| 52604 |
+
"epoch": 0.0007498,
|
| 52605 |
+
"grad_norm": 0.48238804936408997,
|
| 52606 |
+
"learning_rate": 7.497e-05,
|
| 52607 |
+
"loss": 0.1885,
|
| 52608 |
+
"step": 7498
|
| 52609 |
+
},
|
| 52610 |
+
{
|
| 52611 |
+
"epoch": 0.0007499,
|
| 52612 |
+
"grad_norm": 0.45935240387916565,
|
| 52613 |
+
"learning_rate": 7.498e-05,
|
| 52614 |
+
"loss": 0.1896,
|
| 52615 |
+
"step": 7499
|
| 52616 |
+
},
|
| 52617 |
+
{
|
| 52618 |
+
"epoch": 0.00075,
|
| 52619 |
+
"grad_norm": 0.4718035161495209,
|
| 52620 |
+
"learning_rate": 7.499e-05,
|
| 52621 |
+
"loss": 0.1913,
|
| 52622 |
+
"step": 7500
|
| 52623 |
+
},
|
| 52624 |
+
{
|
| 52625 |
+
"epoch": 0.00075,
|
| 52626 |
+
"eval_loss": 0.021516965702176094,
|
| 52627 |
+
"eval_runtime": 362.7437,
|
| 52628 |
+
"eval_samples_per_second": 27.568,
|
| 52629 |
+
"eval_steps_per_second": 1.723,
|
| 52630 |
+
"step": 7500
|
| 52631 |
}
|
| 52632 |
],
|
| 52633 |
"logging_steps": 1,
|