Training in progress, step 15900, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:748bdfc97a3ac0f2471894b6cbf39214e6c8dc68577e4aba95ff73fff3551251
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dce88233f32100ccf6f46e7e8203889854494cdd441403ca132b236dbe7bfe95
|
| 3 |
size 173247691
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e51a42ec5ffac256f3cd0ee0ccd7a6d3befe58914ad400c0cc707bc4a5ee283
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98bced96b32ec6ff4a0577a7b2dc72669103ca93f6b93a912e060bbde6e6a869
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -109208,6 +109208,2106 @@
|
|
| 109208 |
"learning_rate": 1.651068466509187e-06,
|
| 109209 |
"loss": 0.5212,
|
| 109210 |
"step": 15600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109211 |
}
|
| 109212 |
],
|
| 109213 |
"logging_steps": 1,
|
|
@@ -109227,7 +111327,7 @@
|
|
| 109227 |
"attributes": {}
|
| 109228 |
}
|
| 109229 |
},
|
| 109230 |
-
"total_flos": 8.
|
| 109231 |
"train_batch_size": 8,
|
| 109232 |
"trial_name": null,
|
| 109233 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 18.10940170940171,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 109208 |
"learning_rate": 1.651068466509187e-06,
|
| 109209 |
"loss": 0.5212,
|
| 109210 |
"step": 15600
|
| 109211 |
+
},
|
| 109212 |
+
{
|
| 109213 |
+
"epoch": 17.76923076923077,
|
| 109214 |
+
"grad_norm": 0.24282784759998322,
|
| 109215 |
+
"learning_rate": 1.6494037518875988e-06,
|
| 109216 |
+
"loss": 0.5624,
|
| 109217 |
+
"step": 15601
|
| 109218 |
+
},
|
| 109219 |
+
{
|
| 109220 |
+
"epoch": 17.77037037037037,
|
| 109221 |
+
"grad_norm": 0.25488370656967163,
|
| 109222 |
+
"learning_rate": 1.6477398482964423e-06,
|
| 109223 |
+
"loss": 0.4,
|
| 109224 |
+
"step": 15602
|
| 109225 |
+
},
|
| 109226 |
+
{
|
| 109227 |
+
"epoch": 17.77150997150997,
|
| 109228 |
+
"grad_norm": 0.16509465873241425,
|
| 109229 |
+
"learning_rate": 1.6460767557934965e-06,
|
| 109230 |
+
"loss": 0.879,
|
| 109231 |
+
"step": 15603
|
| 109232 |
+
},
|
| 109233 |
+
{
|
| 109234 |
+
"epoch": 17.772649572649573,
|
| 109235 |
+
"grad_norm": 0.17876017093658447,
|
| 109236 |
+
"learning_rate": 1.644414474436526e-06,
|
| 109237 |
+
"loss": 0.7514,
|
| 109238 |
+
"step": 15604
|
| 109239 |
+
},
|
| 109240 |
+
{
|
| 109241 |
+
"epoch": 17.773789173789172,
|
| 109242 |
+
"grad_norm": 0.190785214304924,
|
| 109243 |
+
"learning_rate": 1.6427530042832706e-06,
|
| 109244 |
+
"loss": 0.8318,
|
| 109245 |
+
"step": 15605
|
| 109246 |
+
},
|
| 109247 |
+
{
|
| 109248 |
+
"epoch": 17.774928774928775,
|
| 109249 |
+
"grad_norm": 0.19601422548294067,
|
| 109250 |
+
"learning_rate": 1.6410923453914346e-06,
|
| 109251 |
+
"loss": 0.7242,
|
| 109252 |
+
"step": 15606
|
| 109253 |
+
},
|
| 109254 |
+
{
|
| 109255 |
+
"epoch": 17.776068376068377,
|
| 109256 |
+
"grad_norm": 0.19514648616313934,
|
| 109257 |
+
"learning_rate": 1.639432497818702e-06,
|
| 109258 |
+
"loss": 0.4328,
|
| 109259 |
+
"step": 15607
|
| 109260 |
+
},
|
| 109261 |
+
{
|
| 109262 |
+
"epoch": 17.777207977207976,
|
| 109263 |
+
"grad_norm": 0.16870303452014923,
|
| 109264 |
+
"learning_rate": 1.6377734616227187e-06,
|
| 109265 |
+
"loss": 0.8108,
|
| 109266 |
+
"step": 15608
|
| 109267 |
+
},
|
| 109268 |
+
{
|
| 109269 |
+
"epoch": 17.778347578347578,
|
| 109270 |
+
"grad_norm": 0.23923449218273163,
|
| 109271 |
+
"learning_rate": 1.6361152368611078e-06,
|
| 109272 |
+
"loss": 0.6936,
|
| 109273 |
+
"step": 15609
|
| 109274 |
+
},
|
| 109275 |
+
{
|
| 109276 |
+
"epoch": 17.77948717948718,
|
| 109277 |
+
"grad_norm": 0.16083301603794098,
|
| 109278 |
+
"learning_rate": 1.6344578235914676e-06,
|
| 109279 |
+
"loss": 0.5575,
|
| 109280 |
+
"step": 15610
|
| 109281 |
+
},
|
| 109282 |
+
{
|
| 109283 |
+
"epoch": 17.78062678062678,
|
| 109284 |
+
"grad_norm": 0.2206811010837555,
|
| 109285 |
+
"learning_rate": 1.6328012218713662e-06,
|
| 109286 |
+
"loss": 0.5659,
|
| 109287 |
+
"step": 15611
|
| 109288 |
+
},
|
| 109289 |
+
{
|
| 109290 |
+
"epoch": 17.78176638176638,
|
| 109291 |
+
"grad_norm": 0.17298242449760437,
|
| 109292 |
+
"learning_rate": 1.6311454317583296e-06,
|
| 109293 |
+
"loss": 0.9719,
|
| 109294 |
+
"step": 15612
|
| 109295 |
+
},
|
| 109296 |
+
{
|
| 109297 |
+
"epoch": 17.782905982905984,
|
| 109298 |
+
"grad_norm": 0.22341564297676086,
|
| 109299 |
+
"learning_rate": 1.6294904533098815e-06,
|
| 109300 |
+
"loss": 0.7255,
|
| 109301 |
+
"step": 15613
|
| 109302 |
+
},
|
| 109303 |
+
{
|
| 109304 |
+
"epoch": 17.784045584045582,
|
| 109305 |
+
"grad_norm": 0.22084711492061615,
|
| 109306 |
+
"learning_rate": 1.6278362865835007e-06,
|
| 109307 |
+
"loss": 0.6097,
|
| 109308 |
+
"step": 15614
|
| 109309 |
+
},
|
| 109310 |
+
{
|
| 109311 |
+
"epoch": 17.785185185185185,
|
| 109312 |
+
"grad_norm": 0.16300833225250244,
|
| 109313 |
+
"learning_rate": 1.6261829316366384e-06,
|
| 109314 |
+
"loss": 0.6971,
|
| 109315 |
+
"step": 15615
|
| 109316 |
+
},
|
| 109317 |
+
{
|
| 109318 |
+
"epoch": 17.786324786324787,
|
| 109319 |
+
"grad_norm": 0.17927750945091248,
|
| 109320 |
+
"learning_rate": 1.624530388526721e-06,
|
| 109321 |
+
"loss": 0.734,
|
| 109322 |
+
"step": 15616
|
| 109323 |
+
},
|
| 109324 |
+
{
|
| 109325 |
+
"epoch": 17.787464387464386,
|
| 109326 |
+
"grad_norm": 0.23114407062530518,
|
| 109327 |
+
"learning_rate": 1.6228786573111444e-06,
|
| 109328 |
+
"loss": 0.574,
|
| 109329 |
+
"step": 15617
|
| 109330 |
+
},
|
| 109331 |
+
{
|
| 109332 |
+
"epoch": 17.788603988603988,
|
| 109333 |
+
"grad_norm": 0.21871796250343323,
|
| 109334 |
+
"learning_rate": 1.6212277380472846e-06,
|
| 109335 |
+
"loss": 0.6249,
|
| 109336 |
+
"step": 15618
|
| 109337 |
+
},
|
| 109338 |
+
{
|
| 109339 |
+
"epoch": 17.78974358974359,
|
| 109340 |
+
"grad_norm": 0.16673311591148376,
|
| 109341 |
+
"learning_rate": 1.619577630792471e-06,
|
| 109342 |
+
"loss": 0.7506,
|
| 109343 |
+
"step": 15619
|
| 109344 |
+
},
|
| 109345 |
+
{
|
| 109346 |
+
"epoch": 17.79088319088319,
|
| 109347 |
+
"grad_norm": 0.18423733115196228,
|
| 109348 |
+
"learning_rate": 1.6179283356040243e-06,
|
| 109349 |
+
"loss": 0.536,
|
| 109350 |
+
"step": 15620
|
| 109351 |
+
},
|
| 109352 |
+
{
|
| 109353 |
+
"epoch": 17.79202279202279,
|
| 109354 |
+
"grad_norm": 0.21031668782234192,
|
| 109355 |
+
"learning_rate": 1.6162798525392293e-06,
|
| 109356 |
+
"loss": 0.4542,
|
| 109357 |
+
"step": 15621
|
| 109358 |
+
},
|
| 109359 |
+
{
|
| 109360 |
+
"epoch": 17.793162393162394,
|
| 109361 |
+
"grad_norm": 0.1846408098936081,
|
| 109362 |
+
"learning_rate": 1.61463218165534e-06,
|
| 109363 |
+
"loss": 0.3919,
|
| 109364 |
+
"step": 15622
|
| 109365 |
+
},
|
| 109366 |
+
{
|
| 109367 |
+
"epoch": 17.794301994301993,
|
| 109368 |
+
"grad_norm": 0.1976088583469391,
|
| 109369 |
+
"learning_rate": 1.6129853230095804e-06,
|
| 109370 |
+
"loss": 0.5886,
|
| 109371 |
+
"step": 15623
|
| 109372 |
+
},
|
| 109373 |
+
{
|
| 109374 |
+
"epoch": 17.795441595441595,
|
| 109375 |
+
"grad_norm": 0.17543578147888184,
|
| 109376 |
+
"learning_rate": 1.61133927665916e-06,
|
| 109377 |
+
"loss": 0.8148,
|
| 109378 |
+
"step": 15624
|
| 109379 |
+
},
|
| 109380 |
+
{
|
| 109381 |
+
"epoch": 17.796581196581197,
|
| 109382 |
+
"grad_norm": 0.17480209469795227,
|
| 109383 |
+
"learning_rate": 1.6096940426612473e-06,
|
| 109384 |
+
"loss": 0.7421,
|
| 109385 |
+
"step": 15625
|
| 109386 |
+
},
|
| 109387 |
+
{
|
| 109388 |
+
"epoch": 17.797720797720796,
|
| 109389 |
+
"grad_norm": 0.22213301062583923,
|
| 109390 |
+
"learning_rate": 1.6080496210729795e-06,
|
| 109391 |
+
"loss": 0.4293,
|
| 109392 |
+
"step": 15626
|
| 109393 |
+
},
|
| 109394 |
+
{
|
| 109395 |
+
"epoch": 17.7988603988604,
|
| 109396 |
+
"grad_norm": 0.21744494140148163,
|
| 109397 |
+
"learning_rate": 1.606406011951478e-06,
|
| 109398 |
+
"loss": 0.6143,
|
| 109399 |
+
"step": 15627
|
| 109400 |
+
},
|
| 109401 |
+
{
|
| 109402 |
+
"epoch": 17.8,
|
| 109403 |
+
"grad_norm": 0.19951052963733673,
|
| 109404 |
+
"learning_rate": 1.60476321535383e-06,
|
| 109405 |
+
"loss": 0.6161,
|
| 109406 |
+
"step": 15628
|
| 109407 |
+
},
|
| 109408 |
+
{
|
| 109409 |
+
"epoch": 17.8011396011396,
|
| 109410 |
+
"grad_norm": 0.1538543403148651,
|
| 109411 |
+
"learning_rate": 1.603121231337093e-06,
|
| 109412 |
+
"loss": 0.7263,
|
| 109413 |
+
"step": 15629
|
| 109414 |
+
},
|
| 109415 |
+
{
|
| 109416 |
+
"epoch": 17.802279202279202,
|
| 109417 |
+
"grad_norm": 0.2472684532403946,
|
| 109418 |
+
"learning_rate": 1.6014800599582963e-06,
|
| 109419 |
+
"loss": 0.555,
|
| 109420 |
+
"step": 15630
|
| 109421 |
+
},
|
| 109422 |
+
{
|
| 109423 |
+
"epoch": 17.803418803418804,
|
| 109424 |
+
"grad_norm": 0.19252094626426697,
|
| 109425 |
+
"learning_rate": 1.5998397012744415e-06,
|
| 109426 |
+
"loss": 0.5233,
|
| 109427 |
+
"step": 15631
|
| 109428 |
+
},
|
| 109429 |
+
{
|
| 109430 |
+
"epoch": 17.804558404558403,
|
| 109431 |
+
"grad_norm": 0.20167063176631927,
|
| 109432 |
+
"learning_rate": 1.5982001553425052e-06,
|
| 109433 |
+
"loss": 0.7915,
|
| 109434 |
+
"step": 15632
|
| 109435 |
+
},
|
| 109436 |
+
{
|
| 109437 |
+
"epoch": 17.805698005698005,
|
| 109438 |
+
"grad_norm": 0.1946546733379364,
|
| 109439 |
+
"learning_rate": 1.5965614222194363e-06,
|
| 109440 |
+
"loss": 0.5434,
|
| 109441 |
+
"step": 15633
|
| 109442 |
+
},
|
| 109443 |
+
{
|
| 109444 |
+
"epoch": 17.806837606837608,
|
| 109445 |
+
"grad_norm": 0.26714178919792175,
|
| 109446 |
+
"learning_rate": 1.5949235019621422e-06,
|
| 109447 |
+
"loss": 0.4186,
|
| 109448 |
+
"step": 15634
|
| 109449 |
+
},
|
| 109450 |
+
{
|
| 109451 |
+
"epoch": 17.807977207977206,
|
| 109452 |
+
"grad_norm": 0.22108449041843414,
|
| 109453 |
+
"learning_rate": 1.5932863946275216e-06,
|
| 109454 |
+
"loss": 0.6067,
|
| 109455 |
+
"step": 15635
|
| 109456 |
+
},
|
| 109457 |
+
{
|
| 109458 |
+
"epoch": 17.80911680911681,
|
| 109459 |
+
"grad_norm": 0.1902119368314743,
|
| 109460 |
+
"learning_rate": 1.5916501002724378e-06,
|
| 109461 |
+
"loss": 0.5094,
|
| 109462 |
+
"step": 15636
|
| 109463 |
+
},
|
| 109464 |
+
{
|
| 109465 |
+
"epoch": 17.81025641025641,
|
| 109466 |
+
"grad_norm": 0.18795819580554962,
|
| 109467 |
+
"learning_rate": 1.590014618953714e-06,
|
| 109468 |
+
"loss": 0.8118,
|
| 109469 |
+
"step": 15637
|
| 109470 |
+
},
|
| 109471 |
+
{
|
| 109472 |
+
"epoch": 17.81139601139601,
|
| 109473 |
+
"grad_norm": 0.17217494547367096,
|
| 109474 |
+
"learning_rate": 1.5883799507281637e-06,
|
| 109475 |
+
"loss": 0.5974,
|
| 109476 |
+
"step": 15638
|
| 109477 |
+
},
|
| 109478 |
+
{
|
| 109479 |
+
"epoch": 17.812535612535612,
|
| 109480 |
+
"grad_norm": 0.2681303918361664,
|
| 109481 |
+
"learning_rate": 1.5867460956525555e-06,
|
| 109482 |
+
"loss": 0.4903,
|
| 109483 |
+
"step": 15639
|
| 109484 |
+
},
|
| 109485 |
+
{
|
| 109486 |
+
"epoch": 17.813675213675214,
|
| 109487 |
+
"grad_norm": 0.2004638910293579,
|
| 109488 |
+
"learning_rate": 1.585113053783649e-06,
|
| 109489 |
+
"loss": 0.6443,
|
| 109490 |
+
"step": 15640
|
| 109491 |
+
},
|
| 109492 |
+
{
|
| 109493 |
+
"epoch": 17.814814814814813,
|
| 109494 |
+
"grad_norm": 0.15981297194957733,
|
| 109495 |
+
"learning_rate": 1.583480825178152e-06,
|
| 109496 |
+
"loss": 0.7571,
|
| 109497 |
+
"step": 15641
|
| 109498 |
+
},
|
| 109499 |
+
{
|
| 109500 |
+
"epoch": 17.815954415954415,
|
| 109501 |
+
"grad_norm": 0.233234241604805,
|
| 109502 |
+
"learning_rate": 1.5818494098927632e-06,
|
| 109503 |
+
"loss": 0.6427,
|
| 109504 |
+
"step": 15642
|
| 109505 |
+
},
|
| 109506 |
+
{
|
| 109507 |
+
"epoch": 17.817094017094018,
|
| 109508 |
+
"grad_norm": 0.15844886004924774,
|
| 109509 |
+
"learning_rate": 1.5802188079841435e-06,
|
| 109510 |
+
"loss": 0.6918,
|
| 109511 |
+
"step": 15643
|
| 109512 |
+
},
|
| 109513 |
+
{
|
| 109514 |
+
"epoch": 17.81823361823362,
|
| 109515 |
+
"grad_norm": 0.21250970661640167,
|
| 109516 |
+
"learning_rate": 1.578589019508933e-06,
|
| 109517 |
+
"loss": 0.5825,
|
| 109518 |
+
"step": 15644
|
| 109519 |
+
},
|
| 109520 |
+
{
|
| 109521 |
+
"epoch": 17.81937321937322,
|
| 109522 |
+
"grad_norm": 0.22062163054943085,
|
| 109523 |
+
"learning_rate": 1.576960044523726e-06,
|
| 109524 |
+
"loss": 0.4389,
|
| 109525 |
+
"step": 15645
|
| 109526 |
+
},
|
| 109527 |
+
{
|
| 109528 |
+
"epoch": 17.82051282051282,
|
| 109529 |
+
"grad_norm": 0.2203861027956009,
|
| 109530 |
+
"learning_rate": 1.5753318830851155e-06,
|
| 109531 |
+
"loss": 0.5551,
|
| 109532 |
+
"step": 15646
|
| 109533 |
+
},
|
| 109534 |
+
{
|
| 109535 |
+
"epoch": 17.821652421652423,
|
| 109536 |
+
"grad_norm": 0.1712387502193451,
|
| 109537 |
+
"learning_rate": 1.5737045352496482e-06,
|
| 109538 |
+
"loss": 0.7451,
|
| 109539 |
+
"step": 15647
|
| 109540 |
+
},
|
| 109541 |
+
{
|
| 109542 |
+
"epoch": 17.822792022792022,
|
| 109543 |
+
"grad_norm": 0.17983700335025787,
|
| 109544 |
+
"learning_rate": 1.5720780010738484e-06,
|
| 109545 |
+
"loss": 0.7685,
|
| 109546 |
+
"step": 15648
|
| 109547 |
+
},
|
| 109548 |
+
{
|
| 109549 |
+
"epoch": 17.823931623931625,
|
| 109550 |
+
"grad_norm": 0.1772400438785553,
|
| 109551 |
+
"learning_rate": 1.5704522806142013e-06,
|
| 109552 |
+
"loss": 0.482,
|
| 109553 |
+
"step": 15649
|
| 109554 |
+
},
|
| 109555 |
+
{
|
| 109556 |
+
"epoch": 17.825071225071227,
|
| 109557 |
+
"grad_norm": 0.22852641344070435,
|
| 109558 |
+
"learning_rate": 1.5688273739271786e-06,
|
| 109559 |
+
"loss": 0.4757,
|
| 109560 |
+
"step": 15650
|
| 109561 |
+
},
|
| 109562 |
+
{
|
| 109563 |
+
"epoch": 17.826210826210826,
|
| 109564 |
+
"grad_norm": 0.17057965695858002,
|
| 109565 |
+
"learning_rate": 1.5672032810692155e-06,
|
| 109566 |
+
"loss": 0.6854,
|
| 109567 |
+
"step": 15651
|
| 109568 |
+
},
|
| 109569 |
+
{
|
| 109570 |
+
"epoch": 17.827350427350428,
|
| 109571 |
+
"grad_norm": 0.19366249442100525,
|
| 109572 |
+
"learning_rate": 1.5655800020967282e-06,
|
| 109573 |
+
"loss": 0.8097,
|
| 109574 |
+
"step": 15652
|
| 109575 |
+
},
|
| 109576 |
+
{
|
| 109577 |
+
"epoch": 17.82849002849003,
|
| 109578 |
+
"grad_norm": 0.197560653090477,
|
| 109579 |
+
"learning_rate": 1.5639575370660908e-06,
|
| 109580 |
+
"loss": 0.5299,
|
| 109581 |
+
"step": 15653
|
| 109582 |
+
},
|
| 109583 |
+
{
|
| 109584 |
+
"epoch": 17.82962962962963,
|
| 109585 |
+
"grad_norm": 0.15306705236434937,
|
| 109586 |
+
"learning_rate": 1.5623358860336528e-06,
|
| 109587 |
+
"loss": 0.7513,
|
| 109588 |
+
"step": 15654
|
| 109589 |
+
},
|
| 109590 |
+
{
|
| 109591 |
+
"epoch": 17.83076923076923,
|
| 109592 |
+
"grad_norm": 0.16111943125724792,
|
| 109593 |
+
"learning_rate": 1.560715049055747e-06,
|
| 109594 |
+
"loss": 0.6649,
|
| 109595 |
+
"step": 15655
|
| 109596 |
+
},
|
| 109597 |
+
{
|
| 109598 |
+
"epoch": 17.831908831908834,
|
| 109599 |
+
"grad_norm": 0.1969628632068634,
|
| 109600 |
+
"learning_rate": 1.5590950261886645e-06,
|
| 109601 |
+
"loss": 0.5163,
|
| 109602 |
+
"step": 15656
|
| 109603 |
+
},
|
| 109604 |
+
{
|
| 109605 |
+
"epoch": 17.833048433048432,
|
| 109606 |
+
"grad_norm": 0.16821090877056122,
|
| 109607 |
+
"learning_rate": 1.5574758174886738e-06,
|
| 109608 |
+
"loss": 0.4574,
|
| 109609 |
+
"step": 15657
|
| 109610 |
+
},
|
| 109611 |
+
{
|
| 109612 |
+
"epoch": 17.834188034188035,
|
| 109613 |
+
"grad_norm": 0.22840365767478943,
|
| 109614 |
+
"learning_rate": 1.5558574230120165e-06,
|
| 109615 |
+
"loss": 0.6578,
|
| 109616 |
+
"step": 15658
|
| 109617 |
+
},
|
| 109618 |
+
{
|
| 109619 |
+
"epoch": 17.835327635327637,
|
| 109620 |
+
"grad_norm": 0.17995165288448334,
|
| 109621 |
+
"learning_rate": 1.5542398428149058e-06,
|
| 109622 |
+
"loss": 0.6351,
|
| 109623 |
+
"step": 15659
|
| 109624 |
+
},
|
| 109625 |
+
{
|
| 109626 |
+
"epoch": 17.836467236467236,
|
| 109627 |
+
"grad_norm": 0.17947416007518768,
|
| 109628 |
+
"learning_rate": 1.5526230769535188e-06,
|
| 109629 |
+
"loss": 0.7299,
|
| 109630 |
+
"step": 15660
|
| 109631 |
+
},
|
| 109632 |
+
{
|
| 109633 |
+
"epoch": 17.837606837606838,
|
| 109634 |
+
"grad_norm": 0.2007407546043396,
|
| 109635 |
+
"learning_rate": 1.551007125484011e-06,
|
| 109636 |
+
"loss": 0.5662,
|
| 109637 |
+
"step": 15661
|
| 109638 |
+
},
|
| 109639 |
+
{
|
| 109640 |
+
"epoch": 17.83874643874644,
|
| 109641 |
+
"grad_norm": 0.20293985307216644,
|
| 109642 |
+
"learning_rate": 1.5493919884625118e-06,
|
| 109643 |
+
"loss": 0.4846,
|
| 109644 |
+
"step": 15662
|
| 109645 |
+
},
|
| 109646 |
+
{
|
| 109647 |
+
"epoch": 17.83988603988604,
|
| 109648 |
+
"grad_norm": 0.23599407076835632,
|
| 109649 |
+
"learning_rate": 1.5477776659451215e-06,
|
| 109650 |
+
"loss": 0.552,
|
| 109651 |
+
"step": 15663
|
| 109652 |
+
},
|
| 109653 |
+
{
|
| 109654 |
+
"epoch": 17.84102564102564,
|
| 109655 |
+
"grad_norm": 0.16593383252620697,
|
| 109656 |
+
"learning_rate": 1.5461641579879032e-06,
|
| 109657 |
+
"loss": 0.9204,
|
| 109658 |
+
"step": 15664
|
| 109659 |
+
},
|
| 109660 |
+
{
|
| 109661 |
+
"epoch": 17.842165242165244,
|
| 109662 |
+
"grad_norm": 0.2062879502773285,
|
| 109663 |
+
"learning_rate": 1.5445514646469012e-06,
|
| 109664 |
+
"loss": 0.6868,
|
| 109665 |
+
"step": 15665
|
| 109666 |
+
},
|
| 109667 |
+
{
|
| 109668 |
+
"epoch": 17.843304843304843,
|
| 109669 |
+
"grad_norm": 0.2270360291004181,
|
| 109670 |
+
"learning_rate": 1.5429395859781314e-06,
|
| 109671 |
+
"loss": 0.8417,
|
| 109672 |
+
"step": 15666
|
| 109673 |
+
},
|
| 109674 |
+
{
|
| 109675 |
+
"epoch": 17.844444444444445,
|
| 109676 |
+
"grad_norm": 0.1631319373846054,
|
| 109677 |
+
"learning_rate": 1.5413285220375745e-06,
|
| 109678 |
+
"loss": 0.5998,
|
| 109679 |
+
"step": 15667
|
| 109680 |
+
},
|
| 109681 |
+
{
|
| 109682 |
+
"epoch": 17.845584045584047,
|
| 109683 |
+
"grad_norm": 0.21692709624767303,
|
| 109684 |
+
"learning_rate": 1.5397182728811909e-06,
|
| 109685 |
+
"loss": 0.4068,
|
| 109686 |
+
"step": 15668
|
| 109687 |
+
},
|
| 109688 |
+
{
|
| 109689 |
+
"epoch": 17.846723646723646,
|
| 109690 |
+
"grad_norm": 0.23554235696792603,
|
| 109691 |
+
"learning_rate": 1.5381088385649083e-06,
|
| 109692 |
+
"loss": 0.6377,
|
| 109693 |
+
"step": 15669
|
| 109694 |
+
},
|
| 109695 |
+
{
|
| 109696 |
+
"epoch": 17.84786324786325,
|
| 109697 |
+
"grad_norm": 0.17600174248218536,
|
| 109698 |
+
"learning_rate": 1.5365002191446293e-06,
|
| 109699 |
+
"loss": 0.8951,
|
| 109700 |
+
"step": 15670
|
| 109701 |
+
},
|
| 109702 |
+
{
|
| 109703 |
+
"epoch": 17.84900284900285,
|
| 109704 |
+
"grad_norm": 0.22851988673210144,
|
| 109705 |
+
"learning_rate": 1.53489241467622e-06,
|
| 109706 |
+
"loss": 0.6134,
|
| 109707 |
+
"step": 15671
|
| 109708 |
+
},
|
| 109709 |
+
{
|
| 109710 |
+
"epoch": 17.85014245014245,
|
| 109711 |
+
"grad_norm": 0.18456017971038818,
|
| 109712 |
+
"learning_rate": 1.533285425215525e-06,
|
| 109713 |
+
"loss": 0.9537,
|
| 109714 |
+
"step": 15672
|
| 109715 |
+
},
|
| 109716 |
+
{
|
| 109717 |
+
"epoch": 17.851282051282052,
|
| 109718 |
+
"grad_norm": 0.20807267725467682,
|
| 109719 |
+
"learning_rate": 1.5316792508183602e-06,
|
| 109720 |
+
"loss": 0.6913,
|
| 109721 |
+
"step": 15673
|
| 109722 |
+
},
|
| 109723 |
+
{
|
| 109724 |
+
"epoch": 17.852421652421654,
|
| 109725 |
+
"grad_norm": 0.21124258637428284,
|
| 109726 |
+
"learning_rate": 1.5300738915405205e-06,
|
| 109727 |
+
"loss": 0.5291,
|
| 109728 |
+
"step": 15674
|
| 109729 |
+
},
|
| 109730 |
+
{
|
| 109731 |
+
"epoch": 17.853561253561253,
|
| 109732 |
+
"grad_norm": 0.18766073882579803,
|
| 109733 |
+
"learning_rate": 1.5284693474377497e-06,
|
| 109734 |
+
"loss": 0.6316,
|
| 109735 |
+
"step": 15675
|
| 109736 |
+
},
|
| 109737 |
+
{
|
| 109738 |
+
"epoch": 17.854700854700855,
|
| 109739 |
+
"grad_norm": 0.2111806571483612,
|
| 109740 |
+
"learning_rate": 1.526865618565787e-06,
|
| 109741 |
+
"loss": 0.5725,
|
| 109742 |
+
"step": 15676
|
| 109743 |
+
},
|
| 109744 |
+
{
|
| 109745 |
+
"epoch": 17.855840455840458,
|
| 109746 |
+
"grad_norm": 0.19177082180976868,
|
| 109747 |
+
"learning_rate": 1.525262704980332e-06,
|
| 109748 |
+
"loss": 0.6037,
|
| 109749 |
+
"step": 15677
|
| 109750 |
+
},
|
| 109751 |
+
{
|
| 109752 |
+
"epoch": 17.856980056980056,
|
| 109753 |
+
"grad_norm": 0.21159958839416504,
|
| 109754 |
+
"learning_rate": 1.5236606067370596e-06,
|
| 109755 |
+
"loss": 0.7083,
|
| 109756 |
+
"step": 15678
|
| 109757 |
+
},
|
| 109758 |
+
{
|
| 109759 |
+
"epoch": 17.85811965811966,
|
| 109760 |
+
"grad_norm": 0.21092787384986877,
|
| 109761 |
+
"learning_rate": 1.5220593238916141e-06,
|
| 109762 |
+
"loss": 0.677,
|
| 109763 |
+
"step": 15679
|
| 109764 |
+
},
|
| 109765 |
+
{
|
| 109766 |
+
"epoch": 17.85925925925926,
|
| 109767 |
+
"grad_norm": 0.23395919799804688,
|
| 109768 |
+
"learning_rate": 1.5204588564996126e-06,
|
| 109769 |
+
"loss": 0.6052,
|
| 109770 |
+
"step": 15680
|
| 109771 |
+
},
|
| 109772 |
+
{
|
| 109773 |
+
"epoch": 17.86039886039886,
|
| 109774 |
+
"grad_norm": 0.1797294318675995,
|
| 109775 |
+
"learning_rate": 1.5188592046166489e-06,
|
| 109776 |
+
"loss": 0.5012,
|
| 109777 |
+
"step": 15681
|
| 109778 |
+
},
|
| 109779 |
+
{
|
| 109780 |
+
"epoch": 17.861538461538462,
|
| 109781 |
+
"grad_norm": 0.19395455718040466,
|
| 109782 |
+
"learning_rate": 1.5172603682982734e-06,
|
| 109783 |
+
"loss": 0.9367,
|
| 109784 |
+
"step": 15682
|
| 109785 |
+
},
|
| 109786 |
+
{
|
| 109787 |
+
"epoch": 17.862678062678064,
|
| 109788 |
+
"grad_norm": 0.18395520746707916,
|
| 109789 |
+
"learning_rate": 1.515662347600022e-06,
|
| 109790 |
+
"loss": 0.8208,
|
| 109791 |
+
"step": 15683
|
| 109792 |
+
},
|
| 109793 |
+
{
|
| 109794 |
+
"epoch": 17.863817663817663,
|
| 109795 |
+
"grad_norm": 0.17953245341777802,
|
| 109796 |
+
"learning_rate": 1.5140651425774033e-06,
|
| 109797 |
+
"loss": 0.6289,
|
| 109798 |
+
"step": 15684
|
| 109799 |
+
},
|
| 109800 |
+
{
|
| 109801 |
+
"epoch": 17.864957264957265,
|
| 109802 |
+
"grad_norm": 0.2031773030757904,
|
| 109803 |
+
"learning_rate": 1.512468753285884e-06,
|
| 109804 |
+
"loss": 0.5615,
|
| 109805 |
+
"step": 15685
|
| 109806 |
+
},
|
| 109807 |
+
{
|
| 109808 |
+
"epoch": 17.866096866096868,
|
| 109809 |
+
"grad_norm": 0.17379479110240936,
|
| 109810 |
+
"learning_rate": 1.5108731797809223e-06,
|
| 109811 |
+
"loss": 0.8522,
|
| 109812 |
+
"step": 15686
|
| 109813 |
+
},
|
| 109814 |
+
{
|
| 109815 |
+
"epoch": 17.867236467236467,
|
| 109816 |
+
"grad_norm": 0.19298098981380463,
|
| 109817 |
+
"learning_rate": 1.5092784221179269e-06,
|
| 109818 |
+
"loss": 0.6752,
|
| 109819 |
+
"step": 15687
|
| 109820 |
+
},
|
| 109821 |
+
{
|
| 109822 |
+
"epoch": 17.86837606837607,
|
| 109823 |
+
"grad_norm": 0.18998976051807404,
|
| 109824 |
+
"learning_rate": 1.5076844803522922e-06,
|
| 109825 |
+
"loss": 0.77,
|
| 109826 |
+
"step": 15688
|
| 109827 |
+
},
|
| 109828 |
+
{
|
| 109829 |
+
"epoch": 17.86951566951567,
|
| 109830 |
+
"grad_norm": 0.17749148607254028,
|
| 109831 |
+
"learning_rate": 1.5060913545393796e-06,
|
| 109832 |
+
"loss": 0.4653,
|
| 109833 |
+
"step": 15689
|
| 109834 |
+
},
|
| 109835 |
+
{
|
| 109836 |
+
"epoch": 17.87065527065527,
|
| 109837 |
+
"grad_norm": 0.20059289038181305,
|
| 109838 |
+
"learning_rate": 1.5044990447345197e-06,
|
| 109839 |
+
"loss": 0.7338,
|
| 109840 |
+
"step": 15690
|
| 109841 |
+
},
|
| 109842 |
+
{
|
| 109843 |
+
"epoch": 17.871794871794872,
|
| 109844 |
+
"grad_norm": 0.1818057745695114,
|
| 109845 |
+
"learning_rate": 1.502907550993024e-06,
|
| 109846 |
+
"loss": 0.6244,
|
| 109847 |
+
"step": 15691
|
| 109848 |
+
},
|
| 109849 |
+
{
|
| 109850 |
+
"epoch": 17.872934472934475,
|
| 109851 |
+
"grad_norm": 0.18002621829509735,
|
| 109852 |
+
"learning_rate": 1.5013168733701649e-06,
|
| 109853 |
+
"loss": 0.7185,
|
| 109854 |
+
"step": 15692
|
| 109855 |
+
},
|
| 109856 |
+
{
|
| 109857 |
+
"epoch": 17.874074074074073,
|
| 109858 |
+
"grad_norm": 0.2134348303079605,
|
| 109859 |
+
"learning_rate": 1.4997270119211954e-06,
|
| 109860 |
+
"loss": 0.6021,
|
| 109861 |
+
"step": 15693
|
| 109862 |
+
},
|
| 109863 |
+
{
|
| 109864 |
+
"epoch": 17.875213675213676,
|
| 109865 |
+
"grad_norm": 0.23193307220935822,
|
| 109866 |
+
"learning_rate": 1.4981379667013322e-06,
|
| 109867 |
+
"loss": 0.5269,
|
| 109868 |
+
"step": 15694
|
| 109869 |
+
},
|
| 109870 |
+
{
|
| 109871 |
+
"epoch": 17.876353276353278,
|
| 109872 |
+
"grad_norm": 0.17786921560764313,
|
| 109873 |
+
"learning_rate": 1.496549737765765e-06,
|
| 109874 |
+
"loss": 0.8064,
|
| 109875 |
+
"step": 15695
|
| 109876 |
+
},
|
| 109877 |
+
{
|
| 109878 |
+
"epoch": 17.877492877492877,
|
| 109879 |
+
"grad_norm": 0.17929306626319885,
|
| 109880 |
+
"learning_rate": 1.4949623251696604e-06,
|
| 109881 |
+
"loss": 0.7414,
|
| 109882 |
+
"step": 15696
|
| 109883 |
+
},
|
| 109884 |
+
{
|
| 109885 |
+
"epoch": 17.87863247863248,
|
| 109886 |
+
"grad_norm": 0.17453618347644806,
|
| 109887 |
+
"learning_rate": 1.4933757289681576e-06,
|
| 109888 |
+
"loss": 0.7455,
|
| 109889 |
+
"step": 15697
|
| 109890 |
+
},
|
| 109891 |
+
{
|
| 109892 |
+
"epoch": 17.87977207977208,
|
| 109893 |
+
"grad_norm": 0.17964501678943634,
|
| 109894 |
+
"learning_rate": 1.4917899492163546e-06,
|
| 109895 |
+
"loss": 0.4788,
|
| 109896 |
+
"step": 15698
|
| 109897 |
+
},
|
| 109898 |
+
{
|
| 109899 |
+
"epoch": 17.88091168091168,
|
| 109900 |
+
"grad_norm": 0.18611229956150055,
|
| 109901 |
+
"learning_rate": 1.4902049859693374e-06,
|
| 109902 |
+
"loss": 0.807,
|
| 109903 |
+
"step": 15699
|
| 109904 |
+
},
|
| 109905 |
+
{
|
| 109906 |
+
"epoch": 17.882051282051282,
|
| 109907 |
+
"grad_norm": 0.18048332631587982,
|
| 109908 |
+
"learning_rate": 1.488620839282151e-06,
|
| 109909 |
+
"loss": 0.7006,
|
| 109910 |
+
"step": 15700
|
| 109911 |
+
},
|
| 109912 |
+
{
|
| 109913 |
+
"epoch": 17.883190883190885,
|
| 109914 |
+
"grad_norm": 0.23939697444438934,
|
| 109915 |
+
"learning_rate": 1.4870375092098182e-06,
|
| 109916 |
+
"loss": 0.5045,
|
| 109917 |
+
"step": 15701
|
| 109918 |
+
},
|
| 109919 |
+
{
|
| 109920 |
+
"epoch": 17.884330484330484,
|
| 109921 |
+
"grad_norm": 0.29259318113327026,
|
| 109922 |
+
"learning_rate": 1.485454995807334e-06,
|
| 109923 |
+
"loss": 0.33,
|
| 109924 |
+
"step": 15702
|
| 109925 |
+
},
|
| 109926 |
+
{
|
| 109927 |
+
"epoch": 17.885470085470086,
|
| 109928 |
+
"grad_norm": 0.22974389791488647,
|
| 109929 |
+
"learning_rate": 1.483873299129665e-06,
|
| 109930 |
+
"loss": 0.557,
|
| 109931 |
+
"step": 15703
|
| 109932 |
+
},
|
| 109933 |
+
{
|
| 109934 |
+
"epoch": 17.886609686609688,
|
| 109935 |
+
"grad_norm": 0.24285724759101868,
|
| 109936 |
+
"learning_rate": 1.4822924192317483e-06,
|
| 109937 |
+
"loss": 0.4113,
|
| 109938 |
+
"step": 15704
|
| 109939 |
+
},
|
| 109940 |
+
{
|
| 109941 |
+
"epoch": 17.887749287749287,
|
| 109942 |
+
"grad_norm": 0.16974323987960815,
|
| 109943 |
+
"learning_rate": 1.4807123561684871e-06,
|
| 109944 |
+
"loss": 0.7404,
|
| 109945 |
+
"step": 15705
|
| 109946 |
+
},
|
| 109947 |
+
{
|
| 109948 |
+
"epoch": 17.88888888888889,
|
| 109949 |
+
"grad_norm": 0.20796890556812286,
|
| 109950 |
+
"learning_rate": 1.4791331099947626e-06,
|
| 109951 |
+
"loss": 0.549,
|
| 109952 |
+
"step": 15706
|
| 109953 |
+
},
|
| 109954 |
+
{
|
| 109955 |
+
"epoch": 17.89002849002849,
|
| 109956 |
+
"grad_norm": 0.1859317421913147,
|
| 109957 |
+
"learning_rate": 1.4775546807654279e-06,
|
| 109958 |
+
"loss": 0.7613,
|
| 109959 |
+
"step": 15707
|
| 109960 |
+
},
|
| 109961 |
+
{
|
| 109962 |
+
"epoch": 17.89116809116809,
|
| 109963 |
+
"grad_norm": 0.22831900417804718,
|
| 109964 |
+
"learning_rate": 1.4759770685353114e-06,
|
| 109965 |
+
"loss": 0.3131,
|
| 109966 |
+
"step": 15708
|
| 109967 |
+
},
|
| 109968 |
+
{
|
| 109969 |
+
"epoch": 17.892307692307693,
|
| 109970 |
+
"grad_norm": 0.22934114933013916,
|
| 109971 |
+
"learning_rate": 1.474400273359197e-06,
|
| 109972 |
+
"loss": 0.6561,
|
| 109973 |
+
"step": 15709
|
| 109974 |
+
},
|
| 109975 |
+
{
|
| 109976 |
+
"epoch": 17.893447293447295,
|
| 109977 |
+
"grad_norm": 0.1744450479745865,
|
| 109978 |
+
"learning_rate": 1.4728242952918582e-06,
|
| 109979 |
+
"loss": 0.8713,
|
| 109980 |
+
"step": 15710
|
| 109981 |
+
},
|
| 109982 |
+
{
|
| 109983 |
+
"epoch": 17.894586894586894,
|
| 109984 |
+
"grad_norm": 0.18341250717639923,
|
| 109985 |
+
"learning_rate": 1.4712491343880308e-06,
|
| 109986 |
+
"loss": 0.8197,
|
| 109987 |
+
"step": 15711
|
| 109988 |
+
},
|
| 109989 |
+
{
|
| 109990 |
+
"epoch": 17.895726495726496,
|
| 109991 |
+
"grad_norm": 0.24606621265411377,
|
| 109992 |
+
"learning_rate": 1.4696747907024272e-06,
|
| 109993 |
+
"loss": 0.6444,
|
| 109994 |
+
"step": 15712
|
| 109995 |
+
},
|
| 109996 |
+
{
|
| 109997 |
+
"epoch": 17.8968660968661,
|
| 109998 |
+
"grad_norm": 0.20632360875606537,
|
| 109999 |
+
"learning_rate": 1.4681012642897258e-06,
|
| 110000 |
+
"loss": 0.5589,
|
| 110001 |
+
"step": 15713
|
| 110002 |
+
},
|
| 110003 |
+
{
|
| 110004 |
+
"epoch": 17.898005698005697,
|
| 110005 |
+
"grad_norm": 0.23047134280204773,
|
| 110006 |
+
"learning_rate": 1.4665285552045798e-06,
|
| 110007 |
+
"loss": 0.5622,
|
| 110008 |
+
"step": 15714
|
| 110009 |
+
},
|
| 110010 |
+
{
|
| 110011 |
+
"epoch": 17.8991452991453,
|
| 110012 |
+
"grad_norm": 0.1647486686706543,
|
| 110013 |
+
"learning_rate": 1.4649566635016182e-06,
|
| 110014 |
+
"loss": 0.9014,
|
| 110015 |
+
"step": 15715
|
| 110016 |
+
},
|
| 110017 |
+
{
|
| 110018 |
+
"epoch": 17.900284900284902,
|
| 110019 |
+
"grad_norm": 0.21783824265003204,
|
| 110020 |
+
"learning_rate": 1.4633855892354275e-06,
|
| 110021 |
+
"loss": 0.5439,
|
| 110022 |
+
"step": 15716
|
| 110023 |
+
},
|
| 110024 |
+
{
|
| 110025 |
+
"epoch": 17.9014245014245,
|
| 110026 |
+
"grad_norm": 0.1910092532634735,
|
| 110027 |
+
"learning_rate": 1.4618153324605838e-06,
|
| 110028 |
+
"loss": 0.5971,
|
| 110029 |
+
"step": 15717
|
| 110030 |
+
},
|
| 110031 |
+
{
|
| 110032 |
+
"epoch": 17.902564102564103,
|
| 110033 |
+
"grad_norm": 0.19612379372119904,
|
| 110034 |
+
"learning_rate": 1.4602458932316239e-06,
|
| 110035 |
+
"loss": 0.4906,
|
| 110036 |
+
"step": 15718
|
| 110037 |
+
},
|
| 110038 |
+
{
|
| 110039 |
+
"epoch": 17.903703703703705,
|
| 110040 |
+
"grad_norm": 0.23693189024925232,
|
| 110041 |
+
"learning_rate": 1.4586772716030622e-06,
|
| 110042 |
+
"loss": 0.6419,
|
| 110043 |
+
"step": 15719
|
| 110044 |
+
},
|
| 110045 |
+
{
|
| 110046 |
+
"epoch": 17.904843304843304,
|
| 110047 |
+
"grad_norm": 0.2047819048166275,
|
| 110048 |
+
"learning_rate": 1.457109467629375e-06,
|
| 110049 |
+
"loss": 0.6399,
|
| 110050 |
+
"step": 15720
|
| 110051 |
+
},
|
| 110052 |
+
{
|
| 110053 |
+
"epoch": 17.905982905982906,
|
| 110054 |
+
"grad_norm": 0.23813176155090332,
|
| 110055 |
+
"learning_rate": 1.4555424813650158e-06,
|
| 110056 |
+
"loss": 0.6639,
|
| 110057 |
+
"step": 15721
|
| 110058 |
+
},
|
| 110059 |
+
{
|
| 110060 |
+
"epoch": 17.90712250712251,
|
| 110061 |
+
"grad_norm": 0.18176306784152985,
|
| 110062 |
+
"learning_rate": 1.4539763128644134e-06,
|
| 110063 |
+
"loss": 0.7799,
|
| 110064 |
+
"step": 15722
|
| 110065 |
+
},
|
| 110066 |
+
{
|
| 110067 |
+
"epoch": 17.908262108262107,
|
| 110068 |
+
"grad_norm": 0.2089502066373825,
|
| 110069 |
+
"learning_rate": 1.4524109621819715e-06,
|
| 110070 |
+
"loss": 0.665,
|
| 110071 |
+
"step": 15723
|
| 110072 |
+
},
|
| 110073 |
+
{
|
| 110074 |
+
"epoch": 17.90940170940171,
|
| 110075 |
+
"grad_norm": 0.1719292402267456,
|
| 110076 |
+
"learning_rate": 1.4508464293720492e-06,
|
| 110077 |
+
"loss": 0.886,
|
| 110078 |
+
"step": 15724
|
| 110079 |
+
},
|
| 110080 |
+
{
|
| 110081 |
+
"epoch": 17.910541310541312,
|
| 110082 |
+
"grad_norm": 0.23162811994552612,
|
| 110083 |
+
"learning_rate": 1.449282714488992e-06,
|
| 110084 |
+
"loss": 0.5855,
|
| 110085 |
+
"step": 15725
|
| 110086 |
+
},
|
| 110087 |
+
{
|
| 110088 |
+
"epoch": 17.91168091168091,
|
| 110089 |
+
"grad_norm": 0.21349689364433289,
|
| 110090 |
+
"learning_rate": 1.4477198175871094e-06,
|
| 110091 |
+
"loss": 0.7581,
|
| 110092 |
+
"step": 15726
|
| 110093 |
+
},
|
| 110094 |
+
{
|
| 110095 |
+
"epoch": 17.912820512820513,
|
| 110096 |
+
"grad_norm": 0.1813352406024933,
|
| 110097 |
+
"learning_rate": 1.446157738720691e-06,
|
| 110098 |
+
"loss": 0.6351,
|
| 110099 |
+
"step": 15727
|
| 110100 |
+
},
|
| 110101 |
+
{
|
| 110102 |
+
"epoch": 17.913960113960115,
|
| 110103 |
+
"grad_norm": 0.15334013104438782,
|
| 110104 |
+
"learning_rate": 1.4445964779439824e-06,
|
| 110105 |
+
"loss": 0.7737,
|
| 110106 |
+
"step": 15728
|
| 110107 |
+
},
|
| 110108 |
+
{
|
| 110109 |
+
"epoch": 17.915099715099714,
|
| 110110 |
+
"grad_norm": 0.2020421177148819,
|
| 110111 |
+
"learning_rate": 1.4430360353112183e-06,
|
| 110112 |
+
"loss": 0.5556,
|
| 110113 |
+
"step": 15729
|
| 110114 |
+
},
|
| 110115 |
+
{
|
| 110116 |
+
"epoch": 17.916239316239317,
|
| 110117 |
+
"grad_norm": 0.18615838885307312,
|
| 110118 |
+
"learning_rate": 1.441476410876591e-06,
|
| 110119 |
+
"loss": 0.7083,
|
| 110120 |
+
"step": 15730
|
| 110121 |
+
},
|
| 110122 |
+
{
|
| 110123 |
+
"epoch": 17.91737891737892,
|
| 110124 |
+
"grad_norm": 0.188862144947052,
|
| 110125 |
+
"learning_rate": 1.4399176046942825e-06,
|
| 110126 |
+
"loss": 0.6021,
|
| 110127 |
+
"step": 15731
|
| 110128 |
+
},
|
| 110129 |
+
{
|
| 110130 |
+
"epoch": 17.918518518518518,
|
| 110131 |
+
"grad_norm": 0.1673618108034134,
|
| 110132 |
+
"learning_rate": 1.4383596168184188e-06,
|
| 110133 |
+
"loss": 0.5754,
|
| 110134 |
+
"step": 15732
|
| 110135 |
+
},
|
| 110136 |
+
{
|
| 110137 |
+
"epoch": 17.91965811965812,
|
| 110138 |
+
"grad_norm": 0.20038911700248718,
|
| 110139 |
+
"learning_rate": 1.4368024473031178e-06,
|
| 110140 |
+
"loss": 0.7084,
|
| 110141 |
+
"step": 15733
|
| 110142 |
+
},
|
| 110143 |
+
{
|
| 110144 |
+
"epoch": 17.920797720797722,
|
| 110145 |
+
"grad_norm": 0.17062163352966309,
|
| 110146 |
+
"learning_rate": 1.435246096202475e-06,
|
| 110147 |
+
"loss": 0.609,
|
| 110148 |
+
"step": 15734
|
| 110149 |
+
},
|
| 110150 |
+
{
|
| 110151 |
+
"epoch": 17.92193732193732,
|
| 110152 |
+
"grad_norm": 0.22755929827690125,
|
| 110153 |
+
"learning_rate": 1.4336905635705333e-06,
|
| 110154 |
+
"loss": 0.6411,
|
| 110155 |
+
"step": 15735
|
| 110156 |
+
},
|
| 110157 |
+
{
|
| 110158 |
+
"epoch": 17.923076923076923,
|
| 110159 |
+
"grad_norm": 0.16928339004516602,
|
| 110160 |
+
"learning_rate": 1.4321358494613273e-06,
|
| 110161 |
+
"loss": 0.7167,
|
| 110162 |
+
"step": 15736
|
| 110163 |
+
},
|
| 110164 |
+
{
|
| 110165 |
+
"epoch": 17.924216524216526,
|
| 110166 |
+
"grad_norm": 0.20951497554779053,
|
| 110167 |
+
"learning_rate": 1.4305819539288557e-06,
|
| 110168 |
+
"loss": 0.7137,
|
| 110169 |
+
"step": 15737
|
| 110170 |
+
},
|
| 110171 |
+
{
|
| 110172 |
+
"epoch": 17.925356125356124,
|
| 110173 |
+
"grad_norm": 0.19941192865371704,
|
| 110174 |
+
"learning_rate": 1.4290288770270915e-06,
|
| 110175 |
+
"loss": 0.8405,
|
| 110176 |
+
"step": 15738
|
| 110177 |
+
},
|
| 110178 |
+
{
|
| 110179 |
+
"epoch": 17.926495726495727,
|
| 110180 |
+
"grad_norm": 0.21432682871818542,
|
| 110181 |
+
"learning_rate": 1.4274766188099697e-06,
|
| 110182 |
+
"loss": 0.6568,
|
| 110183 |
+
"step": 15739
|
| 110184 |
+
},
|
| 110185 |
+
{
|
| 110186 |
+
"epoch": 17.92763532763533,
|
| 110187 |
+
"grad_norm": 0.16878537833690643,
|
| 110188 |
+
"learning_rate": 1.4259251793314111e-06,
|
| 110189 |
+
"loss": 0.7838,
|
| 110190 |
+
"step": 15740
|
| 110191 |
+
},
|
| 110192 |
+
{
|
| 110193 |
+
"epoch": 17.928774928774928,
|
| 110194 |
+
"grad_norm": 0.18712733685970306,
|
| 110195 |
+
"learning_rate": 1.4243745586453e-06,
|
| 110196 |
+
"loss": 0.5487,
|
| 110197 |
+
"step": 15741
|
| 110198 |
+
},
|
| 110199 |
+
{
|
| 110200 |
+
"epoch": 17.92991452991453,
|
| 110201 |
+
"grad_norm": 0.19850511848926544,
|
| 110202 |
+
"learning_rate": 1.4228247568054993e-06,
|
| 110203 |
+
"loss": 0.8193,
|
| 110204 |
+
"step": 15742
|
| 110205 |
+
},
|
| 110206 |
+
{
|
| 110207 |
+
"epoch": 17.931054131054132,
|
| 110208 |
+
"grad_norm": 0.2295110672712326,
|
| 110209 |
+
"learning_rate": 1.4212757738658266e-06,
|
| 110210 |
+
"loss": 0.7032,
|
| 110211 |
+
"step": 15743
|
| 110212 |
+
},
|
| 110213 |
+
{
|
| 110214 |
+
"epoch": 17.93219373219373,
|
| 110215 |
+
"grad_norm": 0.22638626396656036,
|
| 110216 |
+
"learning_rate": 1.4197276098800838e-06,
|
| 110217 |
+
"loss": 0.6077,
|
| 110218 |
+
"step": 15744
|
| 110219 |
+
},
|
| 110220 |
+
{
|
| 110221 |
+
"epoch": 17.933333333333334,
|
| 110222 |
+
"grad_norm": 0.2030586153268814,
|
| 110223 |
+
"learning_rate": 1.4181802649020554e-06,
|
| 110224 |
+
"loss": 0.7718,
|
| 110225 |
+
"step": 15745
|
| 110226 |
+
},
|
| 110227 |
+
{
|
| 110228 |
+
"epoch": 17.934472934472936,
|
| 110229 |
+
"grad_norm": 0.22802267968654633,
|
| 110230 |
+
"learning_rate": 1.4166337389854734e-06,
|
| 110231 |
+
"loss": 0.425,
|
| 110232 |
+
"step": 15746
|
| 110233 |
+
},
|
| 110234 |
+
{
|
| 110235 |
+
"epoch": 17.935612535612535,
|
| 110236 |
+
"grad_norm": 0.17927877604961395,
|
| 110237 |
+
"learning_rate": 1.415088032184056e-06,
|
| 110238 |
+
"loss": 0.6523,
|
| 110239 |
+
"step": 15747
|
| 110240 |
+
},
|
| 110241 |
+
{
|
| 110242 |
+
"epoch": 17.936752136752137,
|
| 110243 |
+
"grad_norm": 0.19029876589775085,
|
| 110244 |
+
"learning_rate": 1.413543144551488e-06,
|
| 110245 |
+
"loss": 0.6057,
|
| 110246 |
+
"step": 15748
|
| 110247 |
+
},
|
| 110248 |
+
{
|
| 110249 |
+
"epoch": 17.93789173789174,
|
| 110250 |
+
"grad_norm": 0.1827959418296814,
|
| 110251 |
+
"learning_rate": 1.4119990761414348e-06,
|
| 110252 |
+
"loss": 0.771,
|
| 110253 |
+
"step": 15749
|
| 110254 |
+
},
|
| 110255 |
+
{
|
| 110256 |
+
"epoch": 17.939031339031338,
|
| 110257 |
+
"grad_norm": 0.20706240832805634,
|
| 110258 |
+
"learning_rate": 1.4104558270075175e-06,
|
| 110259 |
+
"loss": 0.6524,
|
| 110260 |
+
"step": 15750
|
| 110261 |
+
},
|
| 110262 |
+
{
|
| 110263 |
+
"epoch": 17.94017094017094,
|
| 110264 |
+
"grad_norm": 0.183136984705925,
|
| 110265 |
+
"learning_rate": 1.4089133972033402e-06,
|
| 110266 |
+
"loss": 0.578,
|
| 110267 |
+
"step": 15751
|
| 110268 |
+
},
|
| 110269 |
+
{
|
| 110270 |
+
"epoch": 17.941310541310543,
|
| 110271 |
+
"grad_norm": 0.18875323235988617,
|
| 110272 |
+
"learning_rate": 1.407371786782477e-06,
|
| 110273 |
+
"loss": 0.547,
|
| 110274 |
+
"step": 15752
|
| 110275 |
+
},
|
| 110276 |
+
{
|
| 110277 |
+
"epoch": 17.94245014245014,
|
| 110278 |
+
"grad_norm": 0.2019789069890976,
|
| 110279 |
+
"learning_rate": 1.4058309957984739e-06,
|
| 110280 |
+
"loss": 0.5378,
|
| 110281 |
+
"step": 15753
|
| 110282 |
+
},
|
| 110283 |
+
{
|
| 110284 |
+
"epoch": 17.943589743589744,
|
| 110285 |
+
"grad_norm": 0.176071897149086,
|
| 110286 |
+
"learning_rate": 1.4042910243048434e-06,
|
| 110287 |
+
"loss": 0.7113,
|
| 110288 |
+
"step": 15754
|
| 110289 |
+
},
|
| 110290 |
+
{
|
| 110291 |
+
"epoch": 17.944729344729346,
|
| 110292 |
+
"grad_norm": 0.25191131234169006,
|
| 110293 |
+
"learning_rate": 1.402751872355068e-06,
|
| 110294 |
+
"loss": 0.4129,
|
| 110295 |
+
"step": 15755
|
| 110296 |
+
},
|
| 110297 |
+
{
|
| 110298 |
+
"epoch": 17.945868945868945,
|
| 110299 |
+
"grad_norm": 0.1877506822347641,
|
| 110300 |
+
"learning_rate": 1.4012135400026216e-06,
|
| 110301 |
+
"loss": 0.6665,
|
| 110302 |
+
"step": 15756
|
| 110303 |
+
},
|
| 110304 |
+
{
|
| 110305 |
+
"epoch": 17.947008547008547,
|
| 110306 |
+
"grad_norm": 0.17223647236824036,
|
| 110307 |
+
"learning_rate": 1.3996760273009223e-06,
|
| 110308 |
+
"loss": 0.6929,
|
| 110309 |
+
"step": 15757
|
| 110310 |
+
},
|
| 110311 |
+
{
|
| 110312 |
+
"epoch": 17.94814814814815,
|
| 110313 |
+
"grad_norm": 0.1804594099521637,
|
| 110314 |
+
"learning_rate": 1.398139334303375e-06,
|
| 110315 |
+
"loss": 0.541,
|
| 110316 |
+
"step": 15758
|
| 110317 |
+
},
|
| 110318 |
+
{
|
| 110319 |
+
"epoch": 17.94928774928775,
|
| 110320 |
+
"grad_norm": 0.2069043070077896,
|
| 110321 |
+
"learning_rate": 1.3966034610633533e-06,
|
| 110322 |
+
"loss": 0.7153,
|
| 110323 |
+
"step": 15759
|
| 110324 |
+
},
|
| 110325 |
+
{
|
| 110326 |
+
"epoch": 17.95042735042735,
|
| 110327 |
+
"grad_norm": 0.1842239648103714,
|
| 110328 |
+
"learning_rate": 1.3950684076342092e-06,
|
| 110329 |
+
"loss": 0.749,
|
| 110330 |
+
"step": 15760
|
| 110331 |
+
},
|
| 110332 |
+
{
|
| 110333 |
+
"epoch": 17.951566951566953,
|
| 110334 |
+
"grad_norm": 0.23782020807266235,
|
| 110335 |
+
"learning_rate": 1.393534174069247e-06,
|
| 110336 |
+
"loss": 0.614,
|
| 110337 |
+
"step": 15761
|
| 110338 |
+
},
|
| 110339 |
+
{
|
| 110340 |
+
"epoch": 17.95270655270655,
|
| 110341 |
+
"grad_norm": 0.2035999298095703,
|
| 110342 |
+
"learning_rate": 1.3920007604217605e-06,
|
| 110343 |
+
"loss": 0.5895,
|
| 110344 |
+
"step": 15762
|
| 110345 |
+
},
|
| 110346 |
+
{
|
| 110347 |
+
"epoch": 17.953846153846154,
|
| 110348 |
+
"grad_norm": 0.20039676129817963,
|
| 110349 |
+
"learning_rate": 1.3904681667450125e-06,
|
| 110350 |
+
"loss": 0.7869,
|
| 110351 |
+
"step": 15763
|
| 110352 |
+
},
|
| 110353 |
+
{
|
| 110354 |
+
"epoch": 17.954985754985756,
|
| 110355 |
+
"grad_norm": 0.21923471987247467,
|
| 110356 |
+
"learning_rate": 1.38893639309223e-06,
|
| 110357 |
+
"loss": 0.7398,
|
| 110358 |
+
"step": 15764
|
| 110359 |
+
},
|
| 110360 |
+
{
|
| 110361 |
+
"epoch": 17.956125356125355,
|
| 110362 |
+
"grad_norm": 0.23911401629447937,
|
| 110363 |
+
"learning_rate": 1.3874054395166202e-06,
|
| 110364 |
+
"loss": 0.4584,
|
| 110365 |
+
"step": 15765
|
| 110366 |
+
},
|
| 110367 |
+
{
|
| 110368 |
+
"epoch": 17.957264957264957,
|
| 110369 |
+
"grad_norm": 0.2090776115655899,
|
| 110370 |
+
"learning_rate": 1.3858753060713464e-06,
|
| 110371 |
+
"loss": 0.5422,
|
| 110372 |
+
"step": 15766
|
| 110373 |
+
},
|
| 110374 |
+
{
|
| 110375 |
+
"epoch": 17.95840455840456,
|
| 110376 |
+
"grad_norm": 0.14810439944267273,
|
| 110377 |
+
"learning_rate": 1.3843459928095687e-06,
|
| 110378 |
+
"loss": 0.5826,
|
| 110379 |
+
"step": 15767
|
| 110380 |
+
},
|
| 110381 |
+
{
|
| 110382 |
+
"epoch": 17.95954415954416,
|
| 110383 |
+
"grad_norm": 0.1938624531030655,
|
| 110384 |
+
"learning_rate": 1.3828174997844001e-06,
|
| 110385 |
+
"loss": 0.6167,
|
| 110386 |
+
"step": 15768
|
| 110387 |
+
},
|
| 110388 |
+
{
|
| 110389 |
+
"epoch": 17.96068376068376,
|
| 110390 |
+
"grad_norm": 0.24723225831985474,
|
| 110391 |
+
"learning_rate": 1.3812898270489232e-06,
|
| 110392 |
+
"loss": 0.7233,
|
| 110393 |
+
"step": 15769
|
| 110394 |
+
},
|
| 110395 |
+
{
|
| 110396 |
+
"epoch": 17.961823361823363,
|
| 110397 |
+
"grad_norm": 0.21326233446598053,
|
| 110398 |
+
"learning_rate": 1.379762974656204e-06,
|
| 110399 |
+
"loss": 0.6627,
|
| 110400 |
+
"step": 15770
|
| 110401 |
+
},
|
| 110402 |
+
{
|
| 110403 |
+
"epoch": 17.962962962962962,
|
| 110404 |
+
"grad_norm": 0.1856732815504074,
|
| 110405 |
+
"learning_rate": 1.3782369426592694e-06,
|
| 110406 |
+
"loss": 0.7113,
|
| 110407 |
+
"step": 15771
|
| 110408 |
+
},
|
| 110409 |
+
{
|
| 110410 |
+
"epoch": 17.964102564102564,
|
| 110411 |
+
"grad_norm": 0.19795556366443634,
|
| 110412 |
+
"learning_rate": 1.3767117311111328e-06,
|
| 110413 |
+
"loss": 0.6747,
|
| 110414 |
+
"step": 15772
|
| 110415 |
+
},
|
| 110416 |
+
{
|
| 110417 |
+
"epoch": 17.965242165242167,
|
| 110418 |
+
"grad_norm": 0.21814678609371185,
|
| 110419 |
+
"learning_rate": 1.375187340064757e-06,
|
| 110420 |
+
"loss": 0.7673,
|
| 110421 |
+
"step": 15773
|
| 110422 |
+
},
|
| 110423 |
+
{
|
| 110424 |
+
"epoch": 17.966381766381765,
|
| 110425 |
+
"grad_norm": 0.2197718769311905,
|
| 110426 |
+
"learning_rate": 1.373663769573094e-06,
|
| 110427 |
+
"loss": 0.7267,
|
| 110428 |
+
"step": 15774
|
| 110429 |
+
},
|
| 110430 |
+
{
|
| 110431 |
+
"epoch": 17.967521367521368,
|
| 110432 |
+
"grad_norm": 0.19596217572689056,
|
| 110433 |
+
"learning_rate": 1.3721410196890604e-06,
|
| 110434 |
+
"loss": 0.903,
|
| 110435 |
+
"step": 15775
|
| 110436 |
+
},
|
| 110437 |
+
{
|
| 110438 |
+
"epoch": 17.96866096866097,
|
| 110439 |
+
"grad_norm": 0.17794634401798248,
|
| 110440 |
+
"learning_rate": 1.3706190904655497e-06,
|
| 110441 |
+
"loss": 0.9005,
|
| 110442 |
+
"step": 15776
|
| 110443 |
+
},
|
| 110444 |
+
{
|
| 110445 |
+
"epoch": 17.96980056980057,
|
| 110446 |
+
"grad_norm": 0.17554956674575806,
|
| 110447 |
+
"learning_rate": 1.3690979819554112e-06,
|
| 110448 |
+
"loss": 0.7723,
|
| 110449 |
+
"step": 15777
|
| 110450 |
+
},
|
| 110451 |
+
{
|
| 110452 |
+
"epoch": 17.97094017094017,
|
| 110453 |
+
"grad_norm": 0.18461477756500244,
|
| 110454 |
+
"learning_rate": 1.3675776942114914e-06,
|
| 110455 |
+
"loss": 0.7401,
|
| 110456 |
+
"step": 15778
|
| 110457 |
+
},
|
| 110458 |
+
{
|
| 110459 |
+
"epoch": 17.972079772079773,
|
| 110460 |
+
"grad_norm": 0.16596505045890808,
|
| 110461 |
+
"learning_rate": 1.3660582272865874e-06,
|
| 110462 |
+
"loss": 0.6513,
|
| 110463 |
+
"step": 15779
|
| 110464 |
+
},
|
| 110465 |
+
{
|
| 110466 |
+
"epoch": 17.973219373219372,
|
| 110467 |
+
"grad_norm": 0.17433393001556396,
|
| 110468 |
+
"learning_rate": 1.3645395812334733e-06,
|
| 110469 |
+
"loss": 0.5522,
|
| 110470 |
+
"step": 15780
|
| 110471 |
+
},
|
| 110472 |
+
{
|
| 110473 |
+
"epoch": 17.974358974358974,
|
| 110474 |
+
"grad_norm": 0.21385356783866882,
|
| 110475 |
+
"learning_rate": 1.3630217561048985e-06,
|
| 110476 |
+
"loss": 0.5244,
|
| 110477 |
+
"step": 15781
|
| 110478 |
+
},
|
| 110479 |
+
{
|
| 110480 |
+
"epoch": 17.975498575498577,
|
| 110481 |
+
"grad_norm": 0.1666141003370285,
|
| 110482 |
+
"learning_rate": 1.3615047519535768e-06,
|
| 110483 |
+
"loss": 0.7708,
|
| 110484 |
+
"step": 15782
|
| 110485 |
+
},
|
| 110486 |
+
{
|
| 110487 |
+
"epoch": 17.976638176638176,
|
| 110488 |
+
"grad_norm": 0.2162417769432068,
|
| 110489 |
+
"learning_rate": 1.3599885688322073e-06,
|
| 110490 |
+
"loss": 0.5591,
|
| 110491 |
+
"step": 15783
|
| 110492 |
+
},
|
| 110493 |
+
{
|
| 110494 |
+
"epoch": 17.977777777777778,
|
| 110495 |
+
"grad_norm": 0.18287643790245056,
|
| 110496 |
+
"learning_rate": 1.3584732067934397e-06,
|
| 110497 |
+
"loss": 0.8681,
|
| 110498 |
+
"step": 15784
|
| 110499 |
+
},
|
| 110500 |
+
{
|
| 110501 |
+
"epoch": 17.97891737891738,
|
| 110502 |
+
"grad_norm": 0.20362326502799988,
|
| 110503 |
+
"learning_rate": 1.3569586658899152e-06,
|
| 110504 |
+
"loss": 0.6579,
|
| 110505 |
+
"step": 15785
|
| 110506 |
+
},
|
| 110507 |
+
{
|
| 110508 |
+
"epoch": 17.98005698005698,
|
| 110509 |
+
"grad_norm": 0.20295435190200806,
|
| 110510 |
+
"learning_rate": 1.3554449461742308e-06,
|
| 110511 |
+
"loss": 0.8604,
|
| 110512 |
+
"step": 15786
|
| 110513 |
+
},
|
| 110514 |
+
{
|
| 110515 |
+
"epoch": 17.98119658119658,
|
| 110516 |
+
"grad_norm": 0.2299310564994812,
|
| 110517 |
+
"learning_rate": 1.353932047698972e-06,
|
| 110518 |
+
"loss": 0.7428,
|
| 110519 |
+
"step": 15787
|
| 110520 |
+
},
|
| 110521 |
+
{
|
| 110522 |
+
"epoch": 17.982336182336184,
|
| 110523 |
+
"grad_norm": 0.20330417156219482,
|
| 110524 |
+
"learning_rate": 1.3524199705166774e-06,
|
| 110525 |
+
"loss": 0.7127,
|
| 110526 |
+
"step": 15788
|
| 110527 |
+
},
|
| 110528 |
+
{
|
| 110529 |
+
"epoch": 17.983475783475782,
|
| 110530 |
+
"grad_norm": 0.23929935693740845,
|
| 110531 |
+
"learning_rate": 1.3509087146798633e-06,
|
| 110532 |
+
"loss": 0.7436,
|
| 110533 |
+
"step": 15789
|
| 110534 |
+
},
|
| 110535 |
+
{
|
| 110536 |
+
"epoch": 17.984615384615385,
|
| 110537 |
+
"grad_norm": 0.17229998111724854,
|
| 110538 |
+
"learning_rate": 1.3493982802410322e-06,
|
| 110539 |
+
"loss": 0.5942,
|
| 110540 |
+
"step": 15790
|
| 110541 |
+
},
|
| 110542 |
+
{
|
| 110543 |
+
"epoch": 17.985754985754987,
|
| 110544 |
+
"grad_norm": 0.20117415487766266,
|
| 110545 |
+
"learning_rate": 1.3478886672526336e-06,
|
| 110546 |
+
"loss": 0.6428,
|
| 110547 |
+
"step": 15791
|
| 110548 |
+
},
|
| 110549 |
+
{
|
| 110550 |
+
"epoch": 17.986894586894586,
|
| 110551 |
+
"grad_norm": 0.15806029736995697,
|
| 110552 |
+
"learning_rate": 1.3463798757671064e-06,
|
| 110553 |
+
"loss": 0.6213,
|
| 110554 |
+
"step": 15792
|
| 110555 |
+
},
|
| 110556 |
+
{
|
| 110557 |
+
"epoch": 17.988034188034188,
|
| 110558 |
+
"grad_norm": 0.2126135379076004,
|
| 110559 |
+
"learning_rate": 1.3448719058368532e-06,
|
| 110560 |
+
"loss": 0.6405,
|
| 110561 |
+
"step": 15793
|
| 110562 |
+
},
|
| 110563 |
+
{
|
| 110564 |
+
"epoch": 17.98917378917379,
|
| 110565 |
+
"grad_norm": 0.16452065110206604,
|
| 110566 |
+
"learning_rate": 1.3433647575142567e-06,
|
| 110567 |
+
"loss": 0.7101,
|
| 110568 |
+
"step": 15794
|
| 110569 |
+
},
|
| 110570 |
+
{
|
| 110571 |
+
"epoch": 17.99031339031339,
|
| 110572 |
+
"grad_norm": 0.21072053909301758,
|
| 110573 |
+
"learning_rate": 1.3418584308516529e-06,
|
| 110574 |
+
"loss": 0.7111,
|
| 110575 |
+
"step": 15795
|
| 110576 |
+
},
|
| 110577 |
+
{
|
| 110578 |
+
"epoch": 17.99145299145299,
|
| 110579 |
+
"grad_norm": 0.16452158987522125,
|
| 110580 |
+
"learning_rate": 1.3403529259013641e-06,
|
| 110581 |
+
"loss": 0.5449,
|
| 110582 |
+
"step": 15796
|
| 110583 |
+
},
|
| 110584 |
+
{
|
| 110585 |
+
"epoch": 17.992592592592594,
|
| 110586 |
+
"grad_norm": 0.2283734679222107,
|
| 110587 |
+
"learning_rate": 1.3388482427156845e-06,
|
| 110588 |
+
"loss": 0.5565,
|
| 110589 |
+
"step": 15797
|
| 110590 |
+
},
|
| 110591 |
+
{
|
| 110592 |
+
"epoch": 17.993732193732193,
|
| 110593 |
+
"grad_norm": 0.1818462759256363,
|
| 110594 |
+
"learning_rate": 1.3373443813468778e-06,
|
| 110595 |
+
"loss": 0.899,
|
| 110596 |
+
"step": 15798
|
| 110597 |
+
},
|
| 110598 |
+
{
|
| 110599 |
+
"epoch": 17.994871794871795,
|
| 110600 |
+
"grad_norm": 0.21215508878231049,
|
| 110601 |
+
"learning_rate": 1.335841341847166e-06,
|
| 110602 |
+
"loss": 0.6596,
|
| 110603 |
+
"step": 15799
|
| 110604 |
+
},
|
| 110605 |
+
{
|
| 110606 |
+
"epoch": 17.996011396011397,
|
| 110607 |
+
"grad_norm": 0.18539436161518097,
|
| 110608 |
+
"learning_rate": 1.3343391242687603e-06,
|
| 110609 |
+
"loss": 0.5004,
|
| 110610 |
+
"step": 15800
|
| 110611 |
+
},
|
| 110612 |
+
{
|
| 110613 |
+
"epoch": 17.997150997150996,
|
| 110614 |
+
"grad_norm": 0.17485128343105316,
|
| 110615 |
+
"learning_rate": 1.3328377286638439e-06,
|
| 110616 |
+
"loss": 0.6803,
|
| 110617 |
+
"step": 15801
|
| 110618 |
+
},
|
| 110619 |
+
{
|
| 110620 |
+
"epoch": 17.9982905982906,
|
| 110621 |
+
"grad_norm": 0.16342106461524963,
|
| 110622 |
+
"learning_rate": 1.3313371550845583e-06,
|
| 110623 |
+
"loss": 0.7169,
|
| 110624 |
+
"step": 15802
|
| 110625 |
+
},
|
| 110626 |
+
{
|
| 110627 |
+
"epoch": 17.9994301994302,
|
| 110628 |
+
"grad_norm": 0.19986993074417114,
|
| 110629 |
+
"learning_rate": 1.3298374035830174e-06,
|
| 110630 |
+
"loss": 0.5385,
|
| 110631 |
+
"step": 15803
|
| 110632 |
+
},
|
| 110633 |
+
{
|
| 110634 |
+
"epoch": 18.0,
|
| 110635 |
+
"grad_norm": 0.3066229224205017,
|
| 110636 |
+
"learning_rate": 1.3283384742113215e-06,
|
| 110637 |
+
"loss": 0.5134,
|
| 110638 |
+
"step": 15804
|
| 110639 |
+
},
|
| 110640 |
+
{
|
| 110641 |
+
"epoch": 18.001139601139602,
|
| 110642 |
+
"grad_norm": 0.18824172019958496,
|
| 110643 |
+
"learning_rate": 1.3268403670215228e-06,
|
| 110644 |
+
"loss": 0.566,
|
| 110645 |
+
"step": 15805
|
| 110646 |
+
},
|
| 110647 |
+
{
|
| 110648 |
+
"epoch": 18.0022792022792,
|
| 110649 |
+
"grad_norm": 0.2161741405725479,
|
| 110650 |
+
"learning_rate": 1.3253430820656665e-06,
|
| 110651 |
+
"loss": 0.5104,
|
| 110652 |
+
"step": 15806
|
| 110653 |
+
},
|
| 110654 |
+
{
|
| 110655 |
+
"epoch": 18.003418803418803,
|
| 110656 |
+
"grad_norm": 0.20008744299411774,
|
| 110657 |
+
"learning_rate": 1.3238466193957467e-06,
|
| 110658 |
+
"loss": 0.8661,
|
| 110659 |
+
"step": 15807
|
| 110660 |
+
},
|
| 110661 |
+
{
|
| 110662 |
+
"epoch": 18.004558404558406,
|
| 110663 |
+
"grad_norm": 0.18964844942092896,
|
| 110664 |
+
"learning_rate": 1.3223509790637411e-06,
|
| 110665 |
+
"loss": 0.6734,
|
| 110666 |
+
"step": 15808
|
| 110667 |
+
},
|
| 110668 |
+
{
|
| 110669 |
+
"epoch": 18.005698005698004,
|
| 110670 |
+
"grad_norm": 0.23607204854488373,
|
| 110671 |
+
"learning_rate": 1.3208561611216003e-06,
|
| 110672 |
+
"loss": 0.6106,
|
| 110673 |
+
"step": 15809
|
| 110674 |
+
},
|
| 110675 |
+
{
|
| 110676 |
+
"epoch": 18.006837606837607,
|
| 110677 |
+
"grad_norm": 0.20779858529567719,
|
| 110678 |
+
"learning_rate": 1.319362165621249e-06,
|
| 110679 |
+
"loss": 0.6712,
|
| 110680 |
+
"step": 15810
|
| 110681 |
+
},
|
| 110682 |
+
{
|
| 110683 |
+
"epoch": 18.00797720797721,
|
| 110684 |
+
"grad_norm": 0.20699867606163025,
|
| 110685 |
+
"learning_rate": 1.3178689926145627e-06,
|
| 110686 |
+
"loss": 0.5852,
|
| 110687 |
+
"step": 15811
|
| 110688 |
+
},
|
| 110689 |
+
{
|
| 110690 |
+
"epoch": 18.009116809116808,
|
| 110691 |
+
"grad_norm": 0.1768336296081543,
|
| 110692 |
+
"learning_rate": 1.3163766421534163e-06,
|
| 110693 |
+
"loss": 0.5923,
|
| 110694 |
+
"step": 15812
|
| 110695 |
+
},
|
| 110696 |
+
{
|
| 110697 |
+
"epoch": 18.01025641025641,
|
| 110698 |
+
"grad_norm": 0.18291160464286804,
|
| 110699 |
+
"learning_rate": 1.3148851142896434e-06,
|
| 110700 |
+
"loss": 0.8139,
|
| 110701 |
+
"step": 15813
|
| 110702 |
+
},
|
| 110703 |
+
{
|
| 110704 |
+
"epoch": 18.011396011396013,
|
| 110705 |
+
"grad_norm": 0.16323508322238922,
|
| 110706 |
+
"learning_rate": 1.3133944090750388e-06,
|
| 110707 |
+
"loss": 0.6649,
|
| 110708 |
+
"step": 15814
|
| 110709 |
+
},
|
| 110710 |
+
{
|
| 110711 |
+
"epoch": 18.01253561253561,
|
| 110712 |
+
"grad_norm": 0.2666858434677124,
|
| 110713 |
+
"learning_rate": 1.3119045265613889e-06,
|
| 110714 |
+
"loss": 0.5979,
|
| 110715 |
+
"step": 15815
|
| 110716 |
+
},
|
| 110717 |
+
{
|
| 110718 |
+
"epoch": 18.013675213675214,
|
| 110719 |
+
"grad_norm": 0.23866496980190277,
|
| 110720 |
+
"learning_rate": 1.3104154668004353e-06,
|
| 110721 |
+
"loss": 0.4313,
|
| 110722 |
+
"step": 15816
|
| 110723 |
+
},
|
| 110724 |
+
{
|
| 110725 |
+
"epoch": 18.014814814814816,
|
| 110726 |
+
"grad_norm": 0.26230302453041077,
|
| 110727 |
+
"learning_rate": 1.308927229843901e-06,
|
| 110728 |
+
"loss": 0.283,
|
| 110729 |
+
"step": 15817
|
| 110730 |
+
},
|
| 110731 |
+
{
|
| 110732 |
+
"epoch": 18.015954415954415,
|
| 110733 |
+
"grad_norm": 0.19195322692394257,
|
| 110734 |
+
"learning_rate": 1.307439815743472e-06,
|
| 110735 |
+
"loss": 0.6843,
|
| 110736 |
+
"step": 15818
|
| 110737 |
+
},
|
| 110738 |
+
{
|
| 110739 |
+
"epoch": 18.017094017094017,
|
| 110740 |
+
"grad_norm": 0.2123197317123413,
|
| 110741 |
+
"learning_rate": 1.3059532245508154e-06,
|
| 110742 |
+
"loss": 0.6105,
|
| 110743 |
+
"step": 15819
|
| 110744 |
+
},
|
| 110745 |
+
{
|
| 110746 |
+
"epoch": 18.01823361823362,
|
| 110747 |
+
"grad_norm": 0.17628802359104156,
|
| 110748 |
+
"learning_rate": 1.3044674563175597e-06,
|
| 110749 |
+
"loss": 0.8107,
|
| 110750 |
+
"step": 15820
|
| 110751 |
+
},
|
| 110752 |
+
{
|
| 110753 |
+
"epoch": 18.019373219373218,
|
| 110754 |
+
"grad_norm": 0.20673112571239471,
|
| 110755 |
+
"learning_rate": 1.3029825110953158e-06,
|
| 110756 |
+
"loss": 0.6474,
|
| 110757 |
+
"step": 15821
|
| 110758 |
+
},
|
| 110759 |
+
{
|
| 110760 |
+
"epoch": 18.02051282051282,
|
| 110761 |
+
"grad_norm": 0.304705411195755,
|
| 110762 |
+
"learning_rate": 1.3014983889356513e-06,
|
| 110763 |
+
"loss": 0.7788,
|
| 110764 |
+
"step": 15822
|
| 110765 |
+
},
|
| 110766 |
+
{
|
| 110767 |
+
"epoch": 18.021652421652423,
|
| 110768 |
+
"grad_norm": 0.2333732545375824,
|
| 110769 |
+
"learning_rate": 1.3000150898901192e-06,
|
| 110770 |
+
"loss": 0.5934,
|
| 110771 |
+
"step": 15823
|
| 110772 |
+
},
|
| 110773 |
+
{
|
| 110774 |
+
"epoch": 18.02279202279202,
|
| 110775 |
+
"grad_norm": 0.21590343117713928,
|
| 110776 |
+
"learning_rate": 1.298532614010245e-06,
|
| 110777 |
+
"loss": 0.6026,
|
| 110778 |
+
"step": 15824
|
| 110779 |
+
},
|
| 110780 |
+
{
|
| 110781 |
+
"epoch": 18.023931623931624,
|
| 110782 |
+
"grad_norm": 0.18734954297542572,
|
| 110783 |
+
"learning_rate": 1.2970509613475068e-06,
|
| 110784 |
+
"loss": 0.7734,
|
| 110785 |
+
"step": 15825
|
| 110786 |
+
},
|
| 110787 |
+
{
|
| 110788 |
+
"epoch": 18.025071225071226,
|
| 110789 |
+
"grad_norm": 0.20928458869457245,
|
| 110790 |
+
"learning_rate": 1.295570131953372e-06,
|
| 110791 |
+
"loss": 0.3744,
|
| 110792 |
+
"step": 15826
|
| 110793 |
+
},
|
| 110794 |
+
{
|
| 110795 |
+
"epoch": 18.026210826210825,
|
| 110796 |
+
"grad_norm": 0.20024080574512482,
|
| 110797 |
+
"learning_rate": 1.294090125879277e-06,
|
| 110798 |
+
"loss": 0.592,
|
| 110799 |
+
"step": 15827
|
| 110800 |
+
},
|
| 110801 |
+
{
|
| 110802 |
+
"epoch": 18.027350427350427,
|
| 110803 |
+
"grad_norm": 0.17945152521133423,
|
| 110804 |
+
"learning_rate": 1.2926109431766226e-06,
|
| 110805 |
+
"loss": 0.6757,
|
| 110806 |
+
"step": 15828
|
| 110807 |
+
},
|
| 110808 |
+
{
|
| 110809 |
+
"epoch": 18.02849002849003,
|
| 110810 |
+
"grad_norm": 0.17156749963760376,
|
| 110811 |
+
"learning_rate": 1.2911325838967842e-06,
|
| 110812 |
+
"loss": 0.7507,
|
| 110813 |
+
"step": 15829
|
| 110814 |
+
},
|
| 110815 |
+
{
|
| 110816 |
+
"epoch": 18.02962962962963,
|
| 110817 |
+
"grad_norm": 0.1979169100522995,
|
| 110818 |
+
"learning_rate": 1.2896550480911123e-06,
|
| 110819 |
+
"loss": 0.6886,
|
| 110820 |
+
"step": 15830
|
| 110821 |
+
},
|
| 110822 |
+
{
|
| 110823 |
+
"epoch": 18.03076923076923,
|
| 110824 |
+
"grad_norm": 0.19085948169231415,
|
| 110825 |
+
"learning_rate": 1.2881783358109217e-06,
|
| 110826 |
+
"loss": 0.7854,
|
| 110827 |
+
"step": 15831
|
| 110828 |
+
},
|
| 110829 |
+
{
|
| 110830 |
+
"epoch": 18.031908831908833,
|
| 110831 |
+
"grad_norm": 0.23247428238391876,
|
| 110832 |
+
"learning_rate": 1.28670244710751e-06,
|
| 110833 |
+
"loss": 0.6149,
|
| 110834 |
+
"step": 15832
|
| 110835 |
+
},
|
| 110836 |
+
{
|
| 110837 |
+
"epoch": 18.03304843304843,
|
| 110838 |
+
"grad_norm": 0.19736453890800476,
|
| 110839 |
+
"learning_rate": 1.2852273820321282e-06,
|
| 110840 |
+
"loss": 0.6389,
|
| 110841 |
+
"step": 15833
|
| 110842 |
+
},
|
| 110843 |
+
{
|
| 110844 |
+
"epoch": 18.034188034188034,
|
| 110845 |
+
"grad_norm": 0.21781659126281738,
|
| 110846 |
+
"learning_rate": 1.2837531406360181e-06,
|
| 110847 |
+
"loss": 0.475,
|
| 110848 |
+
"step": 15834
|
| 110849 |
+
},
|
| 110850 |
+
{
|
| 110851 |
+
"epoch": 18.035327635327636,
|
| 110852 |
+
"grad_norm": 0.2099636048078537,
|
| 110853 |
+
"learning_rate": 1.2822797229703837e-06,
|
| 110854 |
+
"loss": 0.8111,
|
| 110855 |
+
"step": 15835
|
| 110856 |
+
},
|
| 110857 |
+
{
|
| 110858 |
+
"epoch": 18.036467236467235,
|
| 110859 |
+
"grad_norm": 0.19832612574100494,
|
| 110860 |
+
"learning_rate": 1.2808071290863948e-06,
|
| 110861 |
+
"loss": 0.5439,
|
| 110862 |
+
"step": 15836
|
| 110863 |
+
},
|
| 110864 |
+
{
|
| 110865 |
+
"epoch": 18.037606837606837,
|
| 110866 |
+
"grad_norm": 0.19173333048820496,
|
| 110867 |
+
"learning_rate": 1.2793353590352052e-06,
|
| 110868 |
+
"loss": 0.7532,
|
| 110869 |
+
"step": 15837
|
| 110870 |
+
},
|
| 110871 |
+
{
|
| 110872 |
+
"epoch": 18.03874643874644,
|
| 110873 |
+
"grad_norm": 0.21488866209983826,
|
| 110874 |
+
"learning_rate": 1.2778644128679269e-06,
|
| 110875 |
+
"loss": 0.6298,
|
| 110876 |
+
"step": 15838
|
| 110877 |
+
},
|
| 110878 |
+
{
|
| 110879 |
+
"epoch": 18.03988603988604,
|
| 110880 |
+
"grad_norm": 0.1884155124425888,
|
| 110881 |
+
"learning_rate": 1.2763942906356601e-06,
|
| 110882 |
+
"loss": 0.5331,
|
| 110883 |
+
"step": 15839
|
| 110884 |
+
},
|
| 110885 |
+
{
|
| 110886 |
+
"epoch": 18.04102564102564,
|
| 110887 |
+
"grad_norm": 0.1631074994802475,
|
| 110888 |
+
"learning_rate": 1.2749249923894536e-06,
|
| 110889 |
+
"loss": 0.8695,
|
| 110890 |
+
"step": 15840
|
| 110891 |
+
},
|
| 110892 |
+
{
|
| 110893 |
+
"epoch": 18.042165242165243,
|
| 110894 |
+
"grad_norm": 0.19923914968967438,
|
| 110895 |
+
"learning_rate": 1.2734565181803492e-06,
|
| 110896 |
+
"loss": 0.6837,
|
| 110897 |
+
"step": 15841
|
| 110898 |
+
},
|
| 110899 |
+
{
|
| 110900 |
+
"epoch": 18.043304843304842,
|
| 110901 |
+
"grad_norm": 0.1920376569032669,
|
| 110902 |
+
"learning_rate": 1.2719888680593456e-06,
|
| 110903 |
+
"loss": 0.408,
|
| 110904 |
+
"step": 15842
|
| 110905 |
+
},
|
| 110906 |
+
{
|
| 110907 |
+
"epoch": 18.044444444444444,
|
| 110908 |
+
"grad_norm": 0.20901142060756683,
|
| 110909 |
+
"learning_rate": 1.270522042077424e-06,
|
| 110910 |
+
"loss": 0.456,
|
| 110911 |
+
"step": 15843
|
| 110912 |
+
},
|
| 110913 |
+
{
|
| 110914 |
+
"epoch": 18.045584045584047,
|
| 110915 |
+
"grad_norm": 0.1856703907251358,
|
| 110916 |
+
"learning_rate": 1.2690560402855238e-06,
|
| 110917 |
+
"loss": 0.7221,
|
| 110918 |
+
"step": 15844
|
| 110919 |
+
},
|
| 110920 |
+
{
|
| 110921 |
+
"epoch": 18.046723646723645,
|
| 110922 |
+
"grad_norm": 0.15371152758598328,
|
| 110923 |
+
"learning_rate": 1.2675908627345718e-06,
|
| 110924 |
+
"loss": 0.6246,
|
| 110925 |
+
"step": 15845
|
| 110926 |
+
},
|
| 110927 |
+
{
|
| 110928 |
+
"epoch": 18.047863247863248,
|
| 110929 |
+
"grad_norm": 0.16257111728191376,
|
| 110930 |
+
"learning_rate": 1.2661265094754516e-06,
|
| 110931 |
+
"loss": 0.7655,
|
| 110932 |
+
"step": 15846
|
| 110933 |
+
},
|
| 110934 |
+
{
|
| 110935 |
+
"epoch": 18.04900284900285,
|
| 110936 |
+
"grad_norm": 0.22813346982002258,
|
| 110937 |
+
"learning_rate": 1.264662980559031e-06,
|
| 110938 |
+
"loss": 0.4758,
|
| 110939 |
+
"step": 15847
|
| 110940 |
+
},
|
| 110941 |
+
{
|
| 110942 |
+
"epoch": 18.05014245014245,
|
| 110943 |
+
"grad_norm": 0.17259332537651062,
|
| 110944 |
+
"learning_rate": 1.2632002760361333e-06,
|
| 110945 |
+
"loss": 0.709,
|
| 110946 |
+
"step": 15848
|
| 110947 |
+
},
|
| 110948 |
+
{
|
| 110949 |
+
"epoch": 18.05128205128205,
|
| 110950 |
+
"grad_norm": 0.19387666881084442,
|
| 110951 |
+
"learning_rate": 1.2617383959575652e-06,
|
| 110952 |
+
"loss": 0.631,
|
| 110953 |
+
"step": 15849
|
| 110954 |
+
},
|
| 110955 |
+
{
|
| 110956 |
+
"epoch": 18.052421652421653,
|
| 110957 |
+
"grad_norm": 0.19855740666389465,
|
| 110958 |
+
"learning_rate": 1.2602773403741025e-06,
|
| 110959 |
+
"loss": 0.7316,
|
| 110960 |
+
"step": 15850
|
| 110961 |
+
},
|
| 110962 |
+
{
|
| 110963 |
+
"epoch": 18.053561253561252,
|
| 110964 |
+
"grad_norm": 0.1869831383228302,
|
| 110965 |
+
"learning_rate": 1.2588171093364991e-06,
|
| 110966 |
+
"loss": 0.6693,
|
| 110967 |
+
"step": 15851
|
| 110968 |
+
},
|
| 110969 |
+
{
|
| 110970 |
+
"epoch": 18.054700854700855,
|
| 110971 |
+
"grad_norm": 0.18067550659179688,
|
| 110972 |
+
"learning_rate": 1.2573577028954592e-06,
|
| 110973 |
+
"loss": 0.7039,
|
| 110974 |
+
"step": 15852
|
| 110975 |
+
},
|
| 110976 |
+
{
|
| 110977 |
+
"epoch": 18.055840455840457,
|
| 110978 |
+
"grad_norm": 0.1634341925382614,
|
| 110979 |
+
"learning_rate": 1.2558991211016808e-06,
|
| 110980 |
+
"loss": 0.5873,
|
| 110981 |
+
"step": 15853
|
| 110982 |
+
},
|
| 110983 |
+
{
|
| 110984 |
+
"epoch": 18.056980056980056,
|
| 110985 |
+
"grad_norm": 0.1706697791814804,
|
| 110986 |
+
"learning_rate": 1.2544413640058233e-06,
|
| 110987 |
+
"loss": 0.6032,
|
| 110988 |
+
"step": 15854
|
| 110989 |
+
},
|
| 110990 |
+
{
|
| 110991 |
+
"epoch": 18.058119658119658,
|
| 110992 |
+
"grad_norm": 0.20070824027061462,
|
| 110993 |
+
"learning_rate": 1.2529844316585159e-06,
|
| 110994 |
+
"loss": 0.5887,
|
| 110995 |
+
"step": 15855
|
| 110996 |
+
},
|
| 110997 |
+
{
|
| 110998 |
+
"epoch": 18.05925925925926,
|
| 110999 |
+
"grad_norm": 0.18240754306316376,
|
| 111000 |
+
"learning_rate": 1.2515283241103625e-06,
|
| 111001 |
+
"loss": 0.6586,
|
| 111002 |
+
"step": 15856
|
| 111003 |
+
},
|
| 111004 |
+
{
|
| 111005 |
+
"epoch": 18.06039886039886,
|
| 111006 |
+
"grad_norm": 0.2466755360364914,
|
| 111007 |
+
"learning_rate": 1.2500730414119422e-06,
|
| 111008 |
+
"loss": 0.6891,
|
| 111009 |
+
"step": 15857
|
| 111010 |
+
},
|
| 111011 |
+
{
|
| 111012 |
+
"epoch": 18.06153846153846,
|
| 111013 |
+
"grad_norm": 0.18024280667304993,
|
| 111014 |
+
"learning_rate": 1.2486185836138003e-06,
|
| 111015 |
+
"loss": 0.6849,
|
| 111016 |
+
"step": 15858
|
| 111017 |
+
},
|
| 111018 |
+
{
|
| 111019 |
+
"epoch": 18.062678062678064,
|
| 111020 |
+
"grad_norm": 0.19057750701904297,
|
| 111021 |
+
"learning_rate": 1.2471649507664469e-06,
|
| 111022 |
+
"loss": 0.6037,
|
| 111023 |
+
"step": 15859
|
| 111024 |
+
},
|
| 111025 |
+
{
|
| 111026 |
+
"epoch": 18.063817663817662,
|
| 111027 |
+
"grad_norm": 0.18585965037345886,
|
| 111028 |
+
"learning_rate": 1.2457121429203743e-06,
|
| 111029 |
+
"loss": 0.6832,
|
| 111030 |
+
"step": 15860
|
| 111031 |
+
},
|
| 111032 |
+
{
|
| 111033 |
+
"epoch": 18.064957264957265,
|
| 111034 |
+
"grad_norm": 0.18791721761226654,
|
| 111035 |
+
"learning_rate": 1.2442601601260457e-06,
|
| 111036 |
+
"loss": 0.8144,
|
| 111037 |
+
"step": 15861
|
| 111038 |
+
},
|
| 111039 |
+
{
|
| 111040 |
+
"epoch": 18.066096866096867,
|
| 111041 |
+
"grad_norm": 0.16386424005031586,
|
| 111042 |
+
"learning_rate": 1.2428090024338922e-06,
|
| 111043 |
+
"loss": 0.7695,
|
| 111044 |
+
"step": 15862
|
| 111045 |
+
},
|
| 111046 |
+
{
|
| 111047 |
+
"epoch": 18.067236467236466,
|
| 111048 |
+
"grad_norm": 0.22035211324691772,
|
| 111049 |
+
"learning_rate": 1.241358669894313e-06,
|
| 111050 |
+
"loss": 0.5671,
|
| 111051 |
+
"step": 15863
|
| 111052 |
+
},
|
| 111053 |
+
{
|
| 111054 |
+
"epoch": 18.068376068376068,
|
| 111055 |
+
"grad_norm": 0.18681329488754272,
|
| 111056 |
+
"learning_rate": 1.2399091625576814e-06,
|
| 111057 |
+
"loss": 0.5536,
|
| 111058 |
+
"step": 15864
|
| 111059 |
+
},
|
| 111060 |
+
{
|
| 111061 |
+
"epoch": 18.06951566951567,
|
| 111062 |
+
"grad_norm": 0.17148402333259583,
|
| 111063 |
+
"learning_rate": 1.238460480474349e-06,
|
| 111064 |
+
"loss": 0.5449,
|
| 111065 |
+
"step": 15865
|
| 111066 |
+
},
|
| 111067 |
+
{
|
| 111068 |
+
"epoch": 18.07065527065527,
|
| 111069 |
+
"grad_norm": 0.1965188980102539,
|
| 111070 |
+
"learning_rate": 1.237012623694625e-06,
|
| 111071 |
+
"loss": 0.654,
|
| 111072 |
+
"step": 15866
|
| 111073 |
+
},
|
| 111074 |
+
{
|
| 111075 |
+
"epoch": 18.07179487179487,
|
| 111076 |
+
"grad_norm": 0.19194090366363525,
|
| 111077 |
+
"learning_rate": 1.2355655922688004e-06,
|
| 111078 |
+
"loss": 0.5753,
|
| 111079 |
+
"step": 15867
|
| 111080 |
+
},
|
| 111081 |
+
{
|
| 111082 |
+
"epoch": 18.072934472934474,
|
| 111083 |
+
"grad_norm": 0.2225450724363327,
|
| 111084 |
+
"learning_rate": 1.2341193862471373e-06,
|
| 111085 |
+
"loss": 0.6352,
|
| 111086 |
+
"step": 15868
|
| 111087 |
+
},
|
| 111088 |
+
{
|
| 111089 |
+
"epoch": 18.074074074074073,
|
| 111090 |
+
"grad_norm": 0.17703381180763245,
|
| 111091 |
+
"learning_rate": 1.232674005679868e-06,
|
| 111092 |
+
"loss": 0.5726,
|
| 111093 |
+
"step": 15869
|
| 111094 |
+
},
|
| 111095 |
+
{
|
| 111096 |
+
"epoch": 18.075213675213675,
|
| 111097 |
+
"grad_norm": 0.18432699143886566,
|
| 111098 |
+
"learning_rate": 1.2312294506171855e-06,
|
| 111099 |
+
"loss": 0.7207,
|
| 111100 |
+
"step": 15870
|
| 111101 |
+
},
|
| 111102 |
+
{
|
| 111103 |
+
"epoch": 18.076353276353277,
|
| 111104 |
+
"grad_norm": 0.21086126565933228,
|
| 111105 |
+
"learning_rate": 1.2297857211092722e-06,
|
| 111106 |
+
"loss": 0.5844,
|
| 111107 |
+
"step": 15871
|
| 111108 |
+
},
|
| 111109 |
+
{
|
| 111110 |
+
"epoch": 18.077492877492876,
|
| 111111 |
+
"grad_norm": 0.21550235152244568,
|
| 111112 |
+
"learning_rate": 1.2283428172062656e-06,
|
| 111113 |
+
"loss": 0.6006,
|
| 111114 |
+
"step": 15872
|
| 111115 |
+
},
|
| 111116 |
+
{
|
| 111117 |
+
"epoch": 18.07863247863248,
|
| 111118 |
+
"grad_norm": 0.3013046085834503,
|
| 111119 |
+
"learning_rate": 1.2269007389582892e-06,
|
| 111120 |
+
"loss": 0.3329,
|
| 111121 |
+
"step": 15873
|
| 111122 |
+
},
|
| 111123 |
+
{
|
| 111124 |
+
"epoch": 18.07977207977208,
|
| 111125 |
+
"grad_norm": 0.1899062544107437,
|
| 111126 |
+
"learning_rate": 1.2254594864154256e-06,
|
| 111127 |
+
"loss": 0.701,
|
| 111128 |
+
"step": 15874
|
| 111129 |
+
},
|
| 111130 |
+
{
|
| 111131 |
+
"epoch": 18.08091168091168,
|
| 111132 |
+
"grad_norm": 0.20286870002746582,
|
| 111133 |
+
"learning_rate": 1.2240190596277317e-06,
|
| 111134 |
+
"loss": 0.4609,
|
| 111135 |
+
"step": 15875
|
| 111136 |
+
},
|
| 111137 |
+
{
|
| 111138 |
+
"epoch": 18.08205128205128,
|
| 111139 |
+
"grad_norm": 0.2039937525987625,
|
| 111140 |
+
"learning_rate": 1.22257945864524e-06,
|
| 111141 |
+
"loss": 0.469,
|
| 111142 |
+
"step": 15876
|
| 111143 |
+
},
|
| 111144 |
+
{
|
| 111145 |
+
"epoch": 18.083190883190884,
|
| 111146 |
+
"grad_norm": 0.20729224383831024,
|
| 111147 |
+
"learning_rate": 1.2211406835179517e-06,
|
| 111148 |
+
"loss": 0.8769,
|
| 111149 |
+
"step": 15877
|
| 111150 |
+
},
|
| 111151 |
+
{
|
| 111152 |
+
"epoch": 18.084330484330483,
|
| 111153 |
+
"grad_norm": 0.1667632758617401,
|
| 111154 |
+
"learning_rate": 1.2197027342958412e-06,
|
| 111155 |
+
"loss": 0.6936,
|
| 111156 |
+
"step": 15878
|
| 111157 |
+
},
|
| 111158 |
+
{
|
| 111159 |
+
"epoch": 18.085470085470085,
|
| 111160 |
+
"grad_norm": 0.23237772285938263,
|
| 111161 |
+
"learning_rate": 1.2182656110288516e-06,
|
| 111162 |
+
"loss": 0.4519,
|
| 111163 |
+
"step": 15879
|
| 111164 |
+
},
|
| 111165 |
+
{
|
| 111166 |
+
"epoch": 18.086609686609687,
|
| 111167 |
+
"grad_norm": 0.20046734809875488,
|
| 111168 |
+
"learning_rate": 1.2168293137668957e-06,
|
| 111169 |
+
"loss": 0.726,
|
| 111170 |
+
"step": 15880
|
| 111171 |
+
},
|
| 111172 |
+
{
|
| 111173 |
+
"epoch": 18.087749287749286,
|
| 111174 |
+
"grad_norm": 0.2263612598180771,
|
| 111175 |
+
"learning_rate": 1.2153938425598644e-06,
|
| 111176 |
+
"loss": 0.5326,
|
| 111177 |
+
"step": 15881
|
| 111178 |
+
},
|
| 111179 |
+
{
|
| 111180 |
+
"epoch": 18.08888888888889,
|
| 111181 |
+
"grad_norm": 0.1870729625225067,
|
| 111182 |
+
"learning_rate": 1.2139591974576092e-06,
|
| 111183 |
+
"loss": 0.6429,
|
| 111184 |
+
"step": 15882
|
| 111185 |
+
},
|
| 111186 |
+
{
|
| 111187 |
+
"epoch": 18.09002849002849,
|
| 111188 |
+
"grad_norm": 0.1688375025987625,
|
| 111189 |
+
"learning_rate": 1.2125253785099655e-06,
|
| 111190 |
+
"loss": 0.7038,
|
| 111191 |
+
"step": 15883
|
| 111192 |
+
},
|
| 111193 |
+
{
|
| 111194 |
+
"epoch": 18.09116809116809,
|
| 111195 |
+
"grad_norm": 0.15771566331386566,
|
| 111196 |
+
"learning_rate": 1.2110923857667295e-06,
|
| 111197 |
+
"loss": 0.5643,
|
| 111198 |
+
"step": 15884
|
| 111199 |
+
},
|
| 111200 |
+
{
|
| 111201 |
+
"epoch": 18.092307692307692,
|
| 111202 |
+
"grad_norm": 0.1544315665960312,
|
| 111203 |
+
"learning_rate": 1.209660219277678e-06,
|
| 111204 |
+
"loss": 0.7892,
|
| 111205 |
+
"step": 15885
|
| 111206 |
+
},
|
| 111207 |
+
{
|
| 111208 |
+
"epoch": 18.093447293447294,
|
| 111209 |
+
"grad_norm": 0.16746006906032562,
|
| 111210 |
+
"learning_rate": 1.208228879092549e-06,
|
| 111211 |
+
"loss": 0.8208,
|
| 111212 |
+
"step": 15886
|
| 111213 |
+
},
|
| 111214 |
+
{
|
| 111215 |
+
"epoch": 18.094586894586893,
|
| 111216 |
+
"grad_norm": 0.21814511716365814,
|
| 111217 |
+
"learning_rate": 1.2067983652610555e-06,
|
| 111218 |
+
"loss": 0.8497,
|
| 111219 |
+
"step": 15887
|
| 111220 |
+
},
|
| 111221 |
+
{
|
| 111222 |
+
"epoch": 18.095726495726495,
|
| 111223 |
+
"grad_norm": 0.20921318233013153,
|
| 111224 |
+
"learning_rate": 1.2053686778328883e-06,
|
| 111225 |
+
"loss": 0.4532,
|
| 111226 |
+
"step": 15888
|
| 111227 |
+
},
|
| 111228 |
+
{
|
| 111229 |
+
"epoch": 18.096866096866098,
|
| 111230 |
+
"grad_norm": 0.2083912193775177,
|
| 111231 |
+
"learning_rate": 1.203939816857702e-06,
|
| 111232 |
+
"loss": 0.6343,
|
| 111233 |
+
"step": 15889
|
| 111234 |
+
},
|
| 111235 |
+
{
|
| 111236 |
+
"epoch": 18.098005698005696,
|
| 111237 |
+
"grad_norm": 0.22173093259334564,
|
| 111238 |
+
"learning_rate": 1.2025117823851267e-06,
|
| 111239 |
+
"loss": 0.6096,
|
| 111240 |
+
"step": 15890
|
| 111241 |
+
},
|
| 111242 |
+
{
|
| 111243 |
+
"epoch": 18.0991452991453,
|
| 111244 |
+
"grad_norm": 0.24359305202960968,
|
| 111245 |
+
"learning_rate": 1.2010845744647587e-06,
|
| 111246 |
+
"loss": 0.4292,
|
| 111247 |
+
"step": 15891
|
| 111248 |
+
},
|
| 111249 |
+
{
|
| 111250 |
+
"epoch": 18.1002849002849,
|
| 111251 |
+
"grad_norm": 0.16970635950565338,
|
| 111252 |
+
"learning_rate": 1.1996581931461748e-06,
|
| 111253 |
+
"loss": 0.7707,
|
| 111254 |
+
"step": 15892
|
| 111255 |
+
},
|
| 111256 |
+
{
|
| 111257 |
+
"epoch": 18.1014245014245,
|
| 111258 |
+
"grad_norm": 0.16749995946884155,
|
| 111259 |
+
"learning_rate": 1.1982326384789106e-06,
|
| 111260 |
+
"loss": 0.8025,
|
| 111261 |
+
"step": 15893
|
| 111262 |
+
},
|
| 111263 |
+
{
|
| 111264 |
+
"epoch": 18.102564102564102,
|
| 111265 |
+
"grad_norm": 0.1994680017232895,
|
| 111266 |
+
"learning_rate": 1.196807910512479e-06,
|
| 111267 |
+
"loss": 0.6891,
|
| 111268 |
+
"step": 15894
|
| 111269 |
+
},
|
| 111270 |
+
{
|
| 111271 |
+
"epoch": 18.103703703703705,
|
| 111272 |
+
"grad_norm": 0.18482127785682678,
|
| 111273 |
+
"learning_rate": 1.1953840092963715e-06,
|
| 111274 |
+
"loss": 0.7436,
|
| 111275 |
+
"step": 15895
|
| 111276 |
+
},
|
| 111277 |
+
{
|
| 111278 |
+
"epoch": 18.104843304843303,
|
| 111279 |
+
"grad_norm": 0.1713782399892807,
|
| 111280 |
+
"learning_rate": 1.1939609348800423e-06,
|
| 111281 |
+
"loss": 0.603,
|
| 111282 |
+
"step": 15896
|
| 111283 |
+
},
|
| 111284 |
+
{
|
| 111285 |
+
"epoch": 18.105982905982906,
|
| 111286 |
+
"grad_norm": 0.1746223419904709,
|
| 111287 |
+
"learning_rate": 1.1925386873129135e-06,
|
| 111288 |
+
"loss": 0.5961,
|
| 111289 |
+
"step": 15897
|
| 111290 |
+
},
|
| 111291 |
+
{
|
| 111292 |
+
"epoch": 18.107122507122508,
|
| 111293 |
+
"grad_norm": 0.17016907036304474,
|
| 111294 |
+
"learning_rate": 1.1911172666443842e-06,
|
| 111295 |
+
"loss": 0.8368,
|
| 111296 |
+
"step": 15898
|
| 111297 |
+
},
|
| 111298 |
+
{
|
| 111299 |
+
"epoch": 18.108262108262107,
|
| 111300 |
+
"grad_norm": 0.20107758045196533,
|
| 111301 |
+
"learning_rate": 1.189696672923829e-06,
|
| 111302 |
+
"loss": 0.4812,
|
| 111303 |
+
"step": 15899
|
| 111304 |
+
},
|
| 111305 |
+
{
|
| 111306 |
+
"epoch": 18.10940170940171,
|
| 111307 |
+
"grad_norm": 0.19996224343776703,
|
| 111308 |
+
"learning_rate": 1.1882769062005888e-06,
|
| 111309 |
+
"loss": 0.7433,
|
| 111310 |
+
"step": 15900
|
| 111311 |
}
|
| 111312 |
],
|
| 111313 |
"logging_steps": 1,
|
|
|
|
| 111327 |
"attributes": {}
|
| 111328 |
}
|
| 111329 |
},
|
| 111330 |
+
"total_flos": 8.889886036305543e+19,
|
| 111331 |
"train_batch_size": 8,
|
| 111332 |
"trial_name": null,
|
| 111333 |
"trial_params": null
|