Training in progress, step 14700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:999737be1250345e07cbc8c49c6f615c61a6540b24d938e0eae8422c8e5784c0
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc6b178c4e55ce1a30225b782329bf5f137e62b6e3baa626c043110fe745f977
|
| 3 |
size 173247691
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e51a42ec5ffac256f3cd0ee0ccd7a6d3befe58914ad400c0cc707bc4a5ee283
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28b29328685228ac4012747bbe4f4cb19926cd3fac7cdda1f451c65123dae04b
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -100808,6 +100808,2106 @@
|
|
| 100808 |
"learning_rate": 1.7987807008613854e-06,
|
| 100809 |
"loss": 0.682104229927063,
|
| 100810 |
"step": 14400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100811 |
}
|
| 100812 |
],
|
| 100813 |
"logging_steps": 1,
|
|
@@ -100827,7 +102927,7 @@
|
|
| 100827 |
"attributes": {}
|
| 100828 |
}
|
| 100829 |
},
|
| 100830 |
-
"total_flos": 4.
|
| 100831 |
"train_batch_size": 8,
|
| 100832 |
"trial_name": null,
|
| 100833 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 18.03680981595092,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 100808 |
"learning_rate": 1.7987807008613854e-06,
|
| 100809 |
"loss": 0.682104229927063,
|
| 100810 |
"step": 14400
|
| 100811 |
+
},
|
| 100812 |
+
{
|
| 100813 |
+
"epoch": 17.669938650306747,
|
| 100814 |
+
"grad_norm": 0.26147767901420593,
|
| 100815 |
+
"learning_rate": 1.7969117386798379e-06,
|
| 100816 |
+
"loss": 0.47439491748809814,
|
| 100817 |
+
"step": 14401
|
| 100818 |
+
},
|
| 100819 |
+
{
|
| 100820 |
+
"epoch": 17.67116564417178,
|
| 100821 |
+
"grad_norm": 0.27708715200424194,
|
| 100822 |
+
"learning_rate": 1.7950437117486035e-06,
|
| 100823 |
+
"loss": 0.5937535762786865,
|
| 100824 |
+
"step": 14402
|
| 100825 |
+
},
|
| 100826 |
+
{
|
| 100827 |
+
"epoch": 17.67239263803681,
|
| 100828 |
+
"grad_norm": 0.2620975971221924,
|
| 100829 |
+
"learning_rate": 1.7931766201429723e-06,
|
| 100830 |
+
"loss": 0.4101699888706207,
|
| 100831 |
+
"step": 14403
|
| 100832 |
+
},
|
| 100833 |
+
{
|
| 100834 |
+
"epoch": 17.67361963190184,
|
| 100835 |
+
"grad_norm": 0.27248403429985046,
|
| 100836 |
+
"learning_rate": 1.7913104639382034e-06,
|
| 100837 |
+
"loss": 0.6348403096199036,
|
| 100838 |
+
"step": 14404
|
| 100839 |
+
},
|
| 100840 |
+
{
|
| 100841 |
+
"epoch": 17.67484662576687,
|
| 100842 |
+
"grad_norm": 0.2789841294288635,
|
| 100843 |
+
"learning_rate": 1.789445243209517e-06,
|
| 100844 |
+
"loss": 0.5949240922927856,
|
| 100845 |
+
"step": 14405
|
| 100846 |
+
},
|
| 100847 |
+
{
|
| 100848 |
+
"epoch": 17.6760736196319,
|
| 100849 |
+
"grad_norm": 0.2628888189792633,
|
| 100850 |
+
"learning_rate": 1.787580958032098e-06,
|
| 100851 |
+
"loss": 0.45168930292129517,
|
| 100852 |
+
"step": 14406
|
| 100853 |
+
},
|
| 100854 |
+
{
|
| 100855 |
+
"epoch": 17.677300613496932,
|
| 100856 |
+
"grad_norm": 0.2553688585758209,
|
| 100857 |
+
"learning_rate": 1.7857176084810828e-06,
|
| 100858 |
+
"loss": 0.6703063249588013,
|
| 100859 |
+
"step": 14407
|
| 100860 |
+
},
|
| 100861 |
+
{
|
| 100862 |
+
"epoch": 17.678527607361964,
|
| 100863 |
+
"grad_norm": 0.2887146770954132,
|
| 100864 |
+
"learning_rate": 1.783855194631584e-06,
|
| 100865 |
+
"loss": 0.5317137241363525,
|
| 100866 |
+
"step": 14408
|
| 100867 |
+
},
|
| 100868 |
+
{
|
| 100869 |
+
"epoch": 17.679754601226993,
|
| 100870 |
+
"grad_norm": 0.22660981118679047,
|
| 100871 |
+
"learning_rate": 1.7819937165586637e-06,
|
| 100872 |
+
"loss": 0.5763599276542664,
|
| 100873 |
+
"step": 14409
|
| 100874 |
+
},
|
| 100875 |
+
{
|
| 100876 |
+
"epoch": 17.680981595092025,
|
| 100877 |
+
"grad_norm": 0.26851749420166016,
|
| 100878 |
+
"learning_rate": 1.780133174337359e-06,
|
| 100879 |
+
"loss": 0.4394838213920593,
|
| 100880 |
+
"step": 14410
|
| 100881 |
+
},
|
| 100882 |
+
{
|
| 100883 |
+
"epoch": 17.682208588957057,
|
| 100884 |
+
"grad_norm": 0.27212029695510864,
|
| 100885 |
+
"learning_rate": 1.7782735680426655e-06,
|
| 100886 |
+
"loss": 0.5942716002464294,
|
| 100887 |
+
"step": 14411
|
| 100888 |
+
},
|
| 100889 |
+
{
|
| 100890 |
+
"epoch": 17.683435582822085,
|
| 100891 |
+
"grad_norm": 0.29270434379577637,
|
| 100892 |
+
"learning_rate": 1.7764148977495286e-06,
|
| 100893 |
+
"loss": 0.6551831960678101,
|
| 100894 |
+
"step": 14412
|
| 100895 |
+
},
|
| 100896 |
+
{
|
| 100897 |
+
"epoch": 17.684662576687117,
|
| 100898 |
+
"grad_norm": 0.23340660333633423,
|
| 100899 |
+
"learning_rate": 1.7745571635328723e-06,
|
| 100900 |
+
"loss": 0.5789006352424622,
|
| 100901 |
+
"step": 14413
|
| 100902 |
+
},
|
| 100903 |
+
{
|
| 100904 |
+
"epoch": 17.68588957055215,
|
| 100905 |
+
"grad_norm": 0.2515926957130432,
|
| 100906 |
+
"learning_rate": 1.7727003654675778e-06,
|
| 100907 |
+
"loss": 0.597957968711853,
|
| 100908 |
+
"step": 14414
|
| 100909 |
+
},
|
| 100910 |
+
{
|
| 100911 |
+
"epoch": 17.687116564417177,
|
| 100912 |
+
"grad_norm": 0.2698799967765808,
|
| 100913 |
+
"learning_rate": 1.7708445036284826e-06,
|
| 100914 |
+
"loss": 0.7288224697113037,
|
| 100915 |
+
"step": 14415
|
| 100916 |
+
},
|
| 100917 |
+
{
|
| 100918 |
+
"epoch": 17.68834355828221,
|
| 100919 |
+
"grad_norm": 0.263457328081131,
|
| 100920 |
+
"learning_rate": 1.7689895780903964e-06,
|
| 100921 |
+
"loss": 0.6287193298339844,
|
| 100922 |
+
"step": 14416
|
| 100923 |
+
},
|
| 100924 |
+
{
|
| 100925 |
+
"epoch": 17.689570552147238,
|
| 100926 |
+
"grad_norm": 0.28241053223609924,
|
| 100927 |
+
"learning_rate": 1.7671355889280816e-06,
|
| 100928 |
+
"loss": 0.618333101272583,
|
| 100929 |
+
"step": 14417
|
| 100930 |
+
},
|
| 100931 |
+
{
|
| 100932 |
+
"epoch": 17.69079754601227,
|
| 100933 |
+
"grad_norm": 0.23713575303554535,
|
| 100934 |
+
"learning_rate": 1.76528253621627e-06,
|
| 100935 |
+
"loss": 0.3304596245288849,
|
| 100936 |
+
"step": 14418
|
| 100937 |
+
},
|
| 100938 |
+
{
|
| 100939 |
+
"epoch": 17.692024539877302,
|
| 100940 |
+
"grad_norm": 0.27843257784843445,
|
| 100941 |
+
"learning_rate": 1.7634304200296493e-06,
|
| 100942 |
+
"loss": 0.5487096309661865,
|
| 100943 |
+
"step": 14419
|
| 100944 |
+
},
|
| 100945 |
+
{
|
| 100946 |
+
"epoch": 17.69325153374233,
|
| 100947 |
+
"grad_norm": 0.2896970212459564,
|
| 100948 |
+
"learning_rate": 1.7615792404428789e-06,
|
| 100949 |
+
"loss": 0.7352400422096252,
|
| 100950 |
+
"step": 14420
|
| 100951 |
+
},
|
| 100952 |
+
{
|
| 100953 |
+
"epoch": 17.694478527607362,
|
| 100954 |
+
"grad_norm": 0.2644232511520386,
|
| 100955 |
+
"learning_rate": 1.7597289975305714e-06,
|
| 100956 |
+
"loss": 0.666672945022583,
|
| 100957 |
+
"step": 14421
|
| 100958 |
+
},
|
| 100959 |
+
{
|
| 100960 |
+
"epoch": 17.69570552147239,
|
| 100961 |
+
"grad_norm": 0.22160853445529938,
|
| 100962 |
+
"learning_rate": 1.7578796913673034e-06,
|
| 100963 |
+
"loss": 0.30926448106765747,
|
| 100964 |
+
"step": 14422
|
| 100965 |
+
},
|
| 100966 |
+
{
|
| 100967 |
+
"epoch": 17.696932515337423,
|
| 100968 |
+
"grad_norm": 0.24156180024147034,
|
| 100969 |
+
"learning_rate": 1.7560313220276098e-06,
|
| 100970 |
+
"loss": 0.6096391677856445,
|
| 100971 |
+
"step": 14423
|
| 100972 |
+
},
|
| 100973 |
+
{
|
| 100974 |
+
"epoch": 17.698159509202455,
|
| 100975 |
+
"grad_norm": 0.27816241979599,
|
| 100976 |
+
"learning_rate": 1.7541838895860057e-06,
|
| 100977 |
+
"loss": 0.5655328631401062,
|
| 100978 |
+
"step": 14424
|
| 100979 |
+
},
|
| 100980 |
+
{
|
| 100981 |
+
"epoch": 17.699386503067483,
|
| 100982 |
+
"grad_norm": 0.2434120625257492,
|
| 100983 |
+
"learning_rate": 1.7523373941169541e-06,
|
| 100984 |
+
"loss": 0.5196170210838318,
|
| 100985 |
+
"step": 14425
|
| 100986 |
+
},
|
| 100987 |
+
{
|
| 100988 |
+
"epoch": 17.700613496932515,
|
| 100989 |
+
"grad_norm": 0.2635331451892853,
|
| 100990 |
+
"learning_rate": 1.750491835694873e-06,
|
| 100991 |
+
"loss": 0.4783024191856384,
|
| 100992 |
+
"step": 14426
|
| 100993 |
+
},
|
| 100994 |
+
{
|
| 100995 |
+
"epoch": 17.701840490797547,
|
| 100996 |
+
"grad_norm": 0.2635659873485565,
|
| 100997 |
+
"learning_rate": 1.7486472143941586e-06,
|
| 100998 |
+
"loss": 0.5331522822380066,
|
| 100999 |
+
"step": 14427
|
| 101000 |
+
},
|
| 101001 |
+
{
|
| 101002 |
+
"epoch": 17.703067484662576,
|
| 101003 |
+
"grad_norm": 0.24245093762874603,
|
| 101004 |
+
"learning_rate": 1.7468035302891568e-06,
|
| 101005 |
+
"loss": 0.6252617835998535,
|
| 101006 |
+
"step": 14428
|
| 101007 |
+
},
|
| 101008 |
+
{
|
| 101009 |
+
"epoch": 17.704294478527608,
|
| 101010 |
+
"grad_norm": 0.23542805016040802,
|
| 101011 |
+
"learning_rate": 1.7449607834541858e-06,
|
| 101012 |
+
"loss": 0.5733821392059326,
|
| 101013 |
+
"step": 14429
|
| 101014 |
+
},
|
| 101015 |
+
{
|
| 101016 |
+
"epoch": 17.70552147239264,
|
| 101017 |
+
"grad_norm": 0.25554269552230835,
|
| 101018 |
+
"learning_rate": 1.7431189739635228e-06,
|
| 101019 |
+
"loss": 0.6193656921386719,
|
| 101020 |
+
"step": 14430
|
| 101021 |
+
},
|
| 101022 |
+
{
|
| 101023 |
+
"epoch": 17.706748466257668,
|
| 101024 |
+
"grad_norm": 0.29763785004615784,
|
| 101025 |
+
"learning_rate": 1.7412781018914027e-06,
|
| 101026 |
+
"loss": 0.6570577621459961,
|
| 101027 |
+
"step": 14431
|
| 101028 |
+
},
|
| 101029 |
+
{
|
| 101030 |
+
"epoch": 17.7079754601227,
|
| 101031 |
+
"grad_norm": 0.2672852575778961,
|
| 101032 |
+
"learning_rate": 1.7394381673120242e-06,
|
| 101033 |
+
"loss": 0.5586692094802856,
|
| 101034 |
+
"step": 14432
|
| 101035 |
+
},
|
| 101036 |
+
{
|
| 101037 |
+
"epoch": 17.70920245398773,
|
| 101038 |
+
"grad_norm": 0.2807861566543579,
|
| 101039 |
+
"learning_rate": 1.7375991702995503e-06,
|
| 101040 |
+
"loss": 0.772005558013916,
|
| 101041 |
+
"step": 14433
|
| 101042 |
+
},
|
| 101043 |
+
{
|
| 101044 |
+
"epoch": 17.71042944785276,
|
| 101045 |
+
"grad_norm": 0.2668750286102295,
|
| 101046 |
+
"learning_rate": 1.7357611109281109e-06,
|
| 101047 |
+
"loss": 0.7570143938064575,
|
| 101048 |
+
"step": 14434
|
| 101049 |
+
},
|
| 101050 |
+
{
|
| 101051 |
+
"epoch": 17.711656441717793,
|
| 101052 |
+
"grad_norm": 0.2762105464935303,
|
| 101053 |
+
"learning_rate": 1.7339239892717908e-06,
|
| 101054 |
+
"loss": 0.632401168346405,
|
| 101055 |
+
"step": 14435
|
| 101056 |
+
},
|
| 101057 |
+
{
|
| 101058 |
+
"epoch": 17.71288343558282,
|
| 101059 |
+
"grad_norm": 0.2651897370815277,
|
| 101060 |
+
"learning_rate": 1.7320878054046336e-06,
|
| 101061 |
+
"loss": 0.6254444718360901,
|
| 101062 |
+
"step": 14436
|
| 101063 |
+
},
|
| 101064 |
+
{
|
| 101065 |
+
"epoch": 17.714110429447853,
|
| 101066 |
+
"grad_norm": 0.30766695737838745,
|
| 101067 |
+
"learning_rate": 1.730252559400658e-06,
|
| 101068 |
+
"loss": 0.6680042743682861,
|
| 101069 |
+
"step": 14437
|
| 101070 |
+
},
|
| 101071 |
+
{
|
| 101072 |
+
"epoch": 17.715337423312885,
|
| 101073 |
+
"grad_norm": 0.6888008117675781,
|
| 101074 |
+
"learning_rate": 1.7284182513338297e-06,
|
| 101075 |
+
"loss": 0.4033116400241852,
|
| 101076 |
+
"step": 14438
|
| 101077 |
+
},
|
| 101078 |
+
{
|
| 101079 |
+
"epoch": 17.716564417177914,
|
| 101080 |
+
"grad_norm": 0.27741289138793945,
|
| 101081 |
+
"learning_rate": 1.7265848812780928e-06,
|
| 101082 |
+
"loss": 0.5820201635360718,
|
| 101083 |
+
"step": 14439
|
| 101084 |
+
},
|
| 101085 |
+
{
|
| 101086 |
+
"epoch": 17.717791411042946,
|
| 101087 |
+
"grad_norm": 0.263315349817276,
|
| 101088 |
+
"learning_rate": 1.7247524493073431e-06,
|
| 101089 |
+
"loss": 0.5300512909889221,
|
| 101090 |
+
"step": 14440
|
| 101091 |
+
},
|
| 101092 |
+
{
|
| 101093 |
+
"epoch": 17.719018404907974,
|
| 101094 |
+
"grad_norm": 0.25898921489715576,
|
| 101095 |
+
"learning_rate": 1.7229209554954384e-06,
|
| 101096 |
+
"loss": 0.5288949012756348,
|
| 101097 |
+
"step": 14441
|
| 101098 |
+
},
|
| 101099 |
+
{
|
| 101100 |
+
"epoch": 17.720245398773006,
|
| 101101 |
+
"grad_norm": 0.24144957959651947,
|
| 101102 |
+
"learning_rate": 1.7210903999161976e-06,
|
| 101103 |
+
"loss": 0.5988331437110901,
|
| 101104 |
+
"step": 14442
|
| 101105 |
+
},
|
| 101106 |
+
{
|
| 101107 |
+
"epoch": 17.721472392638038,
|
| 101108 |
+
"grad_norm": 0.2512979507446289,
|
| 101109 |
+
"learning_rate": 1.7192607826434116e-06,
|
| 101110 |
+
"loss": 0.7105739116668701,
|
| 101111 |
+
"step": 14443
|
| 101112 |
+
},
|
| 101113 |
+
{
|
| 101114 |
+
"epoch": 17.722699386503066,
|
| 101115 |
+
"grad_norm": 0.27440232038497925,
|
| 101116 |
+
"learning_rate": 1.717432103750824e-06,
|
| 101117 |
+
"loss": 0.4024357795715332,
|
| 101118 |
+
"step": 14444
|
| 101119 |
+
},
|
| 101120 |
+
{
|
| 101121 |
+
"epoch": 17.7239263803681,
|
| 101122 |
+
"grad_norm": 0.2921546399593353,
|
| 101123 |
+
"learning_rate": 1.7156043633121481e-06,
|
| 101124 |
+
"loss": 0.7575478553771973,
|
| 101125 |
+
"step": 14445
|
| 101126 |
+
},
|
| 101127 |
+
{
|
| 101128 |
+
"epoch": 17.72515337423313,
|
| 101129 |
+
"grad_norm": 0.24694538116455078,
|
| 101130 |
+
"learning_rate": 1.713777561401045e-06,
|
| 101131 |
+
"loss": 0.5967926979064941,
|
| 101132 |
+
"step": 14446
|
| 101133 |
+
},
|
| 101134 |
+
{
|
| 101135 |
+
"epoch": 17.72638036809816,
|
| 101136 |
+
"grad_norm": 0.2890719473361969,
|
| 101137 |
+
"learning_rate": 1.711951698091155e-06,
|
| 101138 |
+
"loss": 0.5971429347991943,
|
| 101139 |
+
"step": 14447
|
| 101140 |
+
},
|
| 101141 |
+
{
|
| 101142 |
+
"epoch": 17.72760736196319,
|
| 101143 |
+
"grad_norm": 0.3042852282524109,
|
| 101144 |
+
"learning_rate": 1.710126773456075e-06,
|
| 101145 |
+
"loss": 0.643683135509491,
|
| 101146 |
+
"step": 14448
|
| 101147 |
+
},
|
| 101148 |
+
{
|
| 101149 |
+
"epoch": 17.72883435582822,
|
| 101150 |
+
"grad_norm": 0.2705894410610199,
|
| 101151 |
+
"learning_rate": 1.7083027875693631e-06,
|
| 101152 |
+
"loss": 0.6428991556167603,
|
| 101153 |
+
"step": 14449
|
| 101154 |
+
},
|
| 101155 |
+
{
|
| 101156 |
+
"epoch": 17.73006134969325,
|
| 101157 |
+
"grad_norm": 0.2516762614250183,
|
| 101158 |
+
"learning_rate": 1.7064797405045325e-06,
|
| 101159 |
+
"loss": 0.5717575550079346,
|
| 101160 |
+
"step": 14450
|
| 101161 |
+
},
|
| 101162 |
+
{
|
| 101163 |
+
"epoch": 17.731288343558283,
|
| 101164 |
+
"grad_norm": 0.2620484530925751,
|
| 101165 |
+
"learning_rate": 1.7046576323350661e-06,
|
| 101166 |
+
"loss": 0.4684022068977356,
|
| 101167 |
+
"step": 14451
|
| 101168 |
+
},
|
| 101169 |
+
{
|
| 101170 |
+
"epoch": 17.73251533742331,
|
| 101171 |
+
"grad_norm": 0.30806615948677063,
|
| 101172 |
+
"learning_rate": 1.7028364631344134e-06,
|
| 101173 |
+
"loss": 0.7536346912384033,
|
| 101174 |
+
"step": 14452
|
| 101175 |
+
},
|
| 101176 |
+
{
|
| 101177 |
+
"epoch": 17.733742331288344,
|
| 101178 |
+
"grad_norm": 0.2730661928653717,
|
| 101179 |
+
"learning_rate": 1.7010162329759743e-06,
|
| 101180 |
+
"loss": 0.49218887090682983,
|
| 101181 |
+
"step": 14453
|
| 101182 |
+
},
|
| 101183 |
+
{
|
| 101184 |
+
"epoch": 17.734969325153376,
|
| 101185 |
+
"grad_norm": 0.26885226368904114,
|
| 101186 |
+
"learning_rate": 1.699196941933126e-06,
|
| 101187 |
+
"loss": 0.49474549293518066,
|
| 101188 |
+
"step": 14454
|
| 101189 |
+
},
|
| 101190 |
+
{
|
| 101191 |
+
"epoch": 17.736196319018404,
|
| 101192 |
+
"grad_norm": 0.2716234028339386,
|
| 101193 |
+
"learning_rate": 1.6973785900791877e-06,
|
| 101194 |
+
"loss": 0.5910289287567139,
|
| 101195 |
+
"step": 14455
|
| 101196 |
+
},
|
| 101197 |
+
{
|
| 101198 |
+
"epoch": 17.737423312883436,
|
| 101199 |
+
"grad_norm": 0.2848872244358063,
|
| 101200 |
+
"learning_rate": 1.6955611774874592e-06,
|
| 101201 |
+
"loss": 0.5455286502838135,
|
| 101202 |
+
"step": 14456
|
| 101203 |
+
},
|
| 101204 |
+
{
|
| 101205 |
+
"epoch": 17.738650306748465,
|
| 101206 |
+
"grad_norm": 0.2808411717414856,
|
| 101207 |
+
"learning_rate": 1.69374470423119e-06,
|
| 101208 |
+
"loss": 0.7998709678649902,
|
| 101209 |
+
"step": 14457
|
| 101210 |
+
},
|
| 101211 |
+
{
|
| 101212 |
+
"epoch": 17.739877300613497,
|
| 101213 |
+
"grad_norm": 0.2624289095401764,
|
| 101214 |
+
"learning_rate": 1.6919291703836022e-06,
|
| 101215 |
+
"loss": 0.6524244546890259,
|
| 101216 |
+
"step": 14458
|
| 101217 |
+
},
|
| 101218 |
+
{
|
| 101219 |
+
"epoch": 17.74110429447853,
|
| 101220 |
+
"grad_norm": 0.30455687642097473,
|
| 101221 |
+
"learning_rate": 1.6901145760178788e-06,
|
| 101222 |
+
"loss": 0.7363672256469727,
|
| 101223 |
+
"step": 14459
|
| 101224 |
+
},
|
| 101225 |
+
{
|
| 101226 |
+
"epoch": 17.742331288343557,
|
| 101227 |
+
"grad_norm": 0.28727486729621887,
|
| 101228 |
+
"learning_rate": 1.6883009212071477e-06,
|
| 101229 |
+
"loss": 0.5129668116569519,
|
| 101230 |
+
"step": 14460
|
| 101231 |
+
},
|
| 101232 |
+
{
|
| 101233 |
+
"epoch": 17.74355828220859,
|
| 101234 |
+
"grad_norm": 0.29007336497306824,
|
| 101235 |
+
"learning_rate": 1.6864882060245223e-06,
|
| 101236 |
+
"loss": 0.3749275207519531,
|
| 101237 |
+
"step": 14461
|
| 101238 |
+
},
|
| 101239 |
+
{
|
| 101240 |
+
"epoch": 17.74478527607362,
|
| 101241 |
+
"grad_norm": 0.26407772302627563,
|
| 101242 |
+
"learning_rate": 1.684676430543064e-06,
|
| 101243 |
+
"loss": 0.46111637353897095,
|
| 101244 |
+
"step": 14462
|
| 101245 |
+
},
|
| 101246 |
+
{
|
| 101247 |
+
"epoch": 17.74601226993865,
|
| 101248 |
+
"grad_norm": 0.24980175495147705,
|
| 101249 |
+
"learning_rate": 1.6828655948358002e-06,
|
| 101250 |
+
"loss": 0.5749626159667969,
|
| 101251 |
+
"step": 14463
|
| 101252 |
+
},
|
| 101253 |
+
{
|
| 101254 |
+
"epoch": 17.74723926380368,
|
| 101255 |
+
"grad_norm": 0.2503575086593628,
|
| 101256 |
+
"learning_rate": 1.6810556989757253e-06,
|
| 101257 |
+
"loss": 0.6050692796707153,
|
| 101258 |
+
"step": 14464
|
| 101259 |
+
},
|
| 101260 |
+
{
|
| 101261 |
+
"epoch": 17.74846625766871,
|
| 101262 |
+
"grad_norm": 0.2976168990135193,
|
| 101263 |
+
"learning_rate": 1.679246743035784e-06,
|
| 101264 |
+
"loss": 0.6255719065666199,
|
| 101265 |
+
"step": 14465
|
| 101266 |
+
},
|
| 101267 |
+
{
|
| 101268 |
+
"epoch": 17.749693251533742,
|
| 101269 |
+
"grad_norm": 0.2517981231212616,
|
| 101270 |
+
"learning_rate": 1.6774387270888953e-06,
|
| 101271 |
+
"loss": 0.4583664536476135,
|
| 101272 |
+
"step": 14466
|
| 101273 |
+
},
|
| 101274 |
+
{
|
| 101275 |
+
"epoch": 17.750920245398774,
|
| 101276 |
+
"grad_norm": 0.2787110507488251,
|
| 101277 |
+
"learning_rate": 1.6756316512079318e-06,
|
| 101278 |
+
"loss": 0.6985753774642944,
|
| 101279 |
+
"step": 14467
|
| 101280 |
+
},
|
| 101281 |
+
{
|
| 101282 |
+
"epoch": 17.752147239263802,
|
| 101283 |
+
"grad_norm": 0.2829684019088745,
|
| 101284 |
+
"learning_rate": 1.673825515465735e-06,
|
| 101285 |
+
"loss": 0.5918815732002258,
|
| 101286 |
+
"step": 14468
|
| 101287 |
+
},
|
| 101288 |
+
{
|
| 101289 |
+
"epoch": 17.753374233128834,
|
| 101290 |
+
"grad_norm": 0.2527298033237457,
|
| 101291 |
+
"learning_rate": 1.6720203199351025e-06,
|
| 101292 |
+
"loss": 0.6950498819351196,
|
| 101293 |
+
"step": 14469
|
| 101294 |
+
},
|
| 101295 |
+
{
|
| 101296 |
+
"epoch": 17.754601226993866,
|
| 101297 |
+
"grad_norm": 0.28229647874832153,
|
| 101298 |
+
"learning_rate": 1.6702160646887955e-06,
|
| 101299 |
+
"loss": 0.4725314974784851,
|
| 101300 |
+
"step": 14470
|
| 101301 |
+
},
|
| 101302 |
+
{
|
| 101303 |
+
"epoch": 17.755828220858895,
|
| 101304 |
+
"grad_norm": 0.23890115320682526,
|
| 101305 |
+
"learning_rate": 1.6684127497995444e-06,
|
| 101306 |
+
"loss": 0.47889724373817444,
|
| 101307 |
+
"step": 14471
|
| 101308 |
+
},
|
| 101309 |
+
{
|
| 101310 |
+
"epoch": 17.757055214723927,
|
| 101311 |
+
"grad_norm": 0.27976393699645996,
|
| 101312 |
+
"learning_rate": 1.6666103753400275e-06,
|
| 101313 |
+
"loss": 0.779056191444397,
|
| 101314 |
+
"step": 14472
|
| 101315 |
+
},
|
| 101316 |
+
{
|
| 101317 |
+
"epoch": 17.758282208588955,
|
| 101318 |
+
"grad_norm": 0.2894750237464905,
|
| 101319 |
+
"learning_rate": 1.6648089413829032e-06,
|
| 101320 |
+
"loss": 0.8105239272117615,
|
| 101321 |
+
"step": 14473
|
| 101322 |
+
},
|
| 101323 |
+
{
|
| 101324 |
+
"epoch": 17.759509202453987,
|
| 101325 |
+
"grad_norm": 0.2877979278564453,
|
| 101326 |
+
"learning_rate": 1.6630084480007718e-06,
|
| 101327 |
+
"loss": 0.8243641257286072,
|
| 101328 |
+
"step": 14474
|
| 101329 |
+
},
|
| 101330 |
+
{
|
| 101331 |
+
"epoch": 17.76073619631902,
|
| 101332 |
+
"grad_norm": 0.2657652199268341,
|
| 101333 |
+
"learning_rate": 1.6612088952662113e-06,
|
| 101334 |
+
"loss": 0.65790194272995,
|
| 101335 |
+
"step": 14475
|
| 101336 |
+
},
|
| 101337 |
+
{
|
| 101338 |
+
"epoch": 17.761963190184048,
|
| 101339 |
+
"grad_norm": 0.2523839473724365,
|
| 101340 |
+
"learning_rate": 1.6594102832517554e-06,
|
| 101341 |
+
"loss": 0.582813024520874,
|
| 101342 |
+
"step": 14476
|
| 101343 |
+
},
|
| 101344 |
+
{
|
| 101345 |
+
"epoch": 17.76319018404908,
|
| 101346 |
+
"grad_norm": 0.24560768902301788,
|
| 101347 |
+
"learning_rate": 1.6576126120299045e-06,
|
| 101348 |
+
"loss": 0.5974304676055908,
|
| 101349 |
+
"step": 14477
|
| 101350 |
+
},
|
| 101351 |
+
{
|
| 101352 |
+
"epoch": 17.764417177914112,
|
| 101353 |
+
"grad_norm": 0.27486369013786316,
|
| 101354 |
+
"learning_rate": 1.6558158816731144e-06,
|
| 101355 |
+
"loss": 0.6215538382530212,
|
| 101356 |
+
"step": 14478
|
| 101357 |
+
},
|
| 101358 |
+
{
|
| 101359 |
+
"epoch": 17.76564417177914,
|
| 101360 |
+
"grad_norm": 0.2607943117618561,
|
| 101361 |
+
"learning_rate": 1.6540200922538052e-06,
|
| 101362 |
+
"loss": 0.4282832145690918,
|
| 101363 |
+
"step": 14479
|
| 101364 |
+
},
|
| 101365 |
+
{
|
| 101366 |
+
"epoch": 17.766871165644172,
|
| 101367 |
+
"grad_norm": 0.28973671793937683,
|
| 101368 |
+
"learning_rate": 1.6522252438443604e-06,
|
| 101369 |
+
"loss": 0.5674418210983276,
|
| 101370 |
+
"step": 14480
|
| 101371 |
+
},
|
| 101372 |
+
{
|
| 101373 |
+
"epoch": 17.7680981595092,
|
| 101374 |
+
"grad_norm": 0.25997284054756165,
|
| 101375 |
+
"learning_rate": 1.6504313365171247e-06,
|
| 101376 |
+
"loss": 0.4637855887413025,
|
| 101377 |
+
"step": 14481
|
| 101378 |
+
},
|
| 101379 |
+
{
|
| 101380 |
+
"epoch": 17.769325153374233,
|
| 101381 |
+
"grad_norm": 0.28390493988990784,
|
| 101382 |
+
"learning_rate": 1.6486383703444074e-06,
|
| 101383 |
+
"loss": 0.5710793733596802,
|
| 101384 |
+
"step": 14482
|
| 101385 |
+
},
|
| 101386 |
+
{
|
| 101387 |
+
"epoch": 17.770552147239265,
|
| 101388 |
+
"grad_norm": 0.2710730731487274,
|
| 101389 |
+
"learning_rate": 1.6468463453984838e-06,
|
| 101390 |
+
"loss": 0.47080177068710327,
|
| 101391 |
+
"step": 14483
|
| 101392 |
+
},
|
| 101393 |
+
{
|
| 101394 |
+
"epoch": 17.771779141104293,
|
| 101395 |
+
"grad_norm": 0.2949327230453491,
|
| 101396 |
+
"learning_rate": 1.6450552617515712e-06,
|
| 101397 |
+
"loss": 0.7702762484550476,
|
| 101398 |
+
"step": 14484
|
| 101399 |
+
},
|
| 101400 |
+
{
|
| 101401 |
+
"epoch": 17.773006134969325,
|
| 101402 |
+
"grad_norm": 0.28098398447036743,
|
| 101403 |
+
"learning_rate": 1.643265119475873e-06,
|
| 101404 |
+
"loss": 0.5846347212791443,
|
| 101405 |
+
"step": 14485
|
| 101406 |
+
},
|
| 101407 |
+
{
|
| 101408 |
+
"epoch": 17.774233128834357,
|
| 101409 |
+
"grad_norm": 0.27663174271583557,
|
| 101410 |
+
"learning_rate": 1.6414759186435424e-06,
|
| 101411 |
+
"loss": 0.6966352462768555,
|
| 101412 |
+
"step": 14486
|
| 101413 |
+
},
|
| 101414 |
+
{
|
| 101415 |
+
"epoch": 17.775460122699386,
|
| 101416 |
+
"grad_norm": 0.24636676907539368,
|
| 101417 |
+
"learning_rate": 1.639687659326694e-06,
|
| 101418 |
+
"loss": 0.589970588684082,
|
| 101419 |
+
"step": 14487
|
| 101420 |
+
},
|
| 101421 |
+
{
|
| 101422 |
+
"epoch": 17.776687116564418,
|
| 101423 |
+
"grad_norm": 0.2559802234172821,
|
| 101424 |
+
"learning_rate": 1.6379003415974175e-06,
|
| 101425 |
+
"loss": 0.5076272487640381,
|
| 101426 |
+
"step": 14488
|
| 101427 |
+
},
|
| 101428 |
+
{
|
| 101429 |
+
"epoch": 17.77791411042945,
|
| 101430 |
+
"grad_norm": 0.25797852873802185,
|
| 101431 |
+
"learning_rate": 1.6361139655277414e-06,
|
| 101432 |
+
"loss": 0.5841696262359619,
|
| 101433 |
+
"step": 14489
|
| 101434 |
+
},
|
| 101435 |
+
{
|
| 101436 |
+
"epoch": 17.779141104294478,
|
| 101437 |
+
"grad_norm": 0.29553401470184326,
|
| 101438 |
+
"learning_rate": 1.6343285311896717e-06,
|
| 101439 |
+
"loss": 0.5014752149581909,
|
| 101440 |
+
"step": 14490
|
| 101441 |
+
},
|
| 101442 |
+
{
|
| 101443 |
+
"epoch": 17.78036809815951,
|
| 101444 |
+
"grad_norm": 0.2689141631126404,
|
| 101445 |
+
"learning_rate": 1.6325440386551843e-06,
|
| 101446 |
+
"loss": 0.5869241952896118,
|
| 101447 |
+
"step": 14491
|
| 101448 |
+
},
|
| 101449 |
+
{
|
| 101450 |
+
"epoch": 17.78159509202454,
|
| 101451 |
+
"grad_norm": 0.2589794099330902,
|
| 101452 |
+
"learning_rate": 1.6307604879962025e-06,
|
| 101453 |
+
"loss": 0.7023707628250122,
|
| 101454 |
+
"step": 14492
|
| 101455 |
+
},
|
| 101456 |
+
{
|
| 101457 |
+
"epoch": 17.78282208588957,
|
| 101458 |
+
"grad_norm": 0.3093288540840149,
|
| 101459 |
+
"learning_rate": 1.62897787928461e-06,
|
| 101460 |
+
"loss": 0.687326192855835,
|
| 101461 |
+
"step": 14493
|
| 101462 |
+
},
|
| 101463 |
+
{
|
| 101464 |
+
"epoch": 17.784049079754602,
|
| 101465 |
+
"grad_norm": 0.28463014960289,
|
| 101466 |
+
"learning_rate": 1.6271962125922635e-06,
|
| 101467 |
+
"loss": 0.5737600326538086,
|
| 101468 |
+
"step": 14494
|
| 101469 |
+
},
|
| 101470 |
+
{
|
| 101471 |
+
"epoch": 17.78527607361963,
|
| 101472 |
+
"grad_norm": 0.2662215530872345,
|
| 101473 |
+
"learning_rate": 1.6254154879909778e-06,
|
| 101474 |
+
"loss": 0.5251301527023315,
|
| 101475 |
+
"step": 14495
|
| 101476 |
+
},
|
| 101477 |
+
{
|
| 101478 |
+
"epoch": 17.786503067484663,
|
| 101479 |
+
"grad_norm": 0.2652130424976349,
|
| 101480 |
+
"learning_rate": 1.6236357055525258e-06,
|
| 101481 |
+
"loss": 0.6468573212623596,
|
| 101482 |
+
"step": 14496
|
| 101483 |
+
},
|
| 101484 |
+
{
|
| 101485 |
+
"epoch": 17.787730061349695,
|
| 101486 |
+
"grad_norm": 0.29214316606521606,
|
| 101487 |
+
"learning_rate": 1.6218568653486504e-06,
|
| 101488 |
+
"loss": 0.6550905704498291,
|
| 101489 |
+
"step": 14497
|
| 101490 |
+
},
|
| 101491 |
+
{
|
| 101492 |
+
"epoch": 17.788957055214723,
|
| 101493 |
+
"grad_norm": 0.2671346664428711,
|
| 101494 |
+
"learning_rate": 1.620078967451047e-06,
|
| 101495 |
+
"loss": 0.5882951617240906,
|
| 101496 |
+
"step": 14498
|
| 101497 |
+
},
|
| 101498 |
+
{
|
| 101499 |
+
"epoch": 17.790184049079755,
|
| 101500 |
+
"grad_norm": 0.26868271827697754,
|
| 101501 |
+
"learning_rate": 1.6183020119313746e-06,
|
| 101502 |
+
"loss": 0.6404262185096741,
|
| 101503 |
+
"step": 14499
|
| 101504 |
+
},
|
| 101505 |
+
{
|
| 101506 |
+
"epoch": 17.791411042944784,
|
| 101507 |
+
"grad_norm": 0.28620389103889465,
|
| 101508 |
+
"learning_rate": 1.6165259988612652e-06,
|
| 101509 |
+
"loss": 0.5554099678993225,
|
| 101510 |
+
"step": 14500
|
| 101511 |
+
},
|
| 101512 |
+
{
|
| 101513 |
+
"epoch": 17.792638036809816,
|
| 101514 |
+
"grad_norm": 0.3018434941768646,
|
| 101515 |
+
"learning_rate": 1.6147509283123003e-06,
|
| 101516 |
+
"loss": 0.692064642906189,
|
| 101517 |
+
"step": 14501
|
| 101518 |
+
},
|
| 101519 |
+
{
|
| 101520 |
+
"epoch": 17.793865030674848,
|
| 101521 |
+
"grad_norm": 0.26555582880973816,
|
| 101522 |
+
"learning_rate": 1.6129768003560253e-06,
|
| 101523 |
+
"loss": 0.5606411695480347,
|
| 101524 |
+
"step": 14502
|
| 101525 |
+
},
|
| 101526 |
+
{
|
| 101527 |
+
"epoch": 17.795092024539876,
|
| 101528 |
+
"grad_norm": 0.26493602991104126,
|
| 101529 |
+
"learning_rate": 1.611203615063961e-06,
|
| 101530 |
+
"loss": 0.5916208624839783,
|
| 101531 |
+
"step": 14503
|
| 101532 |
+
},
|
| 101533 |
+
{
|
| 101534 |
+
"epoch": 17.79631901840491,
|
| 101535 |
+
"grad_norm": 0.2802395522594452,
|
| 101536 |
+
"learning_rate": 1.6094313725075666e-06,
|
| 101537 |
+
"loss": 0.43546223640441895,
|
| 101538 |
+
"step": 14504
|
| 101539 |
+
},
|
| 101540 |
+
{
|
| 101541 |
+
"epoch": 17.79754601226994,
|
| 101542 |
+
"grad_norm": 0.25258538126945496,
|
| 101543 |
+
"learning_rate": 1.6076600727582824e-06,
|
| 101544 |
+
"loss": 0.539810836315155,
|
| 101545 |
+
"step": 14505
|
| 101546 |
+
},
|
| 101547 |
+
{
|
| 101548 |
+
"epoch": 17.79877300613497,
|
| 101549 |
+
"grad_norm": 0.25099050998687744,
|
| 101550 |
+
"learning_rate": 1.6058897158875042e-06,
|
| 101551 |
+
"loss": 0.43675804138183594,
|
| 101552 |
+
"step": 14506
|
| 101553 |
+
},
|
| 101554 |
+
{
|
| 101555 |
+
"epoch": 17.8,
|
| 101556 |
+
"grad_norm": 0.36962538957595825,
|
| 101557 |
+
"learning_rate": 1.6041203019665886e-06,
|
| 101558 |
+
"loss": 0.5866479873657227,
|
| 101559 |
+
"step": 14507
|
| 101560 |
+
},
|
| 101561 |
+
{
|
| 101562 |
+
"epoch": 17.80122699386503,
|
| 101563 |
+
"grad_norm": 0.2473585307598114,
|
| 101564 |
+
"learning_rate": 1.6023518310668618e-06,
|
| 101565 |
+
"loss": 0.4072653651237488,
|
| 101566 |
+
"step": 14508
|
| 101567 |
+
},
|
| 101568 |
+
{
|
| 101569 |
+
"epoch": 17.80245398773006,
|
| 101570 |
+
"grad_norm": 0.2429029494524002,
|
| 101571 |
+
"learning_rate": 1.6005843032595947e-06,
|
| 101572 |
+
"loss": 0.42757776379585266,
|
| 101573 |
+
"step": 14509
|
| 101574 |
+
},
|
| 101575 |
+
{
|
| 101576 |
+
"epoch": 17.803680981595093,
|
| 101577 |
+
"grad_norm": 0.2404707968235016,
|
| 101578 |
+
"learning_rate": 1.5988177186160385e-06,
|
| 101579 |
+
"loss": 0.487602561712265,
|
| 101580 |
+
"step": 14510
|
| 101581 |
+
},
|
| 101582 |
+
{
|
| 101583 |
+
"epoch": 17.80490797546012,
|
| 101584 |
+
"grad_norm": 0.2901292145252228,
|
| 101585 |
+
"learning_rate": 1.597052077207395e-06,
|
| 101586 |
+
"loss": 0.6432065963745117,
|
| 101587 |
+
"step": 14511
|
| 101588 |
+
},
|
| 101589 |
+
{
|
| 101590 |
+
"epoch": 17.806134969325154,
|
| 101591 |
+
"grad_norm": 0.25490713119506836,
|
| 101592 |
+
"learning_rate": 1.5952873791048373e-06,
|
| 101593 |
+
"loss": 0.5464239120483398,
|
| 101594 |
+
"step": 14512
|
| 101595 |
+
},
|
| 101596 |
+
{
|
| 101597 |
+
"epoch": 17.807361963190186,
|
| 101598 |
+
"grad_norm": 0.29172468185424805,
|
| 101599 |
+
"learning_rate": 1.5935236243794922e-06,
|
| 101600 |
+
"loss": 0.535041093826294,
|
| 101601 |
+
"step": 14513
|
| 101602 |
+
},
|
| 101603 |
+
{
|
| 101604 |
+
"epoch": 17.808588957055214,
|
| 101605 |
+
"grad_norm": 0.32893475890159607,
|
| 101606 |
+
"learning_rate": 1.5917608131024552e-06,
|
| 101607 |
+
"loss": 0.6691635251045227,
|
| 101608 |
+
"step": 14514
|
| 101609 |
+
},
|
| 101610 |
+
{
|
| 101611 |
+
"epoch": 17.809815950920246,
|
| 101612 |
+
"grad_norm": 0.25528624653816223,
|
| 101613 |
+
"learning_rate": 1.5899989453447728e-06,
|
| 101614 |
+
"loss": 0.40936315059661865,
|
| 101615 |
+
"step": 14515
|
| 101616 |
+
},
|
| 101617 |
+
{
|
| 101618 |
+
"epoch": 17.811042944785274,
|
| 101619 |
+
"grad_norm": 0.2819651663303375,
|
| 101620 |
+
"learning_rate": 1.5882380211774683e-06,
|
| 101621 |
+
"loss": 0.7845460772514343,
|
| 101622 |
+
"step": 14516
|
| 101623 |
+
},
|
| 101624 |
+
{
|
| 101625 |
+
"epoch": 17.812269938650306,
|
| 101626 |
+
"grad_norm": 0.2538367211818695,
|
| 101627 |
+
"learning_rate": 1.5864780406715213e-06,
|
| 101628 |
+
"loss": 0.5226367712020874,
|
| 101629 |
+
"step": 14517
|
| 101630 |
+
},
|
| 101631 |
+
{
|
| 101632 |
+
"epoch": 17.81349693251534,
|
| 101633 |
+
"grad_norm": 0.26220065355300903,
|
| 101634 |
+
"learning_rate": 1.584719003897861e-06,
|
| 101635 |
+
"loss": 0.47917234897613525,
|
| 101636 |
+
"step": 14518
|
| 101637 |
+
},
|
| 101638 |
+
{
|
| 101639 |
+
"epoch": 17.814723926380367,
|
| 101640 |
+
"grad_norm": 0.2652464807033539,
|
| 101641 |
+
"learning_rate": 1.5829609109273973e-06,
|
| 101642 |
+
"loss": 0.5523488521575928,
|
| 101643 |
+
"step": 14519
|
| 101644 |
+
},
|
| 101645 |
+
{
|
| 101646 |
+
"epoch": 17.8159509202454,
|
| 101647 |
+
"grad_norm": 0.2618599534034729,
|
| 101648 |
+
"learning_rate": 1.5812037618309905e-06,
|
| 101649 |
+
"loss": 0.48545190691947937,
|
| 101650 |
+
"step": 14520
|
| 101651 |
+
},
|
| 101652 |
+
{
|
| 101653 |
+
"epoch": 17.81717791411043,
|
| 101654 |
+
"grad_norm": 0.2567989230155945,
|
| 101655 |
+
"learning_rate": 1.579447556679467e-06,
|
| 101656 |
+
"loss": 0.5344886779785156,
|
| 101657 |
+
"step": 14521
|
| 101658 |
+
},
|
| 101659 |
+
{
|
| 101660 |
+
"epoch": 17.81840490797546,
|
| 101661 |
+
"grad_norm": 0.2714916467666626,
|
| 101662 |
+
"learning_rate": 1.5776922955436203e-06,
|
| 101663 |
+
"loss": 0.7741023302078247,
|
| 101664 |
+
"step": 14522
|
| 101665 |
+
},
|
| 101666 |
+
{
|
| 101667 |
+
"epoch": 17.81963190184049,
|
| 101668 |
+
"grad_norm": 0.2299203872680664,
|
| 101669 |
+
"learning_rate": 1.5759379784941907e-06,
|
| 101670 |
+
"loss": 0.5713906288146973,
|
| 101671 |
+
"step": 14523
|
| 101672 |
+
},
|
| 101673 |
+
{
|
| 101674 |
+
"epoch": 17.82085889570552,
|
| 101675 |
+
"grad_norm": 0.3314440846443176,
|
| 101676 |
+
"learning_rate": 1.5741846056018917e-06,
|
| 101677 |
+
"loss": 0.6644710302352905,
|
| 101678 |
+
"step": 14524
|
| 101679 |
+
},
|
| 101680 |
+
{
|
| 101681 |
+
"epoch": 17.822085889570552,
|
| 101682 |
+
"grad_norm": 0.24096408486366272,
|
| 101683 |
+
"learning_rate": 1.5724321769374023e-06,
|
| 101684 |
+
"loss": 0.3701532483100891,
|
| 101685 |
+
"step": 14525
|
| 101686 |
+
},
|
| 101687 |
+
{
|
| 101688 |
+
"epoch": 17.823312883435584,
|
| 101689 |
+
"grad_norm": 0.26719456911087036,
|
| 101690 |
+
"learning_rate": 1.5706806925713524e-06,
|
| 101691 |
+
"loss": 0.5712520480155945,
|
| 101692 |
+
"step": 14526
|
| 101693 |
+
},
|
| 101694 |
+
{
|
| 101695 |
+
"epoch": 17.824539877300612,
|
| 101696 |
+
"grad_norm": 0.2500896453857422,
|
| 101697 |
+
"learning_rate": 1.5689301525743438e-06,
|
| 101698 |
+
"loss": 0.3234938979148865,
|
| 101699 |
+
"step": 14527
|
| 101700 |
+
},
|
| 101701 |
+
{
|
| 101702 |
+
"epoch": 17.825766871165644,
|
| 101703 |
+
"grad_norm": 0.311578631401062,
|
| 101704 |
+
"learning_rate": 1.5671805570169312e-06,
|
| 101705 |
+
"loss": 0.7870638966560364,
|
| 101706 |
+
"step": 14528
|
| 101707 |
+
},
|
| 101708 |
+
{
|
| 101709 |
+
"epoch": 17.826993865030676,
|
| 101710 |
+
"grad_norm": 0.27789121866226196,
|
| 101711 |
+
"learning_rate": 1.565431905969636e-06,
|
| 101712 |
+
"loss": 0.5378883481025696,
|
| 101713 |
+
"step": 14529
|
| 101714 |
+
},
|
| 101715 |
+
{
|
| 101716 |
+
"epoch": 17.828220858895705,
|
| 101717 |
+
"grad_norm": 0.2626987099647522,
|
| 101718 |
+
"learning_rate": 1.563684199502946e-06,
|
| 101719 |
+
"loss": 0.6227214336395264,
|
| 101720 |
+
"step": 14530
|
| 101721 |
+
},
|
| 101722 |
+
{
|
| 101723 |
+
"epoch": 17.829447852760737,
|
| 101724 |
+
"grad_norm": 0.24920257925987244,
|
| 101725 |
+
"learning_rate": 1.5619374376872997e-06,
|
| 101726 |
+
"loss": 0.2821338474750519,
|
| 101727 |
+
"step": 14531
|
| 101728 |
+
},
|
| 101729 |
+
{
|
| 101730 |
+
"epoch": 17.830674846625765,
|
| 101731 |
+
"grad_norm": 0.2617615759372711,
|
| 101732 |
+
"learning_rate": 1.5601916205931155e-06,
|
| 101733 |
+
"loss": 0.5795203447341919,
|
| 101734 |
+
"step": 14532
|
| 101735 |
+
},
|
| 101736 |
+
{
|
| 101737 |
+
"epoch": 17.831901840490797,
|
| 101738 |
+
"grad_norm": 0.27603378891944885,
|
| 101739 |
+
"learning_rate": 1.5584467482907482e-06,
|
| 101740 |
+
"loss": 0.6474769711494446,
|
| 101741 |
+
"step": 14533
|
| 101742 |
+
},
|
| 101743 |
+
{
|
| 101744 |
+
"epoch": 17.83312883435583,
|
| 101745 |
+
"grad_norm": 0.2555461823940277,
|
| 101746 |
+
"learning_rate": 1.5567028208505334e-06,
|
| 101747 |
+
"loss": 0.5072534680366516,
|
| 101748 |
+
"step": 14534
|
| 101749 |
+
},
|
| 101750 |
+
{
|
| 101751 |
+
"epoch": 17.834355828220858,
|
| 101752 |
+
"grad_norm": 0.275061696767807,
|
| 101753 |
+
"learning_rate": 1.5549598383427677e-06,
|
| 101754 |
+
"loss": 0.7106829881668091,
|
| 101755 |
+
"step": 14535
|
| 101756 |
+
},
|
| 101757 |
+
{
|
| 101758 |
+
"epoch": 17.83558282208589,
|
| 101759 |
+
"grad_norm": 0.26190558075904846,
|
| 101760 |
+
"learning_rate": 1.5532178008377058e-06,
|
| 101761 |
+
"loss": 0.5646058320999146,
|
| 101762 |
+
"step": 14536
|
| 101763 |
+
},
|
| 101764 |
+
{
|
| 101765 |
+
"epoch": 17.83680981595092,
|
| 101766 |
+
"grad_norm": 0.3063841760158539,
|
| 101767 |
+
"learning_rate": 1.551476708405561e-06,
|
| 101768 |
+
"loss": 0.6638450622558594,
|
| 101769 |
+
"step": 14537
|
| 101770 |
+
},
|
| 101771 |
+
{
|
| 101772 |
+
"epoch": 17.83803680981595,
|
| 101773 |
+
"grad_norm": 0.25269055366516113,
|
| 101774 |
+
"learning_rate": 1.5497365611165133e-06,
|
| 101775 |
+
"loss": 0.35852086544036865,
|
| 101776 |
+
"step": 14538
|
| 101777 |
+
},
|
| 101778 |
+
{
|
| 101779 |
+
"epoch": 17.839263803680982,
|
| 101780 |
+
"grad_norm": 0.2659214437007904,
|
| 101781 |
+
"learning_rate": 1.5479973590407009e-06,
|
| 101782 |
+
"loss": 0.6507385969161987,
|
| 101783 |
+
"step": 14539
|
| 101784 |
+
},
|
| 101785 |
+
{
|
| 101786 |
+
"epoch": 17.84049079754601,
|
| 101787 |
+
"grad_norm": 0.28474104404449463,
|
| 101788 |
+
"learning_rate": 1.5462591022482264e-06,
|
| 101789 |
+
"loss": 0.5481491684913635,
|
| 101790 |
+
"step": 14540
|
| 101791 |
+
},
|
| 101792 |
+
{
|
| 101793 |
+
"epoch": 17.841717791411043,
|
| 101794 |
+
"grad_norm": 0.2880687713623047,
|
| 101795 |
+
"learning_rate": 1.5445217908091613e-06,
|
| 101796 |
+
"loss": 0.5938142538070679,
|
| 101797 |
+
"step": 14541
|
| 101798 |
+
},
|
| 101799 |
+
{
|
| 101800 |
+
"epoch": 17.842944785276075,
|
| 101801 |
+
"grad_norm": 0.2858433127403259,
|
| 101802 |
+
"learning_rate": 1.542785424793522e-06,
|
| 101803 |
+
"loss": 0.4925382137298584,
|
| 101804 |
+
"step": 14542
|
| 101805 |
+
},
|
| 101806 |
+
{
|
| 101807 |
+
"epoch": 17.844171779141103,
|
| 101808 |
+
"grad_norm": 0.3114153742790222,
|
| 101809 |
+
"learning_rate": 1.5410500042712994e-06,
|
| 101810 |
+
"loss": 0.6614712476730347,
|
| 101811 |
+
"step": 14543
|
| 101812 |
+
},
|
| 101813 |
+
{
|
| 101814 |
+
"epoch": 17.845398773006135,
|
| 101815 |
+
"grad_norm": 0.2497471421957016,
|
| 101816 |
+
"learning_rate": 1.5393155293124435e-06,
|
| 101817 |
+
"loss": 0.6087071895599365,
|
| 101818 |
+
"step": 14544
|
| 101819 |
+
},
|
| 101820 |
+
{
|
| 101821 |
+
"epoch": 17.846625766871167,
|
| 101822 |
+
"grad_norm": 0.24536079168319702,
|
| 101823 |
+
"learning_rate": 1.5375819999868674e-06,
|
| 101824 |
+
"loss": 0.590156078338623,
|
| 101825 |
+
"step": 14545
|
| 101826 |
+
},
|
| 101827 |
+
{
|
| 101828 |
+
"epoch": 17.847852760736195,
|
| 101829 |
+
"grad_norm": 0.2518615126609802,
|
| 101830 |
+
"learning_rate": 1.5358494163644487e-06,
|
| 101831 |
+
"loss": 0.46484866738319397,
|
| 101832 |
+
"step": 14546
|
| 101833 |
+
},
|
| 101834 |
+
{
|
| 101835 |
+
"epoch": 17.849079754601227,
|
| 101836 |
+
"grad_norm": 0.28139856457710266,
|
| 101837 |
+
"learning_rate": 1.534117778515015e-06,
|
| 101838 |
+
"loss": 0.580226480960846,
|
| 101839 |
+
"step": 14547
|
| 101840 |
+
},
|
| 101841 |
+
{
|
| 101842 |
+
"epoch": 17.85030674846626,
|
| 101843 |
+
"grad_norm": 0.2756141722202301,
|
| 101844 |
+
"learning_rate": 1.5323870865083634e-06,
|
| 101845 |
+
"loss": 0.5730589032173157,
|
| 101846 |
+
"step": 14548
|
| 101847 |
+
},
|
| 101848 |
+
{
|
| 101849 |
+
"epoch": 17.851533742331288,
|
| 101850 |
+
"grad_norm": 0.26249977946281433,
|
| 101851 |
+
"learning_rate": 1.53065734041426e-06,
|
| 101852 |
+
"loss": 0.5200498104095459,
|
| 101853 |
+
"step": 14549
|
| 101854 |
+
},
|
| 101855 |
+
{
|
| 101856 |
+
"epoch": 17.85276073619632,
|
| 101857 |
+
"grad_norm": 0.29090723395347595,
|
| 101858 |
+
"learning_rate": 1.5289285403024211e-06,
|
| 101859 |
+
"loss": 0.5983284115791321,
|
| 101860 |
+
"step": 14550
|
| 101861 |
+
},
|
| 101862 |
+
{
|
| 101863 |
+
"epoch": 17.85398773006135,
|
| 101864 |
+
"grad_norm": 0.25745999813079834,
|
| 101865 |
+
"learning_rate": 1.5272006862425332e-06,
|
| 101866 |
+
"loss": 0.5472363233566284,
|
| 101867 |
+
"step": 14551
|
| 101868 |
+
},
|
| 101869 |
+
{
|
| 101870 |
+
"epoch": 17.85521472392638,
|
| 101871 |
+
"grad_norm": 0.23941321671009064,
|
| 101872 |
+
"learning_rate": 1.5254737783042373e-06,
|
| 101873 |
+
"loss": 0.5717787742614746,
|
| 101874 |
+
"step": 14552
|
| 101875 |
+
},
|
| 101876 |
+
{
|
| 101877 |
+
"epoch": 17.856441717791412,
|
| 101878 |
+
"grad_norm": 0.29118725657463074,
|
| 101879 |
+
"learning_rate": 1.5237478165571418e-06,
|
| 101880 |
+
"loss": 0.6209118366241455,
|
| 101881 |
+
"step": 14553
|
| 101882 |
+
},
|
| 101883 |
+
{
|
| 101884 |
+
"epoch": 17.85766871165644,
|
| 101885 |
+
"grad_norm": 0.2942374050617218,
|
| 101886 |
+
"learning_rate": 1.522022801070816e-06,
|
| 101887 |
+
"loss": 0.6844048500061035,
|
| 101888 |
+
"step": 14554
|
| 101889 |
+
},
|
| 101890 |
+
{
|
| 101891 |
+
"epoch": 17.858895705521473,
|
| 101892 |
+
"grad_norm": 0.2654740512371063,
|
| 101893 |
+
"learning_rate": 1.5202987319147905e-06,
|
| 101894 |
+
"loss": 0.4786403775215149,
|
| 101895 |
+
"step": 14555
|
| 101896 |
+
},
|
| 101897 |
+
{
|
| 101898 |
+
"epoch": 17.860122699386505,
|
| 101899 |
+
"grad_norm": 0.28547996282577515,
|
| 101900 |
+
"learning_rate": 1.5185756091585596e-06,
|
| 101901 |
+
"loss": 0.654380738735199,
|
| 101902 |
+
"step": 14556
|
| 101903 |
+
},
|
| 101904 |
+
{
|
| 101905 |
+
"epoch": 17.861349693251533,
|
| 101906 |
+
"grad_norm": 0.24950368702411652,
|
| 101907 |
+
"learning_rate": 1.516853432871565e-06,
|
| 101908 |
+
"loss": 0.6516945362091064,
|
| 101909 |
+
"step": 14557
|
| 101910 |
+
},
|
| 101911 |
+
{
|
| 101912 |
+
"epoch": 17.862576687116565,
|
| 101913 |
+
"grad_norm": 0.3078065812587738,
|
| 101914 |
+
"learning_rate": 1.51513220312324e-06,
|
| 101915 |
+
"loss": 0.8418989181518555,
|
| 101916 |
+
"step": 14558
|
| 101917 |
+
},
|
| 101918 |
+
{
|
| 101919 |
+
"epoch": 17.863803680981594,
|
| 101920 |
+
"grad_norm": 0.30534765124320984,
|
| 101921 |
+
"learning_rate": 1.5134119199829516e-06,
|
| 101922 |
+
"loss": 0.6099573969841003,
|
| 101923 |
+
"step": 14559
|
| 101924 |
+
},
|
| 101925 |
+
{
|
| 101926 |
+
"epoch": 17.865030674846626,
|
| 101927 |
+
"grad_norm": 0.2472127079963684,
|
| 101928 |
+
"learning_rate": 1.5116925835200496e-06,
|
| 101929 |
+
"loss": 0.5963634848594666,
|
| 101930 |
+
"step": 14560
|
| 101931 |
+
},
|
| 101932 |
+
{
|
| 101933 |
+
"epoch": 17.866257668711658,
|
| 101934 |
+
"grad_norm": 0.2644025683403015,
|
| 101935 |
+
"learning_rate": 1.509974193803823e-06,
|
| 101936 |
+
"loss": 0.596381664276123,
|
| 101937 |
+
"step": 14561
|
| 101938 |
+
},
|
| 101939 |
+
{
|
| 101940 |
+
"epoch": 17.867484662576686,
|
| 101941 |
+
"grad_norm": 0.244345560669899,
|
| 101942 |
+
"learning_rate": 1.5082567509035417e-06,
|
| 101943 |
+
"loss": 0.6145912408828735,
|
| 101944 |
+
"step": 14562
|
| 101945 |
+
},
|
| 101946 |
+
{
|
| 101947 |
+
"epoch": 17.868711656441718,
|
| 101948 |
+
"grad_norm": 0.293698251247406,
|
| 101949 |
+
"learning_rate": 1.5065402548884306e-06,
|
| 101950 |
+
"loss": 0.6953533887863159,
|
| 101951 |
+
"step": 14563
|
| 101952 |
+
},
|
| 101953 |
+
{
|
| 101954 |
+
"epoch": 17.86993865030675,
|
| 101955 |
+
"grad_norm": 0.3028424084186554,
|
| 101956 |
+
"learning_rate": 1.504824705827676e-06,
|
| 101957 |
+
"loss": 0.6375309824943542,
|
| 101958 |
+
"step": 14564
|
| 101959 |
+
},
|
| 101960 |
+
{
|
| 101961 |
+
"epoch": 17.87116564417178,
|
| 101962 |
+
"grad_norm": 0.28665441274642944,
|
| 101963 |
+
"learning_rate": 1.503110103790431e-06,
|
| 101964 |
+
"loss": 0.7814404368400574,
|
| 101965 |
+
"step": 14565
|
| 101966 |
+
},
|
| 101967 |
+
{
|
| 101968 |
+
"epoch": 17.87239263803681,
|
| 101969 |
+
"grad_norm": 0.25935009121894836,
|
| 101970 |
+
"learning_rate": 1.5013964488457988e-06,
|
| 101971 |
+
"loss": 0.4439695477485657,
|
| 101972 |
+
"step": 14566
|
| 101973 |
+
},
|
| 101974 |
+
{
|
| 101975 |
+
"epoch": 17.87361963190184,
|
| 101976 |
+
"grad_norm": 0.2631010413169861,
|
| 101977 |
+
"learning_rate": 1.4996837410628545e-06,
|
| 101978 |
+
"loss": 0.6866264343261719,
|
| 101979 |
+
"step": 14567
|
| 101980 |
+
},
|
| 101981 |
+
{
|
| 101982 |
+
"epoch": 17.87484662576687,
|
| 101983 |
+
"grad_norm": 0.24359236657619476,
|
| 101984 |
+
"learning_rate": 1.4979719805106345e-06,
|
| 101985 |
+
"loss": 0.5527574419975281,
|
| 101986 |
+
"step": 14568
|
| 101987 |
+
},
|
| 101988 |
+
{
|
| 101989 |
+
"epoch": 17.876073619631903,
|
| 101990 |
+
"grad_norm": 0.24687421321868896,
|
| 101991 |
+
"learning_rate": 1.496261167258134e-06,
|
| 101992 |
+
"loss": 0.5957076549530029,
|
| 101993 |
+
"step": 14569
|
| 101994 |
+
},
|
| 101995 |
+
{
|
| 101996 |
+
"epoch": 17.87730061349693,
|
| 101997 |
+
"grad_norm": 0.23686599731445312,
|
| 101998 |
+
"learning_rate": 1.4945513013743167e-06,
|
| 101999 |
+
"loss": 0.5661089420318604,
|
| 102000 |
+
"step": 14570
|
| 102001 |
+
},
|
| 102002 |
+
{
|
| 102003 |
+
"epoch": 17.878527607361963,
|
| 102004 |
+
"grad_norm": 0.26750171184539795,
|
| 102005 |
+
"learning_rate": 1.4928423829280918e-06,
|
| 102006 |
+
"loss": 0.6328824758529663,
|
| 102007 |
+
"step": 14571
|
| 102008 |
+
},
|
| 102009 |
+
{
|
| 102010 |
+
"epoch": 17.879754601226995,
|
| 102011 |
+
"grad_norm": 0.29372668266296387,
|
| 102012 |
+
"learning_rate": 1.491134411988343e-06,
|
| 102013 |
+
"loss": 0.6985042691230774,
|
| 102014 |
+
"step": 14572
|
| 102015 |
+
},
|
| 102016 |
+
{
|
| 102017 |
+
"epoch": 17.880981595092024,
|
| 102018 |
+
"grad_norm": 0.27370312809944153,
|
| 102019 |
+
"learning_rate": 1.4894273886239208e-06,
|
| 102020 |
+
"loss": 0.5453335046768188,
|
| 102021 |
+
"step": 14573
|
| 102022 |
+
},
|
| 102023 |
+
{
|
| 102024 |
+
"epoch": 17.882208588957056,
|
| 102025 |
+
"grad_norm": 0.25438982248306274,
|
| 102026 |
+
"learning_rate": 1.4877213129036255e-06,
|
| 102027 |
+
"loss": 0.6645020246505737,
|
| 102028 |
+
"step": 14574
|
| 102029 |
+
},
|
| 102030 |
+
{
|
| 102031 |
+
"epoch": 17.883435582822084,
|
| 102032 |
+
"grad_norm": 0.26911574602127075,
|
| 102033 |
+
"learning_rate": 1.4860161848962272e-06,
|
| 102034 |
+
"loss": 0.6708583831787109,
|
| 102035 |
+
"step": 14575
|
| 102036 |
+
},
|
| 102037 |
+
{
|
| 102038 |
+
"epoch": 17.884662576687116,
|
| 102039 |
+
"grad_norm": 0.3031774163246155,
|
| 102040 |
+
"learning_rate": 1.4843120046704489e-06,
|
| 102041 |
+
"loss": 0.6086454391479492,
|
| 102042 |
+
"step": 14576
|
| 102043 |
+
},
|
| 102044 |
+
{
|
| 102045 |
+
"epoch": 17.88588957055215,
|
| 102046 |
+
"grad_norm": 0.26895734667778015,
|
| 102047 |
+
"learning_rate": 1.4826087722949854e-06,
|
| 102048 |
+
"loss": 0.6470022201538086,
|
| 102049 |
+
"step": 14577
|
| 102050 |
+
},
|
| 102051 |
+
{
|
| 102052 |
+
"epoch": 17.887116564417177,
|
| 102053 |
+
"grad_norm": 0.2590526342391968,
|
| 102054 |
+
"learning_rate": 1.4809064878384877e-06,
|
| 102055 |
+
"loss": 0.6471710205078125,
|
| 102056 |
+
"step": 14578
|
| 102057 |
+
},
|
| 102058 |
+
{
|
| 102059 |
+
"epoch": 17.88834355828221,
|
| 102060 |
+
"grad_norm": 0.23606297373771667,
|
| 102061 |
+
"learning_rate": 1.4792051513695727e-06,
|
| 102062 |
+
"loss": 0.47087231278419495,
|
| 102063 |
+
"step": 14579
|
| 102064 |
+
},
|
| 102065 |
+
{
|
| 102066 |
+
"epoch": 17.88957055214724,
|
| 102067 |
+
"grad_norm": 0.30575573444366455,
|
| 102068 |
+
"learning_rate": 1.4775047629568135e-06,
|
| 102069 |
+
"loss": 0.3846961557865143,
|
| 102070 |
+
"step": 14580
|
| 102071 |
+
},
|
| 102072 |
+
{
|
| 102073 |
+
"epoch": 17.89079754601227,
|
| 102074 |
+
"grad_norm": 0.2800881564617157,
|
| 102075 |
+
"learning_rate": 1.4758053226687496e-06,
|
| 102076 |
+
"loss": 0.5879099369049072,
|
| 102077 |
+
"step": 14581
|
| 102078 |
+
},
|
| 102079 |
+
{
|
| 102080 |
+
"epoch": 17.8920245398773,
|
| 102081 |
+
"grad_norm": 0.2551717162132263,
|
| 102082 |
+
"learning_rate": 1.4741068305738792e-06,
|
| 102083 |
+
"loss": 0.632975161075592,
|
| 102084 |
+
"step": 14582
|
| 102085 |
+
},
|
| 102086 |
+
{
|
| 102087 |
+
"epoch": 17.89325153374233,
|
| 102088 |
+
"grad_norm": 0.2893374264240265,
|
| 102089 |
+
"learning_rate": 1.4724092867406669e-06,
|
| 102090 |
+
"loss": 0.6257842779159546,
|
| 102091 |
+
"step": 14583
|
| 102092 |
+
},
|
| 102093 |
+
{
|
| 102094 |
+
"epoch": 17.89447852760736,
|
| 102095 |
+
"grad_norm": 0.2591066360473633,
|
| 102096 |
+
"learning_rate": 1.470712691237533e-06,
|
| 102097 |
+
"loss": 0.6154621243476868,
|
| 102098 |
+
"step": 14584
|
| 102099 |
+
},
|
| 102100 |
+
{
|
| 102101 |
+
"epoch": 17.895705521472394,
|
| 102102 |
+
"grad_norm": 0.2389405518770218,
|
| 102103 |
+
"learning_rate": 1.4690170441328616e-06,
|
| 102104 |
+
"loss": 0.5803853273391724,
|
| 102105 |
+
"step": 14585
|
| 102106 |
+
},
|
| 102107 |
+
{
|
| 102108 |
+
"epoch": 17.896932515337422,
|
| 102109 |
+
"grad_norm": 0.26036345958709717,
|
| 102110 |
+
"learning_rate": 1.4673223454950007e-06,
|
| 102111 |
+
"loss": 0.6107214689254761,
|
| 102112 |
+
"step": 14586
|
| 102113 |
+
},
|
| 102114 |
+
{
|
| 102115 |
+
"epoch": 17.898159509202454,
|
| 102116 |
+
"grad_norm": 0.27903497219085693,
|
| 102117 |
+
"learning_rate": 1.46562859539226e-06,
|
| 102118 |
+
"loss": 0.6098342537879944,
|
| 102119 |
+
"step": 14587
|
| 102120 |
+
},
|
| 102121 |
+
{
|
| 102122 |
+
"epoch": 17.899386503067486,
|
| 102123 |
+
"grad_norm": 0.2447638064622879,
|
| 102124 |
+
"learning_rate": 1.4639357938929066e-06,
|
| 102125 |
+
"loss": 0.5947643518447876,
|
| 102126 |
+
"step": 14588
|
| 102127 |
+
},
|
| 102128 |
+
{
|
| 102129 |
+
"epoch": 17.900613496932515,
|
| 102130 |
+
"grad_norm": 0.26714345812797546,
|
| 102131 |
+
"learning_rate": 1.462243941065178e-06,
|
| 102132 |
+
"loss": 0.6195263862609863,
|
| 102133 |
+
"step": 14589
|
| 102134 |
+
},
|
| 102135 |
+
{
|
| 102136 |
+
"epoch": 17.901840490797547,
|
| 102137 |
+
"grad_norm": 0.24291105568408966,
|
| 102138 |
+
"learning_rate": 1.4605530369772608e-06,
|
| 102139 |
+
"loss": 0.512403130531311,
|
| 102140 |
+
"step": 14590
|
| 102141 |
+
},
|
| 102142 |
+
{
|
| 102143 |
+
"epoch": 17.903067484662575,
|
| 102144 |
+
"grad_norm": 0.2616630494594574,
|
| 102145 |
+
"learning_rate": 1.4588630816973147e-06,
|
| 102146 |
+
"loss": 0.6388986110687256,
|
| 102147 |
+
"step": 14591
|
| 102148 |
+
},
|
| 102149 |
+
{
|
| 102150 |
+
"epoch": 17.904294478527607,
|
| 102151 |
+
"grad_norm": 0.28447410464286804,
|
| 102152 |
+
"learning_rate": 1.4571740752934576e-06,
|
| 102153 |
+
"loss": 0.6741594076156616,
|
| 102154 |
+
"step": 14592
|
| 102155 |
+
},
|
| 102156 |
+
{
|
| 102157 |
+
"epoch": 17.90552147239264,
|
| 102158 |
+
"grad_norm": 0.3001502752304077,
|
| 102159 |
+
"learning_rate": 1.4554860178337653e-06,
|
| 102160 |
+
"loss": 0.8494046330451965,
|
| 102161 |
+
"step": 14593
|
| 102162 |
+
},
|
| 102163 |
+
{
|
| 102164 |
+
"epoch": 17.906748466257667,
|
| 102165 |
+
"grad_norm": 0.2859189808368683,
|
| 102166 |
+
"learning_rate": 1.4537989093862863e-06,
|
| 102167 |
+
"loss": 0.5723183155059814,
|
| 102168 |
+
"step": 14594
|
| 102169 |
+
},
|
| 102170 |
+
{
|
| 102171 |
+
"epoch": 17.9079754601227,
|
| 102172 |
+
"grad_norm": 0.25693708658218384,
|
| 102173 |
+
"learning_rate": 1.4521127500190107e-06,
|
| 102174 |
+
"loss": 0.42384105920791626,
|
| 102175 |
+
"step": 14595
|
| 102176 |
+
},
|
| 102177 |
+
{
|
| 102178 |
+
"epoch": 17.90920245398773,
|
| 102179 |
+
"grad_norm": 0.2814970910549164,
|
| 102180 |
+
"learning_rate": 1.4504275397999118e-06,
|
| 102181 |
+
"loss": 0.5663703083992004,
|
| 102182 |
+
"step": 14596
|
| 102183 |
+
},
|
| 102184 |
+
{
|
| 102185 |
+
"epoch": 17.91042944785276,
|
| 102186 |
+
"grad_norm": 0.2637481093406677,
|
| 102187 |
+
"learning_rate": 1.448743278796913e-06,
|
| 102188 |
+
"loss": 0.6658276319503784,
|
| 102189 |
+
"step": 14597
|
| 102190 |
+
},
|
| 102191 |
+
{
|
| 102192 |
+
"epoch": 17.911656441717792,
|
| 102193 |
+
"grad_norm": 0.2559933662414551,
|
| 102194 |
+
"learning_rate": 1.4470599670779016e-06,
|
| 102195 |
+
"loss": 0.5418700575828552,
|
| 102196 |
+
"step": 14598
|
| 102197 |
+
},
|
| 102198 |
+
{
|
| 102199 |
+
"epoch": 17.91288343558282,
|
| 102200 |
+
"grad_norm": 0.2136484682559967,
|
| 102201 |
+
"learning_rate": 1.445377604710732e-06,
|
| 102202 |
+
"loss": 0.37508541345596313,
|
| 102203 |
+
"step": 14599
|
| 102204 |
+
},
|
| 102205 |
+
{
|
| 102206 |
+
"epoch": 17.914110429447852,
|
| 102207 |
+
"grad_norm": 0.25122955441474915,
|
| 102208 |
+
"learning_rate": 1.4436961917632081e-06,
|
| 102209 |
+
"loss": 0.5960754156112671,
|
| 102210 |
+
"step": 14600
|
| 102211 |
+
},
|
| 102212 |
+
{
|
| 102213 |
+
"epoch": 17.915337423312884,
|
| 102214 |
+
"grad_norm": 0.2816019654273987,
|
| 102215 |
+
"learning_rate": 1.4420157283031006e-06,
|
| 102216 |
+
"loss": 0.6493737697601318,
|
| 102217 |
+
"step": 14601
|
| 102218 |
+
},
|
| 102219 |
+
{
|
| 102220 |
+
"epoch": 17.916564417177913,
|
| 102221 |
+
"grad_norm": 0.2641134262084961,
|
| 102222 |
+
"learning_rate": 1.4403362143981553e-06,
|
| 102223 |
+
"loss": 0.7002406716346741,
|
| 102224 |
+
"step": 14602
|
| 102225 |
+
},
|
| 102226 |
+
{
|
| 102227 |
+
"epoch": 17.917791411042945,
|
| 102228 |
+
"grad_norm": 0.24316608905792236,
|
| 102229 |
+
"learning_rate": 1.4386576501160653e-06,
|
| 102230 |
+
"loss": 0.6797574758529663,
|
| 102231 |
+
"step": 14603
|
| 102232 |
+
},
|
| 102233 |
+
{
|
| 102234 |
+
"epoch": 17.919018404907977,
|
| 102235 |
+
"grad_norm": 0.27621063590049744,
|
| 102236 |
+
"learning_rate": 1.4369800355244822e-06,
|
| 102237 |
+
"loss": 0.6116088032722473,
|
| 102238 |
+
"step": 14604
|
| 102239 |
+
},
|
| 102240 |
+
{
|
| 102241 |
+
"epoch": 17.920245398773005,
|
| 102242 |
+
"grad_norm": 0.30401498079299927,
|
| 102243 |
+
"learning_rate": 1.4353033706910296e-06,
|
| 102244 |
+
"loss": 0.5569919347763062,
|
| 102245 |
+
"step": 14605
|
| 102246 |
+
},
|
| 102247 |
+
{
|
| 102248 |
+
"epoch": 17.921472392638037,
|
| 102249 |
+
"grad_norm": 0.2975943982601166,
|
| 102250 |
+
"learning_rate": 1.4336276556832923e-06,
|
| 102251 |
+
"loss": 0.5165454745292664,
|
| 102252 |
+
"step": 14606
|
| 102253 |
+
},
|
| 102254 |
+
{
|
| 102255 |
+
"epoch": 17.92269938650307,
|
| 102256 |
+
"grad_norm": 0.2946213483810425,
|
| 102257 |
+
"learning_rate": 1.4319528905688078e-06,
|
| 102258 |
+
"loss": 0.5460560917854309,
|
| 102259 |
+
"step": 14607
|
| 102260 |
+
},
|
| 102261 |
+
{
|
| 102262 |
+
"epoch": 17.923926380368098,
|
| 102263 |
+
"grad_norm": 0.2377009093761444,
|
| 102264 |
+
"learning_rate": 1.4302790754150863e-06,
|
| 102265 |
+
"loss": 0.47542837262153625,
|
| 102266 |
+
"step": 14608
|
| 102267 |
+
},
|
| 102268 |
+
{
|
| 102269 |
+
"epoch": 17.92515337423313,
|
| 102270 |
+
"grad_norm": 0.23359425365924835,
|
| 102271 |
+
"learning_rate": 1.4286062102895957e-06,
|
| 102272 |
+
"loss": 0.44849640130996704,
|
| 102273 |
+
"step": 14609
|
| 102274 |
+
},
|
| 102275 |
+
{
|
| 102276 |
+
"epoch": 17.926380368098158,
|
| 102277 |
+
"grad_norm": 0.262854665517807,
|
| 102278 |
+
"learning_rate": 1.4269342952597575e-06,
|
| 102279 |
+
"loss": 0.7347406148910522,
|
| 102280 |
+
"step": 14610
|
| 102281 |
+
},
|
| 102282 |
+
{
|
| 102283 |
+
"epoch": 17.92760736196319,
|
| 102284 |
+
"grad_norm": 0.2657775580883026,
|
| 102285 |
+
"learning_rate": 1.4252633303929647e-06,
|
| 102286 |
+
"loss": 0.5693353414535522,
|
| 102287 |
+
"step": 14611
|
| 102288 |
+
},
|
| 102289 |
+
{
|
| 102290 |
+
"epoch": 17.928834355828222,
|
| 102291 |
+
"grad_norm": 0.31792959570884705,
|
| 102292 |
+
"learning_rate": 1.423593315756569e-06,
|
| 102293 |
+
"loss": 0.5038333535194397,
|
| 102294 |
+
"step": 14612
|
| 102295 |
+
},
|
| 102296 |
+
{
|
| 102297 |
+
"epoch": 17.93006134969325,
|
| 102298 |
+
"grad_norm": 0.24880990386009216,
|
| 102299 |
+
"learning_rate": 1.421924251417886e-06,
|
| 102300 |
+
"loss": 0.4884534478187561,
|
| 102301 |
+
"step": 14613
|
| 102302 |
+
},
|
| 102303 |
+
{
|
| 102304 |
+
"epoch": 17.931288343558283,
|
| 102305 |
+
"grad_norm": 0.24555426836013794,
|
| 102306 |
+
"learning_rate": 1.4202561374441926e-06,
|
| 102307 |
+
"loss": 0.6830617189407349,
|
| 102308 |
+
"step": 14614
|
| 102309 |
+
},
|
| 102310 |
+
{
|
| 102311 |
+
"epoch": 17.93251533742331,
|
| 102312 |
+
"grad_norm": 0.2723620533943176,
|
| 102313 |
+
"learning_rate": 1.418588973902721e-06,
|
| 102314 |
+
"loss": 0.6093084216117859,
|
| 102315 |
+
"step": 14615
|
| 102316 |
+
},
|
| 102317 |
+
{
|
| 102318 |
+
"epoch": 17.933742331288343,
|
| 102319 |
+
"grad_norm": 0.2709653377532959,
|
| 102320 |
+
"learning_rate": 1.41692276086067e-06,
|
| 102321 |
+
"loss": 0.4717769920825958,
|
| 102322 |
+
"step": 14616
|
| 102323 |
+
},
|
| 102324 |
+
{
|
| 102325 |
+
"epoch": 17.934969325153375,
|
| 102326 |
+
"grad_norm": 0.23278799653053284,
|
| 102327 |
+
"learning_rate": 1.4152574983852e-06,
|
| 102328 |
+
"loss": 0.48925477266311646,
|
| 102329 |
+
"step": 14617
|
| 102330 |
+
},
|
| 102331 |
+
{
|
| 102332 |
+
"epoch": 17.936196319018403,
|
| 102333 |
+
"grad_norm": 0.2678217589855194,
|
| 102334 |
+
"learning_rate": 1.4135931865434353e-06,
|
| 102335 |
+
"loss": 0.5740103721618652,
|
| 102336 |
+
"step": 14618
|
| 102337 |
+
},
|
| 102338 |
+
{
|
| 102339 |
+
"epoch": 17.937423312883435,
|
| 102340 |
+
"grad_norm": 0.24464988708496094,
|
| 102341 |
+
"learning_rate": 1.4119298254024638e-06,
|
| 102342 |
+
"loss": 0.6216524243354797,
|
| 102343 |
+
"step": 14619
|
| 102344 |
+
},
|
| 102345 |
+
{
|
| 102346 |
+
"epoch": 17.938650306748468,
|
| 102347 |
+
"grad_norm": 0.2594940960407257,
|
| 102348 |
+
"learning_rate": 1.4102674150293232e-06,
|
| 102349 |
+
"loss": 0.5685508251190186,
|
| 102350 |
+
"step": 14620
|
| 102351 |
+
},
|
| 102352 |
+
{
|
| 102353 |
+
"epoch": 17.939877300613496,
|
| 102354 |
+
"grad_norm": 0.2794358730316162,
|
| 102355 |
+
"learning_rate": 1.4086059554910185e-06,
|
| 102356 |
+
"loss": 0.805902898311615,
|
| 102357 |
+
"step": 14621
|
| 102358 |
+
},
|
| 102359 |
+
{
|
| 102360 |
+
"epoch": 17.941104294478528,
|
| 102361 |
+
"grad_norm": 0.27045318484306335,
|
| 102362 |
+
"learning_rate": 1.4069454468545268e-06,
|
| 102363 |
+
"loss": 0.5640958547592163,
|
| 102364 |
+
"step": 14622
|
| 102365 |
+
},
|
| 102366 |
+
{
|
| 102367 |
+
"epoch": 17.94233128834356,
|
| 102368 |
+
"grad_norm": 0.24375905096530914,
|
| 102369 |
+
"learning_rate": 1.4052858891867749e-06,
|
| 102370 |
+
"loss": 0.3431481122970581,
|
| 102371 |
+
"step": 14623
|
| 102372 |
+
},
|
| 102373 |
+
{
|
| 102374 |
+
"epoch": 17.94355828220859,
|
| 102375 |
+
"grad_norm": 0.2661164104938507,
|
| 102376 |
+
"learning_rate": 1.403627282554651e-06,
|
| 102377 |
+
"loss": 0.4697468876838684,
|
| 102378 |
+
"step": 14624
|
| 102379 |
+
},
|
| 102380 |
+
{
|
| 102381 |
+
"epoch": 17.94478527607362,
|
| 102382 |
+
"grad_norm": 0.23826119303703308,
|
| 102383 |
+
"learning_rate": 1.4019696270250153e-06,
|
| 102384 |
+
"loss": 0.39586636424064636,
|
| 102385 |
+
"step": 14625
|
| 102386 |
+
},
|
| 102387 |
+
{
|
| 102388 |
+
"epoch": 17.94601226993865,
|
| 102389 |
+
"grad_norm": 0.29387423396110535,
|
| 102390 |
+
"learning_rate": 1.4003129226646789e-06,
|
| 102391 |
+
"loss": 0.637994110584259,
|
| 102392 |
+
"step": 14626
|
| 102393 |
+
},
|
| 102394 |
+
{
|
| 102395 |
+
"epoch": 17.94723926380368,
|
| 102396 |
+
"grad_norm": 0.23277923464775085,
|
| 102397 |
+
"learning_rate": 1.398657169540421e-06,
|
| 102398 |
+
"loss": 0.508876383304596,
|
| 102399 |
+
"step": 14627
|
| 102400 |
+
},
|
| 102401 |
+
{
|
| 102402 |
+
"epoch": 17.948466257668713,
|
| 102403 |
+
"grad_norm": 0.24642880260944366,
|
| 102404 |
+
"learning_rate": 1.397002367718983e-06,
|
| 102405 |
+
"loss": 0.30661246180534363,
|
| 102406 |
+
"step": 14628
|
| 102407 |
+
},
|
| 102408 |
+
{
|
| 102409 |
+
"epoch": 17.94969325153374,
|
| 102410 |
+
"grad_norm": 0.3073272109031677,
|
| 102411 |
+
"learning_rate": 1.3953485172670588e-06,
|
| 102412 |
+
"loss": 0.5154626369476318,
|
| 102413 |
+
"step": 14629
|
| 102414 |
+
},
|
| 102415 |
+
{
|
| 102416 |
+
"epoch": 17.950920245398773,
|
| 102417 |
+
"grad_norm": 0.29507333040237427,
|
| 102418 |
+
"learning_rate": 1.3936956182513116e-06,
|
| 102419 |
+
"loss": 0.5971220135688782,
|
| 102420 |
+
"step": 14630
|
| 102421 |
+
},
|
| 102422 |
+
{
|
| 102423 |
+
"epoch": 17.952147239263805,
|
| 102424 |
+
"grad_norm": 0.3006948232650757,
|
| 102425 |
+
"learning_rate": 1.3920436707383688e-06,
|
| 102426 |
+
"loss": 0.7919573783874512,
|
| 102427 |
+
"step": 14631
|
| 102428 |
+
},
|
| 102429 |
+
{
|
| 102430 |
+
"epoch": 17.953374233128834,
|
| 102431 |
+
"grad_norm": 0.22684748470783234,
|
| 102432 |
+
"learning_rate": 1.3903926747948131e-06,
|
| 102433 |
+
"loss": 0.5472062826156616,
|
| 102434 |
+
"step": 14632
|
| 102435 |
+
},
|
| 102436 |
+
{
|
| 102437 |
+
"epoch": 17.954601226993866,
|
| 102438 |
+
"grad_norm": 0.2662692070007324,
|
| 102439 |
+
"learning_rate": 1.3887426304871943e-06,
|
| 102440 |
+
"loss": 0.5541741847991943,
|
| 102441 |
+
"step": 14633
|
| 102442 |
+
},
|
| 102443 |
+
{
|
| 102444 |
+
"epoch": 17.955828220858894,
|
| 102445 |
+
"grad_norm": 0.3192005455493927,
|
| 102446 |
+
"learning_rate": 1.3870935378820143e-06,
|
| 102447 |
+
"loss": 0.41688698530197144,
|
| 102448 |
+
"step": 14634
|
| 102449 |
+
},
|
| 102450 |
+
{
|
| 102451 |
+
"epoch": 17.957055214723926,
|
| 102452 |
+
"grad_norm": 0.27690204977989197,
|
| 102453 |
+
"learning_rate": 1.385445397045751e-06,
|
| 102454 |
+
"loss": 0.5172463059425354,
|
| 102455 |
+
"step": 14635
|
| 102456 |
+
},
|
| 102457 |
+
{
|
| 102458 |
+
"epoch": 17.958282208588958,
|
| 102459 |
+
"grad_norm": 0.2813359797000885,
|
| 102460 |
+
"learning_rate": 1.3837982080448286e-06,
|
| 102461 |
+
"loss": 0.5424720048904419,
|
| 102462 |
+
"step": 14636
|
| 102463 |
+
},
|
| 102464 |
+
{
|
| 102465 |
+
"epoch": 17.959509202453987,
|
| 102466 |
+
"grad_norm": 0.3012687861919403,
|
| 102467 |
+
"learning_rate": 1.38215197094565e-06,
|
| 102468 |
+
"loss": 0.5160753726959229,
|
| 102469 |
+
"step": 14637
|
| 102470 |
+
},
|
| 102471 |
+
{
|
| 102472 |
+
"epoch": 17.96073619631902,
|
| 102473 |
+
"grad_norm": 0.2561817467212677,
|
| 102474 |
+
"learning_rate": 1.3805066858145642e-06,
|
| 102475 |
+
"loss": 0.6985805034637451,
|
| 102476 |
+
"step": 14638
|
| 102477 |
+
},
|
| 102478 |
+
{
|
| 102479 |
+
"epoch": 17.96196319018405,
|
| 102480 |
+
"grad_norm": 0.2622375190258026,
|
| 102481 |
+
"learning_rate": 1.3788623527178884e-06,
|
| 102482 |
+
"loss": 0.5551583766937256,
|
| 102483 |
+
"step": 14639
|
| 102484 |
+
},
|
| 102485 |
+
{
|
| 102486 |
+
"epoch": 17.96319018404908,
|
| 102487 |
+
"grad_norm": 0.31576672196388245,
|
| 102488 |
+
"learning_rate": 1.3772189717218997e-06,
|
| 102489 |
+
"loss": 0.5932618379592896,
|
| 102490 |
+
"step": 14640
|
| 102491 |
+
},
|
| 102492 |
+
{
|
| 102493 |
+
"epoch": 17.96441717791411,
|
| 102494 |
+
"grad_norm": 0.3014386296272278,
|
| 102495 |
+
"learning_rate": 1.3755765428928397e-06,
|
| 102496 |
+
"loss": 0.5189095735549927,
|
| 102497 |
+
"step": 14641
|
| 102498 |
+
},
|
| 102499 |
+
{
|
| 102500 |
+
"epoch": 17.96564417177914,
|
| 102501 |
+
"grad_norm": 0.28893518447875977,
|
| 102502 |
+
"learning_rate": 1.3739350662969137e-06,
|
| 102503 |
+
"loss": 0.6193859577178955,
|
| 102504 |
+
"step": 14642
|
| 102505 |
+
},
|
| 102506 |
+
{
|
| 102507 |
+
"epoch": 17.96687116564417,
|
| 102508 |
+
"grad_norm": 0.2829989790916443,
|
| 102509 |
+
"learning_rate": 1.3722945420002826e-06,
|
| 102510 |
+
"loss": 0.6053096055984497,
|
| 102511 |
+
"step": 14643
|
| 102512 |
+
},
|
| 102513 |
+
{
|
| 102514 |
+
"epoch": 17.968098159509204,
|
| 102515 |
+
"grad_norm": 0.24034005403518677,
|
| 102516 |
+
"learning_rate": 1.3706549700690658e-06,
|
| 102517 |
+
"loss": 0.48862046003341675,
|
| 102518 |
+
"step": 14644
|
| 102519 |
+
},
|
| 102520 |
+
{
|
| 102521 |
+
"epoch": 17.969325153374232,
|
| 102522 |
+
"grad_norm": 0.2677293121814728,
|
| 102523 |
+
"learning_rate": 1.3690163505693547e-06,
|
| 102524 |
+
"loss": 0.615605354309082,
|
| 102525 |
+
"step": 14645
|
| 102526 |
+
},
|
| 102527 |
+
{
|
| 102528 |
+
"epoch": 17.970552147239264,
|
| 102529 |
+
"grad_norm": 0.2659146785736084,
|
| 102530 |
+
"learning_rate": 1.3673786835671943e-06,
|
| 102531 |
+
"loss": 0.6540563106536865,
|
| 102532 |
+
"step": 14646
|
| 102533 |
+
},
|
| 102534 |
+
{
|
| 102535 |
+
"epoch": 17.971779141104296,
|
| 102536 |
+
"grad_norm": 0.29513975977897644,
|
| 102537 |
+
"learning_rate": 1.3657419691286033e-06,
|
| 102538 |
+
"loss": 0.6881181001663208,
|
| 102539 |
+
"step": 14647
|
| 102540 |
+
},
|
| 102541 |
+
{
|
| 102542 |
+
"epoch": 17.973006134969324,
|
| 102543 |
+
"grad_norm": 0.277912437915802,
|
| 102544 |
+
"learning_rate": 1.3641062073195431e-06,
|
| 102545 |
+
"loss": 0.4790077805519104,
|
| 102546 |
+
"step": 14648
|
| 102547 |
+
},
|
| 102548 |
+
{
|
| 102549 |
+
"epoch": 17.974233128834356,
|
| 102550 |
+
"grad_norm": 0.26986825466156006,
|
| 102551 |
+
"learning_rate": 1.36247139820595e-06,
|
| 102552 |
+
"loss": 0.4128812551498413,
|
| 102553 |
+
"step": 14649
|
| 102554 |
+
},
|
| 102555 |
+
{
|
| 102556 |
+
"epoch": 17.975460122699385,
|
| 102557 |
+
"grad_norm": 0.26863521337509155,
|
| 102558 |
+
"learning_rate": 1.3608375418537184e-06,
|
| 102559 |
+
"loss": 0.6131393909454346,
|
| 102560 |
+
"step": 14650
|
| 102561 |
+
},
|
| 102562 |
+
{
|
| 102563 |
+
"epoch": 17.976687116564417,
|
| 102564 |
+
"grad_norm": 0.2878187298774719,
|
| 102565 |
+
"learning_rate": 1.3592046383287038e-06,
|
| 102566 |
+
"loss": 0.7043393850326538,
|
| 102567 |
+
"step": 14651
|
| 102568 |
+
},
|
| 102569 |
+
{
|
| 102570 |
+
"epoch": 17.97791411042945,
|
| 102571 |
+
"grad_norm": 0.2617773413658142,
|
| 102572 |
+
"learning_rate": 1.3575726876967287e-06,
|
| 102573 |
+
"loss": 0.57868492603302,
|
| 102574 |
+
"step": 14652
|
| 102575 |
+
},
|
| 102576 |
+
{
|
| 102577 |
+
"epoch": 17.979141104294477,
|
| 102578 |
+
"grad_norm": 0.257463276386261,
|
| 102579 |
+
"learning_rate": 1.3559416900235656e-06,
|
| 102580 |
+
"loss": 0.6122031211853027,
|
| 102581 |
+
"step": 14653
|
| 102582 |
+
},
|
| 102583 |
+
{
|
| 102584 |
+
"epoch": 17.98036809815951,
|
| 102585 |
+
"grad_norm": 0.2614612281322479,
|
| 102586 |
+
"learning_rate": 1.354311645374956e-06,
|
| 102587 |
+
"loss": 0.7196162939071655,
|
| 102588 |
+
"step": 14654
|
| 102589 |
+
},
|
| 102590 |
+
{
|
| 102591 |
+
"epoch": 17.98159509202454,
|
| 102592 |
+
"grad_norm": 0.2789611220359802,
|
| 102593 |
+
"learning_rate": 1.352682553816606e-06,
|
| 102594 |
+
"loss": 0.6364153623580933,
|
| 102595 |
+
"step": 14655
|
| 102596 |
+
},
|
| 102597 |
+
{
|
| 102598 |
+
"epoch": 17.98282208588957,
|
| 102599 |
+
"grad_norm": 0.28908681869506836,
|
| 102600 |
+
"learning_rate": 1.3510544154141769e-06,
|
| 102601 |
+
"loss": 0.6205997467041016,
|
| 102602 |
+
"step": 14656
|
| 102603 |
+
},
|
| 102604 |
+
{
|
| 102605 |
+
"epoch": 17.9840490797546,
|
| 102606 |
+
"grad_norm": 0.2980802655220032,
|
| 102607 |
+
"learning_rate": 1.3494272302332994e-06,
|
| 102608 |
+
"loss": 0.655971884727478,
|
| 102609 |
+
"step": 14657
|
| 102610 |
+
},
|
| 102611 |
+
{
|
| 102612 |
+
"epoch": 17.98527607361963,
|
| 102613 |
+
"grad_norm": 0.257820725440979,
|
| 102614 |
+
"learning_rate": 1.3478009983395546e-06,
|
| 102615 |
+
"loss": 0.5972291827201843,
|
| 102616 |
+
"step": 14658
|
| 102617 |
+
},
|
| 102618 |
+
{
|
| 102619 |
+
"epoch": 17.986503067484662,
|
| 102620 |
+
"grad_norm": 0.2680995762348175,
|
| 102621 |
+
"learning_rate": 1.3461757197984925e-06,
|
| 102622 |
+
"loss": 0.6842451095581055,
|
| 102623 |
+
"step": 14659
|
| 102624 |
+
},
|
| 102625 |
+
{
|
| 102626 |
+
"epoch": 17.987730061349694,
|
| 102627 |
+
"grad_norm": 0.27518877387046814,
|
| 102628 |
+
"learning_rate": 1.3445513946756222e-06,
|
| 102629 |
+
"loss": 0.6312453746795654,
|
| 102630 |
+
"step": 14660
|
| 102631 |
+
},
|
| 102632 |
+
{
|
| 102633 |
+
"epoch": 17.988957055214723,
|
| 102634 |
+
"grad_norm": 0.27443355321884155,
|
| 102635 |
+
"learning_rate": 1.3429280230364216e-06,
|
| 102636 |
+
"loss": 0.7069110870361328,
|
| 102637 |
+
"step": 14661
|
| 102638 |
+
},
|
| 102639 |
+
{
|
| 102640 |
+
"epoch": 17.990184049079755,
|
| 102641 |
+
"grad_norm": 0.285795658826828,
|
| 102642 |
+
"learning_rate": 1.341305604946322e-06,
|
| 102643 |
+
"loss": 0.6982868909835815,
|
| 102644 |
+
"step": 14662
|
| 102645 |
+
},
|
| 102646 |
+
{
|
| 102647 |
+
"epoch": 17.991411042944787,
|
| 102648 |
+
"grad_norm": 0.2761024534702301,
|
| 102649 |
+
"learning_rate": 1.3396841404707122e-06,
|
| 102650 |
+
"loss": 0.6290806531906128,
|
| 102651 |
+
"step": 14663
|
| 102652 |
+
},
|
| 102653 |
+
{
|
| 102654 |
+
"epoch": 17.992638036809815,
|
| 102655 |
+
"grad_norm": 0.26107197999954224,
|
| 102656 |
+
"learning_rate": 1.3380636296749544e-06,
|
| 102657 |
+
"loss": 0.6504873633384705,
|
| 102658 |
+
"step": 14664
|
| 102659 |
+
},
|
| 102660 |
+
{
|
| 102661 |
+
"epoch": 17.993865030674847,
|
| 102662 |
+
"grad_norm": 0.2861160635948181,
|
| 102663 |
+
"learning_rate": 1.3364440726243654e-06,
|
| 102664 |
+
"loss": 0.7837731838226318,
|
| 102665 |
+
"step": 14665
|
| 102666 |
+
},
|
| 102667 |
+
{
|
| 102668 |
+
"epoch": 17.99509202453988,
|
| 102669 |
+
"grad_norm": 0.25738802552223206,
|
| 102670 |
+
"learning_rate": 1.3348254693842238e-06,
|
| 102671 |
+
"loss": 0.6905242204666138,
|
| 102672 |
+
"step": 14666
|
| 102673 |
+
},
|
| 102674 |
+
{
|
| 102675 |
+
"epoch": 17.996319018404908,
|
| 102676 |
+
"grad_norm": 0.24866415560245514,
|
| 102677 |
+
"learning_rate": 1.3332078200197774e-06,
|
| 102678 |
+
"loss": 0.5362913608551025,
|
| 102679 |
+
"step": 14667
|
| 102680 |
+
},
|
| 102681 |
+
{
|
| 102682 |
+
"epoch": 17.99754601226994,
|
| 102683 |
+
"grad_norm": 0.2875867187976837,
|
| 102684 |
+
"learning_rate": 1.331591124596218e-06,
|
| 102685 |
+
"loss": 0.6967638731002808,
|
| 102686 |
+
"step": 14668
|
| 102687 |
+
},
|
| 102688 |
+
{
|
| 102689 |
+
"epoch": 17.998773006134968,
|
| 102690 |
+
"grad_norm": 0.3052618205547333,
|
| 102691 |
+
"learning_rate": 1.3299753831787192e-06,
|
| 102692 |
+
"loss": 0.7811344861984253,
|
| 102693 |
+
"step": 14669
|
| 102694 |
+
},
|
| 102695 |
+
{
|
| 102696 |
+
"epoch": 18.0,
|
| 102697 |
+
"grad_norm": 0.30326345562934875,
|
| 102698 |
+
"learning_rate": 1.3283605958324008e-06,
|
| 102699 |
+
"loss": 0.44550877809524536,
|
| 102700 |
+
"step": 14670
|
| 102701 |
+
},
|
| 102702 |
+
{
|
| 102703 |
+
"epoch": 18.001226993865032,
|
| 102704 |
+
"grad_norm": 0.27468201518058777,
|
| 102705 |
+
"learning_rate": 1.3267467626223606e-06,
|
| 102706 |
+
"loss": 0.5229916572570801,
|
| 102707 |
+
"step": 14671
|
| 102708 |
+
},
|
| 102709 |
+
{
|
| 102710 |
+
"epoch": 18.00245398773006,
|
| 102711 |
+
"grad_norm": 0.2245272397994995,
|
| 102712 |
+
"learning_rate": 1.3251338836136328e-06,
|
| 102713 |
+
"loss": 0.5883045196533203,
|
| 102714 |
+
"step": 14672
|
| 102715 |
+
},
|
| 102716 |
+
{
|
| 102717 |
+
"epoch": 18.003680981595092,
|
| 102718 |
+
"grad_norm": 0.26401230692863464,
|
| 102719 |
+
"learning_rate": 1.3235219588712377e-06,
|
| 102720 |
+
"loss": 0.4869281053543091,
|
| 102721 |
+
"step": 14673
|
| 102722 |
+
},
|
| 102723 |
+
{
|
| 102724 |
+
"epoch": 18.004907975460124,
|
| 102725 |
+
"grad_norm": 0.2918539345264435,
|
| 102726 |
+
"learning_rate": 1.3219109884601454e-06,
|
| 102727 |
+
"loss": 0.5983865261077881,
|
| 102728 |
+
"step": 14674
|
| 102729 |
+
},
|
| 102730 |
+
{
|
| 102731 |
+
"epoch": 18.006134969325153,
|
| 102732 |
+
"grad_norm": 0.24663130939006805,
|
| 102733 |
+
"learning_rate": 1.3203009724452876e-06,
|
| 102734 |
+
"loss": 0.4972448945045471,
|
| 102735 |
+
"step": 14675
|
| 102736 |
+
},
|
| 102737 |
+
{
|
| 102738 |
+
"epoch": 18.007361963190185,
|
| 102739 |
+
"grad_norm": 0.2538181245326996,
|
| 102740 |
+
"learning_rate": 1.3186919108915675e-06,
|
| 102741 |
+
"loss": 0.5849470496177673,
|
| 102742 |
+
"step": 14676
|
| 102743 |
+
},
|
| 102744 |
+
{
|
| 102745 |
+
"epoch": 18.008588957055213,
|
| 102746 |
+
"grad_norm": 0.3195498585700989,
|
| 102747 |
+
"learning_rate": 1.3170838038638278e-06,
|
| 102748 |
+
"loss": 0.4939005970954895,
|
| 102749 |
+
"step": 14677
|
| 102750 |
+
},
|
| 102751 |
+
{
|
| 102752 |
+
"epoch": 18.009815950920245,
|
| 102753 |
+
"grad_norm": 0.24394716322422028,
|
| 102754 |
+
"learning_rate": 1.3154766514268975e-06,
|
| 102755 |
+
"loss": 0.6386977434158325,
|
| 102756 |
+
"step": 14678
|
| 102757 |
+
},
|
| 102758 |
+
{
|
| 102759 |
+
"epoch": 18.011042944785277,
|
| 102760 |
+
"grad_norm": 0.21713653206825256,
|
| 102761 |
+
"learning_rate": 1.3138704536455493e-06,
|
| 102762 |
+
"loss": 0.32999545335769653,
|
| 102763 |
+
"step": 14679
|
| 102764 |
+
},
|
| 102765 |
+
{
|
| 102766 |
+
"epoch": 18.012269938650306,
|
| 102767 |
+
"grad_norm": 0.25246524810791016,
|
| 102768 |
+
"learning_rate": 1.3122652105845319e-06,
|
| 102769 |
+
"loss": 0.6087346076965332,
|
| 102770 |
+
"step": 14680
|
| 102771 |
+
},
|
| 102772 |
+
{
|
| 102773 |
+
"epoch": 18.013496932515338,
|
| 102774 |
+
"grad_norm": 0.28877395391464233,
|
| 102775 |
+
"learning_rate": 1.3106609223085459e-06,
|
| 102776 |
+
"loss": 0.6901490688323975,
|
| 102777 |
+
"step": 14681
|
| 102778 |
+
},
|
| 102779 |
+
{
|
| 102780 |
+
"epoch": 18.01472392638037,
|
| 102781 |
+
"grad_norm": 0.289163738489151,
|
| 102782 |
+
"learning_rate": 1.3090575888822509e-06,
|
| 102783 |
+
"loss": 0.7074642181396484,
|
| 102784 |
+
"step": 14682
|
| 102785 |
+
},
|
| 102786 |
+
{
|
| 102787 |
+
"epoch": 18.0159509202454,
|
| 102788 |
+
"grad_norm": 0.28363123536109924,
|
| 102789 |
+
"learning_rate": 1.307455210370276e-06,
|
| 102790 |
+
"loss": 0.6447563767433167,
|
| 102791 |
+
"step": 14683
|
| 102792 |
+
},
|
| 102793 |
+
{
|
| 102794 |
+
"epoch": 18.01717791411043,
|
| 102795 |
+
"grad_norm": 0.2657002806663513,
|
| 102796 |
+
"learning_rate": 1.305853786837208e-06,
|
| 102797 |
+
"loss": 0.5514336824417114,
|
| 102798 |
+
"step": 14684
|
| 102799 |
+
},
|
| 102800 |
+
{
|
| 102801 |
+
"epoch": 18.01840490797546,
|
| 102802 |
+
"grad_norm": 0.2831283509731293,
|
| 102803 |
+
"learning_rate": 1.3042533183475952e-06,
|
| 102804 |
+
"loss": 0.8119156956672668,
|
| 102805 |
+
"step": 14685
|
| 102806 |
+
},
|
| 102807 |
+
{
|
| 102808 |
+
"epoch": 18.01963190184049,
|
| 102809 |
+
"grad_norm": 0.2486710399389267,
|
| 102810 |
+
"learning_rate": 1.3026538049659532e-06,
|
| 102811 |
+
"loss": 0.7142343521118164,
|
| 102812 |
+
"step": 14686
|
| 102813 |
+
},
|
| 102814 |
+
{
|
| 102815 |
+
"epoch": 18.020858895705523,
|
| 102816 |
+
"grad_norm": 0.24357111752033234,
|
| 102817 |
+
"learning_rate": 1.3010552467567466e-06,
|
| 102818 |
+
"loss": 0.5224248766899109,
|
| 102819 |
+
"step": 14687
|
| 102820 |
+
},
|
| 102821 |
+
{
|
| 102822 |
+
"epoch": 18.02208588957055,
|
| 102823 |
+
"grad_norm": 0.24826917052268982,
|
| 102824 |
+
"learning_rate": 1.2994576437844103e-06,
|
| 102825 |
+
"loss": 0.5186973810195923,
|
| 102826 |
+
"step": 14688
|
| 102827 |
+
},
|
| 102828 |
+
{
|
| 102829 |
+
"epoch": 18.023312883435583,
|
| 102830 |
+
"grad_norm": 0.27868419885635376,
|
| 102831 |
+
"learning_rate": 1.2978609961133399e-06,
|
| 102832 |
+
"loss": 0.742059051990509,
|
| 102833 |
+
"step": 14689
|
| 102834 |
+
},
|
| 102835 |
+
{
|
| 102836 |
+
"epoch": 18.024539877300615,
|
| 102837 |
+
"grad_norm": 0.24074698984622955,
|
| 102838 |
+
"learning_rate": 1.296265303807892e-06,
|
| 102839 |
+
"loss": 0.6613558530807495,
|
| 102840 |
+
"step": 14690
|
| 102841 |
+
},
|
| 102842 |
+
{
|
| 102843 |
+
"epoch": 18.025766871165644,
|
| 102844 |
+
"grad_norm": 0.24142009019851685,
|
| 102845 |
+
"learning_rate": 1.2946705669323878e-06,
|
| 102846 |
+
"loss": 0.594866931438446,
|
| 102847 |
+
"step": 14691
|
| 102848 |
+
},
|
| 102849 |
+
{
|
| 102850 |
+
"epoch": 18.026993865030676,
|
| 102851 |
+
"grad_norm": 0.24020954966545105,
|
| 102852 |
+
"learning_rate": 1.2930767855511005e-06,
|
| 102853 |
+
"loss": 0.5475742220878601,
|
| 102854 |
+
"step": 14692
|
| 102855 |
+
},
|
| 102856 |
+
{
|
| 102857 |
+
"epoch": 18.028220858895704,
|
| 102858 |
+
"grad_norm": 0.2685195207595825,
|
| 102859 |
+
"learning_rate": 1.2914839597282736e-06,
|
| 102860 |
+
"loss": 0.6200052499771118,
|
| 102861 |
+
"step": 14693
|
| 102862 |
+
},
|
| 102863 |
+
{
|
| 102864 |
+
"epoch": 18.029447852760736,
|
| 102865 |
+
"grad_norm": 0.26735803484916687,
|
| 102866 |
+
"learning_rate": 1.2898920895281108e-06,
|
| 102867 |
+
"loss": 0.5353715419769287,
|
| 102868 |
+
"step": 14694
|
| 102869 |
+
},
|
| 102870 |
+
{
|
| 102871 |
+
"epoch": 18.030674846625768,
|
| 102872 |
+
"grad_norm": 0.2810949385166168,
|
| 102873 |
+
"learning_rate": 1.288301175014775e-06,
|
| 102874 |
+
"loss": 0.4903876483440399,
|
| 102875 |
+
"step": 14695
|
| 102876 |
+
},
|
| 102877 |
+
{
|
| 102878 |
+
"epoch": 18.031901840490796,
|
| 102879 |
+
"grad_norm": 0.2875228822231293,
|
| 102880 |
+
"learning_rate": 1.2867112162523897e-06,
|
| 102881 |
+
"loss": 0.7412351369857788,
|
| 102882 |
+
"step": 14696
|
| 102883 |
+
},
|
| 102884 |
+
{
|
| 102885 |
+
"epoch": 18.03312883435583,
|
| 102886 |
+
"grad_norm": 0.22166800498962402,
|
| 102887 |
+
"learning_rate": 1.2851222133050456e-06,
|
| 102888 |
+
"loss": 0.5275256037712097,
|
| 102889 |
+
"step": 14697
|
| 102890 |
+
},
|
| 102891 |
+
{
|
| 102892 |
+
"epoch": 18.03435582822086,
|
| 102893 |
+
"grad_norm": 0.2264336496591568,
|
| 102894 |
+
"learning_rate": 1.283534166236783e-06,
|
| 102895 |
+
"loss": 0.3994319438934326,
|
| 102896 |
+
"step": 14698
|
| 102897 |
+
},
|
| 102898 |
+
{
|
| 102899 |
+
"epoch": 18.03558282208589,
|
| 102900 |
+
"grad_norm": 0.27725744247436523,
|
| 102901 |
+
"learning_rate": 1.2819470751116196e-06,
|
| 102902 |
+
"loss": 0.45187127590179443,
|
| 102903 |
+
"step": 14699
|
| 102904 |
+
},
|
| 102905 |
+
{
|
| 102906 |
+
"epoch": 18.03680981595092,
|
| 102907 |
+
"grad_norm": 0.26830852031707764,
|
| 102908 |
+
"learning_rate": 1.2803609399935274e-06,
|
| 102909 |
+
"loss": 0.6691170334815979,
|
| 102910 |
+
"step": 14700
|
| 102911 |
}
|
| 102912 |
],
|
| 102913 |
"logging_steps": 1,
|
|
|
|
| 102927 |
"attributes": {}
|
| 102928 |
}
|
| 102929 |
},
|
| 102930 |
+
"total_flos": 4.111667366738264e+19,
|
| 102931 |
"train_batch_size": 8,
|
| 102932 |
"trial_name": null,
|
| 102933 |
"trial_params": null
|