Training in progress, step 15900, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa3da879200ff082640f00f5c5425a3f7fdc142210822c2ab4ee54e5a51301dd
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:623382283e0ade9f44be1d1849b35f90cbcd30b371e345b250a3f707c3e72cb7
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a0d189fe0ddf77593b64aa3bf579bc8e4c210bf385640c32a60cb2fbf7b5a65
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 19.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -109208,6 +109208,2106 @@
|
|
| 109208 |
"learning_rate": 2.4717876774865425e-07,
|
| 109209 |
"loss": 0.46103155612945557,
|
| 109210 |
"step": 15600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109211 |
}
|
| 109212 |
],
|
| 109213 |
"logging_steps": 1,
|
|
@@ -109227,7 +111327,7 @@
|
|
| 109227 |
"attributes": {}
|
| 109228 |
}
|
| 109229 |
},
|
| 109230 |
-
"total_flos": 4.
|
| 109231 |
"train_batch_size": 8,
|
| 109232 |
"trial_name": null,
|
| 109233 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 19.50920245398773,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 109208 |
"learning_rate": 2.4717876774865425e-07,
|
| 109209 |
"loss": 0.46103155612945557,
|
| 109210 |
"step": 15600
|
| 109211 |
+
},
|
| 109212 |
+
{
|
| 109213 |
+
"epoch": 19.14233128834356,
|
| 109214 |
+
"grad_norm": 0.22210556268692017,
|
| 109215 |
+
"learning_rate": 2.464752134174547e-07,
|
| 109216 |
+
"loss": 0.46309924125671387,
|
| 109217 |
+
"step": 15601
|
| 109218 |
+
},
|
| 109219 |
+
{
|
| 109220 |
+
"epoch": 19.143558282208588,
|
| 109221 |
+
"grad_norm": 0.2457238733768463,
|
| 109222 |
+
"learning_rate": 2.4577265683024265e-07,
|
| 109223 |
+
"loss": 0.6431790590286255,
|
| 109224 |
+
"step": 15602
|
| 109225 |
+
},
|
| 109226 |
+
{
|
| 109227 |
+
"epoch": 19.14478527607362,
|
| 109228 |
+
"grad_norm": 0.31417316198349,
|
| 109229 |
+
"learning_rate": 2.4507109801533423e-07,
|
| 109230 |
+
"loss": 0.3598482310771942,
|
| 109231 |
+
"step": 15603
|
| 109232 |
+
},
|
| 109233 |
+
{
|
| 109234 |
+
"epoch": 19.14601226993865,
|
| 109235 |
+
"grad_norm": 0.2731918394565582,
|
| 109236 |
+
"learning_rate": 2.443705370010041e-07,
|
| 109237 |
+
"loss": 0.5284492373466492,
|
| 109238 |
+
"step": 15604
|
| 109239 |
+
},
|
| 109240 |
+
{
|
| 109241 |
+
"epoch": 19.14723926380368,
|
| 109242 |
+
"grad_norm": 0.2393893301486969,
|
| 109243 |
+
"learning_rate": 2.436709738154935e-07,
|
| 109244 |
+
"loss": 0.5362446308135986,
|
| 109245 |
+
"step": 15605
|
| 109246 |
+
},
|
| 109247 |
+
{
|
| 109248 |
+
"epoch": 19.148466257668712,
|
| 109249 |
+
"grad_norm": 0.2542176842689514,
|
| 109250 |
+
"learning_rate": 2.4297240848699943e-07,
|
| 109251 |
+
"loss": 0.6258782148361206,
|
| 109252 |
+
"step": 15606
|
| 109253 |
+
},
|
| 109254 |
+
{
|
| 109255 |
+
"epoch": 19.14969325153374,
|
| 109256 |
+
"grad_norm": 0.25948724150657654,
|
| 109257 |
+
"learning_rate": 2.4227484104367704e-07,
|
| 109258 |
+
"loss": 0.6996631622314453,
|
| 109259 |
+
"step": 15607
|
| 109260 |
+
},
|
| 109261 |
+
{
|
| 109262 |
+
"epoch": 19.150920245398773,
|
| 109263 |
+
"grad_norm": 0.32072097063064575,
|
| 109264 |
+
"learning_rate": 2.4157827151364554e-07,
|
| 109265 |
+
"loss": 0.6921621561050415,
|
| 109266 |
+
"step": 15608
|
| 109267 |
+
},
|
| 109268 |
+
{
|
| 109269 |
+
"epoch": 19.152147239263805,
|
| 109270 |
+
"grad_norm": 0.24242158234119415,
|
| 109271 |
+
"learning_rate": 2.408826999249797e-07,
|
| 109272 |
+
"loss": 0.2629132568836212,
|
| 109273 |
+
"step": 15609
|
| 109274 |
+
},
|
| 109275 |
+
{
|
| 109276 |
+
"epoch": 19.153374233128833,
|
| 109277 |
+
"grad_norm": 0.2632128596305847,
|
| 109278 |
+
"learning_rate": 2.4018812630571543e-07,
|
| 109279 |
+
"loss": 0.6644801497459412,
|
| 109280 |
+
"step": 15610
|
| 109281 |
+
},
|
| 109282 |
+
{
|
| 109283 |
+
"epoch": 19.154601226993865,
|
| 109284 |
+
"grad_norm": 0.3048565983772278,
|
| 109285 |
+
"learning_rate": 2.394945506838525e-07,
|
| 109286 |
+
"loss": 0.6476645469665527,
|
| 109287 |
+
"step": 15611
|
| 109288 |
+
},
|
| 109289 |
+
{
|
| 109290 |
+
"epoch": 19.155828220858897,
|
| 109291 |
+
"grad_norm": 0.25412169098854065,
|
| 109292 |
+
"learning_rate": 2.388019730873464e-07,
|
| 109293 |
+
"loss": 0.6835761070251465,
|
| 109294 |
+
"step": 15612
|
| 109295 |
+
},
|
| 109296 |
+
{
|
| 109297 |
+
"epoch": 19.157055214723925,
|
| 109298 |
+
"grad_norm": 0.2939128875732422,
|
| 109299 |
+
"learning_rate": 2.381103935441109e-07,
|
| 109300 |
+
"loss": 0.6096471548080444,
|
| 109301 |
+
"step": 15613
|
| 109302 |
+
},
|
| 109303 |
+
{
|
| 109304 |
+
"epoch": 19.158282208588957,
|
| 109305 |
+
"grad_norm": 0.24789486825466156,
|
| 109306 |
+
"learning_rate": 2.374198120820209e-07,
|
| 109307 |
+
"loss": 0.43319571018218994,
|
| 109308 |
+
"step": 15614
|
| 109309 |
+
},
|
| 109310 |
+
{
|
| 109311 |
+
"epoch": 19.15950920245399,
|
| 109312 |
+
"grad_norm": 0.23721913993358612,
|
| 109313 |
+
"learning_rate": 2.3673022872891527e-07,
|
| 109314 |
+
"loss": 0.5442913770675659,
|
| 109315 |
+
"step": 15615
|
| 109316 |
+
},
|
| 109317 |
+
{
|
| 109318 |
+
"epoch": 19.160736196319018,
|
| 109319 |
+
"grad_norm": 0.2714819610118866,
|
| 109320 |
+
"learning_rate": 2.360416435125856e-07,
|
| 109321 |
+
"loss": 0.6057541370391846,
|
| 109322 |
+
"step": 15616
|
| 109323 |
+
},
|
| 109324 |
+
{
|
| 109325 |
+
"epoch": 19.16196319018405,
|
| 109326 |
+
"grad_norm": 0.26407966017723083,
|
| 109327 |
+
"learning_rate": 2.3535405646078756e-07,
|
| 109328 |
+
"loss": 0.6631194353103638,
|
| 109329 |
+
"step": 15617
|
| 109330 |
+
},
|
| 109331 |
+
{
|
| 109332 |
+
"epoch": 19.16319018404908,
|
| 109333 |
+
"grad_norm": 0.2978915870189667,
|
| 109334 |
+
"learning_rate": 2.3466746760123782e-07,
|
| 109335 |
+
"loss": 0.6162807941436768,
|
| 109336 |
+
"step": 15618
|
| 109337 |
+
},
|
| 109338 |
+
{
|
| 109339 |
+
"epoch": 19.16441717791411,
|
| 109340 |
+
"grad_norm": 0.32170945405960083,
|
| 109341 |
+
"learning_rate": 2.3398187696160867e-07,
|
| 109342 |
+
"loss": 0.4455367922782898,
|
| 109343 |
+
"step": 15619
|
| 109344 |
+
},
|
| 109345 |
+
{
|
| 109346 |
+
"epoch": 19.165644171779142,
|
| 109347 |
+
"grad_norm": 0.25322800874710083,
|
| 109348 |
+
"learning_rate": 2.3329728456953638e-07,
|
| 109349 |
+
"loss": 0.661132276058197,
|
| 109350 |
+
"step": 15620
|
| 109351 |
+
},
|
| 109352 |
+
{
|
| 109353 |
+
"epoch": 19.16687116564417,
|
| 109354 |
+
"grad_norm": 0.2916284203529358,
|
| 109355 |
+
"learning_rate": 2.3261369045261273e-07,
|
| 109356 |
+
"loss": 0.5665102601051331,
|
| 109357 |
+
"step": 15621
|
| 109358 |
+
},
|
| 109359 |
+
{
|
| 109360 |
+
"epoch": 19.168098159509203,
|
| 109361 |
+
"grad_norm": 0.2513220012187958,
|
| 109362 |
+
"learning_rate": 2.3193109463839347e-07,
|
| 109363 |
+
"loss": 0.5695427656173706,
|
| 109364 |
+
"step": 15622
|
| 109365 |
+
},
|
| 109366 |
+
{
|
| 109367 |
+
"epoch": 19.169325153374235,
|
| 109368 |
+
"grad_norm": 0.26983973383903503,
|
| 109369 |
+
"learning_rate": 2.3124949715438992e-07,
|
| 109370 |
+
"loss": 0.6138904094696045,
|
| 109371 |
+
"step": 15623
|
| 109372 |
+
},
|
| 109373 |
+
{
|
| 109374 |
+
"epoch": 19.170552147239263,
|
| 109375 |
+
"grad_norm": 0.3260478377342224,
|
| 109376 |
+
"learning_rate": 2.3056889802807457e-07,
|
| 109377 |
+
"loss": 0.6232873201370239,
|
| 109378 |
+
"step": 15624
|
| 109379 |
+
},
|
| 109380 |
+
{
|
| 109381 |
+
"epoch": 19.171779141104295,
|
| 109382 |
+
"grad_norm": 0.2713378369808197,
|
| 109383 |
+
"learning_rate": 2.2988929728688659e-07,
|
| 109384 |
+
"loss": 0.5095798373222351,
|
| 109385 |
+
"step": 15625
|
| 109386 |
+
},
|
| 109387 |
+
{
|
| 109388 |
+
"epoch": 19.173006134969324,
|
| 109389 |
+
"grad_norm": 0.24961231648921967,
|
| 109390 |
+
"learning_rate": 2.2921069495820957e-07,
|
| 109391 |
+
"loss": 0.47833728790283203,
|
| 109392 |
+
"step": 15626
|
| 109393 |
+
},
|
| 109394 |
+
{
|
| 109395 |
+
"epoch": 19.174233128834356,
|
| 109396 |
+
"grad_norm": 0.2729978859424591,
|
| 109397 |
+
"learning_rate": 2.2853309106940502e-07,
|
| 109398 |
+
"loss": 0.637974739074707,
|
| 109399 |
+
"step": 15627
|
| 109400 |
+
},
|
| 109401 |
+
{
|
| 109402 |
+
"epoch": 19.175460122699388,
|
| 109403 |
+
"grad_norm": 0.2824903428554535,
|
| 109404 |
+
"learning_rate": 2.278564856477816e-07,
|
| 109405 |
+
"loss": 0.4462968111038208,
|
| 109406 |
+
"step": 15628
|
| 109407 |
+
},
|
| 109408 |
+
{
|
| 109409 |
+
"epoch": 19.176687116564416,
|
| 109410 |
+
"grad_norm": 0.2644289433956146,
|
| 109411 |
+
"learning_rate": 2.271808787206092e-07,
|
| 109412 |
+
"loss": 0.5875904560089111,
|
| 109413 |
+
"step": 15629
|
| 109414 |
+
},
|
| 109415 |
+
{
|
| 109416 |
+
"epoch": 19.177914110429448,
|
| 109417 |
+
"grad_norm": 0.2935873866081238,
|
| 109418 |
+
"learning_rate": 2.2650627031511884e-07,
|
| 109419 |
+
"loss": 0.7554188370704651,
|
| 109420 |
+
"step": 15630
|
| 109421 |
+
},
|
| 109422 |
+
{
|
| 109423 |
+
"epoch": 19.17914110429448,
|
| 109424 |
+
"grad_norm": 0.26379087567329407,
|
| 109425 |
+
"learning_rate": 2.2583266045850814e-07,
|
| 109426 |
+
"loss": 0.4860280156135559,
|
| 109427 |
+
"step": 15631
|
| 109428 |
+
},
|
| 109429 |
+
{
|
| 109430 |
+
"epoch": 19.18036809815951,
|
| 109431 |
+
"grad_norm": 0.27808013558387756,
|
| 109432 |
+
"learning_rate": 2.251600491779249e-07,
|
| 109433 |
+
"loss": 0.6582149267196655,
|
| 109434 |
+
"step": 15632
|
| 109435 |
+
},
|
| 109436 |
+
{
|
| 109437 |
+
"epoch": 19.18159509202454,
|
| 109438 |
+
"grad_norm": 0.2750902771949768,
|
| 109439 |
+
"learning_rate": 2.244884365004779e-07,
|
| 109440 |
+
"loss": 0.5358799695968628,
|
| 109441 |
+
"step": 15633
|
| 109442 |
+
},
|
| 109443 |
+
{
|
| 109444 |
+
"epoch": 19.18282208588957,
|
| 109445 |
+
"grad_norm": 0.20725314319133759,
|
| 109446 |
+
"learning_rate": 2.2381782245324001e-07,
|
| 109447 |
+
"loss": 0.32288414239883423,
|
| 109448 |
+
"step": 15634
|
| 109449 |
+
},
|
| 109450 |
+
{
|
| 109451 |
+
"epoch": 19.1840490797546,
|
| 109452 |
+
"grad_norm": 0.25065287947654724,
|
| 109453 |
+
"learning_rate": 2.2314820706324236e-07,
|
| 109454 |
+
"loss": 0.6896646022796631,
|
| 109455 |
+
"step": 15635
|
| 109456 |
+
},
|
| 109457 |
+
{
|
| 109458 |
+
"epoch": 19.185276073619633,
|
| 109459 |
+
"grad_norm": 0.2905101776123047,
|
| 109460 |
+
"learning_rate": 2.2247959035747447e-07,
|
| 109461 |
+
"loss": 0.5678698420524597,
|
| 109462 |
+
"step": 15636
|
| 109463 |
+
},
|
| 109464 |
+
{
|
| 109465 |
+
"epoch": 19.18650306748466,
|
| 109466 |
+
"grad_norm": 0.25215017795562744,
|
| 109467 |
+
"learning_rate": 2.2181197236288697e-07,
|
| 109468 |
+
"loss": 0.40838098526000977,
|
| 109469 |
+
"step": 15637
|
| 109470 |
+
},
|
| 109471 |
+
{
|
| 109472 |
+
"epoch": 19.187730061349694,
|
| 109473 |
+
"grad_norm": 0.26705026626586914,
|
| 109474 |
+
"learning_rate": 2.2114535310639173e-07,
|
| 109475 |
+
"loss": 0.5971882939338684,
|
| 109476 |
+
"step": 15638
|
| 109477 |
+
},
|
| 109478 |
+
{
|
| 109479 |
+
"epoch": 19.188957055214726,
|
| 109480 |
+
"grad_norm": 0.234080508351326,
|
| 109481 |
+
"learning_rate": 2.2047973261485334e-07,
|
| 109482 |
+
"loss": 0.4631671905517578,
|
| 109483 |
+
"step": 15639
|
| 109484 |
+
},
|
| 109485 |
+
{
|
| 109486 |
+
"epoch": 19.190184049079754,
|
| 109487 |
+
"grad_norm": 0.274194598197937,
|
| 109488 |
+
"learning_rate": 2.198151109151031e-07,
|
| 109489 |
+
"loss": 0.7547903060913086,
|
| 109490 |
+
"step": 15640
|
| 109491 |
+
},
|
| 109492 |
+
{
|
| 109493 |
+
"epoch": 19.191411042944786,
|
| 109494 |
+
"grad_norm": 0.25741973519325256,
|
| 109495 |
+
"learning_rate": 2.191514880339307e-07,
|
| 109496 |
+
"loss": 0.4803391098976135,
|
| 109497 |
+
"step": 15641
|
| 109498 |
+
},
|
| 109499 |
+
{
|
| 109500 |
+
"epoch": 19.192638036809814,
|
| 109501 |
+
"grad_norm": 0.308315247297287,
|
| 109502 |
+
"learning_rate": 2.1848886399808422e-07,
|
| 109503 |
+
"loss": 0.7040413022041321,
|
| 109504 |
+
"step": 15642
|
| 109505 |
+
},
|
| 109506 |
+
{
|
| 109507 |
+
"epoch": 19.193865030674846,
|
| 109508 |
+
"grad_norm": 0.2893327474594116,
|
| 109509 |
+
"learning_rate": 2.1782723883427282e-07,
|
| 109510 |
+
"loss": 0.8077352046966553,
|
| 109511 |
+
"step": 15643
|
| 109512 |
+
},
|
| 109513 |
+
{
|
| 109514 |
+
"epoch": 19.19509202453988,
|
| 109515 |
+
"grad_norm": 0.26443737745285034,
|
| 109516 |
+
"learning_rate": 2.171666125691668e-07,
|
| 109517 |
+
"loss": 0.7435310482978821,
|
| 109518 |
+
"step": 15644
|
| 109519 |
+
},
|
| 109520 |
+
{
|
| 109521 |
+
"epoch": 19.196319018404907,
|
| 109522 |
+
"grad_norm": 0.27335280179977417,
|
| 109523 |
+
"learning_rate": 2.1650698522938938e-07,
|
| 109524 |
+
"loss": 0.7214279174804688,
|
| 109525 |
+
"step": 15645
|
| 109526 |
+
},
|
| 109527 |
+
{
|
| 109528 |
+
"epoch": 19.19754601226994,
|
| 109529 |
+
"grad_norm": 0.2606821656227112,
|
| 109530 |
+
"learning_rate": 2.1584835684152759e-07,
|
| 109531 |
+
"loss": 0.6016335487365723,
|
| 109532 |
+
"step": 15646
|
| 109533 |
+
},
|
| 109534 |
+
{
|
| 109535 |
+
"epoch": 19.19877300613497,
|
| 109536 |
+
"grad_norm": 0.2935357987880707,
|
| 109537 |
+
"learning_rate": 2.1519072743213797e-07,
|
| 109538 |
+
"loss": 0.2848409414291382,
|
| 109539 |
+
"step": 15647
|
| 109540 |
+
},
|
| 109541 |
+
{
|
| 109542 |
+
"epoch": 19.2,
|
| 109543 |
+
"grad_norm": 0.2639932632446289,
|
| 109544 |
+
"learning_rate": 2.1453409702771877e-07,
|
| 109545 |
+
"loss": 0.6620244383811951,
|
| 109546 |
+
"step": 15648
|
| 109547 |
+
},
|
| 109548 |
+
{
|
| 109549 |
+
"epoch": 19.20122699386503,
|
| 109550 |
+
"grad_norm": 0.29775023460388184,
|
| 109551 |
+
"learning_rate": 2.1387846565474045e-07,
|
| 109552 |
+
"loss": 0.6931723952293396,
|
| 109553 |
+
"step": 15649
|
| 109554 |
+
},
|
| 109555 |
+
{
|
| 109556 |
+
"epoch": 19.20245398773006,
|
| 109557 |
+
"grad_norm": 0.25157430768013,
|
| 109558 |
+
"learning_rate": 2.1322383333962636e-07,
|
| 109559 |
+
"loss": 0.6132462024688721,
|
| 109560 |
+
"step": 15650
|
| 109561 |
+
},
|
| 109562 |
+
{
|
| 109563 |
+
"epoch": 19.20368098159509,
|
| 109564 |
+
"grad_norm": 0.26053595542907715,
|
| 109565 |
+
"learning_rate": 2.1257020010876926e-07,
|
| 109566 |
+
"loss": 0.6839142441749573,
|
| 109567 |
+
"step": 15651
|
| 109568 |
+
},
|
| 109569 |
+
{
|
| 109570 |
+
"epoch": 19.204907975460124,
|
| 109571 |
+
"grad_norm": 0.25885042548179626,
|
| 109572 |
+
"learning_rate": 2.119175659885092e-07,
|
| 109573 |
+
"loss": 0.644181489944458,
|
| 109574 |
+
"step": 15652
|
| 109575 |
+
},
|
| 109576 |
+
{
|
| 109577 |
+
"epoch": 19.206134969325152,
|
| 109578 |
+
"grad_norm": 0.2844583988189697,
|
| 109579 |
+
"learning_rate": 2.1126593100515567e-07,
|
| 109580 |
+
"loss": 0.6981903314590454,
|
| 109581 |
+
"step": 15653
|
| 109582 |
+
},
|
| 109583 |
+
{
|
| 109584 |
+
"epoch": 19.207361963190184,
|
| 109585 |
+
"grad_norm": 0.24492456018924713,
|
| 109586 |
+
"learning_rate": 2.1061529518497103e-07,
|
| 109587 |
+
"loss": 0.5627588629722595,
|
| 109588 |
+
"step": 15654
|
| 109589 |
+
},
|
| 109590 |
+
{
|
| 109591 |
+
"epoch": 19.208588957055216,
|
| 109592 |
+
"grad_norm": 0.2486869841814041,
|
| 109593 |
+
"learning_rate": 2.0996565855418427e-07,
|
| 109594 |
+
"loss": 0.6487361192703247,
|
| 109595 |
+
"step": 15655
|
| 109596 |
+
},
|
| 109597 |
+
{
|
| 109598 |
+
"epoch": 19.209815950920245,
|
| 109599 |
+
"grad_norm": 0.25746220350265503,
|
| 109600 |
+
"learning_rate": 2.093170211389772e-07,
|
| 109601 |
+
"loss": 0.49360519647598267,
|
| 109602 |
+
"step": 15656
|
| 109603 |
+
},
|
| 109604 |
+
{
|
| 109605 |
+
"epoch": 19.211042944785277,
|
| 109606 |
+
"grad_norm": 0.27797210216522217,
|
| 109607 |
+
"learning_rate": 2.0866938296549565e-07,
|
| 109608 |
+
"loss": 0.5869017243385315,
|
| 109609 |
+
"step": 15657
|
| 109610 |
+
},
|
| 109611 |
+
{
|
| 109612 |
+
"epoch": 19.212269938650305,
|
| 109613 |
+
"grad_norm": 0.2552661895751953,
|
| 109614 |
+
"learning_rate": 2.0802274405984646e-07,
|
| 109615 |
+
"loss": 0.5413323044776917,
|
| 109616 |
+
"step": 15658
|
| 109617 |
+
},
|
| 109618 |
+
{
|
| 109619 |
+
"epoch": 19.213496932515337,
|
| 109620 |
+
"grad_norm": 0.2646702229976654,
|
| 109621 |
+
"learning_rate": 2.0737710444808932e-07,
|
| 109622 |
+
"loss": 0.539040207862854,
|
| 109623 |
+
"step": 15659
|
| 109624 |
+
},
|
| 109625 |
+
{
|
| 109626 |
+
"epoch": 19.21472392638037,
|
| 109627 |
+
"grad_norm": 0.24074086546897888,
|
| 109628 |
+
"learning_rate": 2.0673246415624792e-07,
|
| 109629 |
+
"loss": 0.4676140546798706,
|
| 109630 |
+
"step": 15660
|
| 109631 |
+
},
|
| 109632 |
+
{
|
| 109633 |
+
"epoch": 19.215950920245398,
|
| 109634 |
+
"grad_norm": 0.24642446637153625,
|
| 109635 |
+
"learning_rate": 2.0608882321031254e-07,
|
| 109636 |
+
"loss": 0.5153641104698181,
|
| 109637 |
+
"step": 15661
|
| 109638 |
+
},
|
| 109639 |
+
{
|
| 109640 |
+
"epoch": 19.21717791411043,
|
| 109641 |
+
"grad_norm": 0.26997655630111694,
|
| 109642 |
+
"learning_rate": 2.0544618163622076e-07,
|
| 109643 |
+
"loss": 0.4566311240196228,
|
| 109644 |
+
"step": 15662
|
| 109645 |
+
},
|
| 109646 |
+
{
|
| 109647 |
+
"epoch": 19.21840490797546,
|
| 109648 |
+
"grad_norm": 0.27587464451789856,
|
| 109649 |
+
"learning_rate": 2.0480453945987688e-07,
|
| 109650 |
+
"loss": 0.4805257320404053,
|
| 109651 |
+
"step": 15663
|
| 109652 |
+
},
|
| 109653 |
+
{
|
| 109654 |
+
"epoch": 19.21963190184049,
|
| 109655 |
+
"grad_norm": 0.27641117572784424,
|
| 109656 |
+
"learning_rate": 2.0416389670714354e-07,
|
| 109657 |
+
"loss": 0.5234161615371704,
|
| 109658 |
+
"step": 15664
|
| 109659 |
+
},
|
| 109660 |
+
{
|
| 109661 |
+
"epoch": 19.220858895705522,
|
| 109662 |
+
"grad_norm": 0.3027830123901367,
|
| 109663 |
+
"learning_rate": 2.035242534038445e-07,
|
| 109664 |
+
"loss": 0.6665984392166138,
|
| 109665 |
+
"step": 15665
|
| 109666 |
+
},
|
| 109667 |
+
{
|
| 109668 |
+
"epoch": 19.22208588957055,
|
| 109669 |
+
"grad_norm": 0.2664552628993988,
|
| 109670 |
+
"learning_rate": 2.028856095757592e-07,
|
| 109671 |
+
"loss": 0.6065563559532166,
|
| 109672 |
+
"step": 15666
|
| 109673 |
+
},
|
| 109674 |
+
{
|
| 109675 |
+
"epoch": 19.223312883435582,
|
| 109676 |
+
"grad_norm": 0.2753150463104248,
|
| 109677 |
+
"learning_rate": 2.0224796524863366e-07,
|
| 109678 |
+
"loss": 0.6895867586135864,
|
| 109679 |
+
"step": 15667
|
| 109680 |
+
},
|
| 109681 |
+
{
|
| 109682 |
+
"epoch": 19.224539877300614,
|
| 109683 |
+
"grad_norm": 0.28075286746025085,
|
| 109684 |
+
"learning_rate": 2.0161132044816676e-07,
|
| 109685 |
+
"loss": 0.49532604217529297,
|
| 109686 |
+
"step": 15668
|
| 109687 |
+
},
|
| 109688 |
+
{
|
| 109689 |
+
"epoch": 19.225766871165643,
|
| 109690 |
+
"grad_norm": 0.2576037645339966,
|
| 109691 |
+
"learning_rate": 2.009756752000186e-07,
|
| 109692 |
+
"loss": 0.5182088017463684,
|
| 109693 |
+
"step": 15669
|
| 109694 |
+
},
|
| 109695 |
+
{
|
| 109696 |
+
"epoch": 19.226993865030675,
|
| 109697 |
+
"grad_norm": 0.2769133448600769,
|
| 109698 |
+
"learning_rate": 2.0034102952981305e-07,
|
| 109699 |
+
"loss": 0.6898492574691772,
|
| 109700 |
+
"step": 15670
|
| 109701 |
+
},
|
| 109702 |
+
{
|
| 109703 |
+
"epoch": 19.228220858895707,
|
| 109704 |
+
"grad_norm": 0.25994202494621277,
|
| 109705 |
+
"learning_rate": 1.9970738346313246e-07,
|
| 109706 |
+
"loss": 0.6196413040161133,
|
| 109707 |
+
"step": 15671
|
| 109708 |
+
},
|
| 109709 |
+
{
|
| 109710 |
+
"epoch": 19.229447852760735,
|
| 109711 |
+
"grad_norm": 0.27102354168891907,
|
| 109712 |
+
"learning_rate": 1.9907473702551194e-07,
|
| 109713 |
+
"loss": 0.466544508934021,
|
| 109714 |
+
"step": 15672
|
| 109715 |
+
},
|
| 109716 |
+
{
|
| 109717 |
+
"epoch": 19.230674846625767,
|
| 109718 |
+
"grad_norm": 0.243444561958313,
|
| 109719 |
+
"learning_rate": 1.9844309024245334e-07,
|
| 109720 |
+
"loss": 0.4627355933189392,
|
| 109721 |
+
"step": 15673
|
| 109722 |
+
},
|
| 109723 |
+
{
|
| 109724 |
+
"epoch": 19.2319018404908,
|
| 109725 |
+
"grad_norm": 0.2584282457828522,
|
| 109726 |
+
"learning_rate": 1.978124431394196e-07,
|
| 109727 |
+
"loss": 0.3388047218322754,
|
| 109728 |
+
"step": 15674
|
| 109729 |
+
},
|
| 109730 |
+
{
|
| 109731 |
+
"epoch": 19.233128834355828,
|
| 109732 |
+
"grad_norm": 0.29062268137931824,
|
| 109733 |
+
"learning_rate": 1.9718279574182653e-07,
|
| 109734 |
+
"loss": 0.6275640726089478,
|
| 109735 |
+
"step": 15675
|
| 109736 |
+
},
|
| 109737 |
+
{
|
| 109738 |
+
"epoch": 19.23435582822086,
|
| 109739 |
+
"grad_norm": 0.2674814462661743,
|
| 109740 |
+
"learning_rate": 1.9655414807505658e-07,
|
| 109741 |
+
"loss": 0.6290971040725708,
|
| 109742 |
+
"step": 15676
|
| 109743 |
+
},
|
| 109744 |
+
{
|
| 109745 |
+
"epoch": 19.235582822085888,
|
| 109746 |
+
"grad_norm": 0.2815602123737335,
|
| 109747 |
+
"learning_rate": 1.9592650016444503e-07,
|
| 109748 |
+
"loss": 0.4671408236026764,
|
| 109749 |
+
"step": 15677
|
| 109750 |
+
},
|
| 109751 |
+
{
|
| 109752 |
+
"epoch": 19.23680981595092,
|
| 109753 |
+
"grad_norm": 0.2835483253002167,
|
| 109754 |
+
"learning_rate": 1.9529985203529388e-07,
|
| 109755 |
+
"loss": 0.5751950144767761,
|
| 109756 |
+
"step": 15678
|
| 109757 |
+
},
|
| 109758 |
+
{
|
| 109759 |
+
"epoch": 19.238036809815952,
|
| 109760 |
+
"grad_norm": 0.28011080622673035,
|
| 109761 |
+
"learning_rate": 1.9467420371286071e-07,
|
| 109762 |
+
"loss": 0.5261114835739136,
|
| 109763 |
+
"step": 15679
|
| 109764 |
+
},
|
| 109765 |
+
{
|
| 109766 |
+
"epoch": 19.23926380368098,
|
| 109767 |
+
"grad_norm": 0.25334927439689636,
|
| 109768 |
+
"learning_rate": 1.9404955522236422e-07,
|
| 109769 |
+
"loss": 0.6303417086601257,
|
| 109770 |
+
"step": 15680
|
| 109771 |
+
},
|
| 109772 |
+
{
|
| 109773 |
+
"epoch": 19.240490797546013,
|
| 109774 |
+
"grad_norm": 0.29517796635627747,
|
| 109775 |
+
"learning_rate": 1.9342590658898153e-07,
|
| 109776 |
+
"loss": 0.593755841255188,
|
| 109777 |
+
"step": 15681
|
| 109778 |
+
},
|
| 109779 |
+
{
|
| 109780 |
+
"epoch": 19.241717791411045,
|
| 109781 |
+
"grad_norm": 0.2704833745956421,
|
| 109782 |
+
"learning_rate": 1.928032578378508e-07,
|
| 109783 |
+
"loss": 0.6073254942893982,
|
| 109784 |
+
"step": 15682
|
| 109785 |
+
},
|
| 109786 |
+
{
|
| 109787 |
+
"epoch": 19.242944785276073,
|
| 109788 |
+
"grad_norm": 0.26108258962631226,
|
| 109789 |
+
"learning_rate": 1.921816089940659e-07,
|
| 109790 |
+
"loss": 0.5615946054458618,
|
| 109791 |
+
"step": 15683
|
| 109792 |
+
},
|
| 109793 |
+
{
|
| 109794 |
+
"epoch": 19.244171779141105,
|
| 109795 |
+
"grad_norm": 0.2662059962749481,
|
| 109796 |
+
"learning_rate": 1.9156096008268732e-07,
|
| 109797 |
+
"loss": 0.7788010835647583,
|
| 109798 |
+
"step": 15684
|
| 109799 |
+
},
|
| 109800 |
+
{
|
| 109801 |
+
"epoch": 19.245398773006134,
|
| 109802 |
+
"grad_norm": 0.28087398409843445,
|
| 109803 |
+
"learning_rate": 1.9094131112872837e-07,
|
| 109804 |
+
"loss": 0.654100239276886,
|
| 109805 |
+
"step": 15685
|
| 109806 |
+
},
|
| 109807 |
+
{
|
| 109808 |
+
"epoch": 19.246625766871166,
|
| 109809 |
+
"grad_norm": 0.27967190742492676,
|
| 109810 |
+
"learning_rate": 1.9032266215717188e-07,
|
| 109811 |
+
"loss": 0.5717660188674927,
|
| 109812 |
+
"step": 15686
|
| 109813 |
+
},
|
| 109814 |
+
{
|
| 109815 |
+
"epoch": 19.247852760736198,
|
| 109816 |
+
"grad_norm": 0.2547641098499298,
|
| 109817 |
+
"learning_rate": 1.8970501319294787e-07,
|
| 109818 |
+
"loss": 0.5374426245689392,
|
| 109819 |
+
"step": 15687
|
| 109820 |
+
},
|
| 109821 |
+
{
|
| 109822 |
+
"epoch": 19.249079754601226,
|
| 109823 |
+
"grad_norm": 0.30670973658561707,
|
| 109824 |
+
"learning_rate": 1.8908836426095312e-07,
|
| 109825 |
+
"loss": 0.5804160833358765,
|
| 109826 |
+
"step": 15688
|
| 109827 |
+
},
|
| 109828 |
+
{
|
| 109829 |
+
"epoch": 19.250306748466258,
|
| 109830 |
+
"grad_norm": 0.2664736807346344,
|
| 109831 |
+
"learning_rate": 1.8847271538604272e-07,
|
| 109832 |
+
"loss": 0.6950273513793945,
|
| 109833 |
+
"step": 15689
|
| 109834 |
+
},
|
| 109835 |
+
{
|
| 109836 |
+
"epoch": 19.25153374233129,
|
| 109837 |
+
"grad_norm": 0.28548797965049744,
|
| 109838 |
+
"learning_rate": 1.8785806659303295e-07,
|
| 109839 |
+
"loss": 0.624729573726654,
|
| 109840 |
+
"step": 15690
|
| 109841 |
+
},
|
| 109842 |
+
{
|
| 109843 |
+
"epoch": 19.25276073619632,
|
| 109844 |
+
"grad_norm": 0.28483811020851135,
|
| 109845 |
+
"learning_rate": 1.8724441790669568e-07,
|
| 109846 |
+
"loss": 0.6761590242385864,
|
| 109847 |
+
"step": 15691
|
| 109848 |
+
},
|
| 109849 |
+
{
|
| 109850 |
+
"epoch": 19.25398773006135,
|
| 109851 |
+
"grad_norm": 0.2748323976993561,
|
| 109852 |
+
"learning_rate": 1.8663176935177218e-07,
|
| 109853 |
+
"loss": 0.7182489633560181,
|
| 109854 |
+
"step": 15692
|
| 109855 |
+
},
|
| 109856 |
+
{
|
| 109857 |
+
"epoch": 19.25521472392638,
|
| 109858 |
+
"grad_norm": 0.26820382475852966,
|
| 109859 |
+
"learning_rate": 1.860201209529483e-07,
|
| 109860 |
+
"loss": 0.5007621049880981,
|
| 109861 |
+
"step": 15693
|
| 109862 |
+
},
|
| 109863 |
+
{
|
| 109864 |
+
"epoch": 19.25644171779141,
|
| 109865 |
+
"grad_norm": 0.2610996663570404,
|
| 109866 |
+
"learning_rate": 1.8540947273488484e-07,
|
| 109867 |
+
"loss": 0.6145170331001282,
|
| 109868 |
+
"step": 15694
|
| 109869 |
+
},
|
| 109870 |
+
{
|
| 109871 |
+
"epoch": 19.257668711656443,
|
| 109872 |
+
"grad_norm": 0.2924644351005554,
|
| 109873 |
+
"learning_rate": 1.8479982472218994e-07,
|
| 109874 |
+
"loss": 0.7354214191436768,
|
| 109875 |
+
"step": 15695
|
| 109876 |
+
},
|
| 109877 |
+
{
|
| 109878 |
+
"epoch": 19.25889570552147,
|
| 109879 |
+
"grad_norm": 0.24042746424674988,
|
| 109880 |
+
"learning_rate": 1.841911769394411e-07,
|
| 109881 |
+
"loss": 0.46986573934555054,
|
| 109882 |
+
"step": 15696
|
| 109883 |
+
},
|
| 109884 |
+
{
|
| 109885 |
+
"epoch": 19.260122699386503,
|
| 109886 |
+
"grad_norm": 0.27207738161087036,
|
| 109887 |
+
"learning_rate": 1.8358352941116596e-07,
|
| 109888 |
+
"loss": 0.5326989889144897,
|
| 109889 |
+
"step": 15697
|
| 109890 |
+
},
|
| 109891 |
+
{
|
| 109892 |
+
"epoch": 19.261349693251535,
|
| 109893 |
+
"grad_norm": 0.25708624720573425,
|
| 109894 |
+
"learning_rate": 1.829768821618616e-07,
|
| 109895 |
+
"loss": 0.42855197191238403,
|
| 109896 |
+
"step": 15698
|
| 109897 |
+
},
|
| 109898 |
+
{
|
| 109899 |
+
"epoch": 19.262576687116564,
|
| 109900 |
+
"grad_norm": 0.23511746525764465,
|
| 109901 |
+
"learning_rate": 1.8237123521597788e-07,
|
| 109902 |
+
"loss": 0.45312345027923584,
|
| 109903 |
+
"step": 15699
|
| 109904 |
+
},
|
| 109905 |
+
{
|
| 109906 |
+
"epoch": 19.263803680981596,
|
| 109907 |
+
"grad_norm": 0.3303406834602356,
|
| 109908 |
+
"learning_rate": 1.817665885979314e-07,
|
| 109909 |
+
"loss": 0.6579532623291016,
|
| 109910 |
+
"step": 15700
|
| 109911 |
+
},
|
| 109912 |
+
{
|
| 109913 |
+
"epoch": 19.265030674846624,
|
| 109914 |
+
"grad_norm": 0.26569119095802307,
|
| 109915 |
+
"learning_rate": 1.8116294233208598e-07,
|
| 109916 |
+
"loss": 0.5076731443405151,
|
| 109917 |
+
"step": 15701
|
| 109918 |
+
},
|
| 109919 |
+
{
|
| 109920 |
+
"epoch": 19.266257668711656,
|
| 109921 |
+
"grad_norm": 0.2574261426925659,
|
| 109922 |
+
"learning_rate": 1.8056029644277772e-07,
|
| 109923 |
+
"loss": 0.5825338363647461,
|
| 109924 |
+
"step": 15702
|
| 109925 |
+
},
|
| 109926 |
+
{
|
| 109927 |
+
"epoch": 19.26748466257669,
|
| 109928 |
+
"grad_norm": 0.24608305096626282,
|
| 109929 |
+
"learning_rate": 1.7995865095429832e-07,
|
| 109930 |
+
"loss": 0.5878791809082031,
|
| 109931 |
+
"step": 15703
|
| 109932 |
+
},
|
| 109933 |
+
{
|
| 109934 |
+
"epoch": 19.268711656441717,
|
| 109935 |
+
"grad_norm": 0.2769867181777954,
|
| 109936 |
+
"learning_rate": 1.7935800589089502e-07,
|
| 109937 |
+
"loss": 0.39420732855796814,
|
| 109938 |
+
"step": 15704
|
| 109939 |
+
},
|
| 109940 |
+
{
|
| 109941 |
+
"epoch": 19.26993865030675,
|
| 109942 |
+
"grad_norm": 0.2668036222457886,
|
| 109943 |
+
"learning_rate": 1.7875836127677903e-07,
|
| 109944 |
+
"loss": 0.7374603748321533,
|
| 109945 |
+
"step": 15705
|
| 109946 |
+
},
|
| 109947 |
+
{
|
| 109948 |
+
"epoch": 19.27116564417178,
|
| 109949 |
+
"grad_norm": 0.3071646988391876,
|
| 109950 |
+
"learning_rate": 1.7815971713612268e-07,
|
| 109951 |
+
"loss": 0.7182461023330688,
|
| 109952 |
+
"step": 15706
|
| 109953 |
+
},
|
| 109954 |
+
{
|
| 109955 |
+
"epoch": 19.27239263803681,
|
| 109956 |
+
"grad_norm": 0.3189086616039276,
|
| 109957 |
+
"learning_rate": 1.7756207349305387e-07,
|
| 109958 |
+
"loss": 0.7317497134208679,
|
| 109959 |
+
"step": 15707
|
| 109960 |
+
},
|
| 109961 |
+
{
|
| 109962 |
+
"epoch": 19.27361963190184,
|
| 109963 |
+
"grad_norm": 0.30117762088775635,
|
| 109964 |
+
"learning_rate": 1.769654303716617e-07,
|
| 109965 |
+
"loss": 0.7006818652153015,
|
| 109966 |
+
"step": 15708
|
| 109967 |
+
},
|
| 109968 |
+
{
|
| 109969 |
+
"epoch": 19.27484662576687,
|
| 109970 |
+
"grad_norm": 0.2477964460849762,
|
| 109971 |
+
"learning_rate": 1.7636978779599633e-07,
|
| 109972 |
+
"loss": 0.48170962929725647,
|
| 109973 |
+
"step": 15709
|
| 109974 |
+
},
|
| 109975 |
+
{
|
| 109976 |
+
"epoch": 19.2760736196319,
|
| 109977 |
+
"grad_norm": 0.26466891169548035,
|
| 109978 |
+
"learning_rate": 1.757751457900636e-07,
|
| 109979 |
+
"loss": 0.5916324853897095,
|
| 109980 |
+
"step": 15710
|
| 109981 |
+
},
|
| 109982 |
+
{
|
| 109983 |
+
"epoch": 19.277300613496934,
|
| 109984 |
+
"grad_norm": 0.2800232172012329,
|
| 109985 |
+
"learning_rate": 1.7518150437783597e-07,
|
| 109986 |
+
"loss": 0.6216551661491394,
|
| 109987 |
+
"step": 15711
|
| 109988 |
+
},
|
| 109989 |
+
{
|
| 109990 |
+
"epoch": 19.278527607361962,
|
| 109991 |
+
"grad_norm": 0.2177606076002121,
|
| 109992 |
+
"learning_rate": 1.74588863583236e-07,
|
| 109993 |
+
"loss": 0.4567154347896576,
|
| 109994 |
+
"step": 15712
|
| 109995 |
+
},
|
| 109996 |
+
{
|
| 109997 |
+
"epoch": 19.279754601226994,
|
| 109998 |
+
"grad_norm": 0.27706941962242126,
|
| 109999 |
+
"learning_rate": 1.7399722343015846e-07,
|
| 110000 |
+
"loss": 0.7275480031967163,
|
| 110001 |
+
"step": 15713
|
| 110002 |
+
},
|
| 110003 |
+
{
|
| 110004 |
+
"epoch": 19.280981595092026,
|
| 110005 |
+
"grad_norm": 0.30545711517333984,
|
| 110006 |
+
"learning_rate": 1.7340658394244537e-07,
|
| 110007 |
+
"loss": 0.6275465488433838,
|
| 110008 |
+
"step": 15714
|
| 110009 |
+
},
|
| 110010 |
+
{
|
| 110011 |
+
"epoch": 19.282208588957054,
|
| 110012 |
+
"grad_norm": 0.24607671797275543,
|
| 110013 |
+
"learning_rate": 1.7281694514390546e-07,
|
| 110014 |
+
"loss": 0.42245131731033325,
|
| 110015 |
+
"step": 15715
|
| 110016 |
+
},
|
| 110017 |
+
{
|
| 110018 |
+
"epoch": 19.283435582822086,
|
| 110019 |
+
"grad_norm": 0.2522715628147125,
|
| 110020 |
+
"learning_rate": 1.7222830705830583e-07,
|
| 110021 |
+
"loss": 0.6712870597839355,
|
| 110022 |
+
"step": 15716
|
| 110023 |
+
},
|
| 110024 |
+
{
|
| 110025 |
+
"epoch": 19.284662576687115,
|
| 110026 |
+
"grad_norm": 0.24713724851608276,
|
| 110027 |
+
"learning_rate": 1.716406697093692e-07,
|
| 110028 |
+
"loss": 0.11985582113265991,
|
| 110029 |
+
"step": 15717
|
| 110030 |
+
},
|
| 110031 |
+
{
|
| 110032 |
+
"epoch": 19.285889570552147,
|
| 110033 |
+
"grad_norm": 0.23300576210021973,
|
| 110034 |
+
"learning_rate": 1.7105403312078772e-07,
|
| 110035 |
+
"loss": 0.5070394277572632,
|
| 110036 |
+
"step": 15718
|
| 110037 |
+
},
|
| 110038 |
+
{
|
| 110039 |
+
"epoch": 19.28711656441718,
|
| 110040 |
+
"grad_norm": 0.29088982939720154,
|
| 110041 |
+
"learning_rate": 1.7046839731620357e-07,
|
| 110042 |
+
"loss": 0.5711818933486938,
|
| 110043 |
+
"step": 15719
|
| 110044 |
+
},
|
| 110045 |
+
{
|
| 110046 |
+
"epoch": 19.288343558282207,
|
| 110047 |
+
"grad_norm": 0.3080708682537079,
|
| 110048 |
+
"learning_rate": 1.698837623192201e-07,
|
| 110049 |
+
"loss": 0.6905769109725952,
|
| 110050 |
+
"step": 15720
|
| 110051 |
+
},
|
| 110052 |
+
{
|
| 110053 |
+
"epoch": 19.28957055214724,
|
| 110054 |
+
"grad_norm": 0.2848478853702545,
|
| 110055 |
+
"learning_rate": 1.6930012815340736e-07,
|
| 110056 |
+
"loss": 0.626186728477478,
|
| 110057 |
+
"step": 15721
|
| 110058 |
+
},
|
| 110059 |
+
{
|
| 110060 |
+
"epoch": 19.29079754601227,
|
| 110061 |
+
"grad_norm": 0.25921517610549927,
|
| 110062 |
+
"learning_rate": 1.687174948422854e-07,
|
| 110063 |
+
"loss": 0.7007246017456055,
|
| 110064 |
+
"step": 15722
|
| 110065 |
+
},
|
| 110066 |
+
{
|
| 110067 |
+
"epoch": 19.2920245398773,
|
| 110068 |
+
"grad_norm": 0.3125583827495575,
|
| 110069 |
+
"learning_rate": 1.681358624093382e-07,
|
| 110070 |
+
"loss": 0.5929790735244751,
|
| 110071 |
+
"step": 15723
|
| 110072 |
+
},
|
| 110073 |
+
{
|
| 110074 |
+
"epoch": 19.293251533742332,
|
| 110075 |
+
"grad_norm": 0.24967384338378906,
|
| 110076 |
+
"learning_rate": 1.675552308780165e-07,
|
| 110077 |
+
"loss": 0.4132218062877655,
|
| 110078 |
+
"step": 15724
|
| 110079 |
+
},
|
| 110080 |
+
{
|
| 110081 |
+
"epoch": 19.29447852760736,
|
| 110082 |
+
"grad_norm": 0.2761126756668091,
|
| 110083 |
+
"learning_rate": 1.6697560027171544e-07,
|
| 110084 |
+
"loss": 0.5265856981277466,
|
| 110085 |
+
"step": 15725
|
| 110086 |
+
},
|
| 110087 |
+
{
|
| 110088 |
+
"epoch": 19.295705521472392,
|
| 110089 |
+
"grad_norm": 0.25898540019989014,
|
| 110090 |
+
"learning_rate": 1.6639697061380243e-07,
|
| 110091 |
+
"loss": 0.3211030960083008,
|
| 110092 |
+
"step": 15726
|
| 110093 |
+
},
|
| 110094 |
+
{
|
| 110095 |
+
"epoch": 19.296932515337424,
|
| 110096 |
+
"grad_norm": 0.24974408745765686,
|
| 110097 |
+
"learning_rate": 1.658193419276005e-07,
|
| 110098 |
+
"loss": 0.5403602719306946,
|
| 110099 |
+
"step": 15727
|
| 110100 |
+
},
|
| 110101 |
+
{
|
| 110102 |
+
"epoch": 19.298159509202453,
|
| 110103 |
+
"grad_norm": 0.2762167155742645,
|
| 110104 |
+
"learning_rate": 1.6524271423639103e-07,
|
| 110105 |
+
"loss": 0.6595354080200195,
|
| 110106 |
+
"step": 15728
|
| 110107 |
+
},
|
| 110108 |
+
{
|
| 110109 |
+
"epoch": 19.299386503067485,
|
| 110110 |
+
"grad_norm": 0.27492180466651917,
|
| 110111 |
+
"learning_rate": 1.6466708756341932e-07,
|
| 110112 |
+
"loss": 0.47847893834114075,
|
| 110113 |
+
"step": 15729
|
| 110114 |
+
},
|
| 110115 |
+
{
|
| 110116 |
+
"epoch": 19.300613496932517,
|
| 110117 |
+
"grad_norm": 0.2709231376647949,
|
| 110118 |
+
"learning_rate": 1.6409246193188065e-07,
|
| 110119 |
+
"loss": 0.6940353512763977,
|
| 110120 |
+
"step": 15730
|
| 110121 |
+
},
|
| 110122 |
+
{
|
| 110123 |
+
"epoch": 19.301840490797545,
|
| 110124 |
+
"grad_norm": 0.27758049964904785,
|
| 110125 |
+
"learning_rate": 1.6351883736494268e-07,
|
| 110126 |
+
"loss": 0.684998095035553,
|
| 110127 |
+
"step": 15731
|
| 110128 |
+
},
|
| 110129 |
+
{
|
| 110130 |
+
"epoch": 19.303067484662577,
|
| 110131 |
+
"grad_norm": 0.23531819880008698,
|
| 110132 |
+
"learning_rate": 1.6294621388572296e-07,
|
| 110133 |
+
"loss": 0.2514550983905792,
|
| 110134 |
+
"step": 15732
|
| 110135 |
+
},
|
| 110136 |
+
{
|
| 110137 |
+
"epoch": 19.30429447852761,
|
| 110138 |
+
"grad_norm": 0.2790698707103729,
|
| 110139 |
+
"learning_rate": 1.6237459151730583e-07,
|
| 110140 |
+
"loss": 0.413646936416626,
|
| 110141 |
+
"step": 15733
|
| 110142 |
+
},
|
| 110143 |
+
{
|
| 110144 |
+
"epoch": 19.305521472392638,
|
| 110145 |
+
"grad_norm": 0.3102453649044037,
|
| 110146 |
+
"learning_rate": 1.6180397028272844e-07,
|
| 110147 |
+
"loss": 0.6367359757423401,
|
| 110148 |
+
"step": 15734
|
| 110149 |
+
},
|
| 110150 |
+
{
|
| 110151 |
+
"epoch": 19.30674846625767,
|
| 110152 |
+
"grad_norm": 0.25980275869369507,
|
| 110153 |
+
"learning_rate": 1.6123435020499455e-07,
|
| 110154 |
+
"loss": 0.5755482912063599,
|
| 110155 |
+
"step": 15735
|
| 110156 |
+
},
|
| 110157 |
+
{
|
| 110158 |
+
"epoch": 19.307975460122698,
|
| 110159 |
+
"grad_norm": 0.25953999161720276,
|
| 110160 |
+
"learning_rate": 1.606657313070581e-07,
|
| 110161 |
+
"loss": 0.6423062682151794,
|
| 110162 |
+
"step": 15736
|
| 110163 |
+
},
|
| 110164 |
+
{
|
| 110165 |
+
"epoch": 19.30920245398773,
|
| 110166 |
+
"grad_norm": 0.2520987391471863,
|
| 110167 |
+
"learning_rate": 1.600981136118451e-07,
|
| 110168 |
+
"loss": 0.6856333017349243,
|
| 110169 |
+
"step": 15737
|
| 110170 |
+
},
|
| 110171 |
+
{
|
| 110172 |
+
"epoch": 19.310429447852762,
|
| 110173 |
+
"grad_norm": 0.26205897331237793,
|
| 110174 |
+
"learning_rate": 1.5953149714222904e-07,
|
| 110175 |
+
"loss": 0.42693936824798584,
|
| 110176 |
+
"step": 15738
|
| 110177 |
+
},
|
| 110178 |
+
{
|
| 110179 |
+
"epoch": 19.31165644171779,
|
| 110180 |
+
"grad_norm": 0.2931806743144989,
|
| 110181 |
+
"learning_rate": 1.5896588192105544e-07,
|
| 110182 |
+
"loss": 0.648865282535553,
|
| 110183 |
+
"step": 15739
|
| 110184 |
+
},
|
| 110185 |
+
{
|
| 110186 |
+
"epoch": 19.312883435582823,
|
| 110187 |
+
"grad_norm": 0.28576067090034485,
|
| 110188 |
+
"learning_rate": 1.5840126797111444e-07,
|
| 110189 |
+
"loss": 0.6645696759223938,
|
| 110190 |
+
"step": 15740
|
| 110191 |
+
},
|
| 110192 |
+
{
|
| 110193 |
+
"epoch": 19.314110429447855,
|
| 110194 |
+
"grad_norm": 0.24995481967926025,
|
| 110195 |
+
"learning_rate": 1.5783765531516837e-07,
|
| 110196 |
+
"loss": 0.6978772878646851,
|
| 110197 |
+
"step": 15741
|
| 110198 |
+
},
|
| 110199 |
+
{
|
| 110200 |
+
"epoch": 19.315337423312883,
|
| 110201 |
+
"grad_norm": 0.24016039073467255,
|
| 110202 |
+
"learning_rate": 1.5727504397593517e-07,
|
| 110203 |
+
"loss": 0.5503772497177124,
|
| 110204 |
+
"step": 15742
|
| 110205 |
+
},
|
| 110206 |
+
{
|
| 110207 |
+
"epoch": 19.316564417177915,
|
| 110208 |
+
"grad_norm": 0.2745150327682495,
|
| 110209 |
+
"learning_rate": 1.5671343397609117e-07,
|
| 110210 |
+
"loss": 0.8297175168991089,
|
| 110211 |
+
"step": 15743
|
| 110212 |
+
},
|
| 110213 |
+
{
|
| 110214 |
+
"epoch": 19.317791411042943,
|
| 110215 |
+
"grad_norm": 0.2715660631656647,
|
| 110216 |
+
"learning_rate": 1.5615282533827381e-07,
|
| 110217 |
+
"loss": 0.6635775566101074,
|
| 110218 |
+
"step": 15744
|
| 110219 |
+
},
|
| 110220 |
+
{
|
| 110221 |
+
"epoch": 19.319018404907975,
|
| 110222 |
+
"grad_norm": 0.2674633860588074,
|
| 110223 |
+
"learning_rate": 1.5559321808507888e-07,
|
| 110224 |
+
"loss": 0.5984900593757629,
|
| 110225 |
+
"step": 15745
|
| 110226 |
+
},
|
| 110227 |
+
{
|
| 110228 |
+
"epoch": 19.320245398773007,
|
| 110229 |
+
"grad_norm": 0.23603837192058563,
|
| 110230 |
+
"learning_rate": 1.5503461223906058e-07,
|
| 110231 |
+
"loss": 0.4508005678653717,
|
| 110232 |
+
"step": 15746
|
| 110233 |
+
},
|
| 110234 |
+
{
|
| 110235 |
+
"epoch": 19.321472392638036,
|
| 110236 |
+
"grad_norm": 0.26100143790245056,
|
| 110237 |
+
"learning_rate": 1.5447700782273976e-07,
|
| 110238 |
+
"loss": 0.5953958034515381,
|
| 110239 |
+
"step": 15747
|
| 110240 |
+
},
|
| 110241 |
+
{
|
| 110242 |
+
"epoch": 19.322699386503068,
|
| 110243 |
+
"grad_norm": 0.20635277032852173,
|
| 110244 |
+
"learning_rate": 1.5392040485858738e-07,
|
| 110245 |
+
"loss": 0.4517844617366791,
|
| 110246 |
+
"step": 15748
|
| 110247 |
+
},
|
| 110248 |
+
{
|
| 110249 |
+
"epoch": 19.3239263803681,
|
| 110250 |
+
"grad_norm": 0.3128073215484619,
|
| 110251 |
+
"learning_rate": 1.5336480336904103e-07,
|
| 110252 |
+
"loss": 0.6955556869506836,
|
| 110253 |
+
"step": 15749
|
| 110254 |
+
},
|
| 110255 |
+
{
|
| 110256 |
+
"epoch": 19.32515337423313,
|
| 110257 |
+
"grad_norm": 0.26238998770713806,
|
| 110258 |
+
"learning_rate": 1.5281020337649664e-07,
|
| 110259 |
+
"loss": 0.576416015625,
|
| 110260 |
+
"step": 15750
|
| 110261 |
+
},
|
| 110262 |
+
{
|
| 110263 |
+
"epoch": 19.32638036809816,
|
| 110264 |
+
"grad_norm": 0.264708548784256,
|
| 110265 |
+
"learning_rate": 1.5225660490330307e-07,
|
| 110266 |
+
"loss": 0.536928653717041,
|
| 110267 |
+
"step": 15751
|
| 110268 |
+
},
|
| 110269 |
+
{
|
| 110270 |
+
"epoch": 19.32760736196319,
|
| 110271 |
+
"grad_norm": 0.26538512110710144,
|
| 110272 |
+
"learning_rate": 1.5170400797177853e-07,
|
| 110273 |
+
"loss": 0.5383561849594116,
|
| 110274 |
+
"step": 15752
|
| 110275 |
+
},
|
| 110276 |
+
{
|
| 110277 |
+
"epoch": 19.32883435582822,
|
| 110278 |
+
"grad_norm": 0.3585108518600464,
|
| 110279 |
+
"learning_rate": 1.5115241260419687e-07,
|
| 110280 |
+
"loss": 0.4883931875228882,
|
| 110281 |
+
"step": 15753
|
| 110282 |
+
},
|
| 110283 |
+
{
|
| 110284 |
+
"epoch": 19.330061349693253,
|
| 110285 |
+
"grad_norm": 0.25332963466644287,
|
| 110286 |
+
"learning_rate": 1.5060181882279035e-07,
|
| 110287 |
+
"loss": 0.6847756505012512,
|
| 110288 |
+
"step": 15754
|
| 110289 |
+
},
|
| 110290 |
+
{
|
| 110291 |
+
"epoch": 19.33128834355828,
|
| 110292 |
+
"grad_norm": 0.2801564335823059,
|
| 110293 |
+
"learning_rate": 1.5005222664975228e-07,
|
| 110294 |
+
"loss": 0.6801108121871948,
|
| 110295 |
+
"step": 15755
|
| 110296 |
+
},
|
| 110297 |
+
{
|
| 110298 |
+
"epoch": 19.332515337423313,
|
| 110299 |
+
"grad_norm": 0.2803504765033722,
|
| 110300 |
+
"learning_rate": 1.495036361072344e-07,
|
| 110301 |
+
"loss": 0.6735873222351074,
|
| 110302 |
+
"step": 15756
|
| 110303 |
+
},
|
| 110304 |
+
{
|
| 110305 |
+
"epoch": 19.333742331288345,
|
| 110306 |
+
"grad_norm": 0.2586252689361572,
|
| 110307 |
+
"learning_rate": 1.489560472173468e-07,
|
| 110308 |
+
"loss": 0.4560503661632538,
|
| 110309 |
+
"step": 15757
|
| 110310 |
+
},
|
| 110311 |
+
{
|
| 110312 |
+
"epoch": 19.334969325153374,
|
| 110313 |
+
"grad_norm": 0.2740887403488159,
|
| 110314 |
+
"learning_rate": 1.4840946000216628e-07,
|
| 110315 |
+
"loss": 0.4215601682662964,
|
| 110316 |
+
"step": 15758
|
| 110317 |
+
},
|
| 110318 |
+
{
|
| 110319 |
+
"epoch": 19.336196319018406,
|
| 110320 |
+
"grad_norm": 0.32748526334762573,
|
| 110321 |
+
"learning_rate": 1.4786387448371963e-07,
|
| 110322 |
+
"loss": 0.6028684377670288,
|
| 110323 |
+
"step": 15759
|
| 110324 |
+
},
|
| 110325 |
+
{
|
| 110326 |
+
"epoch": 19.337423312883434,
|
| 110327 |
+
"grad_norm": 0.2679018974304199,
|
| 110328 |
+
"learning_rate": 1.473192906840004e-07,
|
| 110329 |
+
"loss": 0.5579320192337036,
|
| 110330 |
+
"step": 15760
|
| 110331 |
+
},
|
| 110332 |
+
{
|
| 110333 |
+
"epoch": 19.338650306748466,
|
| 110334 |
+
"grad_norm": 0.25507208704948425,
|
| 110335 |
+
"learning_rate": 1.4677570862496049e-07,
|
| 110336 |
+
"loss": 0.6786926984786987,
|
| 110337 |
+
"step": 15761
|
| 110338 |
+
},
|
| 110339 |
+
{
|
| 110340 |
+
"epoch": 19.339877300613498,
|
| 110341 |
+
"grad_norm": 0.299342542886734,
|
| 110342 |
+
"learning_rate": 1.4623312832850455e-07,
|
| 110343 |
+
"loss": 0.6689451932907104,
|
| 110344 |
+
"step": 15762
|
| 110345 |
+
},
|
| 110346 |
+
{
|
| 110347 |
+
"epoch": 19.341104294478527,
|
| 110348 |
+
"grad_norm": 0.25550568103790283,
|
| 110349 |
+
"learning_rate": 1.4569154981650957e-07,
|
| 110350 |
+
"loss": 0.5636083483695984,
|
| 110351 |
+
"step": 15763
|
| 110352 |
+
},
|
| 110353 |
+
{
|
| 110354 |
+
"epoch": 19.34233128834356,
|
| 110355 |
+
"grad_norm": 0.21735846996307373,
|
| 110356 |
+
"learning_rate": 1.4515097311079973e-07,
|
| 110357 |
+
"loss": 0.38089361786842346,
|
| 110358 |
+
"step": 15764
|
| 110359 |
+
},
|
| 110360 |
+
{
|
| 110361 |
+
"epoch": 19.34355828220859,
|
| 110362 |
+
"grad_norm": 0.29939004778862,
|
| 110363 |
+
"learning_rate": 1.4461139823316595e-07,
|
| 110364 |
+
"loss": 0.6110247373580933,
|
| 110365 |
+
"step": 15765
|
| 110366 |
+
},
|
| 110367 |
+
{
|
| 110368 |
+
"epoch": 19.34478527607362,
|
| 110369 |
+
"grad_norm": 0.2742545008659363,
|
| 110370 |
+
"learning_rate": 1.4407282520535747e-07,
|
| 110371 |
+
"loss": 0.508581817150116,
|
| 110372 |
+
"step": 15766
|
| 110373 |
+
},
|
| 110374 |
+
{
|
| 110375 |
+
"epoch": 19.34601226993865,
|
| 110376 |
+
"grad_norm": 0.26144132018089294,
|
| 110377 |
+
"learning_rate": 1.4353525404908198e-07,
|
| 110378 |
+
"loss": 0.6471553444862366,
|
| 110379 |
+
"step": 15767
|
| 110380 |
+
},
|
| 110381 |
+
{
|
| 110382 |
+
"epoch": 19.34723926380368,
|
| 110383 |
+
"grad_norm": 0.25421637296676636,
|
| 110384 |
+
"learning_rate": 1.429986847860082e-07,
|
| 110385 |
+
"loss": 0.5721098184585571,
|
| 110386 |
+
"step": 15768
|
| 110387 |
+
},
|
| 110388 |
+
{
|
| 110389 |
+
"epoch": 19.34846625766871,
|
| 110390 |
+
"grad_norm": 0.25687286257743835,
|
| 110391 |
+
"learning_rate": 1.4246311743776052e-07,
|
| 110392 |
+
"loss": 0.725675106048584,
|
| 110393 |
+
"step": 15769
|
| 110394 |
+
},
|
| 110395 |
+
{
|
| 110396 |
+
"epoch": 19.349693251533743,
|
| 110397 |
+
"grad_norm": 0.2701999843120575,
|
| 110398 |
+
"learning_rate": 1.4192855202593002e-07,
|
| 110399 |
+
"loss": 0.582121729850769,
|
| 110400 |
+
"step": 15770
|
| 110401 |
+
},
|
| 110402 |
+
{
|
| 110403 |
+
"epoch": 19.350920245398772,
|
| 110404 |
+
"grad_norm": 0.2289995700120926,
|
| 110405 |
+
"learning_rate": 1.4139498857206335e-07,
|
| 110406 |
+
"loss": 0.5169103145599365,
|
| 110407 |
+
"step": 15771
|
| 110408 |
+
},
|
| 110409 |
+
{
|
| 110410 |
+
"epoch": 19.352147239263804,
|
| 110411 |
+
"grad_norm": 0.2467261403799057,
|
| 110412 |
+
"learning_rate": 1.4086242709766273e-07,
|
| 110413 |
+
"loss": 0.6166015863418579,
|
| 110414 |
+
"step": 15772
|
| 110415 |
+
},
|
| 110416 |
+
{
|
| 110417 |
+
"epoch": 19.353374233128836,
|
| 110418 |
+
"grad_norm": 0.26953253149986267,
|
| 110419 |
+
"learning_rate": 1.403308676241999e-07,
|
| 110420 |
+
"loss": 0.6285051107406616,
|
| 110421 |
+
"step": 15773
|
| 110422 |
+
},
|
| 110423 |
+
{
|
| 110424 |
+
"epoch": 19.354601226993864,
|
| 110425 |
+
"grad_norm": 0.26015618443489075,
|
| 110426 |
+
"learning_rate": 1.398003101730966e-07,
|
| 110427 |
+
"loss": 0.43505793809890747,
|
| 110428 |
+
"step": 15774
|
| 110429 |
+
},
|
| 110430 |
+
{
|
| 110431 |
+
"epoch": 19.355828220858896,
|
| 110432 |
+
"grad_norm": 0.2975304424762726,
|
| 110433 |
+
"learning_rate": 1.3927075476574125e-07,
|
| 110434 |
+
"loss": 0.7155896425247192,
|
| 110435 |
+
"step": 15775
|
| 110436 |
+
},
|
| 110437 |
+
{
|
| 110438 |
+
"epoch": 19.357055214723925,
|
| 110439 |
+
"grad_norm": 0.26385679841041565,
|
| 110440 |
+
"learning_rate": 1.3874220142347515e-07,
|
| 110441 |
+
"loss": 0.5783834457397461,
|
| 110442 |
+
"step": 15776
|
| 110443 |
+
},
|
| 110444 |
+
{
|
| 110445 |
+
"epoch": 19.358282208588957,
|
| 110446 |
+
"grad_norm": 0.31113341450691223,
|
| 110447 |
+
"learning_rate": 1.382146501676035e-07,
|
| 110448 |
+
"loss": 0.5601823925971985,
|
| 110449 |
+
"step": 15777
|
| 110450 |
+
},
|
| 110451 |
+
{
|
| 110452 |
+
"epoch": 19.35950920245399,
|
| 110453 |
+
"grad_norm": 0.27217990159988403,
|
| 110454 |
+
"learning_rate": 1.3768810101939256e-07,
|
| 110455 |
+
"loss": 0.7729296684265137,
|
| 110456 |
+
"step": 15778
|
| 110457 |
+
},
|
| 110458 |
+
{
|
| 110459 |
+
"epoch": 19.360736196319017,
|
| 110460 |
+
"grad_norm": 0.28225427865982056,
|
| 110461 |
+
"learning_rate": 1.371625540000643e-07,
|
| 110462 |
+
"loss": 0.75234055519104,
|
| 110463 |
+
"step": 15779
|
| 110464 |
+
},
|
| 110465 |
+
{
|
| 110466 |
+
"epoch": 19.36196319018405,
|
| 110467 |
+
"grad_norm": 0.23642419278621674,
|
| 110468 |
+
"learning_rate": 1.366380091308017e-07,
|
| 110469 |
+
"loss": 0.5655539631843567,
|
| 110470 |
+
"step": 15780
|
| 110471 |
+
},
|
| 110472 |
+
{
|
| 110473 |
+
"epoch": 19.36319018404908,
|
| 110474 |
+
"grad_norm": 0.2754266560077667,
|
| 110475 |
+
"learning_rate": 1.3611446643274905e-07,
|
| 110476 |
+
"loss": 0.6342481970787048,
|
| 110477 |
+
"step": 15781
|
| 110478 |
+
},
|
| 110479 |
+
{
|
| 110480 |
+
"epoch": 19.36441717791411,
|
| 110481 |
+
"grad_norm": 0.2841984033584595,
|
| 110482 |
+
"learning_rate": 1.3559192592700888e-07,
|
| 110483 |
+
"loss": 0.5084980130195618,
|
| 110484 |
+
"step": 15782
|
| 110485 |
+
},
|
| 110486 |
+
{
|
| 110487 |
+
"epoch": 19.36564417177914,
|
| 110488 |
+
"grad_norm": 0.27847516536712646,
|
| 110489 |
+
"learning_rate": 1.350703876346421e-07,
|
| 110490 |
+
"loss": 0.6208343505859375,
|
| 110491 |
+
"step": 15783
|
| 110492 |
+
},
|
| 110493 |
+
{
|
| 110494 |
+
"epoch": 19.36687116564417,
|
| 110495 |
+
"grad_norm": 0.2644451856613159,
|
| 110496 |
+
"learning_rate": 1.3454985157667076e-07,
|
| 110497 |
+
"loss": 0.6807302236557007,
|
| 110498 |
+
"step": 15784
|
| 110499 |
+
},
|
| 110500 |
+
{
|
| 110501 |
+
"epoch": 19.368098159509202,
|
| 110502 |
+
"grad_norm": 0.25107067823410034,
|
| 110503 |
+
"learning_rate": 1.3403031777407816e-07,
|
| 110504 |
+
"loss": 0.5898149013519287,
|
| 110505 |
+
"step": 15785
|
| 110506 |
+
},
|
| 110507 |
+
{
|
| 110508 |
+
"epoch": 19.369325153374234,
|
| 110509 |
+
"grad_norm": 0.2669815421104431,
|
| 110510 |
+
"learning_rate": 1.3351178624780304e-07,
|
| 110511 |
+
"loss": 0.4053003191947937,
|
| 110512 |
+
"step": 15786
|
| 110513 |
+
},
|
| 110514 |
+
{
|
| 110515 |
+
"epoch": 19.370552147239263,
|
| 110516 |
+
"grad_norm": 0.2503422200679779,
|
| 110517 |
+
"learning_rate": 1.3299425701874535e-07,
|
| 110518 |
+
"loss": 0.572515606880188,
|
| 110519 |
+
"step": 15787
|
| 110520 |
+
},
|
| 110521 |
+
{
|
| 110522 |
+
"epoch": 19.371779141104295,
|
| 110523 |
+
"grad_norm": 0.2358214110136032,
|
| 110524 |
+
"learning_rate": 1.324777301077662e-07,
|
| 110525 |
+
"loss": 0.5711122751235962,
|
| 110526 |
+
"step": 15788
|
| 110527 |
+
},
|
| 110528 |
+
{
|
| 110529 |
+
"epoch": 19.373006134969327,
|
| 110530 |
+
"grad_norm": 0.2685088813304901,
|
| 110531 |
+
"learning_rate": 1.3196220553568505e-07,
|
| 110532 |
+
"loss": 0.5711294412612915,
|
| 110533 |
+
"step": 15789
|
| 110534 |
+
},
|
| 110535 |
+
{
|
| 110536 |
+
"epoch": 19.374233128834355,
|
| 110537 |
+
"grad_norm": 0.29895490407943726,
|
| 110538 |
+
"learning_rate": 1.314476833232825e-07,
|
| 110539 |
+
"loss": 0.6073800921440125,
|
| 110540 |
+
"step": 15790
|
| 110541 |
+
},
|
| 110542 |
+
{
|
| 110543 |
+
"epoch": 19.375460122699387,
|
| 110544 |
+
"grad_norm": 0.23435109853744507,
|
| 110545 |
+
"learning_rate": 1.3093416349129474e-07,
|
| 110546 |
+
"loss": 0.4982273578643799,
|
| 110547 |
+
"step": 15791
|
| 110548 |
+
},
|
| 110549 |
+
{
|
| 110550 |
+
"epoch": 19.376687116564415,
|
| 110551 |
+
"grad_norm": 0.2646413743495941,
|
| 110552 |
+
"learning_rate": 1.3042164606042462e-07,
|
| 110553 |
+
"loss": 0.6578046083450317,
|
| 110554 |
+
"step": 15792
|
| 110555 |
+
},
|
| 110556 |
+
{
|
| 110557 |
+
"epoch": 19.377914110429447,
|
| 110558 |
+
"grad_norm": 0.2803557813167572,
|
| 110559 |
+
"learning_rate": 1.2991013105132787e-07,
|
| 110560 |
+
"loss": 0.6516211032867432,
|
| 110561 |
+
"step": 15793
|
| 110562 |
+
},
|
| 110563 |
+
{
|
| 110564 |
+
"epoch": 19.37914110429448,
|
| 110565 |
+
"grad_norm": 0.23532381653785706,
|
| 110566 |
+
"learning_rate": 1.2939961848461857e-07,
|
| 110567 |
+
"loss": 0.5472257137298584,
|
| 110568 |
+
"step": 15794
|
| 110569 |
+
},
|
| 110570 |
+
{
|
| 110571 |
+
"epoch": 19.380368098159508,
|
| 110572 |
+
"grad_norm": 0.2670692205429077,
|
| 110573 |
+
"learning_rate": 1.2889010838088023e-07,
|
| 110574 |
+
"loss": 0.7223361730575562,
|
| 110575 |
+
"step": 15795
|
| 110576 |
+
},
|
| 110577 |
+
{
|
| 110578 |
+
"epoch": 19.38159509202454,
|
| 110579 |
+
"grad_norm": 0.2556460201740265,
|
| 110580 |
+
"learning_rate": 1.2838160076064644e-07,
|
| 110581 |
+
"loss": 0.7346701622009277,
|
| 110582 |
+
"step": 15796
|
| 110583 |
+
},
|
| 110584 |
+
{
|
| 110585 |
+
"epoch": 19.382822085889572,
|
| 110586 |
+
"grad_norm": 0.25721481442451477,
|
| 110587 |
+
"learning_rate": 1.2787409564441467e-07,
|
| 110588 |
+
"loss": 0.6430968046188354,
|
| 110589 |
+
"step": 15797
|
| 110590 |
+
},
|
| 110591 |
+
{
|
| 110592 |
+
"epoch": 19.3840490797546,
|
| 110593 |
+
"grad_norm": 0.21590790152549744,
|
| 110594 |
+
"learning_rate": 1.27367593052638e-07,
|
| 110595 |
+
"loss": 0.554527223110199,
|
| 110596 |
+
"step": 15798
|
| 110597 |
+
},
|
| 110598 |
+
{
|
| 110599 |
+
"epoch": 19.385276073619632,
|
| 110600 |
+
"grad_norm": 0.2908580005168915,
|
| 110601 |
+
"learning_rate": 1.2686209300573903e-07,
|
| 110602 |
+
"loss": 0.6397587060928345,
|
| 110603 |
+
"step": 15799
|
| 110604 |
+
},
|
| 110605 |
+
{
|
| 110606 |
+
"epoch": 19.38650306748466,
|
| 110607 |
+
"grad_norm": 0.2682172656059265,
|
| 110608 |
+
"learning_rate": 1.2635759552408476e-07,
|
| 110609 |
+
"loss": 0.6351278424263,
|
| 110610 |
+
"step": 15800
|
| 110611 |
+
},
|
| 110612 |
+
{
|
| 110613 |
+
"epoch": 19.387730061349693,
|
| 110614 |
+
"grad_norm": 0.23552776873111725,
|
| 110615 |
+
"learning_rate": 1.258541006280145e-07,
|
| 110616 |
+
"loss": 0.5623694658279419,
|
| 110617 |
+
"step": 15801
|
| 110618 |
+
},
|
| 110619 |
+
{
|
| 110620 |
+
"epoch": 19.388957055214725,
|
| 110621 |
+
"grad_norm": 0.2573508322238922,
|
| 110622 |
+
"learning_rate": 1.253516083378231e-07,
|
| 110623 |
+
"loss": 0.6185847520828247,
|
| 110624 |
+
"step": 15802
|
| 110625 |
+
},
|
| 110626 |
+
{
|
| 110627 |
+
"epoch": 19.390184049079753,
|
| 110628 |
+
"grad_norm": 0.22070027887821198,
|
| 110629 |
+
"learning_rate": 1.2485011867376385e-07,
|
| 110630 |
+
"loss": 0.49574780464172363,
|
| 110631 |
+
"step": 15803
|
| 110632 |
+
},
|
| 110633 |
+
{
|
| 110634 |
+
"epoch": 19.391411042944785,
|
| 110635 |
+
"grad_norm": 0.25273093581199646,
|
| 110636 |
+
"learning_rate": 1.2434963165605106e-07,
|
| 110637 |
+
"loss": 0.49352046847343445,
|
| 110638 |
+
"step": 15804
|
| 110639 |
+
},
|
| 110640 |
+
{
|
| 110641 |
+
"epoch": 19.392638036809817,
|
| 110642 |
+
"grad_norm": 0.2598955035209656,
|
| 110643 |
+
"learning_rate": 1.2385014730485477e-07,
|
| 110644 |
+
"loss": 0.381794810295105,
|
| 110645 |
+
"step": 15805
|
| 110646 |
+
},
|
| 110647 |
+
{
|
| 110648 |
+
"epoch": 19.393865030674846,
|
| 110649 |
+
"grad_norm": 0.2368859201669693,
|
| 110650 |
+
"learning_rate": 1.2335166564031165e-07,
|
| 110651 |
+
"loss": 0.5401523113250732,
|
| 110652 |
+
"step": 15806
|
| 110653 |
+
},
|
| 110654 |
+
{
|
| 110655 |
+
"epoch": 19.395092024539878,
|
| 110656 |
+
"grad_norm": 0.22323626279830933,
|
| 110657 |
+
"learning_rate": 1.228541866825139e-07,
|
| 110658 |
+
"loss": 0.48832154273986816,
|
| 110659 |
+
"step": 15807
|
| 110660 |
+
},
|
| 110661 |
+
{
|
| 110662 |
+
"epoch": 19.39631901840491,
|
| 110663 |
+
"grad_norm": 0.25122928619384766,
|
| 110664 |
+
"learning_rate": 1.2235771045150945e-07,
|
| 110665 |
+
"loss": 0.5396353006362915,
|
| 110666 |
+
"step": 15808
|
| 110667 |
+
},
|
| 110668 |
+
{
|
| 110669 |
+
"epoch": 19.397546012269938,
|
| 110670 |
+
"grad_norm": 0.2825191915035248,
|
| 110671 |
+
"learning_rate": 1.218622369673128e-07,
|
| 110672 |
+
"loss": 0.6739176511764526,
|
| 110673 |
+
"step": 15809
|
| 110674 |
+
},
|
| 110675 |
+
{
|
| 110676 |
+
"epoch": 19.39877300613497,
|
| 110677 |
+
"grad_norm": 0.252305805683136,
|
| 110678 |
+
"learning_rate": 1.213677662498941e-07,
|
| 110679 |
+
"loss": 0.5056841969490051,
|
| 110680 |
+
"step": 15810
|
| 110681 |
+
},
|
| 110682 |
+
{
|
| 110683 |
+
"epoch": 19.4,
|
| 110684 |
+
"grad_norm": 0.2915877401828766,
|
| 110685 |
+
"learning_rate": 1.208742983191874e-07,
|
| 110686 |
+
"loss": 0.6499778032302856,
|
| 110687 |
+
"step": 15811
|
| 110688 |
+
},
|
| 110689 |
+
{
|
| 110690 |
+
"epoch": 19.40122699386503,
|
| 110691 |
+
"grad_norm": 0.283810555934906,
|
| 110692 |
+
"learning_rate": 1.2038183319507955e-07,
|
| 110693 |
+
"loss": 0.499967098236084,
|
| 110694 |
+
"step": 15812
|
| 110695 |
+
},
|
| 110696 |
+
{
|
| 110697 |
+
"epoch": 19.402453987730063,
|
| 110698 |
+
"grad_norm": 0.2559482157230377,
|
| 110699 |
+
"learning_rate": 1.198903708974186e-07,
|
| 110700 |
+
"loss": 0.41814619302749634,
|
| 110701 |
+
"step": 15813
|
| 110702 |
+
},
|
| 110703 |
+
{
|
| 110704 |
+
"epoch": 19.40368098159509,
|
| 110705 |
+
"grad_norm": 0.24786344170570374,
|
| 110706 |
+
"learning_rate": 1.1939991144601925e-07,
|
| 110707 |
+
"loss": 0.4998861849308014,
|
| 110708 |
+
"step": 15814
|
| 110709 |
+
},
|
| 110710 |
+
{
|
| 110711 |
+
"epoch": 19.404907975460123,
|
| 110712 |
+
"grad_norm": 0.23829296231269836,
|
| 110713 |
+
"learning_rate": 1.1891045486064623e-07,
|
| 110714 |
+
"loss": 0.4071350395679474,
|
| 110715 |
+
"step": 15815
|
| 110716 |
+
},
|
| 110717 |
+
{
|
| 110718 |
+
"epoch": 19.406134969325155,
|
| 110719 |
+
"grad_norm": 0.28033989667892456,
|
| 110720 |
+
"learning_rate": 1.1842200116103097e-07,
|
| 110721 |
+
"loss": 0.5290799140930176,
|
| 110722 |
+
"step": 15816
|
| 110723 |
+
},
|
| 110724 |
+
{
|
| 110725 |
+
"epoch": 19.407361963190183,
|
| 110726 |
+
"grad_norm": 0.25909551978111267,
|
| 110727 |
+
"learning_rate": 1.1793455036685774e-07,
|
| 110728 |
+
"loss": 0.6265980005264282,
|
| 110729 |
+
"step": 15817
|
| 110730 |
+
},
|
| 110731 |
+
{
|
| 110732 |
+
"epoch": 19.408588957055215,
|
| 110733 |
+
"grad_norm": 0.24783332645893097,
|
| 110734 |
+
"learning_rate": 1.1744810249777749e-07,
|
| 110735 |
+
"loss": 0.595130205154419,
|
| 110736 |
+
"step": 15818
|
| 110737 |
+
},
|
| 110738 |
+
{
|
| 110739 |
+
"epoch": 19.409815950920244,
|
| 110740 |
+
"grad_norm": 0.2714422941207886,
|
| 110741 |
+
"learning_rate": 1.1696265757339952e-07,
|
| 110742 |
+
"loss": 0.5665304064750671,
|
| 110743 |
+
"step": 15819
|
| 110744 |
+
},
|
| 110745 |
+
{
|
| 110746 |
+
"epoch": 19.411042944785276,
|
| 110747 |
+
"grad_norm": 0.2818240523338318,
|
| 110748 |
+
"learning_rate": 1.1647821561328598e-07,
|
| 110749 |
+
"loss": 0.6436522603034973,
|
| 110750 |
+
"step": 15820
|
| 110751 |
+
},
|
| 110752 |
+
{
|
| 110753 |
+
"epoch": 19.412269938650308,
|
| 110754 |
+
"grad_norm": 0.2629481852054596,
|
| 110755 |
+
"learning_rate": 1.1599477663696845e-07,
|
| 110756 |
+
"loss": 0.6823731660842896,
|
| 110757 |
+
"step": 15821
|
| 110758 |
+
},
|
| 110759 |
+
{
|
| 110760 |
+
"epoch": 19.413496932515336,
|
| 110761 |
+
"grad_norm": 0.2467951774597168,
|
| 110762 |
+
"learning_rate": 1.155123406639258e-07,
|
| 110763 |
+
"loss": 0.6010808944702148,
|
| 110764 |
+
"step": 15822
|
| 110765 |
+
},
|
| 110766 |
+
{
|
| 110767 |
+
"epoch": 19.41472392638037,
|
| 110768 |
+
"grad_norm": 0.2746279239654541,
|
| 110769 |
+
"learning_rate": 1.1503090771361192e-07,
|
| 110770 |
+
"loss": 0.6330393552780151,
|
| 110771 |
+
"step": 15823
|
| 110772 |
+
},
|
| 110773 |
+
{
|
| 110774 |
+
"epoch": 19.4159509202454,
|
| 110775 |
+
"grad_norm": 0.2665802836418152,
|
| 110776 |
+
"learning_rate": 1.1455047780542239e-07,
|
| 110777 |
+
"loss": 0.49999570846557617,
|
| 110778 |
+
"step": 15824
|
| 110779 |
+
},
|
| 110780 |
+
{
|
| 110781 |
+
"epoch": 19.41717791411043,
|
| 110782 |
+
"grad_norm": 0.24728545546531677,
|
| 110783 |
+
"learning_rate": 1.1407105095873338e-07,
|
| 110784 |
+
"loss": 0.6300836801528931,
|
| 110785 |
+
"step": 15825
|
| 110786 |
+
},
|
| 110787 |
+
{
|
| 110788 |
+
"epoch": 19.41840490797546,
|
| 110789 |
+
"grad_norm": 0.24740903079509735,
|
| 110790 |
+
"learning_rate": 1.1359262719286002e-07,
|
| 110791 |
+
"loss": 0.48572689294815063,
|
| 110792 |
+
"step": 15826
|
| 110793 |
+
},
|
| 110794 |
+
{
|
| 110795 |
+
"epoch": 19.41963190184049,
|
| 110796 |
+
"grad_norm": 0.2547744810581207,
|
| 110797 |
+
"learning_rate": 1.131152065270924e-07,
|
| 110798 |
+
"loss": 0.7048099040985107,
|
| 110799 |
+
"step": 15827
|
| 110800 |
+
},
|
| 110801 |
+
{
|
| 110802 |
+
"epoch": 19.42085889570552,
|
| 110803 |
+
"grad_norm": 0.2635292410850525,
|
| 110804 |
+
"learning_rate": 1.1263878898066792e-07,
|
| 110805 |
+
"loss": 0.7074223756790161,
|
| 110806 |
+
"step": 15828
|
| 110807 |
+
},
|
| 110808 |
+
{
|
| 110809 |
+
"epoch": 19.422085889570553,
|
| 110810 |
+
"grad_norm": 0.24470630288124084,
|
| 110811 |
+
"learning_rate": 1.1216337457279624e-07,
|
| 110812 |
+
"loss": 0.5487247705459595,
|
| 110813 |
+
"step": 15829
|
| 110814 |
+
},
|
| 110815 |
+
{
|
| 110816 |
+
"epoch": 19.42331288343558,
|
| 110817 |
+
"grad_norm": 0.2753901481628418,
|
| 110818 |
+
"learning_rate": 1.1168896332263423e-07,
|
| 110819 |
+
"loss": 0.598219633102417,
|
| 110820 |
+
"step": 15830
|
| 110821 |
+
},
|
| 110822 |
+
{
|
| 110823 |
+
"epoch": 19.424539877300614,
|
| 110824 |
+
"grad_norm": 0.2721850574016571,
|
| 110825 |
+
"learning_rate": 1.1121555524930826e-07,
|
| 110826 |
+
"loss": 0.6115570664405823,
|
| 110827 |
+
"step": 15831
|
| 110828 |
+
},
|
| 110829 |
+
{
|
| 110830 |
+
"epoch": 19.425766871165646,
|
| 110831 |
+
"grad_norm": 0.25848832726478577,
|
| 110832 |
+
"learning_rate": 1.1074315037189753e-07,
|
| 110833 |
+
"loss": 0.7405918836593628,
|
| 110834 |
+
"step": 15832
|
| 110835 |
+
},
|
| 110836 |
+
{
|
| 110837 |
+
"epoch": 19.426993865030674,
|
| 110838 |
+
"grad_norm": 0.27631810307502747,
|
| 110839 |
+
"learning_rate": 1.1027174870944234e-07,
|
| 110840 |
+
"loss": 0.6088199615478516,
|
| 110841 |
+
"step": 15833
|
| 110842 |
+
},
|
| 110843 |
+
{
|
| 110844 |
+
"epoch": 19.428220858895706,
|
| 110845 |
+
"grad_norm": 0.2754313349723816,
|
| 110846 |
+
"learning_rate": 1.0980135028094696e-07,
|
| 110847 |
+
"loss": 0.5649731159210205,
|
| 110848 |
+
"step": 15834
|
| 110849 |
+
},
|
| 110850 |
+
{
|
| 110851 |
+
"epoch": 19.429447852760735,
|
| 110852 |
+
"grad_norm": 0.2925424873828888,
|
| 110853 |
+
"learning_rate": 1.093319551053712e-07,
|
| 110854 |
+
"loss": 0.7299742698669434,
|
| 110855 |
+
"step": 15835
|
| 110856 |
+
},
|
| 110857 |
+
{
|
| 110858 |
+
"epoch": 19.430674846625767,
|
| 110859 |
+
"grad_norm": 0.2503783702850342,
|
| 110860 |
+
"learning_rate": 1.0886356320163049e-07,
|
| 110861 |
+
"loss": 0.27628859877586365,
|
| 110862 |
+
"step": 15836
|
| 110863 |
+
},
|
| 110864 |
+
{
|
| 110865 |
+
"epoch": 19.4319018404908,
|
| 110866 |
+
"grad_norm": 0.2410682737827301,
|
| 110867 |
+
"learning_rate": 1.0839617458860973e-07,
|
| 110868 |
+
"loss": 0.5626966953277588,
|
| 110869 |
+
"step": 15837
|
| 110870 |
+
},
|
| 110871 |
+
{
|
| 110872 |
+
"epoch": 19.433128834355827,
|
| 110873 |
+
"grad_norm": 0.23362362384796143,
|
| 110874 |
+
"learning_rate": 1.0792978928514663e-07,
|
| 110875 |
+
"loss": 0.4103464186191559,
|
| 110876 |
+
"step": 15838
|
| 110877 |
+
},
|
| 110878 |
+
{
|
| 110879 |
+
"epoch": 19.43435582822086,
|
| 110880 |
+
"grad_norm": 0.2630615234375,
|
| 110881 |
+
"learning_rate": 1.0746440731003727e-07,
|
| 110882 |
+
"loss": 0.6557605266571045,
|
| 110883 |
+
"step": 15839
|
| 110884 |
+
},
|
| 110885 |
+
{
|
| 110886 |
+
"epoch": 19.43558282208589,
|
| 110887 |
+
"grad_norm": 0.24873913824558258,
|
| 110888 |
+
"learning_rate": 1.0700002868204439e-07,
|
| 110889 |
+
"loss": 0.7347830533981323,
|
| 110890 |
+
"step": 15840
|
| 110891 |
+
},
|
| 110892 |
+
{
|
| 110893 |
+
"epoch": 19.43680981595092,
|
| 110894 |
+
"grad_norm": 0.2748818099498749,
|
| 110895 |
+
"learning_rate": 1.0653665341988361e-07,
|
| 110896 |
+
"loss": 0.6404551863670349,
|
| 110897 |
+
"step": 15841
|
| 110898 |
+
},
|
| 110899 |
+
{
|
| 110900 |
+
"epoch": 19.43803680981595,
|
| 110901 |
+
"grad_norm": 0.22964942455291748,
|
| 110902 |
+
"learning_rate": 1.0607428154222887e-07,
|
| 110903 |
+
"loss": 0.6122875213623047,
|
| 110904 |
+
"step": 15842
|
| 110905 |
+
},
|
| 110906 |
+
{
|
| 110907 |
+
"epoch": 19.43926380368098,
|
| 110908 |
+
"grad_norm": 0.2754347026348114,
|
| 110909 |
+
"learning_rate": 1.0561291306772359e-07,
|
| 110910 |
+
"loss": 0.71027010679245,
|
| 110911 |
+
"step": 15843
|
| 110912 |
+
},
|
| 110913 |
+
{
|
| 110914 |
+
"epoch": 19.440490797546012,
|
| 110915 |
+
"grad_norm": 0.245911106467247,
|
| 110916 |
+
"learning_rate": 1.0515254801495845e-07,
|
| 110917 |
+
"loss": 0.632553219795227,
|
| 110918 |
+
"step": 15844
|
| 110919 |
+
},
|
| 110920 |
+
{
|
| 110921 |
+
"epoch": 19.441717791411044,
|
| 110922 |
+
"grad_norm": 0.2571648061275482,
|
| 110923 |
+
"learning_rate": 1.0469318640249359e-07,
|
| 110924 |
+
"loss": 0.5757336616516113,
|
| 110925 |
+
"step": 15845
|
| 110926 |
+
},
|
| 110927 |
+
{
|
| 110928 |
+
"epoch": 19.442944785276072,
|
| 110929 |
+
"grad_norm": 0.26781097054481506,
|
| 110930 |
+
"learning_rate": 1.0423482824883924e-07,
|
| 110931 |
+
"loss": 0.7274123430252075,
|
| 110932 |
+
"step": 15846
|
| 110933 |
+
},
|
| 110934 |
+
{
|
| 110935 |
+
"epoch": 19.444171779141104,
|
| 110936 |
+
"grad_norm": 0.2652866244316101,
|
| 110937 |
+
"learning_rate": 1.037774735724778e-07,
|
| 110938 |
+
"loss": 0.5961483716964722,
|
| 110939 |
+
"step": 15847
|
| 110940 |
+
},
|
| 110941 |
+
{
|
| 110942 |
+
"epoch": 19.445398773006136,
|
| 110943 |
+
"grad_norm": 0.279137521982193,
|
| 110944 |
+
"learning_rate": 1.03321122391839e-07,
|
| 110945 |
+
"loss": 0.7205758690834045,
|
| 110946 |
+
"step": 15848
|
| 110947 |
+
},
|
| 110948 |
+
{
|
| 110949 |
+
"epoch": 19.446625766871165,
|
| 110950 |
+
"grad_norm": 0.35979318618774414,
|
| 110951 |
+
"learning_rate": 1.0286577472531645e-07,
|
| 110952 |
+
"loss": 0.46861517429351807,
|
| 110953 |
+
"step": 15849
|
| 110954 |
+
},
|
| 110955 |
+
{
|
| 110956 |
+
"epoch": 19.447852760736197,
|
| 110957 |
+
"grad_norm": 0.2840240001678467,
|
| 110958 |
+
"learning_rate": 1.0241143059126767e-07,
|
| 110959 |
+
"loss": 0.6414732933044434,
|
| 110960 |
+
"step": 15850
|
| 110961 |
+
},
|
| 110962 |
+
{
|
| 110963 |
+
"epoch": 19.449079754601225,
|
| 110964 |
+
"grad_norm": 0.2638004720211029,
|
| 110965 |
+
"learning_rate": 1.0195809000800305e-07,
|
| 110966 |
+
"loss": 0.5935536026954651,
|
| 110967 |
+
"step": 15851
|
| 110968 |
+
},
|
| 110969 |
+
{
|
| 110970 |
+
"epoch": 19.450306748466257,
|
| 110971 |
+
"grad_norm": 0.24804934859275818,
|
| 110972 |
+
"learning_rate": 1.0150575299379683e-07,
|
| 110973 |
+
"loss": 0.7691047191619873,
|
| 110974 |
+
"step": 15852
|
| 110975 |
+
},
|
| 110976 |
+
{
|
| 110977 |
+
"epoch": 19.45153374233129,
|
| 110978 |
+
"grad_norm": 0.2349923551082611,
|
| 110979 |
+
"learning_rate": 1.0105441956688167e-07,
|
| 110980 |
+
"loss": 0.5768539905548096,
|
| 110981 |
+
"step": 15853
|
| 110982 |
+
},
|
| 110983 |
+
{
|
| 110984 |
+
"epoch": 19.452760736196318,
|
| 110985 |
+
"grad_norm": 0.27598461508750916,
|
| 110986 |
+
"learning_rate": 1.0060408974544855e-07,
|
| 110987 |
+
"loss": 0.5908697843551636,
|
| 110988 |
+
"step": 15854
|
| 110989 |
+
},
|
| 110990 |
+
{
|
| 110991 |
+
"epoch": 19.45398773006135,
|
| 110992 |
+
"grad_norm": 0.29686421155929565,
|
| 110993 |
+
"learning_rate": 1.0015476354764963e-07,
|
| 110994 |
+
"loss": 0.5275049805641174,
|
| 110995 |
+
"step": 15855
|
| 110996 |
+
},
|
| 110997 |
+
{
|
| 110998 |
+
"epoch": 19.45521472392638,
|
| 110999 |
+
"grad_norm": 0.2517312467098236,
|
| 111000 |
+
"learning_rate": 9.970644099159266e-08,
|
| 111001 |
+
"loss": 0.3856890797615051,
|
| 111002 |
+
"step": 15856
|
| 111003 |
+
},
|
| 111004 |
+
{
|
| 111005 |
+
"epoch": 19.45644171779141,
|
| 111006 |
+
"grad_norm": 0.2788155972957611,
|
| 111007 |
+
"learning_rate": 9.925912209535482e-08,
|
| 111008 |
+
"loss": 0.4941345751285553,
|
| 111009 |
+
"step": 15857
|
| 111010 |
+
},
|
| 111011 |
+
{
|
| 111012 |
+
"epoch": 19.457668711656442,
|
| 111013 |
+
"grad_norm": 0.25111356377601624,
|
| 111014 |
+
"learning_rate": 9.881280687696059e-08,
|
| 111015 |
+
"loss": 0.6521642208099365,
|
| 111016 |
+
"step": 15858
|
| 111017 |
+
},
|
| 111018 |
+
{
|
| 111019 |
+
"epoch": 19.45889570552147,
|
| 111020 |
+
"grad_norm": 0.28807491064071655,
|
| 111021 |
+
"learning_rate": 9.836749535440115e-08,
|
| 111022 |
+
"loss": 0.6666617393493652,
|
| 111023 |
+
"step": 15859
|
| 111024 |
+
},
|
| 111025 |
+
{
|
| 111026 |
+
"epoch": 19.460122699386503,
|
| 111027 |
+
"grad_norm": 0.2396821826696396,
|
| 111028 |
+
"learning_rate": 9.792318754562879e-08,
|
| 111029 |
+
"loss": 0.6506588459014893,
|
| 111030 |
+
"step": 15860
|
| 111031 |
+
},
|
| 111032 |
+
{
|
| 111033 |
+
"epoch": 19.461349693251535,
|
| 111034 |
+
"grad_norm": 0.28365930914878845,
|
| 111035 |
+
"learning_rate": 9.747988346854863e-08,
|
| 111036 |
+
"loss": 0.7387405037879944,
|
| 111037 |
+
"step": 15861
|
| 111038 |
+
},
|
| 111039 |
+
{
|
| 111040 |
+
"epoch": 19.462576687116563,
|
| 111041 |
+
"grad_norm": 0.24973832070827484,
|
| 111042 |
+
"learning_rate": 9.703758314102974e-08,
|
| 111043 |
+
"loss": 0.4761587977409363,
|
| 111044 |
+
"step": 15862
|
| 111045 |
+
},
|
| 111046 |
+
{
|
| 111047 |
+
"epoch": 19.463803680981595,
|
| 111048 |
+
"grad_norm": 0.27384695410728455,
|
| 111049 |
+
"learning_rate": 9.659628658089948e-08,
|
| 111050 |
+
"loss": 0.7770678997039795,
|
| 111051 |
+
"step": 15863
|
| 111052 |
+
},
|
| 111053 |
+
{
|
| 111054 |
+
"epoch": 19.465030674846627,
|
| 111055 |
+
"grad_norm": 0.25532543659210205,
|
| 111056 |
+
"learning_rate": 9.61559938059492e-08,
|
| 111057 |
+
"loss": 0.5538173913955688,
|
| 111058 |
+
"step": 15864
|
| 111059 |
+
},
|
| 111060 |
+
{
|
| 111061 |
+
"epoch": 19.466257668711656,
|
| 111062 |
+
"grad_norm": 0.2814151644706726,
|
| 111063 |
+
"learning_rate": 9.571670483392026e-08,
|
| 111064 |
+
"loss": 0.46482327580451965,
|
| 111065 |
+
"step": 15865
|
| 111066 |
+
},
|
| 111067 |
+
{
|
| 111068 |
+
"epoch": 19.467484662576688,
|
| 111069 |
+
"grad_norm": 0.2586729824542999,
|
| 111070 |
+
"learning_rate": 9.527841968252349e-08,
|
| 111071 |
+
"loss": 0.5719910860061646,
|
| 111072 |
+
"step": 15866
|
| 111073 |
+
},
|
| 111074 |
+
{
|
| 111075 |
+
"epoch": 19.46871165644172,
|
| 111076 |
+
"grad_norm": 0.24742953479290009,
|
| 111077 |
+
"learning_rate": 9.484113836941977e-08,
|
| 111078 |
+
"loss": 0.5429021120071411,
|
| 111079 |
+
"step": 15867
|
| 111080 |
+
},
|
| 111081 |
+
{
|
| 111082 |
+
"epoch": 19.469938650306748,
|
| 111083 |
+
"grad_norm": 0.29058292508125305,
|
| 111084 |
+
"learning_rate": 9.440486091223944e-08,
|
| 111085 |
+
"loss": 0.6003931760787964,
|
| 111086 |
+
"step": 15868
|
| 111087 |
+
},
|
| 111088 |
+
{
|
| 111089 |
+
"epoch": 19.47116564417178,
|
| 111090 |
+
"grad_norm": 0.23325979709625244,
|
| 111091 |
+
"learning_rate": 9.396958732856842e-08,
|
| 111092 |
+
"loss": 0.3479607105255127,
|
| 111093 |
+
"step": 15869
|
| 111094 |
+
},
|
| 111095 |
+
{
|
| 111096 |
+
"epoch": 19.47239263803681,
|
| 111097 |
+
"grad_norm": 0.25594303011894226,
|
| 111098 |
+
"learning_rate": 9.353531763594826e-08,
|
| 111099 |
+
"loss": 0.6116254925727844,
|
| 111100 |
+
"step": 15870
|
| 111101 |
+
},
|
| 111102 |
+
{
|
| 111103 |
+
"epoch": 19.47361963190184,
|
| 111104 |
+
"grad_norm": 0.306173175573349,
|
| 111105 |
+
"learning_rate": 9.310205185188436e-08,
|
| 111106 |
+
"loss": 0.6195787191390991,
|
| 111107 |
+
"step": 15871
|
| 111108 |
+
},
|
| 111109 |
+
{
|
| 111110 |
+
"epoch": 19.474846625766872,
|
| 111111 |
+
"grad_norm": 0.30268770456314087,
|
| 111112 |
+
"learning_rate": 9.26697899938378e-08,
|
| 111113 |
+
"loss": 0.7054605484008789,
|
| 111114 |
+
"step": 15872
|
| 111115 |
+
},
|
| 111116 |
+
{
|
| 111117 |
+
"epoch": 19.4760736196319,
|
| 111118 |
+
"grad_norm": 0.2644689977169037,
|
| 111119 |
+
"learning_rate": 9.223853207923627e-08,
|
| 111120 |
+
"loss": 0.5313975214958191,
|
| 111121 |
+
"step": 15873
|
| 111122 |
+
},
|
| 111123 |
+
{
|
| 111124 |
+
"epoch": 19.477300613496933,
|
| 111125 |
+
"grad_norm": 0.2595892548561096,
|
| 111126 |
+
"learning_rate": 9.180827812546034e-08,
|
| 111127 |
+
"loss": 0.5963461399078369,
|
| 111128 |
+
"step": 15874
|
| 111129 |
+
},
|
| 111130 |
+
{
|
| 111131 |
+
"epoch": 19.478527607361965,
|
| 111132 |
+
"grad_norm": 0.25042617321014404,
|
| 111133 |
+
"learning_rate": 9.137902814984889e-08,
|
| 111134 |
+
"loss": 0.48135995864868164,
|
| 111135 |
+
"step": 15875
|
| 111136 |
+
},
|
| 111137 |
+
{
|
| 111138 |
+
"epoch": 19.479754601226993,
|
| 111139 |
+
"grad_norm": 0.24949641525745392,
|
| 111140 |
+
"learning_rate": 9.095078216971031e-08,
|
| 111141 |
+
"loss": 0.6230705380439758,
|
| 111142 |
+
"step": 15876
|
| 111143 |
+
},
|
| 111144 |
+
{
|
| 111145 |
+
"epoch": 19.480981595092025,
|
| 111146 |
+
"grad_norm": 0.2671191096305847,
|
| 111147 |
+
"learning_rate": 9.052354020230303e-08,
|
| 111148 |
+
"loss": 0.4922260046005249,
|
| 111149 |
+
"step": 15877
|
| 111150 |
+
},
|
| 111151 |
+
{
|
| 111152 |
+
"epoch": 19.482208588957054,
|
| 111153 |
+
"grad_norm": 0.25180691480636597,
|
| 111154 |
+
"learning_rate": 9.009730226484658e-08,
|
| 111155 |
+
"loss": 0.7323871850967407,
|
| 111156 |
+
"step": 15878
|
| 111157 |
+
},
|
| 111158 |
+
{
|
| 111159 |
+
"epoch": 19.483435582822086,
|
| 111160 |
+
"grad_norm": 0.23532934486865997,
|
| 111161 |
+
"learning_rate": 8.967206837452446e-08,
|
| 111162 |
+
"loss": 0.49909257888793945,
|
| 111163 |
+
"step": 15879
|
| 111164 |
+
},
|
| 111165 |
+
{
|
| 111166 |
+
"epoch": 19.484662576687118,
|
| 111167 |
+
"grad_norm": 0.2709427773952484,
|
| 111168 |
+
"learning_rate": 8.924783854847296e-08,
|
| 111169 |
+
"loss": 0.6748344898223877,
|
| 111170 |
+
"step": 15880
|
| 111171 |
+
},
|
| 111172 |
+
{
|
| 111173 |
+
"epoch": 19.485889570552146,
|
| 111174 |
+
"grad_norm": 0.27144742012023926,
|
| 111175 |
+
"learning_rate": 8.882461280379506e-08,
|
| 111176 |
+
"loss": 0.5123757123947144,
|
| 111177 |
+
"step": 15881
|
| 111178 |
+
},
|
| 111179 |
+
{
|
| 111180 |
+
"epoch": 19.487116564417178,
|
| 111181 |
+
"grad_norm": 0.2806928753852844,
|
| 111182 |
+
"learning_rate": 8.840239115754656e-08,
|
| 111183 |
+
"loss": 0.5968196392059326,
|
| 111184 |
+
"step": 15882
|
| 111185 |
+
},
|
| 111186 |
+
{
|
| 111187 |
+
"epoch": 19.48834355828221,
|
| 111188 |
+
"grad_norm": 0.2839152216911316,
|
| 111189 |
+
"learning_rate": 8.798117362674718e-08,
|
| 111190 |
+
"loss": 0.6130701899528503,
|
| 111191 |
+
"step": 15883
|
| 111192 |
+
},
|
| 111193 |
+
{
|
| 111194 |
+
"epoch": 19.48957055214724,
|
| 111195 |
+
"grad_norm": 0.2815488278865814,
|
| 111196 |
+
"learning_rate": 8.756096022837779e-08,
|
| 111197 |
+
"loss": 0.5562403202056885,
|
| 111198 |
+
"step": 15884
|
| 111199 |
+
},
|
| 111200 |
+
{
|
| 111201 |
+
"epoch": 19.49079754601227,
|
| 111202 |
+
"grad_norm": 0.281522274017334,
|
| 111203 |
+
"learning_rate": 8.714175097937204e-08,
|
| 111204 |
+
"loss": 0.5834305286407471,
|
| 111205 |
+
"step": 15885
|
| 111206 |
+
},
|
| 111207 |
+
{
|
| 111208 |
+
"epoch": 19.4920245398773,
|
| 111209 |
+
"grad_norm": 0.2675686478614807,
|
| 111210 |
+
"learning_rate": 8.672354589662757e-08,
|
| 111211 |
+
"loss": 0.535876989364624,
|
| 111212 |
+
"step": 15886
|
| 111213 |
+
},
|
| 111214 |
+
{
|
| 111215 |
+
"epoch": 19.49325153374233,
|
| 111216 |
+
"grad_norm": 0.2832101881504059,
|
| 111217 |
+
"learning_rate": 8.630634499700308e-08,
|
| 111218 |
+
"loss": 0.5851317048072815,
|
| 111219 |
+
"step": 15887
|
| 111220 |
+
},
|
| 111221 |
+
{
|
| 111222 |
+
"epoch": 19.494478527607363,
|
| 111223 |
+
"grad_norm": 0.2712644040584564,
|
| 111224 |
+
"learning_rate": 8.58901482973129e-08,
|
| 111225 |
+
"loss": 0.653908371925354,
|
| 111226 |
+
"step": 15888
|
| 111227 |
+
},
|
| 111228 |
+
{
|
| 111229 |
+
"epoch": 19.49570552147239,
|
| 111230 |
+
"grad_norm": 0.27528050541877747,
|
| 111231 |
+
"learning_rate": 8.547495581433529e-08,
|
| 111232 |
+
"loss": 0.5186161994934082,
|
| 111233 |
+
"step": 15889
|
| 111234 |
+
},
|
| 111235 |
+
{
|
| 111236 |
+
"epoch": 19.496932515337424,
|
| 111237 |
+
"grad_norm": 0.30082106590270996,
|
| 111238 |
+
"learning_rate": 8.50607675648013e-08,
|
| 111239 |
+
"loss": 0.6535308361053467,
|
| 111240 |
+
"step": 15890
|
| 111241 |
+
},
|
| 111242 |
+
{
|
| 111243 |
+
"epoch": 19.498159509202456,
|
| 111244 |
+
"grad_norm": 0.2811293601989746,
|
| 111245 |
+
"learning_rate": 8.464758356541147e-08,
|
| 111246 |
+
"loss": 0.6708139181137085,
|
| 111247 |
+
"step": 15891
|
| 111248 |
+
},
|
| 111249 |
+
{
|
| 111250 |
+
"epoch": 19.499386503067484,
|
| 111251 |
+
"grad_norm": 0.2910325527191162,
|
| 111252 |
+
"learning_rate": 8.423540383281359e-08,
|
| 111253 |
+
"loss": 0.6598278284072876,
|
| 111254 |
+
"step": 15892
|
| 111255 |
+
},
|
| 111256 |
+
{
|
| 111257 |
+
"epoch": 19.500613496932516,
|
| 111258 |
+
"grad_norm": 0.2488136738538742,
|
| 111259 |
+
"learning_rate": 8.382422838362768e-08,
|
| 111260 |
+
"loss": 0.6660745143890381,
|
| 111261 |
+
"step": 15893
|
| 111262 |
+
},
|
| 111263 |
+
{
|
| 111264 |
+
"epoch": 19.501840490797544,
|
| 111265 |
+
"grad_norm": 0.29299691319465637,
|
| 111266 |
+
"learning_rate": 8.341405723442108e-08,
|
| 111267 |
+
"loss": 0.3314124345779419,
|
| 111268 |
+
"step": 15894
|
| 111269 |
+
},
|
| 111270 |
+
{
|
| 111271 |
+
"epoch": 19.503067484662576,
|
| 111272 |
+
"grad_norm": 0.24812175333499908,
|
| 111273 |
+
"learning_rate": 8.300489040173054e-08,
|
| 111274 |
+
"loss": 0.4809376001358032,
|
| 111275 |
+
"step": 15895
|
| 111276 |
+
},
|
| 111277 |
+
{
|
| 111278 |
+
"epoch": 19.50429447852761,
|
| 111279 |
+
"grad_norm": 0.2445564717054367,
|
| 111280 |
+
"learning_rate": 8.259672790204842e-08,
|
| 111281 |
+
"loss": 0.44464632868766785,
|
| 111282 |
+
"step": 15896
|
| 111283 |
+
},
|
| 111284 |
+
{
|
| 111285 |
+
"epoch": 19.505521472392637,
|
| 111286 |
+
"grad_norm": 0.27744320034980774,
|
| 111287 |
+
"learning_rate": 8.218956975182268e-08,
|
| 111288 |
+
"loss": 0.6921316385269165,
|
| 111289 |
+
"step": 15897
|
| 111290 |
+
},
|
| 111291 |
+
{
|
| 111292 |
+
"epoch": 19.50674846625767,
|
| 111293 |
+
"grad_norm": 0.298613965511322,
|
| 111294 |
+
"learning_rate": 8.178341596747074e-08,
|
| 111295 |
+
"loss": 0.7783851623535156,
|
| 111296 |
+
"step": 15898
|
| 111297 |
+
},
|
| 111298 |
+
{
|
| 111299 |
+
"epoch": 19.5079754601227,
|
| 111300 |
+
"grad_norm": 0.2789246141910553,
|
| 111301 |
+
"learning_rate": 8.137826656536007e-08,
|
| 111302 |
+
"loss": 0.6806527972221375,
|
| 111303 |
+
"step": 15899
|
| 111304 |
+
},
|
| 111305 |
+
{
|
| 111306 |
+
"epoch": 19.50920245398773,
|
| 111307 |
+
"grad_norm": 0.2696753442287445,
|
| 111308 |
+
"learning_rate": 8.097412156181927e-08,
|
| 111309 |
+
"loss": 0.7563017010688782,
|
| 111310 |
+
"step": 15900
|
| 111311 |
}
|
| 111312 |
],
|
| 111313 |
"logging_steps": 1,
|
|
|
|
| 111327 |
"attributes": {}
|
| 111328 |
}
|
| 111329 |
},
|
| 111330 |
+
"total_flos": 4.447346514997543e+19,
|
| 111331 |
"train_batch_size": 8,
|
| 111332 |
"trial_name": null,
|
| 111333 |
"trial_params": null
|