Training in progress, step 15600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dddfbf2dbb71d292c025a36d89a603bc6d0638f166c3af3e827ff0b6131cce2
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdeec63821b21fc7b5a96f599159a87355b16b1b13abb529d2b0722f2aaaff69
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52b1746a868d4872448a8ea7dbeda88705c1e50e54c77aedefa9f6de5fe32efe
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 17.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -107108,6 +107108,2106 @@
|
|
| 107108 |
"learning_rate": 2.1868285502537e-06,
|
| 107109 |
"loss": 0.4652,
|
| 107110 |
"step": 15300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107111 |
}
|
| 107112 |
],
|
| 107113 |
"logging_steps": 1,
|
|
@@ -107127,7 +109227,7 @@
|
|
| 107127 |
"attributes": {}
|
| 107128 |
}
|
| 107129 |
},
|
| 107130 |
-
"total_flos": 8.
|
| 107131 |
"train_batch_size": 8,
|
| 107132 |
"trial_name": null,
|
| 107133 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 17.768091168091168,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 107108 |
"learning_rate": 2.1868285502537e-06,
|
| 107109 |
"loss": 0.4652,
|
| 107110 |
"step": 15300
|
| 107111 |
+
},
|
| 107112 |
+
{
|
| 107113 |
+
"epoch": 17.427350427350426,
|
| 107114 |
+
"grad_norm": 0.1929951161146164,
|
| 107115 |
+
"learning_rate": 2.1849232631422467e-06,
|
| 107116 |
+
"loss": 0.7723,
|
| 107117 |
+
"step": 15301
|
| 107118 |
+
},
|
| 107119 |
+
{
|
| 107120 |
+
"epoch": 17.428490028490028,
|
| 107121 |
+
"grad_norm": 0.23146596550941467,
|
| 107122 |
+
"learning_rate": 2.183018768461151e-06,
|
| 107123 |
+
"loss": 0.393,
|
| 107124 |
+
"step": 15302
|
| 107125 |
+
},
|
| 107126 |
+
{
|
| 107127 |
+
"epoch": 17.42962962962963,
|
| 107128 |
+
"grad_norm": 0.19300377368927002,
|
| 107129 |
+
"learning_rate": 2.1811150662765596e-06,
|
| 107130 |
+
"loss": 0.6173,
|
| 107131 |
+
"step": 15303
|
| 107132 |
+
},
|
| 107133 |
+
{
|
| 107134 |
+
"epoch": 17.43076923076923,
|
| 107135 |
+
"grad_norm": 0.19949676096439362,
|
| 107136 |
+
"learning_rate": 2.179212156654595e-06,
|
| 107137 |
+
"loss": 0.7441,
|
| 107138 |
+
"step": 15304
|
| 107139 |
+
},
|
| 107140 |
+
{
|
| 107141 |
+
"epoch": 17.43190883190883,
|
| 107142 |
+
"grad_norm": 0.1906082034111023,
|
| 107143 |
+
"learning_rate": 2.177310039661348e-06,
|
| 107144 |
+
"loss": 0.5934,
|
| 107145 |
+
"step": 15305
|
| 107146 |
+
},
|
| 107147 |
+
{
|
| 107148 |
+
"epoch": 17.433048433048434,
|
| 107149 |
+
"grad_norm": 0.2418171465396881,
|
| 107150 |
+
"learning_rate": 2.175408715362892e-06,
|
| 107151 |
+
"loss": 0.4327,
|
| 107152 |
+
"step": 15306
|
| 107153 |
+
},
|
| 107154 |
+
{
|
| 107155 |
+
"epoch": 17.434188034188033,
|
| 107156 |
+
"grad_norm": 0.1856294721364975,
|
| 107157 |
+
"learning_rate": 2.173508183825254e-06,
|
| 107158 |
+
"loss": 0.7891,
|
| 107159 |
+
"step": 15307
|
| 107160 |
+
},
|
| 107161 |
+
{
|
| 107162 |
+
"epoch": 17.435327635327635,
|
| 107163 |
+
"grad_norm": 0.2543734908103943,
|
| 107164 |
+
"learning_rate": 2.171608445114451e-06,
|
| 107165 |
+
"loss": 0.6179,
|
| 107166 |
+
"step": 15308
|
| 107167 |
+
},
|
| 107168 |
+
{
|
| 107169 |
+
"epoch": 17.436467236467237,
|
| 107170 |
+
"grad_norm": 0.18569955229759216,
|
| 107171 |
+
"learning_rate": 2.169709499296463e-06,
|
| 107172 |
+
"loss": 0.7025,
|
| 107173 |
+
"step": 15309
|
| 107174 |
+
},
|
| 107175 |
+
{
|
| 107176 |
+
"epoch": 17.437606837606836,
|
| 107177 |
+
"grad_norm": 0.23999355733394623,
|
| 107178 |
+
"learning_rate": 2.167811346437254e-06,
|
| 107179 |
+
"loss": 0.4552,
|
| 107180 |
+
"step": 15310
|
| 107181 |
+
},
|
| 107182 |
+
{
|
| 107183 |
+
"epoch": 17.43874643874644,
|
| 107184 |
+
"grad_norm": 0.16314929723739624,
|
| 107185 |
+
"learning_rate": 2.165913986602741e-06,
|
| 107186 |
+
"loss": 0.5465,
|
| 107187 |
+
"step": 15311
|
| 107188 |
+
},
|
| 107189 |
+
{
|
| 107190 |
+
"epoch": 17.43988603988604,
|
| 107191 |
+
"grad_norm": 0.21630598604679108,
|
| 107192 |
+
"learning_rate": 2.1640174198588297e-06,
|
| 107193 |
+
"loss": 0.6926,
|
| 107194 |
+
"step": 15312
|
| 107195 |
+
},
|
| 107196 |
+
{
|
| 107197 |
+
"epoch": 17.44102564102564,
|
| 107198 |
+
"grad_norm": 0.21854954957962036,
|
| 107199 |
+
"learning_rate": 2.1621216462713893e-06,
|
| 107200 |
+
"loss": 0.6521,
|
| 107201 |
+
"step": 15313
|
| 107202 |
+
},
|
| 107203 |
+
{
|
| 107204 |
+
"epoch": 17.442165242165242,
|
| 107205 |
+
"grad_norm": 0.20090152323246002,
|
| 107206 |
+
"learning_rate": 2.160226665906273e-06,
|
| 107207 |
+
"loss": 0.6718,
|
| 107208 |
+
"step": 15314
|
| 107209 |
+
},
|
| 107210 |
+
{
|
| 107211 |
+
"epoch": 17.443304843304844,
|
| 107212 |
+
"grad_norm": 0.1926439106464386,
|
| 107213 |
+
"learning_rate": 2.158332478829292e-06,
|
| 107214 |
+
"loss": 0.4476,
|
| 107215 |
+
"step": 15315
|
| 107216 |
+
},
|
| 107217 |
+
{
|
| 107218 |
+
"epoch": 17.444444444444443,
|
| 107219 |
+
"grad_norm": 0.2181670367717743,
|
| 107220 |
+
"learning_rate": 2.156439085106238e-06,
|
| 107221 |
+
"loss": 0.6576,
|
| 107222 |
+
"step": 15316
|
| 107223 |
+
},
|
| 107224 |
+
{
|
| 107225 |
+
"epoch": 17.445584045584045,
|
| 107226 |
+
"grad_norm": 0.22731441259384155,
|
| 107227 |
+
"learning_rate": 2.15454648480288e-06,
|
| 107228 |
+
"loss": 0.667,
|
| 107229 |
+
"step": 15317
|
| 107230 |
+
},
|
| 107231 |
+
{
|
| 107232 |
+
"epoch": 17.446723646723648,
|
| 107233 |
+
"grad_norm": 0.1826423853635788,
|
| 107234 |
+
"learning_rate": 2.1526546779849443e-06,
|
| 107235 |
+
"loss": 0.5988,
|
| 107236 |
+
"step": 15318
|
| 107237 |
+
},
|
| 107238 |
+
{
|
| 107239 |
+
"epoch": 17.447863247863246,
|
| 107240 |
+
"grad_norm": 0.20163589715957642,
|
| 107241 |
+
"learning_rate": 2.1507636647181418e-06,
|
| 107242 |
+
"loss": 0.7636,
|
| 107243 |
+
"step": 15319
|
| 107244 |
+
},
|
| 107245 |
+
{
|
| 107246 |
+
"epoch": 17.44900284900285,
|
| 107247 |
+
"grad_norm": 0.18035119771957397,
|
| 107248 |
+
"learning_rate": 2.14887344506815e-06,
|
| 107249 |
+
"loss": 0.7143,
|
| 107250 |
+
"step": 15320
|
| 107251 |
+
},
|
| 107252 |
+
{
|
| 107253 |
+
"epoch": 17.45014245014245,
|
| 107254 |
+
"grad_norm": 0.1917208731174469,
|
| 107255 |
+
"learning_rate": 2.1469840191006314e-06,
|
| 107256 |
+
"loss": 0.5918,
|
| 107257 |
+
"step": 15321
|
| 107258 |
+
},
|
| 107259 |
+
{
|
| 107260 |
+
"epoch": 17.45128205128205,
|
| 107261 |
+
"grad_norm": 0.20104704797267914,
|
| 107262 |
+
"learning_rate": 2.145095386881199e-06,
|
| 107263 |
+
"loss": 0.9115,
|
| 107264 |
+
"step": 15322
|
| 107265 |
+
},
|
| 107266 |
+
{
|
| 107267 |
+
"epoch": 17.452421652421652,
|
| 107268 |
+
"grad_norm": 0.3120897114276886,
|
| 107269 |
+
"learning_rate": 2.1432075484754565e-06,
|
| 107270 |
+
"loss": 0.4841,
|
| 107271 |
+
"step": 15323
|
| 107272 |
+
},
|
| 107273 |
+
{
|
| 107274 |
+
"epoch": 17.453561253561254,
|
| 107275 |
+
"grad_norm": 0.17598046362400055,
|
| 107276 |
+
"learning_rate": 2.141320503948971e-06,
|
| 107277 |
+
"loss": 0.8,
|
| 107278 |
+
"step": 15324
|
| 107279 |
+
},
|
| 107280 |
+
{
|
| 107281 |
+
"epoch": 17.454700854700853,
|
| 107282 |
+
"grad_norm": 0.18346905708312988,
|
| 107283 |
+
"learning_rate": 2.139434253367284e-06,
|
| 107284 |
+
"loss": 0.8112,
|
| 107285 |
+
"step": 15325
|
| 107286 |
+
},
|
| 107287 |
+
{
|
| 107288 |
+
"epoch": 17.455840455840455,
|
| 107289 |
+
"grad_norm": 0.2171083241701126,
|
| 107290 |
+
"learning_rate": 2.1375487967959156e-06,
|
| 107291 |
+
"loss": 0.6508,
|
| 107292 |
+
"step": 15326
|
| 107293 |
+
},
|
| 107294 |
+
{
|
| 107295 |
+
"epoch": 17.456980056980058,
|
| 107296 |
+
"grad_norm": 0.1841529756784439,
|
| 107297 |
+
"learning_rate": 2.135664134300347e-06,
|
| 107298 |
+
"loss": 0.7771,
|
| 107299 |
+
"step": 15327
|
| 107300 |
+
},
|
| 107301 |
+
{
|
| 107302 |
+
"epoch": 17.458119658119656,
|
| 107303 |
+
"grad_norm": 0.18029426038265228,
|
| 107304 |
+
"learning_rate": 2.133780265946045e-06,
|
| 107305 |
+
"loss": 0.8219,
|
| 107306 |
+
"step": 15328
|
| 107307 |
+
},
|
| 107308 |
+
{
|
| 107309 |
+
"epoch": 17.45925925925926,
|
| 107310 |
+
"grad_norm": 0.22011998295783997,
|
| 107311 |
+
"learning_rate": 2.1318971917984323e-06,
|
| 107312 |
+
"loss": 0.4506,
|
| 107313 |
+
"step": 15329
|
| 107314 |
+
},
|
| 107315 |
+
{
|
| 107316 |
+
"epoch": 17.46039886039886,
|
| 107317 |
+
"grad_norm": 0.21217116713523865,
|
| 107318 |
+
"learning_rate": 2.1300149119229173e-06,
|
| 107319 |
+
"loss": 0.5719,
|
| 107320 |
+
"step": 15330
|
| 107321 |
+
},
|
| 107322 |
+
{
|
| 107323 |
+
"epoch": 17.46153846153846,
|
| 107324 |
+
"grad_norm": 0.1769542396068573,
|
| 107325 |
+
"learning_rate": 2.128133426384879e-06,
|
| 107326 |
+
"loss": 0.6893,
|
| 107327 |
+
"step": 15331
|
| 107328 |
+
},
|
| 107329 |
+
{
|
| 107330 |
+
"epoch": 17.462678062678062,
|
| 107331 |
+
"grad_norm": 0.2163042426109314,
|
| 107332 |
+
"learning_rate": 2.1262527352496677e-06,
|
| 107333 |
+
"loss": 0.6376,
|
| 107334 |
+
"step": 15332
|
| 107335 |
+
},
|
| 107336 |
+
{
|
| 107337 |
+
"epoch": 17.463817663817665,
|
| 107338 |
+
"grad_norm": 0.17681114375591278,
|
| 107339 |
+
"learning_rate": 2.124372838582597e-06,
|
| 107340 |
+
"loss": 0.677,
|
| 107341 |
+
"step": 15333
|
| 107342 |
+
},
|
| 107343 |
+
{
|
| 107344 |
+
"epoch": 17.464957264957263,
|
| 107345 |
+
"grad_norm": 0.21395109593868256,
|
| 107346 |
+
"learning_rate": 2.1224937364489687e-06,
|
| 107347 |
+
"loss": 0.6916,
|
| 107348 |
+
"step": 15334
|
| 107349 |
+
},
|
| 107350 |
+
{
|
| 107351 |
+
"epoch": 17.466096866096866,
|
| 107352 |
+
"grad_norm": 0.23505502939224243,
|
| 107353 |
+
"learning_rate": 2.120615428914044e-06,
|
| 107354 |
+
"loss": 0.5663,
|
| 107355 |
+
"step": 15335
|
| 107356 |
+
},
|
| 107357 |
+
{
|
| 107358 |
+
"epoch": 17.467236467236468,
|
| 107359 |
+
"grad_norm": 0.2834552824497223,
|
| 107360 |
+
"learning_rate": 2.118737916043065e-06,
|
| 107361 |
+
"loss": 0.672,
|
| 107362 |
+
"step": 15336
|
| 107363 |
+
},
|
| 107364 |
+
{
|
| 107365 |
+
"epoch": 17.468376068376067,
|
| 107366 |
+
"grad_norm": 0.20520302653312683,
|
| 107367 |
+
"learning_rate": 2.1168611979012403e-06,
|
| 107368 |
+
"loss": 0.6569,
|
| 107369 |
+
"step": 15337
|
| 107370 |
+
},
|
| 107371 |
+
{
|
| 107372 |
+
"epoch": 17.46951566951567,
|
| 107373 |
+
"grad_norm": 0.21140910685062408,
|
| 107374 |
+
"learning_rate": 2.114985274553755e-06,
|
| 107375 |
+
"loss": 0.4709,
|
| 107376 |
+
"step": 15338
|
| 107377 |
+
},
|
| 107378 |
+
{
|
| 107379 |
+
"epoch": 17.47065527065527,
|
| 107380 |
+
"grad_norm": 0.23354817926883698,
|
| 107381 |
+
"learning_rate": 2.1131101460657665e-06,
|
| 107382 |
+
"loss": 0.659,
|
| 107383 |
+
"step": 15339
|
| 107384 |
+
},
|
| 107385 |
+
{
|
| 107386 |
+
"epoch": 17.47179487179487,
|
| 107387 |
+
"grad_norm": 0.17846237123012543,
|
| 107388 |
+
"learning_rate": 2.111235812502407e-06,
|
| 107389 |
+
"loss": 0.7709,
|
| 107390 |
+
"step": 15340
|
| 107391 |
+
},
|
| 107392 |
+
{
|
| 107393 |
+
"epoch": 17.472934472934472,
|
| 107394 |
+
"grad_norm": 0.1721949428319931,
|
| 107395 |
+
"learning_rate": 2.1093622739287665e-06,
|
| 107396 |
+
"loss": 0.6594,
|
| 107397 |
+
"step": 15341
|
| 107398 |
+
},
|
| 107399 |
+
{
|
| 107400 |
+
"epoch": 17.474074074074075,
|
| 107401 |
+
"grad_norm": 0.1907200664281845,
|
| 107402 |
+
"learning_rate": 2.107489530409923e-06,
|
| 107403 |
+
"loss": 0.6038,
|
| 107404 |
+
"step": 15342
|
| 107405 |
+
},
|
| 107406 |
+
{
|
| 107407 |
+
"epoch": 17.475213675213674,
|
| 107408 |
+
"grad_norm": 0.23151063919067383,
|
| 107409 |
+
"learning_rate": 2.105617582010924e-06,
|
| 107410 |
+
"loss": 0.6874,
|
| 107411 |
+
"step": 15343
|
| 107412 |
+
},
|
| 107413 |
+
{
|
| 107414 |
+
"epoch": 17.476353276353276,
|
| 107415 |
+
"grad_norm": 0.1753738522529602,
|
| 107416 |
+
"learning_rate": 2.103746428796788e-06,
|
| 107417 |
+
"loss": 0.9033,
|
| 107418 |
+
"step": 15344
|
| 107419 |
+
},
|
| 107420 |
+
{
|
| 107421 |
+
"epoch": 17.477492877492878,
|
| 107422 |
+
"grad_norm": 0.17159105837345123,
|
| 107423 |
+
"learning_rate": 2.1018760708325037e-06,
|
| 107424 |
+
"loss": 0.6414,
|
| 107425 |
+
"step": 15345
|
| 107426 |
+
},
|
| 107427 |
+
{
|
| 107428 |
+
"epoch": 17.478632478632477,
|
| 107429 |
+
"grad_norm": 0.3715958297252655,
|
| 107430 |
+
"learning_rate": 2.1000065081830304e-06,
|
| 107431 |
+
"loss": 0.6692,
|
| 107432 |
+
"step": 15346
|
| 107433 |
+
},
|
| 107434 |
+
{
|
| 107435 |
+
"epoch": 17.47977207977208,
|
| 107436 |
+
"grad_norm": 0.2088504582643509,
|
| 107437 |
+
"learning_rate": 2.0981377409133056e-06,
|
| 107438 |
+
"loss": 0.6677,
|
| 107439 |
+
"step": 15347
|
| 107440 |
+
},
|
| 107441 |
+
{
|
| 107442 |
+
"epoch": 17.48091168091168,
|
| 107443 |
+
"grad_norm": 0.16269053518772125,
|
| 107444 |
+
"learning_rate": 2.096269769088238e-06,
|
| 107445 |
+
"loss": 0.5255,
|
| 107446 |
+
"step": 15348
|
| 107447 |
+
},
|
| 107448 |
+
{
|
| 107449 |
+
"epoch": 17.48205128205128,
|
| 107450 |
+
"grad_norm": 0.16256828606128693,
|
| 107451 |
+
"learning_rate": 2.0944025927727095e-06,
|
| 107452 |
+
"loss": 0.7041,
|
| 107453 |
+
"step": 15349
|
| 107454 |
+
},
|
| 107455 |
+
{
|
| 107456 |
+
"epoch": 17.483190883190883,
|
| 107457 |
+
"grad_norm": 0.1829254925251007,
|
| 107458 |
+
"learning_rate": 2.092536212031568e-06,
|
| 107459 |
+
"loss": 0.6186,
|
| 107460 |
+
"step": 15350
|
| 107461 |
+
},
|
| 107462 |
+
{
|
| 107463 |
+
"epoch": 17.484330484330485,
|
| 107464 |
+
"grad_norm": 0.1834312081336975,
|
| 107465 |
+
"learning_rate": 2.0906706269296416e-06,
|
| 107466 |
+
"loss": 0.5941,
|
| 107467 |
+
"step": 15351
|
| 107468 |
+
},
|
| 107469 |
+
{
|
| 107470 |
+
"epoch": 17.485470085470084,
|
| 107471 |
+
"grad_norm": 0.18137308955192566,
|
| 107472 |
+
"learning_rate": 2.0888058375317238e-06,
|
| 107473 |
+
"loss": 0.648,
|
| 107474 |
+
"step": 15352
|
| 107475 |
+
},
|
| 107476 |
+
{
|
| 107477 |
+
"epoch": 17.486609686609686,
|
| 107478 |
+
"grad_norm": 0.21743400394916534,
|
| 107479 |
+
"learning_rate": 2.0869418439025845e-06,
|
| 107480 |
+
"loss": 0.5386,
|
| 107481 |
+
"step": 15353
|
| 107482 |
+
},
|
| 107483 |
+
{
|
| 107484 |
+
"epoch": 17.48774928774929,
|
| 107485 |
+
"grad_norm": 0.21215492486953735,
|
| 107486 |
+
"learning_rate": 2.0850786461069667e-06,
|
| 107487 |
+
"loss": 0.6154,
|
| 107488 |
+
"step": 15354
|
| 107489 |
+
},
|
| 107490 |
+
{
|
| 107491 |
+
"epoch": 17.488888888888887,
|
| 107492 |
+
"grad_norm": 0.23170483112335205,
|
| 107493 |
+
"learning_rate": 2.083216244209585e-06,
|
| 107494 |
+
"loss": 0.7102,
|
| 107495 |
+
"step": 15355
|
| 107496 |
+
},
|
| 107497 |
+
{
|
| 107498 |
+
"epoch": 17.49002849002849,
|
| 107499 |
+
"grad_norm": 0.15444518625736237,
|
| 107500 |
+
"learning_rate": 2.0813546382751236e-06,
|
| 107501 |
+
"loss": 0.7619,
|
| 107502 |
+
"step": 15356
|
| 107503 |
+
},
|
| 107504 |
+
{
|
| 107505 |
+
"epoch": 17.491168091168092,
|
| 107506 |
+
"grad_norm": 0.19130298495292664,
|
| 107507 |
+
"learning_rate": 2.0794938283682423e-06,
|
| 107508 |
+
"loss": 0.6027,
|
| 107509 |
+
"step": 15357
|
| 107510 |
+
},
|
| 107511 |
+
{
|
| 107512 |
+
"epoch": 17.49230769230769,
|
| 107513 |
+
"grad_norm": 0.20855511724948883,
|
| 107514 |
+
"learning_rate": 2.07763381455357e-06,
|
| 107515 |
+
"loss": 0.7724,
|
| 107516 |
+
"step": 15358
|
| 107517 |
+
},
|
| 107518 |
+
{
|
| 107519 |
+
"epoch": 17.493447293447293,
|
| 107520 |
+
"grad_norm": 0.1930067092180252,
|
| 107521 |
+
"learning_rate": 2.075774596895713e-06,
|
| 107522 |
+
"loss": 0.6319,
|
| 107523 |
+
"step": 15359
|
| 107524 |
+
},
|
| 107525 |
+
{
|
| 107526 |
+
"epoch": 17.494586894586895,
|
| 107527 |
+
"grad_norm": 0.19758287072181702,
|
| 107528 |
+
"learning_rate": 2.0739161754592452e-06,
|
| 107529 |
+
"loss": 0.5473,
|
| 107530 |
+
"step": 15360
|
| 107531 |
+
},
|
| 107532 |
+
{
|
| 107533 |
+
"epoch": 17.495726495726494,
|
| 107534 |
+
"grad_norm": 0.15608827769756317,
|
| 107535 |
+
"learning_rate": 2.072058550308717e-06,
|
| 107536 |
+
"loss": 0.6884,
|
| 107537 |
+
"step": 15361
|
| 107538 |
+
},
|
| 107539 |
+
{
|
| 107540 |
+
"epoch": 17.496866096866096,
|
| 107541 |
+
"grad_norm": 0.2612965404987335,
|
| 107542 |
+
"learning_rate": 2.0702017215086495e-06,
|
| 107543 |
+
"loss": 0.6141,
|
| 107544 |
+
"step": 15362
|
| 107545 |
+
},
|
| 107546 |
+
{
|
| 107547 |
+
"epoch": 17.4980056980057,
|
| 107548 |
+
"grad_norm": 0.21490062773227692,
|
| 107549 |
+
"learning_rate": 2.0683456891235323e-06,
|
| 107550 |
+
"loss": 0.5766,
|
| 107551 |
+
"step": 15363
|
| 107552 |
+
},
|
| 107553 |
+
{
|
| 107554 |
+
"epoch": 17.499145299145297,
|
| 107555 |
+
"grad_norm": 0.20124967396259308,
|
| 107556 |
+
"learning_rate": 2.0664904532178285e-06,
|
| 107557 |
+
"loss": 0.6135,
|
| 107558 |
+
"step": 15364
|
| 107559 |
+
},
|
| 107560 |
+
{
|
| 107561 |
+
"epoch": 17.5002849002849,
|
| 107562 |
+
"grad_norm": 0.15642400085926056,
|
| 107563 |
+
"learning_rate": 2.0646360138559808e-06,
|
| 107564 |
+
"loss": 0.6718,
|
| 107565 |
+
"step": 15365
|
| 107566 |
+
},
|
| 107567 |
+
{
|
| 107568 |
+
"epoch": 17.501424501424502,
|
| 107569 |
+
"grad_norm": 0.20701226592063904,
|
| 107570 |
+
"learning_rate": 2.062782371102401e-06,
|
| 107571 |
+
"loss": 0.6231,
|
| 107572 |
+
"step": 15366
|
| 107573 |
+
},
|
| 107574 |
+
{
|
| 107575 |
+
"epoch": 17.5025641025641,
|
| 107576 |
+
"grad_norm": 0.16767756640911102,
|
| 107577 |
+
"learning_rate": 2.060929525021463e-06,
|
| 107578 |
+
"loss": 0.7096,
|
| 107579 |
+
"step": 15367
|
| 107580 |
+
},
|
| 107581 |
+
{
|
| 107582 |
+
"epoch": 17.503703703703703,
|
| 107583 |
+
"grad_norm": 0.18611721694469452,
|
| 107584 |
+
"learning_rate": 2.0590774756775235e-06,
|
| 107585 |
+
"loss": 0.642,
|
| 107586 |
+
"step": 15368
|
| 107587 |
+
},
|
| 107588 |
+
{
|
| 107589 |
+
"epoch": 17.504843304843305,
|
| 107590 |
+
"grad_norm": 0.20109455287456512,
|
| 107591 |
+
"learning_rate": 2.057226223134909e-06,
|
| 107592 |
+
"loss": 0.5996,
|
| 107593 |
+
"step": 15369
|
| 107594 |
+
},
|
| 107595 |
+
{
|
| 107596 |
+
"epoch": 17.505982905982904,
|
| 107597 |
+
"grad_norm": 0.19398508965969086,
|
| 107598 |
+
"learning_rate": 2.055375767457926e-06,
|
| 107599 |
+
"loss": 0.7829,
|
| 107600 |
+
"step": 15370
|
| 107601 |
+
},
|
| 107602 |
+
{
|
| 107603 |
+
"epoch": 17.507122507122507,
|
| 107604 |
+
"grad_norm": 0.19312256574630737,
|
| 107605 |
+
"learning_rate": 2.0535261087108404e-06,
|
| 107606 |
+
"loss": 0.5825,
|
| 107607 |
+
"step": 15371
|
| 107608 |
+
},
|
| 107609 |
+
{
|
| 107610 |
+
"epoch": 17.50826210826211,
|
| 107611 |
+
"grad_norm": 0.2314954400062561,
|
| 107612 |
+
"learning_rate": 2.0516772469578922e-06,
|
| 107613 |
+
"loss": 0.545,
|
| 107614 |
+
"step": 15372
|
| 107615 |
+
},
|
| 107616 |
+
{
|
| 107617 |
+
"epoch": 17.509401709401708,
|
| 107618 |
+
"grad_norm": 0.2093094289302826,
|
| 107619 |
+
"learning_rate": 2.049829182263305e-06,
|
| 107620 |
+
"loss": 0.5974,
|
| 107621 |
+
"step": 15373
|
| 107622 |
+
},
|
| 107623 |
+
{
|
| 107624 |
+
"epoch": 17.51054131054131,
|
| 107625 |
+
"grad_norm": 0.21281267702579498,
|
| 107626 |
+
"learning_rate": 2.047981914691266e-06,
|
| 107627 |
+
"loss": 0.5286,
|
| 107628 |
+
"step": 15374
|
| 107629 |
+
},
|
| 107630 |
+
{
|
| 107631 |
+
"epoch": 17.511680911680912,
|
| 107632 |
+
"grad_norm": 0.2425657957792282,
|
| 107633 |
+
"learning_rate": 2.046135444305927e-06,
|
| 107634 |
+
"loss": 0.6006,
|
| 107635 |
+
"step": 15375
|
| 107636 |
+
},
|
| 107637 |
+
{
|
| 107638 |
+
"epoch": 17.51282051282051,
|
| 107639 |
+
"grad_norm": 0.22368358075618744,
|
| 107640 |
+
"learning_rate": 2.0442897711714313e-06,
|
| 107641 |
+
"loss": 0.6731,
|
| 107642 |
+
"step": 15376
|
| 107643 |
+
},
|
| 107644 |
+
{
|
| 107645 |
+
"epoch": 17.513960113960113,
|
| 107646 |
+
"grad_norm": 0.17175684869289398,
|
| 107647 |
+
"learning_rate": 2.042444895351878e-06,
|
| 107648 |
+
"loss": 0.5703,
|
| 107649 |
+
"step": 15377
|
| 107650 |
+
},
|
| 107651 |
+
{
|
| 107652 |
+
"epoch": 17.515099715099716,
|
| 107653 |
+
"grad_norm": 0.2873460650444031,
|
| 107654 |
+
"learning_rate": 2.0406008169113507e-06,
|
| 107655 |
+
"loss": 0.3727,
|
| 107656 |
+
"step": 15378
|
| 107657 |
+
},
|
| 107658 |
+
{
|
| 107659 |
+
"epoch": 17.516239316239318,
|
| 107660 |
+
"grad_norm": 0.19145934283733368,
|
| 107661 |
+
"learning_rate": 2.038757535913893e-06,
|
| 107662 |
+
"loss": 0.5564,
|
| 107663 |
+
"step": 15379
|
| 107664 |
+
},
|
| 107665 |
+
{
|
| 107666 |
+
"epoch": 17.517378917378917,
|
| 107667 |
+
"grad_norm": 0.15687519311904907,
|
| 107668 |
+
"learning_rate": 2.0369150524235244e-06,
|
| 107669 |
+
"loss": 0.7346,
|
| 107670 |
+
"step": 15380
|
| 107671 |
+
},
|
| 107672 |
+
{
|
| 107673 |
+
"epoch": 17.51851851851852,
|
| 107674 |
+
"grad_norm": 0.21336393058300018,
|
| 107675 |
+
"learning_rate": 2.035073366504253e-06,
|
| 107676 |
+
"loss": 0.5313,
|
| 107677 |
+
"step": 15381
|
| 107678 |
+
},
|
| 107679 |
+
{
|
| 107680 |
+
"epoch": 17.51965811965812,
|
| 107681 |
+
"grad_norm": 0.17659983038902283,
|
| 107682 |
+
"learning_rate": 2.0332324782200347e-06,
|
| 107683 |
+
"loss": 0.7484,
|
| 107684 |
+
"step": 15382
|
| 107685 |
+
},
|
| 107686 |
+
{
|
| 107687 |
+
"epoch": 17.52079772079772,
|
| 107688 |
+
"grad_norm": 0.19936230778694153,
|
| 107689 |
+
"learning_rate": 2.031392387634809e-06,
|
| 107690 |
+
"loss": 0.7472,
|
| 107691 |
+
"step": 15383
|
| 107692 |
+
},
|
| 107693 |
+
{
|
| 107694 |
+
"epoch": 17.521937321937322,
|
| 107695 |
+
"grad_norm": 0.21555398404598236,
|
| 107696 |
+
"learning_rate": 2.0295530948124917e-06,
|
| 107697 |
+
"loss": 0.6158,
|
| 107698 |
+
"step": 15384
|
| 107699 |
+
},
|
| 107700 |
+
{
|
| 107701 |
+
"epoch": 17.523076923076925,
|
| 107702 |
+
"grad_norm": 0.47986331582069397,
|
| 107703 |
+
"learning_rate": 2.027714599816968e-06,
|
| 107704 |
+
"loss": 0.6851,
|
| 107705 |
+
"step": 15385
|
| 107706 |
+
},
|
| 107707 |
+
{
|
| 107708 |
+
"epoch": 17.524216524216524,
|
| 107709 |
+
"grad_norm": 0.21136222779750824,
|
| 107710 |
+
"learning_rate": 2.0258769027120873e-06,
|
| 107711 |
+
"loss": 0.6985,
|
| 107712 |
+
"step": 15386
|
| 107713 |
+
},
|
| 107714 |
+
{
|
| 107715 |
+
"epoch": 17.525356125356126,
|
| 107716 |
+
"grad_norm": 0.17603352665901184,
|
| 107717 |
+
"learning_rate": 2.024040003561678e-06,
|
| 107718 |
+
"loss": 0.7078,
|
| 107719 |
+
"step": 15387
|
| 107720 |
+
},
|
| 107721 |
+
{
|
| 107722 |
+
"epoch": 17.526495726495728,
|
| 107723 |
+
"grad_norm": 0.25389912724494934,
|
| 107724 |
+
"learning_rate": 2.022203902429545e-06,
|
| 107725 |
+
"loss": 0.5969,
|
| 107726 |
+
"step": 15388
|
| 107727 |
+
},
|
| 107728 |
+
{
|
| 107729 |
+
"epoch": 17.527635327635327,
|
| 107730 |
+
"grad_norm": 0.18012681603431702,
|
| 107731 |
+
"learning_rate": 2.020368599379466e-06,
|
| 107732 |
+
"loss": 0.7431,
|
| 107733 |
+
"step": 15389
|
| 107734 |
+
},
|
| 107735 |
+
{
|
| 107736 |
+
"epoch": 17.52877492877493,
|
| 107737 |
+
"grad_norm": 0.20365744829177856,
|
| 107738 |
+
"learning_rate": 2.018534094475172e-06,
|
| 107739 |
+
"loss": 0.7093,
|
| 107740 |
+
"step": 15390
|
| 107741 |
+
},
|
| 107742 |
+
{
|
| 107743 |
+
"epoch": 17.52991452991453,
|
| 107744 |
+
"grad_norm": 0.18745318055152893,
|
| 107745 |
+
"learning_rate": 2.016700387780385e-06,
|
| 107746 |
+
"loss": 0.3249,
|
| 107747 |
+
"step": 15391
|
| 107748 |
+
},
|
| 107749 |
+
{
|
| 107750 |
+
"epoch": 17.53105413105413,
|
| 107751 |
+
"grad_norm": 0.19238592684268951,
|
| 107752 |
+
"learning_rate": 2.0148674793588064e-06,
|
| 107753 |
+
"loss": 0.5874,
|
| 107754 |
+
"step": 15392
|
| 107755 |
+
},
|
| 107756 |
+
{
|
| 107757 |
+
"epoch": 17.532193732193733,
|
| 107758 |
+
"grad_norm": 0.1820547729730606,
|
| 107759 |
+
"learning_rate": 2.0130353692740854e-06,
|
| 107760 |
+
"loss": 0.8376,
|
| 107761 |
+
"step": 15393
|
| 107762 |
+
},
|
| 107763 |
+
{
|
| 107764 |
+
"epoch": 17.533333333333335,
|
| 107765 |
+
"grad_norm": 0.160737544298172,
|
| 107766 |
+
"learning_rate": 2.0112040575898607e-06,
|
| 107767 |
+
"loss": 0.7618,
|
| 107768 |
+
"step": 15394
|
| 107769 |
+
},
|
| 107770 |
+
{
|
| 107771 |
+
"epoch": 17.534472934472934,
|
| 107772 |
+
"grad_norm": 0.21993643045425415,
|
| 107773 |
+
"learning_rate": 2.0093735443697387e-06,
|
| 107774 |
+
"loss": 0.6073,
|
| 107775 |
+
"step": 15395
|
| 107776 |
+
},
|
| 107777 |
+
{
|
| 107778 |
+
"epoch": 17.535612535612536,
|
| 107779 |
+
"grad_norm": 0.20880837738513947,
|
| 107780 |
+
"learning_rate": 2.0075438296773023e-06,
|
| 107781 |
+
"loss": 0.3951,
|
| 107782 |
+
"step": 15396
|
| 107783 |
+
},
|
| 107784 |
+
{
|
| 107785 |
+
"epoch": 17.53675213675214,
|
| 107786 |
+
"grad_norm": 0.21602734923362732,
|
| 107787 |
+
"learning_rate": 2.0057149135760954e-06,
|
| 107788 |
+
"loss": 0.5836,
|
| 107789 |
+
"step": 15397
|
| 107790 |
+
},
|
| 107791 |
+
{
|
| 107792 |
+
"epoch": 17.537891737891737,
|
| 107793 |
+
"grad_norm": 0.1663624346256256,
|
| 107794 |
+
"learning_rate": 2.003886796129642e-06,
|
| 107795 |
+
"loss": 0.8863,
|
| 107796 |
+
"step": 15398
|
| 107797 |
+
},
|
| 107798 |
+
{
|
| 107799 |
+
"epoch": 17.53903133903134,
|
| 107800 |
+
"grad_norm": 0.21745102107524872,
|
| 107801 |
+
"learning_rate": 2.0020594774014383e-06,
|
| 107802 |
+
"loss": 0.794,
|
| 107803 |
+
"step": 15399
|
| 107804 |
+
},
|
| 107805 |
+
{
|
| 107806 |
+
"epoch": 17.540170940170942,
|
| 107807 |
+
"grad_norm": 0.22152374684810638,
|
| 107808 |
+
"learning_rate": 2.0002329574549617e-06,
|
| 107809 |
+
"loss": 0.6123,
|
| 107810 |
+
"step": 15400
|
| 107811 |
+
},
|
| 107812 |
+
{
|
| 107813 |
+
"epoch": 17.54131054131054,
|
| 107814 |
+
"grad_norm": 0.22281567752361298,
|
| 107815 |
+
"learning_rate": 1.998407236353636e-06,
|
| 107816 |
+
"loss": 0.647,
|
| 107817 |
+
"step": 15401
|
| 107818 |
+
},
|
| 107819 |
+
{
|
| 107820 |
+
"epoch": 17.542450142450143,
|
| 107821 |
+
"grad_norm": 0.17523208260536194,
|
| 107822 |
+
"learning_rate": 1.9965823141608804e-06,
|
| 107823 |
+
"loss": 0.637,
|
| 107824 |
+
"step": 15402
|
| 107825 |
+
},
|
| 107826 |
+
{
|
| 107827 |
+
"epoch": 17.543589743589745,
|
| 107828 |
+
"grad_norm": 0.21158164739608765,
|
| 107829 |
+
"learning_rate": 1.994758190940088e-06,
|
| 107830 |
+
"loss": 0.629,
|
| 107831 |
+
"step": 15403
|
| 107832 |
+
},
|
| 107833 |
+
{
|
| 107834 |
+
"epoch": 17.544729344729344,
|
| 107835 |
+
"grad_norm": 0.2418418675661087,
|
| 107836 |
+
"learning_rate": 1.992934866754603e-06,
|
| 107837 |
+
"loss": 0.5566,
|
| 107838 |
+
"step": 15404
|
| 107839 |
+
},
|
| 107840 |
+
{
|
| 107841 |
+
"epoch": 17.545868945868946,
|
| 107842 |
+
"grad_norm": 0.1990119069814682,
|
| 107843 |
+
"learning_rate": 1.9911123416677615e-06,
|
| 107844 |
+
"loss": 0.5334,
|
| 107845 |
+
"step": 15405
|
| 107846 |
+
},
|
| 107847 |
+
{
|
| 107848 |
+
"epoch": 17.54700854700855,
|
| 107849 |
+
"grad_norm": 0.19692152738571167,
|
| 107850 |
+
"learning_rate": 1.9892906157428616e-06,
|
| 107851 |
+
"loss": 0.6778,
|
| 107852 |
+
"step": 15406
|
| 107853 |
+
},
|
| 107854 |
+
{
|
| 107855 |
+
"epoch": 17.548148148148147,
|
| 107856 |
+
"grad_norm": 0.18656007945537567,
|
| 107857 |
+
"learning_rate": 1.987469689043184e-06,
|
| 107858 |
+
"loss": 0.5933,
|
| 107859 |
+
"step": 15407
|
| 107860 |
+
},
|
| 107861 |
+
{
|
| 107862 |
+
"epoch": 17.54928774928775,
|
| 107863 |
+
"grad_norm": 0.16103674471378326,
|
| 107864 |
+
"learning_rate": 1.985649561631961e-06,
|
| 107865 |
+
"loss": 0.825,
|
| 107866 |
+
"step": 15408
|
| 107867 |
+
},
|
| 107868 |
+
{
|
| 107869 |
+
"epoch": 17.550427350427352,
|
| 107870 |
+
"grad_norm": 0.17847539484500885,
|
| 107871 |
+
"learning_rate": 1.983830233572423e-06,
|
| 107872 |
+
"loss": 0.8594,
|
| 107873 |
+
"step": 15409
|
| 107874 |
+
},
|
| 107875 |
+
{
|
| 107876 |
+
"epoch": 17.55156695156695,
|
| 107877 |
+
"grad_norm": 0.16353803873062134,
|
| 107878 |
+
"learning_rate": 1.9820117049277526e-06,
|
| 107879 |
+
"loss": 0.5623,
|
| 107880 |
+
"step": 15410
|
| 107881 |
+
},
|
| 107882 |
+
{
|
| 107883 |
+
"epoch": 17.552706552706553,
|
| 107884 |
+
"grad_norm": 0.19815440475940704,
|
| 107885 |
+
"learning_rate": 1.9801939757611213e-06,
|
| 107886 |
+
"loss": 0.6037,
|
| 107887 |
+
"step": 15411
|
| 107888 |
+
},
|
| 107889 |
+
{
|
| 107890 |
+
"epoch": 17.553846153846155,
|
| 107891 |
+
"grad_norm": 0.18016420304775238,
|
| 107892 |
+
"learning_rate": 1.9783770461356544e-06,
|
| 107893 |
+
"loss": 0.4899,
|
| 107894 |
+
"step": 15412
|
| 107895 |
+
},
|
| 107896 |
+
{
|
| 107897 |
+
"epoch": 17.554985754985754,
|
| 107898 |
+
"grad_norm": 0.2547997534275055,
|
| 107899 |
+
"learning_rate": 1.976560916114456e-06,
|
| 107900 |
+
"loss": 0.8854,
|
| 107901 |
+
"step": 15413
|
| 107902 |
+
},
|
| 107903 |
+
{
|
| 107904 |
+
"epoch": 17.556125356125357,
|
| 107905 |
+
"grad_norm": 0.21617785096168518,
|
| 107906 |
+
"learning_rate": 1.974745585760618e-06,
|
| 107907 |
+
"loss": 0.613,
|
| 107908 |
+
"step": 15414
|
| 107909 |
+
},
|
| 107910 |
+
{
|
| 107911 |
+
"epoch": 17.55726495726496,
|
| 107912 |
+
"grad_norm": 0.21860112249851227,
|
| 107913 |
+
"learning_rate": 1.9729310551371866e-06,
|
| 107914 |
+
"loss": 0.548,
|
| 107915 |
+
"step": 15415
|
| 107916 |
+
},
|
| 107917 |
+
{
|
| 107918 |
+
"epoch": 17.558404558404558,
|
| 107919 |
+
"grad_norm": 0.19205626845359802,
|
| 107920 |
+
"learning_rate": 1.971117324307184e-06,
|
| 107921 |
+
"loss": 0.6271,
|
| 107922 |
+
"step": 15416
|
| 107923 |
+
},
|
| 107924 |
+
{
|
| 107925 |
+
"epoch": 17.55954415954416,
|
| 107926 |
+
"grad_norm": 0.21441836655139923,
|
| 107927 |
+
"learning_rate": 1.969304393333604e-06,
|
| 107928 |
+
"loss": 0.6238,
|
| 107929 |
+
"step": 15417
|
| 107930 |
+
},
|
| 107931 |
+
{
|
| 107932 |
+
"epoch": 17.560683760683762,
|
| 107933 |
+
"grad_norm": 0.19718217849731445,
|
| 107934 |
+
"learning_rate": 1.967492262279416e-06,
|
| 107935 |
+
"loss": 0.5062,
|
| 107936 |
+
"step": 15418
|
| 107937 |
+
},
|
| 107938 |
+
{
|
| 107939 |
+
"epoch": 17.56182336182336,
|
| 107940 |
+
"grad_norm": 0.21474851667881012,
|
| 107941 |
+
"learning_rate": 1.9656809312075697e-06,
|
| 107942 |
+
"loss": 0.7898,
|
| 107943 |
+
"step": 15419
|
| 107944 |
+
},
|
| 107945 |
+
{
|
| 107946 |
+
"epoch": 17.562962962962963,
|
| 107947 |
+
"grad_norm": 0.19963958859443665,
|
| 107948 |
+
"learning_rate": 1.9638704001809615e-06,
|
| 107949 |
+
"loss": 0.7924,
|
| 107950 |
+
"step": 15420
|
| 107951 |
+
},
|
| 107952 |
+
{
|
| 107953 |
+
"epoch": 17.564102564102566,
|
| 107954 |
+
"grad_norm": 0.19876530766487122,
|
| 107955 |
+
"learning_rate": 1.962060669262486e-06,
|
| 107956 |
+
"loss": 0.6621,
|
| 107957 |
+
"step": 15421
|
| 107958 |
+
},
|
| 107959 |
+
{
|
| 107960 |
+
"epoch": 17.565242165242164,
|
| 107961 |
+
"grad_norm": 0.15920500457286835,
|
| 107962 |
+
"learning_rate": 1.9602517385149953e-06,
|
| 107963 |
+
"loss": 0.6871,
|
| 107964 |
+
"step": 15422
|
| 107965 |
+
},
|
| 107966 |
+
{
|
| 107967 |
+
"epoch": 17.566381766381767,
|
| 107968 |
+
"grad_norm": 0.16280314326286316,
|
| 107969 |
+
"learning_rate": 1.9584436080013285e-06,
|
| 107970 |
+
"loss": 0.781,
|
| 107971 |
+
"step": 15423
|
| 107972 |
+
},
|
| 107973 |
+
{
|
| 107974 |
+
"epoch": 17.56752136752137,
|
| 107975 |
+
"grad_norm": 0.19113875925540924,
|
| 107976 |
+
"learning_rate": 1.9566362777842685e-06,
|
| 107977 |
+
"loss": 0.6086,
|
| 107978 |
+
"step": 15424
|
| 107979 |
+
},
|
| 107980 |
+
{
|
| 107981 |
+
"epoch": 17.568660968660968,
|
| 107982 |
+
"grad_norm": 0.1675650179386139,
|
| 107983 |
+
"learning_rate": 1.9548297479266065e-06,
|
| 107984 |
+
"loss": 0.7763,
|
| 107985 |
+
"step": 15425
|
| 107986 |
+
},
|
| 107987 |
+
{
|
| 107988 |
+
"epoch": 17.56980056980057,
|
| 107989 |
+
"grad_norm": 0.19173195958137512,
|
| 107990 |
+
"learning_rate": 1.953024018491087e-06,
|
| 107991 |
+
"loss": 0.937,
|
| 107992 |
+
"step": 15426
|
| 107993 |
+
},
|
| 107994 |
+
{
|
| 107995 |
+
"epoch": 17.570940170940172,
|
| 107996 |
+
"grad_norm": 0.1962994784116745,
|
| 107997 |
+
"learning_rate": 1.9512190895404152e-06,
|
| 107998 |
+
"loss": 0.7101,
|
| 107999 |
+
"step": 15427
|
| 108000 |
+
},
|
| 108001 |
+
{
|
| 108002 |
+
"epoch": 17.57207977207977,
|
| 108003 |
+
"grad_norm": 0.16905248165130615,
|
| 108004 |
+
"learning_rate": 1.949414961137291e-06,
|
| 108005 |
+
"loss": 0.7945,
|
| 108006 |
+
"step": 15428
|
| 108007 |
+
},
|
| 108008 |
+
{
|
| 108009 |
+
"epoch": 17.573219373219374,
|
| 108010 |
+
"grad_norm": 0.21983909606933594,
|
| 108011 |
+
"learning_rate": 1.9476116333443735e-06,
|
| 108012 |
+
"loss": 0.7035,
|
| 108013 |
+
"step": 15429
|
| 108014 |
+
},
|
| 108015 |
+
{
|
| 108016 |
+
"epoch": 17.574358974358976,
|
| 108017 |
+
"grad_norm": 0.23587347567081451,
|
| 108018 |
+
"learning_rate": 1.9458091062243027e-06,
|
| 108019 |
+
"loss": 0.8793,
|
| 108020 |
+
"step": 15430
|
| 108021 |
+
},
|
| 108022 |
+
{
|
| 108023 |
+
"epoch": 17.575498575498575,
|
| 108024 |
+
"grad_norm": 0.16407811641693115,
|
| 108025 |
+
"learning_rate": 1.944007379839677e-06,
|
| 108026 |
+
"loss": 0.6725,
|
| 108027 |
+
"step": 15431
|
| 108028 |
+
},
|
| 108029 |
+
{
|
| 108030 |
+
"epoch": 17.576638176638177,
|
| 108031 |
+
"grad_norm": 0.170736625790596,
|
| 108032 |
+
"learning_rate": 1.9422064542530817e-06,
|
| 108033 |
+
"loss": 0.673,
|
| 108034 |
+
"step": 15432
|
| 108035 |
+
},
|
| 108036 |
+
{
|
| 108037 |
+
"epoch": 17.57777777777778,
|
| 108038 |
+
"grad_norm": 0.2096918821334839,
|
| 108039 |
+
"learning_rate": 1.9404063295270612e-06,
|
| 108040 |
+
"loss": 0.7357,
|
| 108041 |
+
"step": 15433
|
| 108042 |
+
},
|
| 108043 |
+
{
|
| 108044 |
+
"epoch": 17.578917378917378,
|
| 108045 |
+
"grad_norm": 0.1520949751138687,
|
| 108046 |
+
"learning_rate": 1.9386070057241495e-06,
|
| 108047 |
+
"loss": 0.7768,
|
| 108048 |
+
"step": 15434
|
| 108049 |
+
},
|
| 108050 |
+
{
|
| 108051 |
+
"epoch": 17.58005698005698,
|
| 108052 |
+
"grad_norm": 0.17503471672534943,
|
| 108053 |
+
"learning_rate": 1.9368084829068266e-06,
|
| 108054 |
+
"loss": 0.7439,
|
| 108055 |
+
"step": 15435
|
| 108056 |
+
},
|
| 108057 |
+
{
|
| 108058 |
+
"epoch": 17.581196581196583,
|
| 108059 |
+
"grad_norm": 0.21332886815071106,
|
| 108060 |
+
"learning_rate": 1.9350107611375735e-06,
|
| 108061 |
+
"loss": 0.4735,
|
| 108062 |
+
"step": 15436
|
| 108063 |
+
},
|
| 108064 |
+
{
|
| 108065 |
+
"epoch": 17.58233618233618,
|
| 108066 |
+
"grad_norm": 0.17435243725776672,
|
| 108067 |
+
"learning_rate": 1.9332138404788288e-06,
|
| 108068 |
+
"loss": 0.7822,
|
| 108069 |
+
"step": 15437
|
| 108070 |
+
},
|
| 108071 |
+
{
|
| 108072 |
+
"epoch": 17.583475783475784,
|
| 108073 |
+
"grad_norm": 0.21956662833690643,
|
| 108074 |
+
"learning_rate": 1.931417720992998e-06,
|
| 108075 |
+
"loss": 0.628,
|
| 108076 |
+
"step": 15438
|
| 108077 |
+
},
|
| 108078 |
+
{
|
| 108079 |
+
"epoch": 17.584615384615386,
|
| 108080 |
+
"grad_norm": 0.17011047899723053,
|
| 108081 |
+
"learning_rate": 1.9296224027424704e-06,
|
| 108082 |
+
"loss": 0.7007,
|
| 108083 |
+
"step": 15439
|
| 108084 |
+
},
|
| 108085 |
+
{
|
| 108086 |
+
"epoch": 17.585754985754985,
|
| 108087 |
+
"grad_norm": 0.16453446447849274,
|
| 108088 |
+
"learning_rate": 1.927827885789599e-06,
|
| 108089 |
+
"loss": 0.6905,
|
| 108090 |
+
"step": 15440
|
| 108091 |
+
},
|
| 108092 |
+
{
|
| 108093 |
+
"epoch": 17.586894586894587,
|
| 108094 |
+
"grad_norm": 0.1648949235677719,
|
| 108095 |
+
"learning_rate": 1.9260341701967166e-06,
|
| 108096 |
+
"loss": 0.6129,
|
| 108097 |
+
"step": 15441
|
| 108098 |
+
},
|
| 108099 |
+
{
|
| 108100 |
+
"epoch": 17.58803418803419,
|
| 108101 |
+
"grad_norm": 0.18956655263900757,
|
| 108102 |
+
"learning_rate": 1.9242412560261186e-06,
|
| 108103 |
+
"loss": 0.6696,
|
| 108104 |
+
"step": 15442
|
| 108105 |
+
},
|
| 108106 |
+
{
|
| 108107 |
+
"epoch": 17.58917378917379,
|
| 108108 |
+
"grad_norm": 0.18419325351715088,
|
| 108109 |
+
"learning_rate": 1.92244914334008e-06,
|
| 108110 |
+
"loss": 0.8715,
|
| 108111 |
+
"step": 15443
|
| 108112 |
+
},
|
| 108113 |
+
{
|
| 108114 |
+
"epoch": 17.59031339031339,
|
| 108115 |
+
"grad_norm": 0.20067398250102997,
|
| 108116 |
+
"learning_rate": 1.920657832200845e-06,
|
| 108117 |
+
"loss": 0.6447,
|
| 108118 |
+
"step": 15444
|
| 108119 |
+
},
|
| 108120 |
+
{
|
| 108121 |
+
"epoch": 17.591452991452993,
|
| 108122 |
+
"grad_norm": 0.15579760074615479,
|
| 108123 |
+
"learning_rate": 1.9188673226706364e-06,
|
| 108124 |
+
"loss": 0.6549,
|
| 108125 |
+
"step": 15445
|
| 108126 |
+
},
|
| 108127 |
+
{
|
| 108128 |
+
"epoch": 17.59259259259259,
|
| 108129 |
+
"grad_norm": 0.22809506952762604,
|
| 108130 |
+
"learning_rate": 1.9170776148116326e-06,
|
| 108131 |
+
"loss": 0.8009,
|
| 108132 |
+
"step": 15446
|
| 108133 |
+
},
|
| 108134 |
+
{
|
| 108135 |
+
"epoch": 17.593732193732194,
|
| 108136 |
+
"grad_norm": 0.1710824966430664,
|
| 108137 |
+
"learning_rate": 1.9152887086860054e-06,
|
| 108138 |
+
"loss": 0.4306,
|
| 108139 |
+
"step": 15447
|
| 108140 |
+
},
|
| 108141 |
+
{
|
| 108142 |
+
"epoch": 17.594871794871796,
|
| 108143 |
+
"grad_norm": 0.19887270033359528,
|
| 108144 |
+
"learning_rate": 1.9135006043558835e-06,
|
| 108145 |
+
"loss": 0.7019,
|
| 108146 |
+
"step": 15448
|
| 108147 |
+
},
|
| 108148 |
+
{
|
| 108149 |
+
"epoch": 17.596011396011395,
|
| 108150 |
+
"grad_norm": 0.18668244779109955,
|
| 108151 |
+
"learning_rate": 1.9117133018833776e-06,
|
| 108152 |
+
"loss": 0.6674,
|
| 108153 |
+
"step": 15449
|
| 108154 |
+
},
|
| 108155 |
+
{
|
| 108156 |
+
"epoch": 17.597150997150997,
|
| 108157 |
+
"grad_norm": 0.2371102124452591,
|
| 108158 |
+
"learning_rate": 1.9099268013305583e-06,
|
| 108159 |
+
"loss": 0.7222,
|
| 108160 |
+
"step": 15450
|
| 108161 |
+
},
|
| 108162 |
+
{
|
| 108163 |
+
"epoch": 17.5982905982906,
|
| 108164 |
+
"grad_norm": 0.17918461561203003,
|
| 108165 |
+
"learning_rate": 1.908141102759478e-06,
|
| 108166 |
+
"loss": 0.5778,
|
| 108167 |
+
"step": 15451
|
| 108168 |
+
},
|
| 108169 |
+
{
|
| 108170 |
+
"epoch": 17.5994301994302,
|
| 108171 |
+
"grad_norm": 0.19559653103351593,
|
| 108172 |
+
"learning_rate": 1.9063562062321627e-06,
|
| 108173 |
+
"loss": 0.8688,
|
| 108174 |
+
"step": 15452
|
| 108175 |
+
},
|
| 108176 |
+
{
|
| 108177 |
+
"epoch": 17.6005698005698,
|
| 108178 |
+
"grad_norm": 0.18899329006671906,
|
| 108179 |
+
"learning_rate": 1.904572111810604e-06,
|
| 108180 |
+
"loss": 0.7753,
|
| 108181 |
+
"step": 15453
|
| 108182 |
+
},
|
| 108183 |
+
{
|
| 108184 |
+
"epoch": 17.601709401709403,
|
| 108185 |
+
"grad_norm": 0.18411612510681152,
|
| 108186 |
+
"learning_rate": 1.9027888195567694e-06,
|
| 108187 |
+
"loss": 0.8394,
|
| 108188 |
+
"step": 15454
|
| 108189 |
+
},
|
| 108190 |
+
{
|
| 108191 |
+
"epoch": 17.602849002849002,
|
| 108192 |
+
"grad_norm": 0.19840501248836517,
|
| 108193 |
+
"learning_rate": 1.9010063295325926e-06,
|
| 108194 |
+
"loss": 0.6398,
|
| 108195 |
+
"step": 15455
|
| 108196 |
+
},
|
| 108197 |
+
{
|
| 108198 |
+
"epoch": 17.603988603988604,
|
| 108199 |
+
"grad_norm": 0.21453194320201874,
|
| 108200 |
+
"learning_rate": 1.8992246417999903e-06,
|
| 108201 |
+
"loss": 0.5722,
|
| 108202 |
+
"step": 15456
|
| 108203 |
+
},
|
| 108204 |
+
{
|
| 108205 |
+
"epoch": 17.605128205128207,
|
| 108206 |
+
"grad_norm": 0.301025927066803,
|
| 108207 |
+
"learning_rate": 1.8974437564208442e-06,
|
| 108208 |
+
"loss": 0.5245,
|
| 108209 |
+
"step": 15457
|
| 108210 |
+
},
|
| 108211 |
+
{
|
| 108212 |
+
"epoch": 17.606267806267805,
|
| 108213 |
+
"grad_norm": 0.167201429605484,
|
| 108214 |
+
"learning_rate": 1.895663673457007e-06,
|
| 108215 |
+
"loss": 0.6582,
|
| 108216 |
+
"step": 15458
|
| 108217 |
+
},
|
| 108218 |
+
{
|
| 108219 |
+
"epoch": 17.607407407407408,
|
| 108220 |
+
"grad_norm": 0.17727838456630707,
|
| 108221 |
+
"learning_rate": 1.8938843929703077e-06,
|
| 108222 |
+
"loss": 0.6775,
|
| 108223 |
+
"step": 15459
|
| 108224 |
+
},
|
| 108225 |
+
{
|
| 108226 |
+
"epoch": 17.60854700854701,
|
| 108227 |
+
"grad_norm": 0.23858334124088287,
|
| 108228 |
+
"learning_rate": 1.8921059150225516e-06,
|
| 108229 |
+
"loss": 0.5558,
|
| 108230 |
+
"step": 15460
|
| 108231 |
+
},
|
| 108232 |
+
{
|
| 108233 |
+
"epoch": 17.60968660968661,
|
| 108234 |
+
"grad_norm": 0.2909860908985138,
|
| 108235 |
+
"learning_rate": 1.8903282396754984e-06,
|
| 108236 |
+
"loss": 0.3752,
|
| 108237 |
+
"step": 15461
|
| 108238 |
+
},
|
| 108239 |
+
{
|
| 108240 |
+
"epoch": 17.61082621082621,
|
| 108241 |
+
"grad_norm": 0.241691455245018,
|
| 108242 |
+
"learning_rate": 1.8885513669908983e-06,
|
| 108243 |
+
"loss": 0.7017,
|
| 108244 |
+
"step": 15462
|
| 108245 |
+
},
|
| 108246 |
+
{
|
| 108247 |
+
"epoch": 17.611965811965813,
|
| 108248 |
+
"grad_norm": 0.1912352293729782,
|
| 108249 |
+
"learning_rate": 1.886775297030463e-06,
|
| 108250 |
+
"loss": 0.7422,
|
| 108251 |
+
"step": 15463
|
| 108252 |
+
},
|
| 108253 |
+
{
|
| 108254 |
+
"epoch": 17.613105413105412,
|
| 108255 |
+
"grad_norm": 0.18150320649147034,
|
| 108256 |
+
"learning_rate": 1.8850000298558905e-06,
|
| 108257 |
+
"loss": 0.6309,
|
| 108258 |
+
"step": 15464
|
| 108259 |
+
},
|
| 108260 |
+
{
|
| 108261 |
+
"epoch": 17.614245014245014,
|
| 108262 |
+
"grad_norm": 0.19993852078914642,
|
| 108263 |
+
"learning_rate": 1.8832255655288288e-06,
|
| 108264 |
+
"loss": 0.4911,
|
| 108265 |
+
"step": 15465
|
| 108266 |
+
},
|
| 108267 |
+
{
|
| 108268 |
+
"epoch": 17.615384615384617,
|
| 108269 |
+
"grad_norm": 0.2244868129491806,
|
| 108270 |
+
"learning_rate": 1.8814519041109147e-06,
|
| 108271 |
+
"loss": 0.7259,
|
| 108272 |
+
"step": 15466
|
| 108273 |
+
},
|
| 108274 |
+
{
|
| 108275 |
+
"epoch": 17.616524216524216,
|
| 108276 |
+
"grad_norm": 0.1799953728914261,
|
| 108277 |
+
"learning_rate": 1.8796790456637514e-06,
|
| 108278 |
+
"loss": 0.7384,
|
| 108279 |
+
"step": 15467
|
| 108280 |
+
},
|
| 108281 |
+
{
|
| 108282 |
+
"epoch": 17.617663817663818,
|
| 108283 |
+
"grad_norm": 0.2096245288848877,
|
| 108284 |
+
"learning_rate": 1.8779069902489148e-06,
|
| 108285 |
+
"loss": 0.6046,
|
| 108286 |
+
"step": 15468
|
| 108287 |
+
},
|
| 108288 |
+
{
|
| 108289 |
+
"epoch": 17.61880341880342,
|
| 108290 |
+
"grad_norm": 0.20663857460021973,
|
| 108291 |
+
"learning_rate": 1.876135737927956e-06,
|
| 108292 |
+
"loss": 0.5447,
|
| 108293 |
+
"step": 15469
|
| 108294 |
+
},
|
| 108295 |
+
{
|
| 108296 |
+
"epoch": 17.61994301994302,
|
| 108297 |
+
"grad_norm": 0.18800821900367737,
|
| 108298 |
+
"learning_rate": 1.8743652887623947e-06,
|
| 108299 |
+
"loss": 0.6231,
|
| 108300 |
+
"step": 15470
|
| 108301 |
+
},
|
| 108302 |
+
{
|
| 108303 |
+
"epoch": 17.62108262108262,
|
| 108304 |
+
"grad_norm": 0.18880976736545563,
|
| 108305 |
+
"learning_rate": 1.8725956428137265e-06,
|
| 108306 |
+
"loss": 0.6914,
|
| 108307 |
+
"step": 15471
|
| 108308 |
+
},
|
| 108309 |
+
{
|
| 108310 |
+
"epoch": 17.622222222222224,
|
| 108311 |
+
"grad_norm": 0.19186227023601532,
|
| 108312 |
+
"learning_rate": 1.8708268001434075e-06,
|
| 108313 |
+
"loss": 0.6203,
|
| 108314 |
+
"step": 15472
|
| 108315 |
+
},
|
| 108316 |
+
{
|
| 108317 |
+
"epoch": 17.623361823361822,
|
| 108318 |
+
"grad_norm": 0.19230565428733826,
|
| 108319 |
+
"learning_rate": 1.869058760812878e-06,
|
| 108320 |
+
"loss": 0.6205,
|
| 108321 |
+
"step": 15473
|
| 108322 |
+
},
|
| 108323 |
+
{
|
| 108324 |
+
"epoch": 17.624501424501425,
|
| 108325 |
+
"grad_norm": 0.21398407220840454,
|
| 108326 |
+
"learning_rate": 1.8672915248835492e-06,
|
| 108327 |
+
"loss": 0.8591,
|
| 108328 |
+
"step": 15474
|
| 108329 |
+
},
|
| 108330 |
+
{
|
| 108331 |
+
"epoch": 17.625641025641027,
|
| 108332 |
+
"grad_norm": 0.3133992850780487,
|
| 108333 |
+
"learning_rate": 1.865525092416806e-06,
|
| 108334 |
+
"loss": 0.7318,
|
| 108335 |
+
"step": 15475
|
| 108336 |
+
},
|
| 108337 |
+
{
|
| 108338 |
+
"epoch": 17.626780626780626,
|
| 108339 |
+
"grad_norm": 0.21800687909126282,
|
| 108340 |
+
"learning_rate": 1.8637594634739908e-06,
|
| 108341 |
+
"loss": 0.7356,
|
| 108342 |
+
"step": 15476
|
| 108343 |
+
},
|
| 108344 |
+
{
|
| 108345 |
+
"epoch": 17.627920227920228,
|
| 108346 |
+
"grad_norm": 0.19687959551811218,
|
| 108347 |
+
"learning_rate": 1.861994638116435e-06,
|
| 108348 |
+
"loss": 0.3595,
|
| 108349 |
+
"step": 15477
|
| 108350 |
+
},
|
| 108351 |
+
{
|
| 108352 |
+
"epoch": 17.62905982905983,
|
| 108353 |
+
"grad_norm": 0.23806995153427124,
|
| 108354 |
+
"learning_rate": 1.8602306164054366e-06,
|
| 108355 |
+
"loss": 0.6662,
|
| 108356 |
+
"step": 15478
|
| 108357 |
+
},
|
| 108358 |
+
{
|
| 108359 |
+
"epoch": 17.63019943019943,
|
| 108360 |
+
"grad_norm": 0.2590540945529938,
|
| 108361 |
+
"learning_rate": 1.858467398402261e-06,
|
| 108362 |
+
"loss": 0.6737,
|
| 108363 |
+
"step": 15479
|
| 108364 |
+
},
|
| 108365 |
+
{
|
| 108366 |
+
"epoch": 17.63133903133903,
|
| 108367 |
+
"grad_norm": 0.24640680849552155,
|
| 108368 |
+
"learning_rate": 1.8567049841681532e-06,
|
| 108369 |
+
"loss": 0.4767,
|
| 108370 |
+
"step": 15480
|
| 108371 |
+
},
|
| 108372 |
+
{
|
| 108373 |
+
"epoch": 17.632478632478634,
|
| 108374 |
+
"grad_norm": 0.18963520228862762,
|
| 108375 |
+
"learning_rate": 1.8549433737643256e-06,
|
| 108376 |
+
"loss": 0.6836,
|
| 108377 |
+
"step": 15481
|
| 108378 |
+
},
|
| 108379 |
+
{
|
| 108380 |
+
"epoch": 17.633618233618233,
|
| 108381 |
+
"grad_norm": 0.1933334469795227,
|
| 108382 |
+
"learning_rate": 1.8531825672519682e-06,
|
| 108383 |
+
"loss": 0.6558,
|
| 108384 |
+
"step": 15482
|
| 108385 |
+
},
|
| 108386 |
+
{
|
| 108387 |
+
"epoch": 17.634757834757835,
|
| 108388 |
+
"grad_norm": 0.20356690883636475,
|
| 108389 |
+
"learning_rate": 1.8514225646922289e-06,
|
| 108390 |
+
"loss": 0.7608,
|
| 108391 |
+
"step": 15483
|
| 108392 |
+
},
|
| 108393 |
+
{
|
| 108394 |
+
"epoch": 17.635897435897437,
|
| 108395 |
+
"grad_norm": 0.2365938127040863,
|
| 108396 |
+
"learning_rate": 1.8496633661462453e-06,
|
| 108397 |
+
"loss": 0.8029,
|
| 108398 |
+
"step": 15484
|
| 108399 |
+
},
|
| 108400 |
+
{
|
| 108401 |
+
"epoch": 17.637037037037036,
|
| 108402 |
+
"grad_norm": 0.16450777649879456,
|
| 108403 |
+
"learning_rate": 1.8479049716751128e-06,
|
| 108404 |
+
"loss": 0.7396,
|
| 108405 |
+
"step": 15485
|
| 108406 |
+
},
|
| 108407 |
+
{
|
| 108408 |
+
"epoch": 17.63817663817664,
|
| 108409 |
+
"grad_norm": 0.23959478735923767,
|
| 108410 |
+
"learning_rate": 1.846147381339916e-06,
|
| 108411 |
+
"loss": 0.5964,
|
| 108412 |
+
"step": 15486
|
| 108413 |
+
},
|
| 108414 |
+
{
|
| 108415 |
+
"epoch": 17.63931623931624,
|
| 108416 |
+
"grad_norm": 0.2136934995651245,
|
| 108417 |
+
"learning_rate": 1.8443905952016893e-06,
|
| 108418 |
+
"loss": 0.4586,
|
| 108419 |
+
"step": 15487
|
| 108420 |
+
},
|
| 108421 |
+
{
|
| 108422 |
+
"epoch": 17.64045584045584,
|
| 108423 |
+
"grad_norm": 0.1984747350215912,
|
| 108424 |
+
"learning_rate": 1.8426346133214562e-06,
|
| 108425 |
+
"loss": 0.5861,
|
| 108426 |
+
"step": 15488
|
| 108427 |
+
},
|
| 108428 |
+
{
|
| 108429 |
+
"epoch": 17.64159544159544,
|
| 108430 |
+
"grad_norm": 0.18648794293403625,
|
| 108431 |
+
"learning_rate": 1.8408794357602039e-06,
|
| 108432 |
+
"loss": 0.7181,
|
| 108433 |
+
"step": 15489
|
| 108434 |
+
},
|
| 108435 |
+
{
|
| 108436 |
+
"epoch": 17.642735042735044,
|
| 108437 |
+
"grad_norm": 0.235711470246315,
|
| 108438 |
+
"learning_rate": 1.8391250625789002e-06,
|
| 108439 |
+
"loss": 0.8174,
|
| 108440 |
+
"step": 15490
|
| 108441 |
+
},
|
| 108442 |
+
{
|
| 108443 |
+
"epoch": 17.643874643874643,
|
| 108444 |
+
"grad_norm": 0.17229218780994415,
|
| 108445 |
+
"learning_rate": 1.8373714938384718e-06,
|
| 108446 |
+
"loss": 0.7393,
|
| 108447 |
+
"step": 15491
|
| 108448 |
+
},
|
| 108449 |
+
{
|
| 108450 |
+
"epoch": 17.645014245014245,
|
| 108451 |
+
"grad_norm": 0.20683033764362335,
|
| 108452 |
+
"learning_rate": 1.8356187295998305e-06,
|
| 108453 |
+
"loss": 0.7333,
|
| 108454 |
+
"step": 15492
|
| 108455 |
+
},
|
| 108456 |
+
{
|
| 108457 |
+
"epoch": 17.646153846153847,
|
| 108458 |
+
"grad_norm": 0.2287161648273468,
|
| 108459 |
+
"learning_rate": 1.8338667699238533e-06,
|
| 108460 |
+
"loss": 0.5783,
|
| 108461 |
+
"step": 15493
|
| 108462 |
+
},
|
| 108463 |
+
{
|
| 108464 |
+
"epoch": 17.647293447293446,
|
| 108465 |
+
"grad_norm": 0.19930461049079895,
|
| 108466 |
+
"learning_rate": 1.8321156148713935e-06,
|
| 108467 |
+
"loss": 0.5555,
|
| 108468 |
+
"step": 15494
|
| 108469 |
+
},
|
| 108470 |
+
{
|
| 108471 |
+
"epoch": 17.64843304843305,
|
| 108472 |
+
"grad_norm": 0.21522825956344604,
|
| 108473 |
+
"learning_rate": 1.830365264503267e-06,
|
| 108474 |
+
"loss": 0.694,
|
| 108475 |
+
"step": 15495
|
| 108476 |
+
},
|
| 108477 |
+
{
|
| 108478 |
+
"epoch": 17.64957264957265,
|
| 108479 |
+
"grad_norm": 0.25273633003234863,
|
| 108480 |
+
"learning_rate": 1.8286157188802722e-06,
|
| 108481 |
+
"loss": 0.5542,
|
| 108482 |
+
"step": 15496
|
| 108483 |
+
},
|
| 108484 |
+
{
|
| 108485 |
+
"epoch": 17.65071225071225,
|
| 108486 |
+
"grad_norm": 0.15592874586582184,
|
| 108487 |
+
"learning_rate": 1.8268669780631741e-06,
|
| 108488 |
+
"loss": 0.8216,
|
| 108489 |
+
"step": 15497
|
| 108490 |
+
},
|
| 108491 |
+
{
|
| 108492 |
+
"epoch": 17.651851851851852,
|
| 108493 |
+
"grad_norm": 0.22126252949237823,
|
| 108494 |
+
"learning_rate": 1.8251190421127163e-06,
|
| 108495 |
+
"loss": 0.6417,
|
| 108496 |
+
"step": 15498
|
| 108497 |
+
},
|
| 108498 |
+
{
|
| 108499 |
+
"epoch": 17.652991452991454,
|
| 108500 |
+
"grad_norm": 0.18743862211704254,
|
| 108501 |
+
"learning_rate": 1.8233719110896026e-06,
|
| 108502 |
+
"loss": 0.6671,
|
| 108503 |
+
"step": 15499
|
| 108504 |
+
},
|
| 108505 |
+
{
|
| 108506 |
+
"epoch": 17.654131054131053,
|
| 108507 |
+
"grad_norm": 0.17891569435596466,
|
| 108508 |
+
"learning_rate": 1.8216255850545178e-06,
|
| 108509 |
+
"loss": 0.633,
|
| 108510 |
+
"step": 15500
|
| 108511 |
+
},
|
| 108512 |
+
{
|
| 108513 |
+
"epoch": 17.655270655270655,
|
| 108514 |
+
"grad_norm": 0.23442067205905914,
|
| 108515 |
+
"learning_rate": 1.8198800640681163e-06,
|
| 108516 |
+
"loss": 0.4959,
|
| 108517 |
+
"step": 15501
|
| 108518 |
+
},
|
| 108519 |
+
{
|
| 108520 |
+
"epoch": 17.656410256410258,
|
| 108521 |
+
"grad_norm": 0.19340966641902924,
|
| 108522 |
+
"learning_rate": 1.8181353481910247e-06,
|
| 108523 |
+
"loss": 0.8506,
|
| 108524 |
+
"step": 15502
|
| 108525 |
+
},
|
| 108526 |
+
{
|
| 108527 |
+
"epoch": 17.657549857549856,
|
| 108528 |
+
"grad_norm": 0.19096554815769196,
|
| 108529 |
+
"learning_rate": 1.8163914374838441e-06,
|
| 108530 |
+
"loss": 0.6534,
|
| 108531 |
+
"step": 15503
|
| 108532 |
+
},
|
| 108533 |
+
{
|
| 108534 |
+
"epoch": 17.65868945868946,
|
| 108535 |
+
"grad_norm": 0.22529169917106628,
|
| 108536 |
+
"learning_rate": 1.8146483320071462e-06,
|
| 108537 |
+
"loss": 0.6786,
|
| 108538 |
+
"step": 15504
|
| 108539 |
+
},
|
| 108540 |
+
{
|
| 108541 |
+
"epoch": 17.65982905982906,
|
| 108542 |
+
"grad_norm": 0.18433967232704163,
|
| 108543 |
+
"learning_rate": 1.8129060318214735e-06,
|
| 108544 |
+
"loss": 0.5319,
|
| 108545 |
+
"step": 15505
|
| 108546 |
+
},
|
| 108547 |
+
{
|
| 108548 |
+
"epoch": 17.66096866096866,
|
| 108549 |
+
"grad_norm": 0.19485558569431305,
|
| 108550 |
+
"learning_rate": 1.8111645369873336e-06,
|
| 108551 |
+
"loss": 0.5592,
|
| 108552 |
+
"step": 15506
|
| 108553 |
+
},
|
| 108554 |
+
{
|
| 108555 |
+
"epoch": 17.662108262108262,
|
| 108556 |
+
"grad_norm": 0.1785300076007843,
|
| 108557 |
+
"learning_rate": 1.8094238475652225e-06,
|
| 108558 |
+
"loss": 0.6756,
|
| 108559 |
+
"step": 15507
|
| 108560 |
+
},
|
| 108561 |
+
{
|
| 108562 |
+
"epoch": 17.663247863247864,
|
| 108563 |
+
"grad_norm": 0.23500244319438934,
|
| 108564 |
+
"learning_rate": 1.8076839636155918e-06,
|
| 108565 |
+
"loss": 0.5598,
|
| 108566 |
+
"step": 15508
|
| 108567 |
+
},
|
| 108568 |
+
{
|
| 108569 |
+
"epoch": 17.664387464387463,
|
| 108570 |
+
"grad_norm": 0.16340969502925873,
|
| 108571 |
+
"learning_rate": 1.8059448851988819e-06,
|
| 108572 |
+
"loss": 0.9245,
|
| 108573 |
+
"step": 15509
|
| 108574 |
+
},
|
| 108575 |
+
{
|
| 108576 |
+
"epoch": 17.665527065527066,
|
| 108577 |
+
"grad_norm": 0.2364928424358368,
|
| 108578 |
+
"learning_rate": 1.8042066123754864e-06,
|
| 108579 |
+
"loss": 0.5783,
|
| 108580 |
+
"step": 15510
|
| 108581 |
+
},
|
| 108582 |
+
{
|
| 108583 |
+
"epoch": 17.666666666666668,
|
| 108584 |
+
"grad_norm": 0.19518473744392395,
|
| 108585 |
+
"learning_rate": 1.8024691452057846e-06,
|
| 108586 |
+
"loss": 0.5705,
|
| 108587 |
+
"step": 15511
|
| 108588 |
+
},
|
| 108589 |
+
{
|
| 108590 |
+
"epoch": 17.667806267806267,
|
| 108591 |
+
"grad_norm": 0.18936499953269958,
|
| 108592 |
+
"learning_rate": 1.8007324837501198e-06,
|
| 108593 |
+
"loss": 0.7235,
|
| 108594 |
+
"step": 15512
|
| 108595 |
+
},
|
| 108596 |
+
{
|
| 108597 |
+
"epoch": 17.66894586894587,
|
| 108598 |
+
"grad_norm": 0.22147583961486816,
|
| 108599 |
+
"learning_rate": 1.7989966280688165e-06,
|
| 108600 |
+
"loss": 0.6459,
|
| 108601 |
+
"step": 15513
|
| 108602 |
+
},
|
| 108603 |
+
{
|
| 108604 |
+
"epoch": 17.67008547008547,
|
| 108605 |
+
"grad_norm": 0.17571549117565155,
|
| 108606 |
+
"learning_rate": 1.7972615782221648e-06,
|
| 108607 |
+
"loss": 0.8446,
|
| 108608 |
+
"step": 15514
|
| 108609 |
+
},
|
| 108610 |
+
{
|
| 108611 |
+
"epoch": 17.67122507122507,
|
| 108612 |
+
"grad_norm": 0.26569825410842896,
|
| 108613 |
+
"learning_rate": 1.795527334270422e-06,
|
| 108614 |
+
"loss": 0.5387,
|
| 108615 |
+
"step": 15515
|
| 108616 |
+
},
|
| 108617 |
+
{
|
| 108618 |
+
"epoch": 17.672364672364672,
|
| 108619 |
+
"grad_norm": 0.2654166519641876,
|
| 108620 |
+
"learning_rate": 1.793793896273835e-06,
|
| 108621 |
+
"loss": 0.3612,
|
| 108622 |
+
"step": 15516
|
| 108623 |
+
},
|
| 108624 |
+
{
|
| 108625 |
+
"epoch": 17.673504273504275,
|
| 108626 |
+
"grad_norm": 0.22360172867774963,
|
| 108627 |
+
"learning_rate": 1.7920612642925993e-06,
|
| 108628 |
+
"loss": 0.6004,
|
| 108629 |
+
"step": 15517
|
| 108630 |
+
},
|
| 108631 |
+
{
|
| 108632 |
+
"epoch": 17.674643874643873,
|
| 108633 |
+
"grad_norm": 0.15204519033432007,
|
| 108634 |
+
"learning_rate": 1.790329438386895e-06,
|
| 108635 |
+
"loss": 0.6687,
|
| 108636 |
+
"step": 15518
|
| 108637 |
+
},
|
| 108638 |
+
{
|
| 108639 |
+
"epoch": 17.675783475783476,
|
| 108640 |
+
"grad_norm": 0.20888058841228485,
|
| 108641 |
+
"learning_rate": 1.7885984186168798e-06,
|
| 108642 |
+
"loss": 0.6181,
|
| 108643 |
+
"step": 15519
|
| 108644 |
+
},
|
| 108645 |
+
{
|
| 108646 |
+
"epoch": 17.676923076923078,
|
| 108647 |
+
"grad_norm": 0.19444310665130615,
|
| 108648 |
+
"learning_rate": 1.7868682050426743e-06,
|
| 108649 |
+
"loss": 0.7009,
|
| 108650 |
+
"step": 15520
|
| 108651 |
+
},
|
| 108652 |
+
{
|
| 108653 |
+
"epoch": 17.678062678062677,
|
| 108654 |
+
"grad_norm": 0.2132650762796402,
|
| 108655 |
+
"learning_rate": 1.78513879772437e-06,
|
| 108656 |
+
"loss": 0.7071,
|
| 108657 |
+
"step": 15521
|
| 108658 |
+
},
|
| 108659 |
+
{
|
| 108660 |
+
"epoch": 17.67920227920228,
|
| 108661 |
+
"grad_norm": 0.1768898367881775,
|
| 108662 |
+
"learning_rate": 1.7834101967220351e-06,
|
| 108663 |
+
"loss": 0.64,
|
| 108664 |
+
"step": 15522
|
| 108665 |
+
},
|
| 108666 |
+
{
|
| 108667 |
+
"epoch": 17.68034188034188,
|
| 108668 |
+
"grad_norm": 0.1777723878622055,
|
| 108669 |
+
"learning_rate": 1.7816824020957106e-06,
|
| 108670 |
+
"loss": 0.6448,
|
| 108671 |
+
"step": 15523
|
| 108672 |
+
},
|
| 108673 |
+
{
|
| 108674 |
+
"epoch": 17.68148148148148,
|
| 108675 |
+
"grad_norm": 0.24505949020385742,
|
| 108676 |
+
"learning_rate": 1.779955413905407e-06,
|
| 108677 |
+
"loss": 0.5736,
|
| 108678 |
+
"step": 15524
|
| 108679 |
+
},
|
| 108680 |
+
{
|
| 108681 |
+
"epoch": 17.682621082621083,
|
| 108682 |
+
"grad_norm": 0.18339896202087402,
|
| 108683 |
+
"learning_rate": 1.7782292322111066e-06,
|
| 108684 |
+
"loss": 0.7177,
|
| 108685 |
+
"step": 15525
|
| 108686 |
+
},
|
| 108687 |
+
{
|
| 108688 |
+
"epoch": 17.683760683760685,
|
| 108689 |
+
"grad_norm": 0.17547276616096497,
|
| 108690 |
+
"learning_rate": 1.7765038570727643e-06,
|
| 108691 |
+
"loss": 0.791,
|
| 108692 |
+
"step": 15526
|
| 108693 |
+
},
|
| 108694 |
+
{
|
| 108695 |
+
"epoch": 17.684900284900284,
|
| 108696 |
+
"grad_norm": 0.23864220082759857,
|
| 108697 |
+
"learning_rate": 1.77477928855031e-06,
|
| 108698 |
+
"loss": 0.4275,
|
| 108699 |
+
"step": 15527
|
| 108700 |
+
},
|
| 108701 |
+
{
|
| 108702 |
+
"epoch": 17.686039886039886,
|
| 108703 |
+
"grad_norm": 0.20151357352733612,
|
| 108704 |
+
"learning_rate": 1.773055526703643e-06,
|
| 108705 |
+
"loss": 0.6343,
|
| 108706 |
+
"step": 15528
|
| 108707 |
+
},
|
| 108708 |
+
{
|
| 108709 |
+
"epoch": 17.68717948717949,
|
| 108710 |
+
"grad_norm": 0.18393632769584656,
|
| 108711 |
+
"learning_rate": 1.7713325715926293e-06,
|
| 108712 |
+
"loss": 0.5443,
|
| 108713 |
+
"step": 15529
|
| 108714 |
+
},
|
| 108715 |
+
{
|
| 108716 |
+
"epoch": 17.688319088319087,
|
| 108717 |
+
"grad_norm": 0.2020386904478073,
|
| 108718 |
+
"learning_rate": 1.7696104232771155e-06,
|
| 108719 |
+
"loss": 0.6255,
|
| 108720 |
+
"step": 15530
|
| 108721 |
+
},
|
| 108722 |
+
{
|
| 108723 |
+
"epoch": 17.68945868945869,
|
| 108724 |
+
"grad_norm": 0.21631336212158203,
|
| 108725 |
+
"learning_rate": 1.767889081816912e-06,
|
| 108726 |
+
"loss": 0.6578,
|
| 108727 |
+
"step": 15531
|
| 108728 |
+
},
|
| 108729 |
+
{
|
| 108730 |
+
"epoch": 17.69059829059829,
|
| 108731 |
+
"grad_norm": 0.2140851616859436,
|
| 108732 |
+
"learning_rate": 1.7661685472718153e-06,
|
| 108733 |
+
"loss": 0.4453,
|
| 108734 |
+
"step": 15532
|
| 108735 |
+
},
|
| 108736 |
+
{
|
| 108737 |
+
"epoch": 17.69173789173789,
|
| 108738 |
+
"grad_norm": 0.17820830643177032,
|
| 108739 |
+
"learning_rate": 1.7644488197015751e-06,
|
| 108740 |
+
"loss": 0.3786,
|
| 108741 |
+
"step": 15533
|
| 108742 |
+
},
|
| 108743 |
+
{
|
| 108744 |
+
"epoch": 17.692877492877493,
|
| 108745 |
+
"grad_norm": 0.1866726577281952,
|
| 108746 |
+
"learning_rate": 1.7627298991659185e-06,
|
| 108747 |
+
"loss": 0.5997,
|
| 108748 |
+
"step": 15534
|
| 108749 |
+
},
|
| 108750 |
+
{
|
| 108751 |
+
"epoch": 17.694017094017095,
|
| 108752 |
+
"grad_norm": 0.18119822442531586,
|
| 108753 |
+
"learning_rate": 1.7610117857245673e-06,
|
| 108754 |
+
"loss": 0.4361,
|
| 108755 |
+
"step": 15535
|
| 108756 |
+
},
|
| 108757 |
+
{
|
| 108758 |
+
"epoch": 17.695156695156694,
|
| 108759 |
+
"grad_norm": 0.22917959094047546,
|
| 108760 |
+
"learning_rate": 1.7592944794371764e-06,
|
| 108761 |
+
"loss": 0.4815,
|
| 108762 |
+
"step": 15536
|
| 108763 |
+
},
|
| 108764 |
+
{
|
| 108765 |
+
"epoch": 17.696296296296296,
|
| 108766 |
+
"grad_norm": 0.1893644481897354,
|
| 108767 |
+
"learning_rate": 1.7575779803634035e-06,
|
| 108768 |
+
"loss": 0.6566,
|
| 108769 |
+
"step": 15537
|
| 108770 |
+
},
|
| 108771 |
+
{
|
| 108772 |
+
"epoch": 17.6974358974359,
|
| 108773 |
+
"grad_norm": 0.2197505682706833,
|
| 108774 |
+
"learning_rate": 1.7558622885628623e-06,
|
| 108775 |
+
"loss": 0.6209,
|
| 108776 |
+
"step": 15538
|
| 108777 |
+
},
|
| 108778 |
+
{
|
| 108779 |
+
"epoch": 17.698575498575497,
|
| 108780 |
+
"grad_norm": 0.292765736579895,
|
| 108781 |
+
"learning_rate": 1.7541474040951494e-06,
|
| 108782 |
+
"loss": 0.5363,
|
| 108783 |
+
"step": 15539
|
| 108784 |
+
},
|
| 108785 |
+
{
|
| 108786 |
+
"epoch": 17.6997150997151,
|
| 108787 |
+
"grad_norm": 0.17893919348716736,
|
| 108788 |
+
"learning_rate": 1.7524333270198202e-06,
|
| 108789 |
+
"loss": 0.4011,
|
| 108790 |
+
"step": 15540
|
| 108791 |
+
},
|
| 108792 |
+
{
|
| 108793 |
+
"epoch": 17.700854700854702,
|
| 108794 |
+
"grad_norm": 0.2205260545015335,
|
| 108795 |
+
"learning_rate": 1.7507200573964127e-06,
|
| 108796 |
+
"loss": 0.7413,
|
| 108797 |
+
"step": 15541
|
| 108798 |
+
},
|
| 108799 |
+
{
|
| 108800 |
+
"epoch": 17.7019943019943,
|
| 108801 |
+
"grad_norm": 0.2102389633655548,
|
| 108802 |
+
"learning_rate": 1.7490075952844326e-06,
|
| 108803 |
+
"loss": 0.8024,
|
| 108804 |
+
"step": 15542
|
| 108805 |
+
},
|
| 108806 |
+
{
|
| 108807 |
+
"epoch": 17.703133903133903,
|
| 108808 |
+
"grad_norm": 0.2010532170534134,
|
| 108809 |
+
"learning_rate": 1.7472959407433653e-06,
|
| 108810 |
+
"loss": 0.6227,
|
| 108811 |
+
"step": 15543
|
| 108812 |
+
},
|
| 108813 |
+
{
|
| 108814 |
+
"epoch": 17.704273504273505,
|
| 108815 |
+
"grad_norm": 0.19559378921985626,
|
| 108816 |
+
"learning_rate": 1.7455850938326496e-06,
|
| 108817 |
+
"loss": 0.7316,
|
| 108818 |
+
"step": 15544
|
| 108819 |
+
},
|
| 108820 |
+
{
|
| 108821 |
+
"epoch": 17.705413105413104,
|
| 108822 |
+
"grad_norm": 0.19908377528190613,
|
| 108823 |
+
"learning_rate": 1.7438750546117127e-06,
|
| 108824 |
+
"loss": 0.6025,
|
| 108825 |
+
"step": 15545
|
| 108826 |
+
},
|
| 108827 |
+
{
|
| 108828 |
+
"epoch": 17.706552706552706,
|
| 108829 |
+
"grad_norm": 0.21301917731761932,
|
| 108830 |
+
"learning_rate": 1.742165823139949e-06,
|
| 108831 |
+
"loss": 0.5778,
|
| 108832 |
+
"step": 15546
|
| 108833 |
+
},
|
| 108834 |
+
{
|
| 108835 |
+
"epoch": 17.70769230769231,
|
| 108836 |
+
"grad_norm": 0.21259737014770508,
|
| 108837 |
+
"learning_rate": 1.7404573994767276e-06,
|
| 108838 |
+
"loss": 0.4874,
|
| 108839 |
+
"step": 15547
|
| 108840 |
+
},
|
| 108841 |
+
{
|
| 108842 |
+
"epoch": 17.708831908831907,
|
| 108843 |
+
"grad_norm": 0.25575268268585205,
|
| 108844 |
+
"learning_rate": 1.7387497836813843e-06,
|
| 108845 |
+
"loss": 0.5198,
|
| 108846 |
+
"step": 15548
|
| 108847 |
+
},
|
| 108848 |
+
{
|
| 108849 |
+
"epoch": 17.70997150997151,
|
| 108850 |
+
"grad_norm": 0.179165318608284,
|
| 108851 |
+
"learning_rate": 1.73704297581323e-06,
|
| 108852 |
+
"loss": 0.5788,
|
| 108853 |
+
"step": 15549
|
| 108854 |
+
},
|
| 108855 |
+
{
|
| 108856 |
+
"epoch": 17.711111111111112,
|
| 108857 |
+
"grad_norm": 0.22417140007019043,
|
| 108858 |
+
"learning_rate": 1.7353369759315509e-06,
|
| 108859 |
+
"loss": 0.5924,
|
| 108860 |
+
"step": 15550
|
| 108861 |
+
},
|
| 108862 |
+
{
|
| 108863 |
+
"epoch": 17.71225071225071,
|
| 108864 |
+
"grad_norm": 0.18318435549736023,
|
| 108865 |
+
"learning_rate": 1.7336317840955907e-06,
|
| 108866 |
+
"loss": 0.5719,
|
| 108867 |
+
"step": 15551
|
| 108868 |
+
},
|
| 108869 |
+
{
|
| 108870 |
+
"epoch": 17.713390313390313,
|
| 108871 |
+
"grad_norm": 0.21470986306667328,
|
| 108872 |
+
"learning_rate": 1.731927400364583e-06,
|
| 108873 |
+
"loss": 0.6964,
|
| 108874 |
+
"step": 15552
|
| 108875 |
+
},
|
| 108876 |
+
{
|
| 108877 |
+
"epoch": 17.714529914529916,
|
| 108878 |
+
"grad_norm": 0.1701890379190445,
|
| 108879 |
+
"learning_rate": 1.7302238247977248e-06,
|
| 108880 |
+
"loss": 0.6671,
|
| 108881 |
+
"step": 15553
|
| 108882 |
+
},
|
| 108883 |
+
{
|
| 108884 |
+
"epoch": 17.715669515669514,
|
| 108885 |
+
"grad_norm": 0.20296506583690643,
|
| 108886 |
+
"learning_rate": 1.7285210574541883e-06,
|
| 108887 |
+
"loss": 0.61,
|
| 108888 |
+
"step": 15554
|
| 108889 |
+
},
|
| 108890 |
+
{
|
| 108891 |
+
"epoch": 17.716809116809117,
|
| 108892 |
+
"grad_norm": 0.1775161325931549,
|
| 108893 |
+
"learning_rate": 1.7268190983931065e-06,
|
| 108894 |
+
"loss": 0.793,
|
| 108895 |
+
"step": 15555
|
| 108896 |
+
},
|
| 108897 |
+
{
|
| 108898 |
+
"epoch": 17.71794871794872,
|
| 108899 |
+
"grad_norm": 0.1933393031358719,
|
| 108900 |
+
"learning_rate": 1.7251179476736019e-06,
|
| 108901 |
+
"loss": 0.5851,
|
| 108902 |
+
"step": 15556
|
| 108903 |
+
},
|
| 108904 |
+
{
|
| 108905 |
+
"epoch": 17.719088319088318,
|
| 108906 |
+
"grad_norm": 0.2070257067680359,
|
| 108907 |
+
"learning_rate": 1.7234176053547547e-06,
|
| 108908 |
+
"loss": 0.4786,
|
| 108909 |
+
"step": 15557
|
| 108910 |
+
},
|
| 108911 |
+
{
|
| 108912 |
+
"epoch": 17.72022792022792,
|
| 108913 |
+
"grad_norm": 0.1920633316040039,
|
| 108914 |
+
"learning_rate": 1.721718071495626e-06,
|
| 108915 |
+
"loss": 0.6262,
|
| 108916 |
+
"step": 15558
|
| 108917 |
+
},
|
| 108918 |
+
{
|
| 108919 |
+
"epoch": 17.721367521367522,
|
| 108920 |
+
"grad_norm": 0.1719004213809967,
|
| 108921 |
+
"learning_rate": 1.720019346155244e-06,
|
| 108922 |
+
"loss": 0.6826,
|
| 108923 |
+
"step": 15559
|
| 108924 |
+
},
|
| 108925 |
+
{
|
| 108926 |
+
"epoch": 17.72250712250712,
|
| 108927 |
+
"grad_norm": 0.20648297667503357,
|
| 108928 |
+
"learning_rate": 1.7183214293926108e-06,
|
| 108929 |
+
"loss": 0.5219,
|
| 108930 |
+
"step": 15560
|
| 108931 |
+
},
|
| 108932 |
+
{
|
| 108933 |
+
"epoch": 17.723646723646723,
|
| 108934 |
+
"grad_norm": 0.16962285339832306,
|
| 108935 |
+
"learning_rate": 1.7166243212667049e-06,
|
| 108936 |
+
"loss": 0.8749,
|
| 108937 |
+
"step": 15561
|
| 108938 |
+
},
|
| 108939 |
+
{
|
| 108940 |
+
"epoch": 17.724786324786326,
|
| 108941 |
+
"grad_norm": 0.2371315211057663,
|
| 108942 |
+
"learning_rate": 1.7149280218364594e-06,
|
| 108943 |
+
"loss": 0.478,
|
| 108944 |
+
"step": 15562
|
| 108945 |
+
},
|
| 108946 |
+
{
|
| 108947 |
+
"epoch": 17.725925925925925,
|
| 108948 |
+
"grad_norm": 0.1887923628091812,
|
| 108949 |
+
"learning_rate": 1.7132325311607966e-06,
|
| 108950 |
+
"loss": 0.7168,
|
| 108951 |
+
"step": 15563
|
| 108952 |
+
},
|
| 108953 |
+
{
|
| 108954 |
+
"epoch": 17.727065527065527,
|
| 108955 |
+
"grad_norm": 0.17830583453178406,
|
| 108956 |
+
"learning_rate": 1.7115378492986084e-06,
|
| 108957 |
+
"loss": 0.7262,
|
| 108958 |
+
"step": 15564
|
| 108959 |
+
},
|
| 108960 |
+
{
|
| 108961 |
+
"epoch": 17.72820512820513,
|
| 108962 |
+
"grad_norm": 0.2064090520143509,
|
| 108963 |
+
"learning_rate": 1.7098439763087587e-06,
|
| 108964 |
+
"loss": 0.5849,
|
| 108965 |
+
"step": 15565
|
| 108966 |
+
},
|
| 108967 |
+
{
|
| 108968 |
+
"epoch": 17.729344729344728,
|
| 108969 |
+
"grad_norm": 0.23627474904060364,
|
| 108970 |
+
"learning_rate": 1.7081509122500727e-06,
|
| 108971 |
+
"loss": 0.5145,
|
| 108972 |
+
"step": 15566
|
| 108973 |
+
},
|
| 108974 |
+
{
|
| 108975 |
+
"epoch": 17.73048433048433,
|
| 108976 |
+
"grad_norm": 0.24249598383903503,
|
| 108977 |
+
"learning_rate": 1.7064586571813563e-06,
|
| 108978 |
+
"loss": 0.7986,
|
| 108979 |
+
"step": 15567
|
| 108980 |
+
},
|
| 108981 |
+
{
|
| 108982 |
+
"epoch": 17.731623931623933,
|
| 108983 |
+
"grad_norm": 0.2103555053472519,
|
| 108984 |
+
"learning_rate": 1.7047672111613872e-06,
|
| 108985 |
+
"loss": 0.674,
|
| 108986 |
+
"step": 15568
|
| 108987 |
+
},
|
| 108988 |
+
{
|
| 108989 |
+
"epoch": 17.73276353276353,
|
| 108990 |
+
"grad_norm": 0.19475817680358887,
|
| 108991 |
+
"learning_rate": 1.7030765742489214e-06,
|
| 108992 |
+
"loss": 0.7642,
|
| 108993 |
+
"step": 15569
|
| 108994 |
+
},
|
| 108995 |
+
{
|
| 108996 |
+
"epoch": 17.733903133903134,
|
| 108997 |
+
"grad_norm": 0.2143593579530716,
|
| 108998 |
+
"learning_rate": 1.7013867465026672e-06,
|
| 108999 |
+
"loss": 0.7425,
|
| 109000 |
+
"step": 15570
|
| 109001 |
+
},
|
| 109002 |
+
{
|
| 109003 |
+
"epoch": 17.735042735042736,
|
| 109004 |
+
"grad_norm": 0.19551685452461243,
|
| 109005 |
+
"learning_rate": 1.6996977279813253e-06,
|
| 109006 |
+
"loss": 0.5916,
|
| 109007 |
+
"step": 15571
|
| 109008 |
+
},
|
| 109009 |
+
{
|
| 109010 |
+
"epoch": 17.736182336182335,
|
| 109011 |
+
"grad_norm": 0.1768016666173935,
|
| 109012 |
+
"learning_rate": 1.698009518743554e-06,
|
| 109013 |
+
"loss": 0.6166,
|
| 109014 |
+
"step": 15572
|
| 109015 |
+
},
|
| 109016 |
+
{
|
| 109017 |
+
"epoch": 17.737321937321937,
|
| 109018 |
+
"grad_norm": 0.19153043627738953,
|
| 109019 |
+
"learning_rate": 1.696322118848001e-06,
|
| 109020 |
+
"loss": 0.7695,
|
| 109021 |
+
"step": 15573
|
| 109022 |
+
},
|
| 109023 |
+
{
|
| 109024 |
+
"epoch": 17.73846153846154,
|
| 109025 |
+
"grad_norm": 0.2019730657339096,
|
| 109026 |
+
"learning_rate": 1.6946355283532584e-06,
|
| 109027 |
+
"loss": 0.6971,
|
| 109028 |
+
"step": 15574
|
| 109029 |
+
},
|
| 109030 |
+
{
|
| 109031 |
+
"epoch": 17.739601139601138,
|
| 109032 |
+
"grad_norm": 0.17428846657276154,
|
| 109033 |
+
"learning_rate": 1.6929497473179178e-06,
|
| 109034 |
+
"loss": 0.4945,
|
| 109035 |
+
"step": 15575
|
| 109036 |
+
},
|
| 109037 |
+
{
|
| 109038 |
+
"epoch": 17.74074074074074,
|
| 109039 |
+
"grad_norm": 0.2166702300310135,
|
| 109040 |
+
"learning_rate": 1.6912647758005245e-06,
|
| 109041 |
+
"loss": 0.502,
|
| 109042 |
+
"step": 15576
|
| 109043 |
+
},
|
| 109044 |
+
{
|
| 109045 |
+
"epoch": 17.741880341880343,
|
| 109046 |
+
"grad_norm": 0.19433961808681488,
|
| 109047 |
+
"learning_rate": 1.6895806138596092e-06,
|
| 109048 |
+
"loss": 0.6245,
|
| 109049 |
+
"step": 15577
|
| 109050 |
+
},
|
| 109051 |
+
{
|
| 109052 |
+
"epoch": 17.74301994301994,
|
| 109053 |
+
"grad_norm": 0.1903829723596573,
|
| 109054 |
+
"learning_rate": 1.6878972615536587e-06,
|
| 109055 |
+
"loss": 0.7285,
|
| 109056 |
+
"step": 15578
|
| 109057 |
+
},
|
| 109058 |
+
{
|
| 109059 |
+
"epoch": 17.744159544159544,
|
| 109060 |
+
"grad_norm": 0.17509286105632782,
|
| 109061 |
+
"learning_rate": 1.6862147189411426e-06,
|
| 109062 |
+
"loss": 0.6913,
|
| 109063 |
+
"step": 15579
|
| 109064 |
+
},
|
| 109065 |
+
{
|
| 109066 |
+
"epoch": 17.745299145299146,
|
| 109067 |
+
"grad_norm": 0.1851557195186615,
|
| 109068 |
+
"learning_rate": 1.6845329860805087e-06,
|
| 109069 |
+
"loss": 0.705,
|
| 109070 |
+
"step": 15580
|
| 109071 |
+
},
|
| 109072 |
+
{
|
| 109073 |
+
"epoch": 17.746438746438745,
|
| 109074 |
+
"grad_norm": 0.17817610502243042,
|
| 109075 |
+
"learning_rate": 1.6828520630301574e-06,
|
| 109076 |
+
"loss": 0.5326,
|
| 109077 |
+
"step": 15581
|
| 109078 |
+
},
|
| 109079 |
+
{
|
| 109080 |
+
"epoch": 17.747578347578347,
|
| 109081 |
+
"grad_norm": 0.18409055471420288,
|
| 109082 |
+
"learning_rate": 1.6811719498484785e-06,
|
| 109083 |
+
"loss": 0.7783,
|
| 109084 |
+
"step": 15582
|
| 109085 |
+
},
|
| 109086 |
+
{
|
| 109087 |
+
"epoch": 17.74871794871795,
|
| 109088 |
+
"grad_norm": 0.26104357838630676,
|
| 109089 |
+
"learning_rate": 1.679492646593825e-06,
|
| 109090 |
+
"loss": 0.5521,
|
| 109091 |
+
"step": 15583
|
| 109092 |
+
},
|
| 109093 |
+
{
|
| 109094 |
+
"epoch": 17.74985754985755,
|
| 109095 |
+
"grad_norm": 0.24983125925064087,
|
| 109096 |
+
"learning_rate": 1.677814153324525e-06,
|
| 109097 |
+
"loss": 0.5487,
|
| 109098 |
+
"step": 15584
|
| 109099 |
+
},
|
| 109100 |
+
{
|
| 109101 |
+
"epoch": 17.75099715099715,
|
| 109102 |
+
"grad_norm": 0.19891224801540375,
|
| 109103 |
+
"learning_rate": 1.676136470098874e-06,
|
| 109104 |
+
"loss": 0.5938,
|
| 109105 |
+
"step": 15585
|
| 109106 |
+
},
|
| 109107 |
+
{
|
| 109108 |
+
"epoch": 17.752136752136753,
|
| 109109 |
+
"grad_norm": 0.16363197565078735,
|
| 109110 |
+
"learning_rate": 1.674459596975142e-06,
|
| 109111 |
+
"loss": 0.5812,
|
| 109112 |
+
"step": 15586
|
| 109113 |
+
},
|
| 109114 |
+
{
|
| 109115 |
+
"epoch": 17.753276353276352,
|
| 109116 |
+
"grad_norm": 0.1892014592885971,
|
| 109117 |
+
"learning_rate": 1.6727835340115737e-06,
|
| 109118 |
+
"loss": 0.7943,
|
| 109119 |
+
"step": 15587
|
| 109120 |
+
},
|
| 109121 |
+
{
|
| 109122 |
+
"epoch": 17.754415954415954,
|
| 109123 |
+
"grad_norm": 0.18088212609291077,
|
| 109124 |
+
"learning_rate": 1.6711082812663898e-06,
|
| 109125 |
+
"loss": 0.684,
|
| 109126 |
+
"step": 15588
|
| 109127 |
+
},
|
| 109128 |
+
{
|
| 109129 |
+
"epoch": 17.755555555555556,
|
| 109130 |
+
"grad_norm": 0.2498263418674469,
|
| 109131 |
+
"learning_rate": 1.6694338387977655e-06,
|
| 109132 |
+
"loss": 0.5798,
|
| 109133 |
+
"step": 15589
|
| 109134 |
+
},
|
| 109135 |
+
{
|
| 109136 |
+
"epoch": 17.756695156695155,
|
| 109137 |
+
"grad_norm": 0.19375599920749664,
|
| 109138 |
+
"learning_rate": 1.667760206663857e-06,
|
| 109139 |
+
"loss": 0.5276,
|
| 109140 |
+
"step": 15590
|
| 109141 |
+
},
|
| 109142 |
+
{
|
| 109143 |
+
"epoch": 17.757834757834758,
|
| 109144 |
+
"grad_norm": 0.1859721541404724,
|
| 109145 |
+
"learning_rate": 1.6660873849228125e-06,
|
| 109146 |
+
"loss": 0.8522,
|
| 109147 |
+
"step": 15591
|
| 109148 |
+
},
|
| 109149 |
+
{
|
| 109150 |
+
"epoch": 17.75897435897436,
|
| 109151 |
+
"grad_norm": 0.21408973634243011,
|
| 109152 |
+
"learning_rate": 1.6644153736327134e-06,
|
| 109153 |
+
"loss": 0.5591,
|
| 109154 |
+
"step": 15592
|
| 109155 |
+
},
|
| 109156 |
+
{
|
| 109157 |
+
"epoch": 17.76011396011396,
|
| 109158 |
+
"grad_norm": 0.1931608021259308,
|
| 109159 |
+
"learning_rate": 1.6627441728516435e-06,
|
| 109160 |
+
"loss": 0.8221,
|
| 109161 |
+
"step": 15593
|
| 109162 |
+
},
|
| 109163 |
+
{
|
| 109164 |
+
"epoch": 17.76125356125356,
|
| 109165 |
+
"grad_norm": 0.16885171830654144,
|
| 109166 |
+
"learning_rate": 1.6610737826376454e-06,
|
| 109167 |
+
"loss": 0.7237,
|
| 109168 |
+
"step": 15594
|
| 109169 |
+
},
|
| 109170 |
+
{
|
| 109171 |
+
"epoch": 17.762393162393163,
|
| 109172 |
+
"grad_norm": 0.19227465987205505,
|
| 109173 |
+
"learning_rate": 1.6594042030487421e-06,
|
| 109174 |
+
"loss": 0.4849,
|
| 109175 |
+
"step": 15595
|
| 109176 |
+
},
|
| 109177 |
+
{
|
| 109178 |
+
"epoch": 17.763532763532762,
|
| 109179 |
+
"grad_norm": 0.2134411633014679,
|
| 109180 |
+
"learning_rate": 1.6577354341429125e-06,
|
| 109181 |
+
"loss": 0.4955,
|
| 109182 |
+
"step": 15596
|
| 109183 |
+
},
|
| 109184 |
+
{
|
| 109185 |
+
"epoch": 17.764672364672364,
|
| 109186 |
+
"grad_norm": 0.22480376064777374,
|
| 109187 |
+
"learning_rate": 1.6560674759781236e-06,
|
| 109188 |
+
"loss": 0.8602,
|
| 109189 |
+
"step": 15597
|
| 109190 |
+
},
|
| 109191 |
+
{
|
| 109192 |
+
"epoch": 17.765811965811967,
|
| 109193 |
+
"grad_norm": 0.19876371324062347,
|
| 109194 |
+
"learning_rate": 1.6544003286123071e-06,
|
| 109195 |
+
"loss": 0.6761,
|
| 109196 |
+
"step": 15598
|
| 109197 |
+
},
|
| 109198 |
+
{
|
| 109199 |
+
"epoch": 17.766951566951565,
|
| 109200 |
+
"grad_norm": 0.1717088222503662,
|
| 109201 |
+
"learning_rate": 1.6527339921033725e-06,
|
| 109202 |
+
"loss": 0.7948,
|
| 109203 |
+
"step": 15599
|
| 109204 |
+
},
|
| 109205 |
+
{
|
| 109206 |
+
"epoch": 17.768091168091168,
|
| 109207 |
+
"grad_norm": 0.24230657517910004,
|
| 109208 |
+
"learning_rate": 1.651068466509187e-06,
|
| 109209 |
+
"loss": 0.5212,
|
| 109210 |
+
"step": 15600
|
| 109211 |
}
|
| 109212 |
],
|
| 109213 |
"logging_steps": 1,
|
|
|
|
| 109227 |
"attributes": {}
|
| 109228 |
}
|
| 109229 |
},
|
| 109230 |
+
"total_flos": 8.722396200817558e+19,
|
| 109231 |
"train_batch_size": 8,
|
| 109232 |
"trial_name": null,
|
| 109233 |
"trial_params": null
|