Training in progress, step 14700, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b20328662d6bf4a0f20bed1390706c5d0e2cadb6afbc9571832839820dea8be
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6392f60b9dee4f829097497c825e77bdee27a3c6046f89c0f668728cc9ee157f
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d0ccb50a47098887116f687715d542ec70106f4bb2dae425b33a790943b1be8
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 16.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -100808,6 +100808,2106 @@
|
|
| 100808 |
"learning_rate": 4.214068634672344e-06,
|
| 100809 |
"loss": 0.4891,
|
| 100810 |
"step": 14400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100811 |
}
|
| 100812 |
],
|
| 100813 |
"logging_steps": 1,
|
|
@@ -100827,7 +102927,7 @@
|
|
| 100827 |
"attributes": {}
|
| 100828 |
}
|
| 100829 |
},
|
| 100830 |
-
"total_flos": 8.
|
| 100831 |
"train_batch_size": 8,
|
| 100832 |
"trial_name": null,
|
| 100833 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 16.74301994301994,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 100808 |
"learning_rate": 4.214068634672344e-06,
|
| 100809 |
"loss": 0.4891,
|
| 100810 |
"step": 14400
|
| 100811 |
+
},
|
| 100812 |
+
{
|
| 100813 |
+
"epoch": 16.402279202279203,
|
| 100814 |
+
"grad_norm": 0.21518702805042267,
|
| 100815 |
+
"learning_rate": 4.2114802677019395e-06,
|
| 100816 |
+
"loss": 0.6103,
|
| 100817 |
+
"step": 14401
|
| 100818 |
+
},
|
| 100819 |
+
{
|
| 100820 |
+
"epoch": 16.403418803418802,
|
| 100821 |
+
"grad_norm": 0.17440588772296906,
|
| 100822 |
+
"learning_rate": 4.208892622773988e-06,
|
| 100823 |
+
"loss": 0.7396,
|
| 100824 |
+
"step": 14402
|
| 100825 |
+
},
|
| 100826 |
+
{
|
| 100827 |
+
"epoch": 16.404558404558404,
|
| 100828 |
+
"grad_norm": 0.22613508999347687,
|
| 100829 |
+
"learning_rate": 4.206305699978374e-06,
|
| 100830 |
+
"loss": 0.6477,
|
| 100831 |
+
"step": 14403
|
| 100832 |
+
},
|
| 100833 |
+
{
|
| 100834 |
+
"epoch": 16.405698005698007,
|
| 100835 |
+
"grad_norm": 0.1704530566930771,
|
| 100836 |
+
"learning_rate": 4.203719499404946e-06,
|
| 100837 |
+
"loss": 0.754,
|
| 100838 |
+
"step": 14404
|
| 100839 |
+
},
|
| 100840 |
+
{
|
| 100841 |
+
"epoch": 16.406837606837605,
|
| 100842 |
+
"grad_norm": 0.17848657071590424,
|
| 100843 |
+
"learning_rate": 4.201134021143535e-06,
|
| 100844 |
+
"loss": 0.8487,
|
| 100845 |
+
"step": 14405
|
| 100846 |
+
},
|
| 100847 |
+
{
|
| 100848 |
+
"epoch": 16.407977207977208,
|
| 100849 |
+
"grad_norm": 0.19880610704421997,
|
| 100850 |
+
"learning_rate": 4.19854926528393e-06,
|
| 100851 |
+
"loss": 0.619,
|
| 100852 |
+
"step": 14406
|
| 100853 |
+
},
|
| 100854 |
+
{
|
| 100855 |
+
"epoch": 16.40911680911681,
|
| 100856 |
+
"grad_norm": 0.19024336338043213,
|
| 100857 |
+
"learning_rate": 4.195965231915914e-06,
|
| 100858 |
+
"loss": 0.6118,
|
| 100859 |
+
"step": 14407
|
| 100860 |
+
},
|
| 100861 |
+
{
|
| 100862 |
+
"epoch": 16.41025641025641,
|
| 100863 |
+
"grad_norm": 0.1932651847600937,
|
| 100864 |
+
"learning_rate": 4.1933819211292355e-06,
|
| 100865 |
+
"loss": 0.543,
|
| 100866 |
+
"step": 14408
|
| 100867 |
+
},
|
| 100868 |
+
{
|
| 100869 |
+
"epoch": 16.41139601139601,
|
| 100870 |
+
"grad_norm": 0.17125093936920166,
|
| 100871 |
+
"learning_rate": 4.19079933301362e-06,
|
| 100872 |
+
"loss": 0.8024,
|
| 100873 |
+
"step": 14409
|
| 100874 |
+
},
|
| 100875 |
+
{
|
| 100876 |
+
"epoch": 16.412535612535613,
|
| 100877 |
+
"grad_norm": 0.20307296514511108,
|
| 100878 |
+
"learning_rate": 4.188217467658773e-06,
|
| 100879 |
+
"loss": 0.848,
|
| 100880 |
+
"step": 14410
|
| 100881 |
+
},
|
| 100882 |
+
{
|
| 100883 |
+
"epoch": 16.413675213675212,
|
| 100884 |
+
"grad_norm": 0.20626360177993774,
|
| 100885 |
+
"learning_rate": 4.185636325154363e-06,
|
| 100886 |
+
"loss": 0.677,
|
| 100887 |
+
"step": 14411
|
| 100888 |
+
},
|
| 100889 |
+
{
|
| 100890 |
+
"epoch": 16.414814814814815,
|
| 100891 |
+
"grad_norm": 0.2043163925409317,
|
| 100892 |
+
"learning_rate": 4.18305590559005e-06,
|
| 100893 |
+
"loss": 0.6448,
|
| 100894 |
+
"step": 14412
|
| 100895 |
+
},
|
| 100896 |
+
{
|
| 100897 |
+
"epoch": 16.415954415954417,
|
| 100898 |
+
"grad_norm": 0.1820337325334549,
|
| 100899 |
+
"learning_rate": 4.180476209055448e-06,
|
| 100900 |
+
"loss": 0.8245,
|
| 100901 |
+
"step": 14413
|
| 100902 |
+
},
|
| 100903 |
+
{
|
| 100904 |
+
"epoch": 16.417094017094016,
|
| 100905 |
+
"grad_norm": 0.22119086980819702,
|
| 100906 |
+
"learning_rate": 4.1778972356401575e-06,
|
| 100907 |
+
"loss": 0.6379,
|
| 100908 |
+
"step": 14414
|
| 100909 |
+
},
|
| 100910 |
+
{
|
| 100911 |
+
"epoch": 16.418233618233618,
|
| 100912 |
+
"grad_norm": 0.2301556020975113,
|
| 100913 |
+
"learning_rate": 4.17531898543376e-06,
|
| 100914 |
+
"loss": 0.7971,
|
| 100915 |
+
"step": 14415
|
| 100916 |
+
},
|
| 100917 |
+
{
|
| 100918 |
+
"epoch": 16.41937321937322,
|
| 100919 |
+
"grad_norm": 0.1856745034456253,
|
| 100920 |
+
"learning_rate": 4.1727414585258085e-06,
|
| 100921 |
+
"loss": 0.6311,
|
| 100922 |
+
"step": 14416
|
| 100923 |
+
},
|
| 100924 |
+
{
|
| 100925 |
+
"epoch": 16.42051282051282,
|
| 100926 |
+
"grad_norm": 0.16893912851810455,
|
| 100927 |
+
"learning_rate": 4.170164655005812e-06,
|
| 100928 |
+
"loss": 0.7953,
|
| 100929 |
+
"step": 14417
|
| 100930 |
+
},
|
| 100931 |
+
{
|
| 100932 |
+
"epoch": 16.42165242165242,
|
| 100933 |
+
"grad_norm": 0.18980665504932404,
|
| 100934 |
+
"learning_rate": 4.167588574963282e-06,
|
| 100935 |
+
"loss": 0.5062,
|
| 100936 |
+
"step": 14418
|
| 100937 |
+
},
|
| 100938 |
+
{
|
| 100939 |
+
"epoch": 16.422792022792024,
|
| 100940 |
+
"grad_norm": 0.24632985889911652,
|
| 100941 |
+
"learning_rate": 4.16501321848769e-06,
|
| 100942 |
+
"loss": 0.6022,
|
| 100943 |
+
"step": 14419
|
| 100944 |
+
},
|
| 100945 |
+
{
|
| 100946 |
+
"epoch": 16.423931623931622,
|
| 100947 |
+
"grad_norm": 0.18609194457530975,
|
| 100948 |
+
"learning_rate": 4.162438585668485e-06,
|
| 100949 |
+
"loss": 0.6492,
|
| 100950 |
+
"step": 14420
|
| 100951 |
+
},
|
| 100952 |
+
{
|
| 100953 |
+
"epoch": 16.425071225071225,
|
| 100954 |
+
"grad_norm": 0.1913001388311386,
|
| 100955 |
+
"learning_rate": 4.159864676595093e-06,
|
| 100956 |
+
"loss": 0.6434,
|
| 100957 |
+
"step": 14421
|
| 100958 |
+
},
|
| 100959 |
+
{
|
| 100960 |
+
"epoch": 16.426210826210827,
|
| 100961 |
+
"grad_norm": 0.17134885489940643,
|
| 100962 |
+
"learning_rate": 4.15729149135691e-06,
|
| 100963 |
+
"loss": 0.8395,
|
| 100964 |
+
"step": 14422
|
| 100965 |
+
},
|
| 100966 |
+
{
|
| 100967 |
+
"epoch": 16.427350427350426,
|
| 100968 |
+
"grad_norm": 0.18671829998493195,
|
| 100969 |
+
"learning_rate": 4.154719030043316e-06,
|
| 100970 |
+
"loss": 0.6149,
|
| 100971 |
+
"step": 14423
|
| 100972 |
+
},
|
| 100973 |
+
{
|
| 100974 |
+
"epoch": 16.428490028490028,
|
| 100975 |
+
"grad_norm": 0.19499360024929047,
|
| 100976 |
+
"learning_rate": 4.15214729274365e-06,
|
| 100977 |
+
"loss": 0.6216,
|
| 100978 |
+
"step": 14424
|
| 100979 |
+
},
|
| 100980 |
+
{
|
| 100981 |
+
"epoch": 16.42962962962963,
|
| 100982 |
+
"grad_norm": 0.21025635302066803,
|
| 100983 |
+
"learning_rate": 4.14957627954724e-06,
|
| 100984 |
+
"loss": 0.6376,
|
| 100985 |
+
"step": 14425
|
| 100986 |
+
},
|
| 100987 |
+
{
|
| 100988 |
+
"epoch": 16.43076923076923,
|
| 100989 |
+
"grad_norm": 0.20964206755161285,
|
| 100990 |
+
"learning_rate": 4.1470059905433845e-06,
|
| 100991 |
+
"loss": 0.6179,
|
| 100992 |
+
"step": 14426
|
| 100993 |
+
},
|
| 100994 |
+
{
|
| 100995 |
+
"epoch": 16.43190883190883,
|
| 100996 |
+
"grad_norm": 0.19520244002342224,
|
| 100997 |
+
"learning_rate": 4.144436425821363e-06,
|
| 100998 |
+
"loss": 0.543,
|
| 100999 |
+
"step": 14427
|
| 101000 |
+
},
|
| 101001 |
+
{
|
| 101002 |
+
"epoch": 16.433048433048434,
|
| 101003 |
+
"grad_norm": 0.2406681627035141,
|
| 101004 |
+
"learning_rate": 4.141867585470413e-06,
|
| 101005 |
+
"loss": 0.6697,
|
| 101006 |
+
"step": 14428
|
| 101007 |
+
},
|
| 101008 |
+
{
|
| 101009 |
+
"epoch": 16.434188034188033,
|
| 101010 |
+
"grad_norm": 0.15467698872089386,
|
| 101011 |
+
"learning_rate": 4.13929946957976e-06,
|
| 101012 |
+
"loss": 0.9953,
|
| 101013 |
+
"step": 14429
|
| 101014 |
+
},
|
| 101015 |
+
{
|
| 101016 |
+
"epoch": 16.435327635327635,
|
| 101017 |
+
"grad_norm": 0.17723286151885986,
|
| 101018 |
+
"learning_rate": 4.1367320782385976e-06,
|
| 101019 |
+
"loss": 0.755,
|
| 101020 |
+
"step": 14430
|
| 101021 |
+
},
|
| 101022 |
+
{
|
| 101023 |
+
"epoch": 16.436467236467237,
|
| 101024 |
+
"grad_norm": 0.19826023280620575,
|
| 101025 |
+
"learning_rate": 4.134165411536117e-06,
|
| 101026 |
+
"loss": 0.605,
|
| 101027 |
+
"step": 14431
|
| 101028 |
+
},
|
| 101029 |
+
{
|
| 101030 |
+
"epoch": 16.437606837606836,
|
| 101031 |
+
"grad_norm": 0.2520255446434021,
|
| 101032 |
+
"learning_rate": 4.131599469561448e-06,
|
| 101033 |
+
"loss": 0.6394,
|
| 101034 |
+
"step": 14432
|
| 101035 |
+
},
|
| 101036 |
+
{
|
| 101037 |
+
"epoch": 16.43874643874644,
|
| 101038 |
+
"grad_norm": 0.23350045084953308,
|
| 101039 |
+
"learning_rate": 4.129034252403715e-06,
|
| 101040 |
+
"loss": 0.818,
|
| 101041 |
+
"step": 14433
|
| 101042 |
+
},
|
| 101043 |
+
{
|
| 101044 |
+
"epoch": 16.43988603988604,
|
| 101045 |
+
"grad_norm": 0.220793679356575,
|
| 101046 |
+
"learning_rate": 4.126469760152021e-06,
|
| 101047 |
+
"loss": 0.818,
|
| 101048 |
+
"step": 14434
|
| 101049 |
+
},
|
| 101050 |
+
{
|
| 101051 |
+
"epoch": 16.44102564102564,
|
| 101052 |
+
"grad_norm": 0.22970087826251984,
|
| 101053 |
+
"learning_rate": 4.1239059928954385e-06,
|
| 101054 |
+
"loss": 0.7733,
|
| 101055 |
+
"step": 14435
|
| 101056 |
+
},
|
| 101057 |
+
{
|
| 101058 |
+
"epoch": 16.442165242165242,
|
| 101059 |
+
"grad_norm": 0.18339882791042328,
|
| 101060 |
+
"learning_rate": 4.121342950723004e-06,
|
| 101061 |
+
"loss": 0.7766,
|
| 101062 |
+
"step": 14436
|
| 101063 |
+
},
|
| 101064 |
+
{
|
| 101065 |
+
"epoch": 16.443304843304844,
|
| 101066 |
+
"grad_norm": 0.19485601782798767,
|
| 101067 |
+
"learning_rate": 4.118780633723745e-06,
|
| 101068 |
+
"loss": 0.6469,
|
| 101069 |
+
"step": 14437
|
| 101070 |
+
},
|
| 101071 |
+
{
|
| 101072 |
+
"epoch": 16.444444444444443,
|
| 101073 |
+
"grad_norm": 0.2181605100631714,
|
| 101074 |
+
"learning_rate": 4.11621904198666e-06,
|
| 101075 |
+
"loss": 0.6113,
|
| 101076 |
+
"step": 14438
|
| 101077 |
+
},
|
| 101078 |
+
{
|
| 101079 |
+
"epoch": 16.445584045584045,
|
| 101080 |
+
"grad_norm": 0.2137926071882248,
|
| 101081 |
+
"learning_rate": 4.113658175600724e-06,
|
| 101082 |
+
"loss": 0.5545,
|
| 101083 |
+
"step": 14439
|
| 101084 |
+
},
|
| 101085 |
+
{
|
| 101086 |
+
"epoch": 16.446723646723648,
|
| 101087 |
+
"grad_norm": 0.19882921874523163,
|
| 101088 |
+
"learning_rate": 4.111098034654873e-06,
|
| 101089 |
+
"loss": 0.8499,
|
| 101090 |
+
"step": 14440
|
| 101091 |
+
},
|
| 101092 |
+
{
|
| 101093 |
+
"epoch": 16.447863247863246,
|
| 101094 |
+
"grad_norm": 0.19488947093486786,
|
| 101095 |
+
"learning_rate": 4.108538619238022e-06,
|
| 101096 |
+
"loss": 0.7716,
|
| 101097 |
+
"step": 14441
|
| 101098 |
+
},
|
| 101099 |
+
{
|
| 101100 |
+
"epoch": 16.44900284900285,
|
| 101101 |
+
"grad_norm": 0.2073955237865448,
|
| 101102 |
+
"learning_rate": 4.105979929439091e-06,
|
| 101103 |
+
"loss": 0.7342,
|
| 101104 |
+
"step": 14442
|
| 101105 |
+
},
|
| 101106 |
+
{
|
| 101107 |
+
"epoch": 16.45014245014245,
|
| 101108 |
+
"grad_norm": 0.21484479308128357,
|
| 101109 |
+
"learning_rate": 4.103421965346929e-06,
|
| 101110 |
+
"loss": 0.7877,
|
| 101111 |
+
"step": 14443
|
| 101112 |
+
},
|
| 101113 |
+
{
|
| 101114 |
+
"epoch": 16.45128205128205,
|
| 101115 |
+
"grad_norm": 0.2445450872182846,
|
| 101116 |
+
"learning_rate": 4.100864727050388e-06,
|
| 101117 |
+
"loss": 0.4911,
|
| 101118 |
+
"step": 14444
|
| 101119 |
+
},
|
| 101120 |
+
{
|
| 101121 |
+
"epoch": 16.452421652421652,
|
| 101122 |
+
"grad_norm": 0.17344039678573608,
|
| 101123 |
+
"learning_rate": 4.098308214638288e-06,
|
| 101124 |
+
"loss": 0.6512,
|
| 101125 |
+
"step": 14445
|
| 101126 |
+
},
|
| 101127 |
+
{
|
| 101128 |
+
"epoch": 16.453561253561254,
|
| 101129 |
+
"grad_norm": 0.16234394907951355,
|
| 101130 |
+
"learning_rate": 4.0957524281994284e-06,
|
| 101131 |
+
"loss": 0.8383,
|
| 101132 |
+
"step": 14446
|
| 101133 |
+
},
|
| 101134 |
+
{
|
| 101135 |
+
"epoch": 16.454700854700853,
|
| 101136 |
+
"grad_norm": 0.22016826272010803,
|
| 101137 |
+
"learning_rate": 4.093197367822571e-06,
|
| 101138 |
+
"loss": 0.7096,
|
| 101139 |
+
"step": 14447
|
| 101140 |
+
},
|
| 101141 |
+
{
|
| 101142 |
+
"epoch": 16.455840455840455,
|
| 101143 |
+
"grad_norm": 0.26369261741638184,
|
| 101144 |
+
"learning_rate": 4.090643033596461e-06,
|
| 101145 |
+
"loss": 0.5644,
|
| 101146 |
+
"step": 14448
|
| 101147 |
+
},
|
| 101148 |
+
{
|
| 101149 |
+
"epoch": 16.456980056980058,
|
| 101150 |
+
"grad_norm": 0.23340226709842682,
|
| 101151 |
+
"learning_rate": 4.088089425609817e-06,
|
| 101152 |
+
"loss": 0.6914,
|
| 101153 |
+
"step": 14449
|
| 101154 |
+
},
|
| 101155 |
+
{
|
| 101156 |
+
"epoch": 16.458119658119656,
|
| 101157 |
+
"grad_norm": 0.21980200707912445,
|
| 101158 |
+
"learning_rate": 4.085536543951346e-06,
|
| 101159 |
+
"loss": 0.6858,
|
| 101160 |
+
"step": 14450
|
| 101161 |
+
},
|
| 101162 |
+
{
|
| 101163 |
+
"epoch": 16.45925925925926,
|
| 101164 |
+
"grad_norm": 0.20083510875701904,
|
| 101165 |
+
"learning_rate": 4.082984388709696e-06,
|
| 101166 |
+
"loss": 0.6296,
|
| 101167 |
+
"step": 14451
|
| 101168 |
+
},
|
| 101169 |
+
{
|
| 101170 |
+
"epoch": 16.46039886039886,
|
| 101171 |
+
"grad_norm": 0.21656829118728638,
|
| 101172 |
+
"learning_rate": 4.080432959973515e-06,
|
| 101173 |
+
"loss": 0.6918,
|
| 101174 |
+
"step": 14452
|
| 101175 |
+
},
|
| 101176 |
+
{
|
| 101177 |
+
"epoch": 16.46153846153846,
|
| 101178 |
+
"grad_norm": 0.21565794944763184,
|
| 101179 |
+
"learning_rate": 4.077882257831439e-06,
|
| 101180 |
+
"loss": 0.5942,
|
| 101181 |
+
"step": 14453
|
| 101182 |
+
},
|
| 101183 |
+
{
|
| 101184 |
+
"epoch": 16.462678062678062,
|
| 101185 |
+
"grad_norm": 0.18151481449604034,
|
| 101186 |
+
"learning_rate": 4.075332282372044e-06,
|
| 101187 |
+
"loss": 0.6738,
|
| 101188 |
+
"step": 14454
|
| 101189 |
+
},
|
| 101190 |
+
{
|
| 101191 |
+
"epoch": 16.463817663817665,
|
| 101192 |
+
"grad_norm": 0.1792241334915161,
|
| 101193 |
+
"learning_rate": 4.0727830336838994e-06,
|
| 101194 |
+
"loss": 0.7729,
|
| 101195 |
+
"step": 14455
|
| 101196 |
+
},
|
| 101197 |
+
{
|
| 101198 |
+
"epoch": 16.464957264957263,
|
| 101199 |
+
"grad_norm": 0.20354300737380981,
|
| 101200 |
+
"learning_rate": 4.07023451185555e-06,
|
| 101201 |
+
"loss": 0.6296,
|
| 101202 |
+
"step": 14456
|
| 101203 |
+
},
|
| 101204 |
+
{
|
| 101205 |
+
"epoch": 16.466096866096866,
|
| 101206 |
+
"grad_norm": 0.2067917436361313,
|
| 101207 |
+
"learning_rate": 4.067686716975522e-06,
|
| 101208 |
+
"loss": 0.512,
|
| 101209 |
+
"step": 14457
|
| 101210 |
+
},
|
| 101211 |
+
{
|
| 101212 |
+
"epoch": 16.467236467236468,
|
| 101213 |
+
"grad_norm": 0.1799246221780777,
|
| 101214 |
+
"learning_rate": 4.065139649132288e-06,
|
| 101215 |
+
"loss": 0.4658,
|
| 101216 |
+
"step": 14458
|
| 101217 |
+
},
|
| 101218 |
+
{
|
| 101219 |
+
"epoch": 16.468376068376067,
|
| 101220 |
+
"grad_norm": 0.21179606020450592,
|
| 101221 |
+
"learning_rate": 4.0625933084143284e-06,
|
| 101222 |
+
"loss": 0.6547,
|
| 101223 |
+
"step": 14459
|
| 101224 |
+
},
|
| 101225 |
+
{
|
| 101226 |
+
"epoch": 16.46951566951567,
|
| 101227 |
+
"grad_norm": 0.22315360605716705,
|
| 101228 |
+
"learning_rate": 4.06004769491008e-06,
|
| 101229 |
+
"loss": 0.6729,
|
| 101230 |
+
"step": 14460
|
| 101231 |
+
},
|
| 101232 |
+
{
|
| 101233 |
+
"epoch": 16.47065527065527,
|
| 101234 |
+
"grad_norm": 0.179313063621521,
|
| 101235 |
+
"learning_rate": 4.057502808707967e-06,
|
| 101236 |
+
"loss": 0.7105,
|
| 101237 |
+
"step": 14461
|
| 101238 |
+
},
|
| 101239 |
+
{
|
| 101240 |
+
"epoch": 16.47179487179487,
|
| 101241 |
+
"grad_norm": 0.18379870057106018,
|
| 101242 |
+
"learning_rate": 4.054958649896368e-06,
|
| 101243 |
+
"loss": 0.4343,
|
| 101244 |
+
"step": 14462
|
| 101245 |
+
},
|
| 101246 |
+
{
|
| 101247 |
+
"epoch": 16.472934472934472,
|
| 101248 |
+
"grad_norm": 0.2692694664001465,
|
| 101249 |
+
"learning_rate": 4.052415218563646e-06,
|
| 101250 |
+
"loss": 0.5529,
|
| 101251 |
+
"step": 14463
|
| 101252 |
+
},
|
| 101253 |
+
{
|
| 101254 |
+
"epoch": 16.474074074074075,
|
| 101255 |
+
"grad_norm": 0.1646180897951126,
|
| 101256 |
+
"learning_rate": 4.049872514798162e-06,
|
| 101257 |
+
"loss": 0.6413,
|
| 101258 |
+
"step": 14464
|
| 101259 |
+
},
|
| 101260 |
+
{
|
| 101261 |
+
"epoch": 16.475213675213674,
|
| 101262 |
+
"grad_norm": 0.1998300552368164,
|
| 101263 |
+
"learning_rate": 4.047330538688212e-06,
|
| 101264 |
+
"loss": 0.4966,
|
| 101265 |
+
"step": 14465
|
| 101266 |
+
},
|
| 101267 |
+
{
|
| 101268 |
+
"epoch": 16.476353276353276,
|
| 101269 |
+
"grad_norm": 0.15743793547153473,
|
| 101270 |
+
"learning_rate": 4.044789290322096e-06,
|
| 101271 |
+
"loss": 0.7123,
|
| 101272 |
+
"step": 14466
|
| 101273 |
+
},
|
| 101274 |
+
{
|
| 101275 |
+
"epoch": 16.477492877492878,
|
| 101276 |
+
"grad_norm": 0.2263445407152176,
|
| 101277 |
+
"learning_rate": 4.042248769788074e-06,
|
| 101278 |
+
"loss": 0.6525,
|
| 101279 |
+
"step": 14467
|
| 101280 |
+
},
|
| 101281 |
+
{
|
| 101282 |
+
"epoch": 16.478632478632477,
|
| 101283 |
+
"grad_norm": 0.1588156670331955,
|
| 101284 |
+
"learning_rate": 4.039708977174389e-06,
|
| 101285 |
+
"loss": 0.6672,
|
| 101286 |
+
"step": 14468
|
| 101287 |
+
},
|
| 101288 |
+
{
|
| 101289 |
+
"epoch": 16.47977207977208,
|
| 101290 |
+
"grad_norm": 0.17254389822483063,
|
| 101291 |
+
"learning_rate": 4.0371699125692495e-06,
|
| 101292 |
+
"loss": 0.5763,
|
| 101293 |
+
"step": 14469
|
| 101294 |
+
},
|
| 101295 |
+
{
|
| 101296 |
+
"epoch": 16.48091168091168,
|
| 101297 |
+
"grad_norm": 0.1884051412343979,
|
| 101298 |
+
"learning_rate": 4.034631576060846e-06,
|
| 101299 |
+
"loss": 0.7396,
|
| 101300 |
+
"step": 14470
|
| 101301 |
+
},
|
| 101302 |
+
{
|
| 101303 |
+
"epoch": 16.48205128205128,
|
| 101304 |
+
"grad_norm": 0.18718352913856506,
|
| 101305 |
+
"learning_rate": 4.032093967737341e-06,
|
| 101306 |
+
"loss": 0.7101,
|
| 101307 |
+
"step": 14471
|
| 101308 |
+
},
|
| 101309 |
+
{
|
| 101310 |
+
"epoch": 16.483190883190883,
|
| 101311 |
+
"grad_norm": 0.25985613465309143,
|
| 101312 |
+
"learning_rate": 4.029557087686883e-06,
|
| 101313 |
+
"loss": 0.5803,
|
| 101314 |
+
"step": 14472
|
| 101315 |
+
},
|
| 101316 |
+
{
|
| 101317 |
+
"epoch": 16.484330484330485,
|
| 101318 |
+
"grad_norm": 0.19891129434108734,
|
| 101319 |
+
"learning_rate": 4.027020935997569e-06,
|
| 101320 |
+
"loss": 0.6691,
|
| 101321 |
+
"step": 14473
|
| 101322 |
+
},
|
| 101323 |
+
{
|
| 101324 |
+
"epoch": 16.485470085470084,
|
| 101325 |
+
"grad_norm": 0.23379211127758026,
|
| 101326 |
+
"learning_rate": 4.024485512757489e-06,
|
| 101327 |
+
"loss": 0.4533,
|
| 101328 |
+
"step": 14474
|
| 101329 |
+
},
|
| 101330 |
+
{
|
| 101331 |
+
"epoch": 16.486609686609686,
|
| 101332 |
+
"grad_norm": 0.19329865276813507,
|
| 101333 |
+
"learning_rate": 4.021950818054715e-06,
|
| 101334 |
+
"loss": 0.6181,
|
| 101335 |
+
"step": 14475
|
| 101336 |
+
},
|
| 101337 |
+
{
|
| 101338 |
+
"epoch": 16.48774928774929,
|
| 101339 |
+
"grad_norm": 0.15092094242572784,
|
| 101340 |
+
"learning_rate": 4.019416851977284e-06,
|
| 101341 |
+
"loss": 0.75,
|
| 101342 |
+
"step": 14476
|
| 101343 |
+
},
|
| 101344 |
+
{
|
| 101345 |
+
"epoch": 16.488888888888887,
|
| 101346 |
+
"grad_norm": 0.16361835598945618,
|
| 101347 |
+
"learning_rate": 4.016883614613198e-06,
|
| 101348 |
+
"loss": 0.5619,
|
| 101349 |
+
"step": 14477
|
| 101350 |
+
},
|
| 101351 |
+
{
|
| 101352 |
+
"epoch": 16.49002849002849,
|
| 101353 |
+
"grad_norm": 0.2616507112979889,
|
| 101354 |
+
"learning_rate": 4.014351106050449e-06,
|
| 101355 |
+
"loss": 0.4272,
|
| 101356 |
+
"step": 14478
|
| 101357 |
+
},
|
| 101358 |
+
{
|
| 101359 |
+
"epoch": 16.491168091168092,
|
| 101360 |
+
"grad_norm": 0.16996940970420837,
|
| 101361 |
+
"learning_rate": 4.011819326376995e-06,
|
| 101362 |
+
"loss": 0.7497,
|
| 101363 |
+
"step": 14479
|
| 101364 |
+
},
|
| 101365 |
+
{
|
| 101366 |
+
"epoch": 16.49230769230769,
|
| 101367 |
+
"grad_norm": 0.21917283535003662,
|
| 101368 |
+
"learning_rate": 4.0092882756807805e-06,
|
| 101369 |
+
"loss": 0.4194,
|
| 101370 |
+
"step": 14480
|
| 101371 |
+
},
|
| 101372 |
+
{
|
| 101373 |
+
"epoch": 16.493447293447293,
|
| 101374 |
+
"grad_norm": 0.23285363614559174,
|
| 101375 |
+
"learning_rate": 4.0067579540497006e-06,
|
| 101376 |
+
"loss": 0.3855,
|
| 101377 |
+
"step": 14481
|
| 101378 |
+
},
|
| 101379 |
+
{
|
| 101380 |
+
"epoch": 16.494586894586895,
|
| 101381 |
+
"grad_norm": 0.20673397183418274,
|
| 101382 |
+
"learning_rate": 4.004228361571652e-06,
|
| 101383 |
+
"loss": 0.5267,
|
| 101384 |
+
"step": 14482
|
| 101385 |
+
},
|
| 101386 |
+
{
|
| 101387 |
+
"epoch": 16.495726495726494,
|
| 101388 |
+
"grad_norm": 0.16720455884933472,
|
| 101389 |
+
"learning_rate": 4.001699498334488e-06,
|
| 101390 |
+
"loss": 0.8746,
|
| 101391 |
+
"step": 14483
|
| 101392 |
+
},
|
| 101393 |
+
{
|
| 101394 |
+
"epoch": 16.496866096866096,
|
| 101395 |
+
"grad_norm": 0.17041461169719696,
|
| 101396 |
+
"learning_rate": 3.999171364426055e-06,
|
| 101397 |
+
"loss": 0.7391,
|
| 101398 |
+
"step": 14484
|
| 101399 |
+
},
|
| 101400 |
+
{
|
| 101401 |
+
"epoch": 16.4980056980057,
|
| 101402 |
+
"grad_norm": 0.20871534943580627,
|
| 101403 |
+
"learning_rate": 3.9966439599341375e-06,
|
| 101404 |
+
"loss": 0.6219,
|
| 101405 |
+
"step": 14485
|
| 101406 |
+
},
|
| 101407 |
+
{
|
| 101408 |
+
"epoch": 16.499145299145297,
|
| 101409 |
+
"grad_norm": 0.26762112975120544,
|
| 101410 |
+
"learning_rate": 3.994117284946544e-06,
|
| 101411 |
+
"loss": 0.5247,
|
| 101412 |
+
"step": 14486
|
| 101413 |
+
},
|
| 101414 |
+
{
|
| 101415 |
+
"epoch": 16.5002849002849,
|
| 101416 |
+
"grad_norm": 0.3475840091705322,
|
| 101417 |
+
"learning_rate": 3.991591339551026e-06,
|
| 101418 |
+
"loss": 0.5787,
|
| 101419 |
+
"step": 14487
|
| 101420 |
+
},
|
| 101421 |
+
{
|
| 101422 |
+
"epoch": 16.501424501424502,
|
| 101423 |
+
"grad_norm": 0.19313666224479675,
|
| 101424 |
+
"learning_rate": 3.989066123835311e-06,
|
| 101425 |
+
"loss": 0.7083,
|
| 101426 |
+
"step": 14488
|
| 101427 |
+
},
|
| 101428 |
+
{
|
| 101429 |
+
"epoch": 16.5025641025641,
|
| 101430 |
+
"grad_norm": 0.2199021279811859,
|
| 101431 |
+
"learning_rate": 3.986541637887109e-06,
|
| 101432 |
+
"loss": 0.6433,
|
| 101433 |
+
"step": 14489
|
| 101434 |
+
},
|
| 101435 |
+
{
|
| 101436 |
+
"epoch": 16.503703703703703,
|
| 101437 |
+
"grad_norm": 0.18482081592082977,
|
| 101438 |
+
"learning_rate": 3.984017881794103e-06,
|
| 101439 |
+
"loss": 0.8095,
|
| 101440 |
+
"step": 14490
|
| 101441 |
+
},
|
| 101442 |
+
{
|
| 101443 |
+
"epoch": 16.504843304843305,
|
| 101444 |
+
"grad_norm": 0.1827738881111145,
|
| 101445 |
+
"learning_rate": 3.981494855643958e-06,
|
| 101446 |
+
"loss": 0.6815,
|
| 101447 |
+
"step": 14491
|
| 101448 |
+
},
|
| 101449 |
+
{
|
| 101450 |
+
"epoch": 16.505982905982904,
|
| 101451 |
+
"grad_norm": 0.19812440872192383,
|
| 101452 |
+
"learning_rate": 3.9789725595242884e-06,
|
| 101453 |
+
"loss": 0.7362,
|
| 101454 |
+
"step": 14492
|
| 101455 |
+
},
|
| 101456 |
+
{
|
| 101457 |
+
"epoch": 16.507122507122507,
|
| 101458 |
+
"grad_norm": 0.2004772573709488,
|
| 101459 |
+
"learning_rate": 3.976450993522712e-06,
|
| 101460 |
+
"loss": 0.7225,
|
| 101461 |
+
"step": 14493
|
| 101462 |
+
},
|
| 101463 |
+
{
|
| 101464 |
+
"epoch": 16.50826210826211,
|
| 101465 |
+
"grad_norm": 0.2659797966480255,
|
| 101466 |
+
"learning_rate": 3.973930157726807e-06,
|
| 101467 |
+
"loss": 0.5827,
|
| 101468 |
+
"step": 14494
|
| 101469 |
+
},
|
| 101470 |
+
{
|
| 101471 |
+
"epoch": 16.509401709401708,
|
| 101472 |
+
"grad_norm": 0.2283308357000351,
|
| 101473 |
+
"learning_rate": 3.971410052224136e-06,
|
| 101474 |
+
"loss": 0.7094,
|
| 101475 |
+
"step": 14495
|
| 101476 |
+
},
|
| 101477 |
+
{
|
| 101478 |
+
"epoch": 16.51054131054131,
|
| 101479 |
+
"grad_norm": 0.1879384070634842,
|
| 101480 |
+
"learning_rate": 3.968890677102211e-06,
|
| 101481 |
+
"loss": 0.628,
|
| 101482 |
+
"step": 14496
|
| 101483 |
+
},
|
| 101484 |
+
{
|
| 101485 |
+
"epoch": 16.511680911680912,
|
| 101486 |
+
"grad_norm": 0.2578437626361847,
|
| 101487 |
+
"learning_rate": 3.966372032448554e-06,
|
| 101488 |
+
"loss": 0.4458,
|
| 101489 |
+
"step": 14497
|
| 101490 |
+
},
|
| 101491 |
+
{
|
| 101492 |
+
"epoch": 16.51282051282051,
|
| 101493 |
+
"grad_norm": 0.2004712074995041,
|
| 101494 |
+
"learning_rate": 3.963854118350644e-06,
|
| 101495 |
+
"loss": 0.6449,
|
| 101496 |
+
"step": 14498
|
| 101497 |
+
},
|
| 101498 |
+
{
|
| 101499 |
+
"epoch": 16.513960113960113,
|
| 101500 |
+
"grad_norm": 0.20575734972953796,
|
| 101501 |
+
"learning_rate": 3.961336934895926e-06,
|
| 101502 |
+
"loss": 0.6711,
|
| 101503 |
+
"step": 14499
|
| 101504 |
+
},
|
| 101505 |
+
{
|
| 101506 |
+
"epoch": 16.515099715099716,
|
| 101507 |
+
"grad_norm": 0.2130632847547531,
|
| 101508 |
+
"learning_rate": 3.958820482171832e-06,
|
| 101509 |
+
"loss": 0.6722,
|
| 101510 |
+
"step": 14500
|
| 101511 |
+
},
|
| 101512 |
+
{
|
| 101513 |
+
"epoch": 16.516239316239318,
|
| 101514 |
+
"grad_norm": 0.19198299944400787,
|
| 101515 |
+
"learning_rate": 3.956304760265763e-06,
|
| 101516 |
+
"loss": 0.8823,
|
| 101517 |
+
"step": 14501
|
| 101518 |
+
},
|
| 101519 |
+
{
|
| 101520 |
+
"epoch": 16.517378917378917,
|
| 101521 |
+
"grad_norm": 0.19161811470985413,
|
| 101522 |
+
"learning_rate": 3.953789769265112e-06,
|
| 101523 |
+
"loss": 0.5373,
|
| 101524 |
+
"step": 14502
|
| 101525 |
+
},
|
| 101526 |
+
{
|
| 101527 |
+
"epoch": 16.51851851851852,
|
| 101528 |
+
"grad_norm": 0.24001465737819672,
|
| 101529 |
+
"learning_rate": 3.95127550925721e-06,
|
| 101530 |
+
"loss": 0.7958,
|
| 101531 |
+
"step": 14503
|
| 101532 |
+
},
|
| 101533 |
+
{
|
| 101534 |
+
"epoch": 16.51965811965812,
|
| 101535 |
+
"grad_norm": 0.1894005835056305,
|
| 101536 |
+
"learning_rate": 3.948761980329393e-06,
|
| 101537 |
+
"loss": 0.7023,
|
| 101538 |
+
"step": 14504
|
| 101539 |
+
},
|
| 101540 |
+
{
|
| 101541 |
+
"epoch": 16.52079772079772,
|
| 101542 |
+
"grad_norm": 0.24162663519382477,
|
| 101543 |
+
"learning_rate": 3.946249182568968e-06,
|
| 101544 |
+
"loss": 0.6452,
|
| 101545 |
+
"step": 14505
|
| 101546 |
+
},
|
| 101547 |
+
{
|
| 101548 |
+
"epoch": 16.521937321937322,
|
| 101549 |
+
"grad_norm": 0.19434046745300293,
|
| 101550 |
+
"learning_rate": 3.943737116063209e-06,
|
| 101551 |
+
"loss": 0.9394,
|
| 101552 |
+
"step": 14506
|
| 101553 |
+
},
|
| 101554 |
+
{
|
| 101555 |
+
"epoch": 16.523076923076925,
|
| 101556 |
+
"grad_norm": 0.16493722796440125,
|
| 101557 |
+
"learning_rate": 3.941225780899352e-06,
|
| 101558 |
+
"loss": 0.7118,
|
| 101559 |
+
"step": 14507
|
| 101560 |
+
},
|
| 101561 |
+
{
|
| 101562 |
+
"epoch": 16.524216524216524,
|
| 101563 |
+
"grad_norm": 0.18182341754436493,
|
| 101564 |
+
"learning_rate": 3.938715177164645e-06,
|
| 101565 |
+
"loss": 0.5909,
|
| 101566 |
+
"step": 14508
|
| 101567 |
+
},
|
| 101568 |
+
{
|
| 101569 |
+
"epoch": 16.525356125356126,
|
| 101570 |
+
"grad_norm": 0.20856042206287384,
|
| 101571 |
+
"learning_rate": 3.936205304946275e-06,
|
| 101572 |
+
"loss": 0.4961,
|
| 101573 |
+
"step": 14509
|
| 101574 |
+
},
|
| 101575 |
+
{
|
| 101576 |
+
"epoch": 16.526495726495728,
|
| 101577 |
+
"grad_norm": 0.23776394128799438,
|
| 101578 |
+
"learning_rate": 3.93369616433143e-06,
|
| 101579 |
+
"loss": 0.5035,
|
| 101580 |
+
"step": 14510
|
| 101581 |
+
},
|
| 101582 |
+
{
|
| 101583 |
+
"epoch": 16.527635327635327,
|
| 101584 |
+
"grad_norm": 0.16974763572216034,
|
| 101585 |
+
"learning_rate": 3.931187755407243e-06,
|
| 101586 |
+
"loss": 0.6542,
|
| 101587 |
+
"step": 14511
|
| 101588 |
+
},
|
| 101589 |
+
{
|
| 101590 |
+
"epoch": 16.52877492877493,
|
| 101591 |
+
"grad_norm": 0.18352046608924866,
|
| 101592 |
+
"learning_rate": 3.928680078260844e-06,
|
| 101593 |
+
"loss": 0.6894,
|
| 101594 |
+
"step": 14512
|
| 101595 |
+
},
|
| 101596 |
+
{
|
| 101597 |
+
"epoch": 16.52991452991453,
|
| 101598 |
+
"grad_norm": 0.30340099334716797,
|
| 101599 |
+
"learning_rate": 3.92617313297933e-06,
|
| 101600 |
+
"loss": 0.6206,
|
| 101601 |
+
"step": 14513
|
| 101602 |
+
},
|
| 101603 |
+
{
|
| 101604 |
+
"epoch": 16.53105413105413,
|
| 101605 |
+
"grad_norm": 0.17844238877296448,
|
| 101606 |
+
"learning_rate": 3.9236669196497846e-06,
|
| 101607 |
+
"loss": 0.6471,
|
| 101608 |
+
"step": 14514
|
| 101609 |
+
},
|
| 101610 |
+
{
|
| 101611 |
+
"epoch": 16.532193732193733,
|
| 101612 |
+
"grad_norm": 0.20907242596149445,
|
| 101613 |
+
"learning_rate": 3.921161438359242e-06,
|
| 101614 |
+
"loss": 0.6187,
|
| 101615 |
+
"step": 14515
|
| 101616 |
+
},
|
| 101617 |
+
{
|
| 101618 |
+
"epoch": 16.533333333333335,
|
| 101619 |
+
"grad_norm": 0.21441438794136047,
|
| 101620 |
+
"learning_rate": 3.918656689194727e-06,
|
| 101621 |
+
"loss": 0.6341,
|
| 101622 |
+
"step": 14516
|
| 101623 |
+
},
|
| 101624 |
+
{
|
| 101625 |
+
"epoch": 16.534472934472934,
|
| 101626 |
+
"grad_norm": 0.19238892197608948,
|
| 101627 |
+
"learning_rate": 3.916152672243243e-06,
|
| 101628 |
+
"loss": 0.5023,
|
| 101629 |
+
"step": 14517
|
| 101630 |
+
},
|
| 101631 |
+
{
|
| 101632 |
+
"epoch": 16.535612535612536,
|
| 101633 |
+
"grad_norm": 0.19719743728637695,
|
| 101634 |
+
"learning_rate": 3.913649387591756e-06,
|
| 101635 |
+
"loss": 0.7562,
|
| 101636 |
+
"step": 14518
|
| 101637 |
+
},
|
| 101638 |
+
{
|
| 101639 |
+
"epoch": 16.53675213675214,
|
| 101640 |
+
"grad_norm": 0.1943766176700592,
|
| 101641 |
+
"learning_rate": 3.911146835327212e-06,
|
| 101642 |
+
"loss": 0.6566,
|
| 101643 |
+
"step": 14519
|
| 101644 |
+
},
|
| 101645 |
+
{
|
| 101646 |
+
"epoch": 16.537891737891737,
|
| 101647 |
+
"grad_norm": 0.21831713616847992,
|
| 101648 |
+
"learning_rate": 3.908645015536533e-06,
|
| 101649 |
+
"loss": 0.5037,
|
| 101650 |
+
"step": 14520
|
| 101651 |
+
},
|
| 101652 |
+
{
|
| 101653 |
+
"epoch": 16.53903133903134,
|
| 101654 |
+
"grad_norm": 0.2081470787525177,
|
| 101655 |
+
"learning_rate": 3.9061439283066216e-06,
|
| 101656 |
+
"loss": 0.7748,
|
| 101657 |
+
"step": 14521
|
| 101658 |
+
},
|
| 101659 |
+
{
|
| 101660 |
+
"epoch": 16.540170940170942,
|
| 101661 |
+
"grad_norm": 0.21147605776786804,
|
| 101662 |
+
"learning_rate": 3.903643573724333e-06,
|
| 101663 |
+
"loss": 0.6704,
|
| 101664 |
+
"step": 14522
|
| 101665 |
+
},
|
| 101666 |
+
{
|
| 101667 |
+
"epoch": 16.54131054131054,
|
| 101668 |
+
"grad_norm": 0.17337815463542938,
|
| 101669 |
+
"learning_rate": 3.901143951876518e-06,
|
| 101670 |
+
"loss": 0.6969,
|
| 101671 |
+
"step": 14523
|
| 101672 |
+
},
|
| 101673 |
+
{
|
| 101674 |
+
"epoch": 16.542450142450143,
|
| 101675 |
+
"grad_norm": 0.21074214577674866,
|
| 101676 |
+
"learning_rate": 3.8986450628499955e-06,
|
| 101677 |
+
"loss": 0.505,
|
| 101678 |
+
"step": 14524
|
| 101679 |
+
},
|
| 101680 |
+
{
|
| 101681 |
+
"epoch": 16.543589743589745,
|
| 101682 |
+
"grad_norm": 0.1778637170791626,
|
| 101683 |
+
"learning_rate": 3.896146906731565e-06,
|
| 101684 |
+
"loss": 0.7977,
|
| 101685 |
+
"step": 14525
|
| 101686 |
+
},
|
| 101687 |
+
{
|
| 101688 |
+
"epoch": 16.544729344729344,
|
| 101689 |
+
"grad_norm": 0.17211110889911652,
|
| 101690 |
+
"learning_rate": 3.893649483607984e-06,
|
| 101691 |
+
"loss": 0.6107,
|
| 101692 |
+
"step": 14526
|
| 101693 |
+
},
|
| 101694 |
+
{
|
| 101695 |
+
"epoch": 16.545868945868946,
|
| 101696 |
+
"grad_norm": 0.15985257923603058,
|
| 101697 |
+
"learning_rate": 3.891152793565997e-06,
|
| 101698 |
+
"loss": 0.8332,
|
| 101699 |
+
"step": 14527
|
| 101700 |
+
},
|
| 101701 |
+
{
|
| 101702 |
+
"epoch": 16.54700854700855,
|
| 101703 |
+
"grad_norm": 0.18682222068309784,
|
| 101704 |
+
"learning_rate": 3.888656836692325e-06,
|
| 101705 |
+
"loss": 0.5215,
|
| 101706 |
+
"step": 14528
|
| 101707 |
+
},
|
| 101708 |
+
{
|
| 101709 |
+
"epoch": 16.548148148148147,
|
| 101710 |
+
"grad_norm": 0.18926292657852173,
|
| 101711 |
+
"learning_rate": 3.886161613073655e-06,
|
| 101712 |
+
"loss": 0.7584,
|
| 101713 |
+
"step": 14529
|
| 101714 |
+
},
|
| 101715 |
+
{
|
| 101716 |
+
"epoch": 16.54928774928775,
|
| 101717 |
+
"grad_norm": 0.20884409546852112,
|
| 101718 |
+
"learning_rate": 3.883667122796658e-06,
|
| 101719 |
+
"loss": 0.7324,
|
| 101720 |
+
"step": 14530
|
| 101721 |
+
},
|
| 101722 |
+
{
|
| 101723 |
+
"epoch": 16.550427350427352,
|
| 101724 |
+
"grad_norm": 0.22595719993114471,
|
| 101725 |
+
"learning_rate": 3.881173365947971e-06,
|
| 101726 |
+
"loss": 0.685,
|
| 101727 |
+
"step": 14531
|
| 101728 |
+
},
|
| 101729 |
+
{
|
| 101730 |
+
"epoch": 16.55156695156695,
|
| 101731 |
+
"grad_norm": 0.2707895338535309,
|
| 101732 |
+
"learning_rate": 3.878680342614216e-06,
|
| 101733 |
+
"loss": 0.509,
|
| 101734 |
+
"step": 14532
|
| 101735 |
+
},
|
| 101736 |
+
{
|
| 101737 |
+
"epoch": 16.552706552706553,
|
| 101738 |
+
"grad_norm": 0.24852532148361206,
|
| 101739 |
+
"learning_rate": 3.8761880528819685e-06,
|
| 101740 |
+
"loss": 0.5025,
|
| 101741 |
+
"step": 14533
|
| 101742 |
+
},
|
| 101743 |
+
{
|
| 101744 |
+
"epoch": 16.553846153846155,
|
| 101745 |
+
"grad_norm": 0.23397788405418396,
|
| 101746 |
+
"learning_rate": 3.8736964968378035e-06,
|
| 101747 |
+
"loss": 0.6484,
|
| 101748 |
+
"step": 14534
|
| 101749 |
+
},
|
| 101750 |
+
{
|
| 101751 |
+
"epoch": 16.554985754985754,
|
| 101752 |
+
"grad_norm": 0.2433735877275467,
|
| 101753 |
+
"learning_rate": 3.871205674568257e-06,
|
| 101754 |
+
"loss": 0.6137,
|
| 101755 |
+
"step": 14535
|
| 101756 |
+
},
|
| 101757 |
+
{
|
| 101758 |
+
"epoch": 16.556125356125357,
|
| 101759 |
+
"grad_norm": 0.23208960890769958,
|
| 101760 |
+
"learning_rate": 3.8687155861598465e-06,
|
| 101761 |
+
"loss": 0.6291,
|
| 101762 |
+
"step": 14536
|
| 101763 |
+
},
|
| 101764 |
+
{
|
| 101765 |
+
"epoch": 16.55726495726496,
|
| 101766 |
+
"grad_norm": 0.22305455803871155,
|
| 101767 |
+
"learning_rate": 3.8662262316990464e-06,
|
| 101768 |
+
"loss": 0.5538,
|
| 101769 |
+
"step": 14537
|
| 101770 |
+
},
|
| 101771 |
+
{
|
| 101772 |
+
"epoch": 16.558404558404558,
|
| 101773 |
+
"grad_norm": 0.1901799887418747,
|
| 101774 |
+
"learning_rate": 3.8637376112723305e-06,
|
| 101775 |
+
"loss": 0.573,
|
| 101776 |
+
"step": 14538
|
| 101777 |
+
},
|
| 101778 |
+
{
|
| 101779 |
+
"epoch": 16.55954415954416,
|
| 101780 |
+
"grad_norm": 0.1874188780784607,
|
| 101781 |
+
"learning_rate": 3.861249724966132e-06,
|
| 101782 |
+
"loss": 0.6841,
|
| 101783 |
+
"step": 14539
|
| 101784 |
+
},
|
| 101785 |
+
{
|
| 101786 |
+
"epoch": 16.560683760683762,
|
| 101787 |
+
"grad_norm": 0.26205453276634216,
|
| 101788 |
+
"learning_rate": 3.8587625728668615e-06,
|
| 101789 |
+
"loss": 0.607,
|
| 101790 |
+
"step": 14540
|
| 101791 |
+
},
|
| 101792 |
+
{
|
| 101793 |
+
"epoch": 16.56182336182336,
|
| 101794 |
+
"grad_norm": 0.20798297226428986,
|
| 101795 |
+
"learning_rate": 3.856276155060906e-06,
|
| 101796 |
+
"loss": 0.7327,
|
| 101797 |
+
"step": 14541
|
| 101798 |
+
},
|
| 101799 |
+
{
|
| 101800 |
+
"epoch": 16.562962962962963,
|
| 101801 |
+
"grad_norm": 0.23050467669963837,
|
| 101802 |
+
"learning_rate": 3.853790471634628e-06,
|
| 101803 |
+
"loss": 0.4925,
|
| 101804 |
+
"step": 14542
|
| 101805 |
+
},
|
| 101806 |
+
{
|
| 101807 |
+
"epoch": 16.564102564102566,
|
| 101808 |
+
"grad_norm": 0.19091200828552246,
|
| 101809 |
+
"learning_rate": 3.851305522674361e-06,
|
| 101810 |
+
"loss": 0.7183,
|
| 101811 |
+
"step": 14543
|
| 101812 |
+
},
|
| 101813 |
+
{
|
| 101814 |
+
"epoch": 16.565242165242164,
|
| 101815 |
+
"grad_norm": 0.23158277571201324,
|
| 101816 |
+
"learning_rate": 3.848821308266406e-06,
|
| 101817 |
+
"loss": 0.7537,
|
| 101818 |
+
"step": 14544
|
| 101819 |
+
},
|
| 101820 |
+
{
|
| 101821 |
+
"epoch": 16.566381766381767,
|
| 101822 |
+
"grad_norm": 0.19433674216270447,
|
| 101823 |
+
"learning_rate": 3.846337828497057e-06,
|
| 101824 |
+
"loss": 0.6727,
|
| 101825 |
+
"step": 14545
|
| 101826 |
+
},
|
| 101827 |
+
{
|
| 101828 |
+
"epoch": 16.56752136752137,
|
| 101829 |
+
"grad_norm": 0.2191799134016037,
|
| 101830 |
+
"learning_rate": 3.843855083452563e-06,
|
| 101831 |
+
"loss": 0.4923,
|
| 101832 |
+
"step": 14546
|
| 101833 |
+
},
|
| 101834 |
+
{
|
| 101835 |
+
"epoch": 16.568660968660968,
|
| 101836 |
+
"grad_norm": 0.24010121822357178,
|
| 101837 |
+
"learning_rate": 3.841373073219171e-06,
|
| 101838 |
+
"loss": 0.5411,
|
| 101839 |
+
"step": 14547
|
| 101840 |
+
},
|
| 101841 |
+
{
|
| 101842 |
+
"epoch": 16.56980056980057,
|
| 101843 |
+
"grad_norm": 0.20162785053253174,
|
| 101844 |
+
"learning_rate": 3.838891797883074e-06,
|
| 101845 |
+
"loss": 0.789,
|
| 101846 |
+
"step": 14548
|
| 101847 |
+
},
|
| 101848 |
+
{
|
| 101849 |
+
"epoch": 16.570940170940172,
|
| 101850 |
+
"grad_norm": 0.21228350698947906,
|
| 101851 |
+
"learning_rate": 3.836411257530453e-06,
|
| 101852 |
+
"loss": 0.6263,
|
| 101853 |
+
"step": 14549
|
| 101854 |
+
},
|
| 101855 |
+
{
|
| 101856 |
+
"epoch": 16.57207977207977,
|
| 101857 |
+
"grad_norm": 0.18376502394676208,
|
| 101858 |
+
"learning_rate": 3.833931452247474e-06,
|
| 101859 |
+
"loss": 0.7464,
|
| 101860 |
+
"step": 14550
|
| 101861 |
+
},
|
| 101862 |
+
{
|
| 101863 |
+
"epoch": 16.573219373219374,
|
| 101864 |
+
"grad_norm": 0.1630278378725052,
|
| 101865 |
+
"learning_rate": 3.83145238212026e-06,
|
| 101866 |
+
"loss": 0.5857,
|
| 101867 |
+
"step": 14551
|
| 101868 |
+
},
|
| 101869 |
+
{
|
| 101870 |
+
"epoch": 16.574358974358976,
|
| 101871 |
+
"grad_norm": 0.16570539772510529,
|
| 101872 |
+
"learning_rate": 3.828974047234921e-06,
|
| 101873 |
+
"loss": 0.7666,
|
| 101874 |
+
"step": 14552
|
| 101875 |
+
},
|
| 101876 |
+
{
|
| 101877 |
+
"epoch": 16.575498575498575,
|
| 101878 |
+
"grad_norm": 0.1783129870891571,
|
| 101879 |
+
"learning_rate": 3.82649644767753e-06,
|
| 101880 |
+
"loss": 0.8422,
|
| 101881 |
+
"step": 14553
|
| 101882 |
+
},
|
| 101883 |
+
{
|
| 101884 |
+
"epoch": 16.576638176638177,
|
| 101885 |
+
"grad_norm": 0.1791054606437683,
|
| 101886 |
+
"learning_rate": 3.824019583534147e-06,
|
| 101887 |
+
"loss": 0.7483,
|
| 101888 |
+
"step": 14554
|
| 101889 |
+
},
|
| 101890 |
+
{
|
| 101891 |
+
"epoch": 16.57777777777778,
|
| 101892 |
+
"grad_norm": 0.22978992760181427,
|
| 101893 |
+
"learning_rate": 3.821543454890805e-06,
|
| 101894 |
+
"loss": 0.3788,
|
| 101895 |
+
"step": 14555
|
| 101896 |
+
},
|
| 101897 |
+
{
|
| 101898 |
+
"epoch": 16.578917378917378,
|
| 101899 |
+
"grad_norm": 0.2110435962677002,
|
| 101900 |
+
"learning_rate": 3.819068061833492e-06,
|
| 101901 |
+
"loss": 0.613,
|
| 101902 |
+
"step": 14556
|
| 101903 |
+
},
|
| 101904 |
+
{
|
| 101905 |
+
"epoch": 16.58005698005698,
|
| 101906 |
+
"grad_norm": 0.17531338334083557,
|
| 101907 |
+
"learning_rate": 3.816593404448193e-06,
|
| 101908 |
+
"loss": 0.8015,
|
| 101909 |
+
"step": 14557
|
| 101910 |
+
},
|
| 101911 |
+
{
|
| 101912 |
+
"epoch": 16.581196581196583,
|
| 101913 |
+
"grad_norm": 0.19135034084320068,
|
| 101914 |
+
"learning_rate": 3.8141194828208602e-06,
|
| 101915 |
+
"loss": 0.576,
|
| 101916 |
+
"step": 14558
|
| 101917 |
+
},
|
| 101918 |
+
{
|
| 101919 |
+
"epoch": 16.58233618233618,
|
| 101920 |
+
"grad_norm": 0.17526598274707794,
|
| 101921 |
+
"learning_rate": 3.8116462970374246e-06,
|
| 101922 |
+
"loss": 0.7397,
|
| 101923 |
+
"step": 14559
|
| 101924 |
+
},
|
| 101925 |
+
{
|
| 101926 |
+
"epoch": 16.583475783475784,
|
| 101927 |
+
"grad_norm": 0.17120303213596344,
|
| 101928 |
+
"learning_rate": 3.8091738471837778e-06,
|
| 101929 |
+
"loss": 0.578,
|
| 101930 |
+
"step": 14560
|
| 101931 |
+
},
|
| 101932 |
+
{
|
| 101933 |
+
"epoch": 16.584615384615386,
|
| 101934 |
+
"grad_norm": 0.20212437212467194,
|
| 101935 |
+
"learning_rate": 3.8067021333457965e-06,
|
| 101936 |
+
"loss": 0.6067,
|
| 101937 |
+
"step": 14561
|
| 101938 |
+
},
|
| 101939 |
+
{
|
| 101940 |
+
"epoch": 16.585754985754985,
|
| 101941 |
+
"grad_norm": 0.16685090959072113,
|
| 101942 |
+
"learning_rate": 3.804231155609331e-06,
|
| 101943 |
+
"loss": 0.6871,
|
| 101944 |
+
"step": 14562
|
| 101945 |
+
},
|
| 101946 |
+
{
|
| 101947 |
+
"epoch": 16.586894586894587,
|
| 101948 |
+
"grad_norm": 0.2742098867893219,
|
| 101949 |
+
"learning_rate": 3.8017609140602067e-06,
|
| 101950 |
+
"loss": 0.4295,
|
| 101951 |
+
"step": 14563
|
| 101952 |
+
},
|
| 101953 |
+
{
|
| 101954 |
+
"epoch": 16.58803418803419,
|
| 101955 |
+
"grad_norm": 0.18924805521965027,
|
| 101956 |
+
"learning_rate": 3.7992914087842224e-06,
|
| 101957 |
+
"loss": 0.6182,
|
| 101958 |
+
"step": 14564
|
| 101959 |
+
},
|
| 101960 |
+
{
|
| 101961 |
+
"epoch": 16.58917378917379,
|
| 101962 |
+
"grad_norm": 0.17991560697555542,
|
| 101963 |
+
"learning_rate": 3.796822639867148e-06,
|
| 101964 |
+
"loss": 0.6326,
|
| 101965 |
+
"step": 14565
|
| 101966 |
+
},
|
| 101967 |
+
{
|
| 101968 |
+
"epoch": 16.59031339031339,
|
| 101969 |
+
"grad_norm": 0.17940419912338257,
|
| 101970 |
+
"learning_rate": 3.79435460739474e-06,
|
| 101971 |
+
"loss": 0.6089,
|
| 101972 |
+
"step": 14566
|
| 101973 |
+
},
|
| 101974 |
+
{
|
| 101975 |
+
"epoch": 16.591452991452993,
|
| 101976 |
+
"grad_norm": 0.1745978444814682,
|
| 101977 |
+
"learning_rate": 3.7918873114527047e-06,
|
| 101978 |
+
"loss": 0.5845,
|
| 101979 |
+
"step": 14567
|
| 101980 |
+
},
|
| 101981 |
+
{
|
| 101982 |
+
"epoch": 16.59259259259259,
|
| 101983 |
+
"grad_norm": 0.2253025323152542,
|
| 101984 |
+
"learning_rate": 3.789420752126746e-06,
|
| 101985 |
+
"loss": 0.7228,
|
| 101986 |
+
"step": 14568
|
| 101987 |
+
},
|
| 101988 |
+
{
|
| 101989 |
+
"epoch": 16.593732193732194,
|
| 101990 |
+
"grad_norm": 0.22936908900737762,
|
| 101991 |
+
"learning_rate": 3.7869549295025343e-06,
|
| 101992 |
+
"loss": 0.6392,
|
| 101993 |
+
"step": 14569
|
| 101994 |
+
},
|
| 101995 |
+
{
|
| 101996 |
+
"epoch": 16.594871794871796,
|
| 101997 |
+
"grad_norm": 0.18933890759944916,
|
| 101998 |
+
"learning_rate": 3.78448984366572e-06,
|
| 101999 |
+
"loss": 0.7953,
|
| 102000 |
+
"step": 14570
|
| 102001 |
+
},
|
| 102002 |
+
{
|
| 102003 |
+
"epoch": 16.596011396011395,
|
| 102004 |
+
"grad_norm": 0.24416644871234894,
|
| 102005 |
+
"learning_rate": 3.7820254947019073e-06,
|
| 102006 |
+
"loss": 0.6635,
|
| 102007 |
+
"step": 14571
|
| 102008 |
+
},
|
| 102009 |
+
{
|
| 102010 |
+
"epoch": 16.597150997150997,
|
| 102011 |
+
"grad_norm": 0.22809933125972748,
|
| 102012 |
+
"learning_rate": 3.7795618826967026e-06,
|
| 102013 |
+
"loss": 0.5825,
|
| 102014 |
+
"step": 14572
|
| 102015 |
+
},
|
| 102016 |
+
{
|
| 102017 |
+
"epoch": 16.5982905982906,
|
| 102018 |
+
"grad_norm": 0.17031829059123993,
|
| 102019 |
+
"learning_rate": 3.777099007735668e-06,
|
| 102020 |
+
"loss": 0.5184,
|
| 102021 |
+
"step": 14573
|
| 102022 |
+
},
|
| 102023 |
+
{
|
| 102024 |
+
"epoch": 16.5994301994302,
|
| 102025 |
+
"grad_norm": 0.2156430184841156,
|
| 102026 |
+
"learning_rate": 3.7746368699043496e-06,
|
| 102027 |
+
"loss": 0.5978,
|
| 102028 |
+
"step": 14574
|
| 102029 |
+
},
|
| 102030 |
+
{
|
| 102031 |
+
"epoch": 16.6005698005698,
|
| 102032 |
+
"grad_norm": 0.21404266357421875,
|
| 102033 |
+
"learning_rate": 3.772175469288264e-06,
|
| 102034 |
+
"loss": 0.3838,
|
| 102035 |
+
"step": 14575
|
| 102036 |
+
},
|
| 102037 |
+
{
|
| 102038 |
+
"epoch": 16.601709401709403,
|
| 102039 |
+
"grad_norm": 0.17592374980449677,
|
| 102040 |
+
"learning_rate": 3.7697148059728986e-06,
|
| 102041 |
+
"loss": 0.4915,
|
| 102042 |
+
"step": 14576
|
| 102043 |
+
},
|
| 102044 |
+
{
|
| 102045 |
+
"epoch": 16.602849002849002,
|
| 102046 |
+
"grad_norm": 0.2183118611574173,
|
| 102047 |
+
"learning_rate": 3.7672548800437274e-06,
|
| 102048 |
+
"loss": 0.4314,
|
| 102049 |
+
"step": 14577
|
| 102050 |
+
},
|
| 102051 |
+
{
|
| 102052 |
+
"epoch": 16.603988603988604,
|
| 102053 |
+
"grad_norm": 0.21222040057182312,
|
| 102054 |
+
"learning_rate": 3.7647956915861786e-06,
|
| 102055 |
+
"loss": 0.4624,
|
| 102056 |
+
"step": 14578
|
| 102057 |
+
},
|
| 102058 |
+
{
|
| 102059 |
+
"epoch": 16.605128205128207,
|
| 102060 |
+
"grad_norm": 0.1846335232257843,
|
| 102061 |
+
"learning_rate": 3.762337240685673e-06,
|
| 102062 |
+
"loss": 0.7217,
|
| 102063 |
+
"step": 14579
|
| 102064 |
+
},
|
| 102065 |
+
{
|
| 102066 |
+
"epoch": 16.606267806267805,
|
| 102067 |
+
"grad_norm": 0.30568239092826843,
|
| 102068 |
+
"learning_rate": 3.759879527427601e-06,
|
| 102069 |
+
"loss": 0.8213,
|
| 102070 |
+
"step": 14580
|
| 102071 |
+
},
|
| 102072 |
+
{
|
| 102073 |
+
"epoch": 16.607407407407408,
|
| 102074 |
+
"grad_norm": 0.202920064330101,
|
| 102075 |
+
"learning_rate": 3.757422551897327e-06,
|
| 102076 |
+
"loss": 0.2951,
|
| 102077 |
+
"step": 14581
|
| 102078 |
+
},
|
| 102079 |
+
{
|
| 102080 |
+
"epoch": 16.60854700854701,
|
| 102081 |
+
"grad_norm": 0.19714248180389404,
|
| 102082 |
+
"learning_rate": 3.7549663141801805e-06,
|
| 102083 |
+
"loss": 0.6109,
|
| 102084 |
+
"step": 14582
|
| 102085 |
+
},
|
| 102086 |
+
{
|
| 102087 |
+
"epoch": 16.60968660968661,
|
| 102088 |
+
"grad_norm": 0.17863404750823975,
|
| 102089 |
+
"learning_rate": 3.752510814361476e-06,
|
| 102090 |
+
"loss": 0.5999,
|
| 102091 |
+
"step": 14583
|
| 102092 |
+
},
|
| 102093 |
+
{
|
| 102094 |
+
"epoch": 16.61082621082621,
|
| 102095 |
+
"grad_norm": 0.16537688672542572,
|
| 102096 |
+
"learning_rate": 3.7500560525265044e-06,
|
| 102097 |
+
"loss": 0.7788,
|
| 102098 |
+
"step": 14584
|
| 102099 |
+
},
|
| 102100 |
+
{
|
| 102101 |
+
"epoch": 16.611965811965813,
|
| 102102 |
+
"grad_norm": 0.21520625054836273,
|
| 102103 |
+
"learning_rate": 3.7476020287605217e-06,
|
| 102104 |
+
"loss": 0.5608,
|
| 102105 |
+
"step": 14585
|
| 102106 |
+
},
|
| 102107 |
+
{
|
| 102108 |
+
"epoch": 16.613105413105412,
|
| 102109 |
+
"grad_norm": 0.19842830300331116,
|
| 102110 |
+
"learning_rate": 3.745148743148766e-06,
|
| 102111 |
+
"loss": 0.6802,
|
| 102112 |
+
"step": 14586
|
| 102113 |
+
},
|
| 102114 |
+
{
|
| 102115 |
+
"epoch": 16.614245014245014,
|
| 102116 |
+
"grad_norm": 0.21955198049545288,
|
| 102117 |
+
"learning_rate": 3.7426961957764434e-06,
|
| 102118 |
+
"loss": 0.4403,
|
| 102119 |
+
"step": 14587
|
| 102120 |
+
},
|
| 102121 |
+
{
|
| 102122 |
+
"epoch": 16.615384615384617,
|
| 102123 |
+
"grad_norm": 0.21487829089164734,
|
| 102124 |
+
"learning_rate": 3.740244386728742e-06,
|
| 102125 |
+
"loss": 0.6714,
|
| 102126 |
+
"step": 14588
|
| 102127 |
+
},
|
| 102128 |
+
{
|
| 102129 |
+
"epoch": 16.616524216524216,
|
| 102130 |
+
"grad_norm": 0.2130952626466751,
|
| 102131 |
+
"learning_rate": 3.737793316090821e-06,
|
| 102132 |
+
"loss": 0.5808,
|
| 102133 |
+
"step": 14589
|
| 102134 |
+
},
|
| 102135 |
+
{
|
| 102136 |
+
"epoch": 16.617663817663818,
|
| 102137 |
+
"grad_norm": 0.20674735307693481,
|
| 102138 |
+
"learning_rate": 3.7353429839478064e-06,
|
| 102139 |
+
"loss": 0.724,
|
| 102140 |
+
"step": 14590
|
| 102141 |
+
},
|
| 102142 |
+
{
|
| 102143 |
+
"epoch": 16.61880341880342,
|
| 102144 |
+
"grad_norm": 0.2151806503534317,
|
| 102145 |
+
"learning_rate": 3.732893390384806e-06,
|
| 102146 |
+
"loss": 0.5662,
|
| 102147 |
+
"step": 14591
|
| 102148 |
+
},
|
| 102149 |
+
{
|
| 102150 |
+
"epoch": 16.61994301994302,
|
| 102151 |
+
"grad_norm": 0.19295625388622284,
|
| 102152 |
+
"learning_rate": 3.7304445354869044e-06,
|
| 102153 |
+
"loss": 0.6498,
|
| 102154 |
+
"step": 14592
|
| 102155 |
+
},
|
| 102156 |
+
{
|
| 102157 |
+
"epoch": 16.62108262108262,
|
| 102158 |
+
"grad_norm": 0.1685391366481781,
|
| 102159 |
+
"learning_rate": 3.727996419339161e-06,
|
| 102160 |
+
"loss": 0.6361,
|
| 102161 |
+
"step": 14593
|
| 102162 |
+
},
|
| 102163 |
+
{
|
| 102164 |
+
"epoch": 16.622222222222224,
|
| 102165 |
+
"grad_norm": 0.24617771804332733,
|
| 102166 |
+
"learning_rate": 3.725549042026594e-06,
|
| 102167 |
+
"loss": 0.4309,
|
| 102168 |
+
"step": 14594
|
| 102169 |
+
},
|
| 102170 |
+
{
|
| 102171 |
+
"epoch": 16.623361823361822,
|
| 102172 |
+
"grad_norm": 0.22348542511463165,
|
| 102173 |
+
"learning_rate": 3.723102403634213e-06,
|
| 102174 |
+
"loss": 0.5611,
|
| 102175 |
+
"step": 14595
|
| 102176 |
+
},
|
| 102177 |
+
{
|
| 102178 |
+
"epoch": 16.624501424501425,
|
| 102179 |
+
"grad_norm": 0.18756158649921417,
|
| 102180 |
+
"learning_rate": 3.720656504246997e-06,
|
| 102181 |
+
"loss": 0.6909,
|
| 102182 |
+
"step": 14596
|
| 102183 |
+
},
|
| 102184 |
+
{
|
| 102185 |
+
"epoch": 16.625641025641027,
|
| 102186 |
+
"grad_norm": 0.2298748642206192,
|
| 102187 |
+
"learning_rate": 3.7182113439499013e-06,
|
| 102188 |
+
"loss": 0.5768,
|
| 102189 |
+
"step": 14597
|
| 102190 |
+
},
|
| 102191 |
+
{
|
| 102192 |
+
"epoch": 16.626780626780626,
|
| 102193 |
+
"grad_norm": 0.1706974357366562,
|
| 102194 |
+
"learning_rate": 3.7157669228278486e-06,
|
| 102195 |
+
"loss": 0.7419,
|
| 102196 |
+
"step": 14598
|
| 102197 |
+
},
|
| 102198 |
+
{
|
| 102199 |
+
"epoch": 16.627920227920228,
|
| 102200 |
+
"grad_norm": 0.24354368448257446,
|
| 102201 |
+
"learning_rate": 3.713323240965744e-06,
|
| 102202 |
+
"loss": 0.6543,
|
| 102203 |
+
"step": 14599
|
| 102204 |
+
},
|
| 102205 |
+
{
|
| 102206 |
+
"epoch": 16.62905982905983,
|
| 102207 |
+
"grad_norm": 0.2417616844177246,
|
| 102208 |
+
"learning_rate": 3.7108802984484686e-06,
|
| 102209 |
+
"loss": 0.3807,
|
| 102210 |
+
"step": 14600
|
| 102211 |
+
},
|
| 102212 |
+
{
|
| 102213 |
+
"epoch": 16.63019943019943,
|
| 102214 |
+
"grad_norm": 0.19453510642051697,
|
| 102215 |
+
"learning_rate": 3.7084380953608582e-06,
|
| 102216 |
+
"loss": 0.511,
|
| 102217 |
+
"step": 14601
|
| 102218 |
+
},
|
| 102219 |
+
{
|
| 102220 |
+
"epoch": 16.63133903133903,
|
| 102221 |
+
"grad_norm": 0.20993264019489288,
|
| 102222 |
+
"learning_rate": 3.705996631787745e-06,
|
| 102223 |
+
"loss": 0.3764,
|
| 102224 |
+
"step": 14602
|
| 102225 |
+
},
|
| 102226 |
+
{
|
| 102227 |
+
"epoch": 16.632478632478634,
|
| 102228 |
+
"grad_norm": 0.19857636094093323,
|
| 102229 |
+
"learning_rate": 3.703555907813927e-06,
|
| 102230 |
+
"loss": 0.7042,
|
| 102231 |
+
"step": 14603
|
| 102232 |
+
},
|
| 102233 |
+
{
|
| 102234 |
+
"epoch": 16.633618233618233,
|
| 102235 |
+
"grad_norm": 0.23784996569156647,
|
| 102236 |
+
"learning_rate": 3.7011159235241845e-06,
|
| 102237 |
+
"loss": 0.5527,
|
| 102238 |
+
"step": 14604
|
| 102239 |
+
},
|
| 102240 |
+
{
|
| 102241 |
+
"epoch": 16.634757834757835,
|
| 102242 |
+
"grad_norm": 0.1775774359703064,
|
| 102243 |
+
"learning_rate": 3.698676679003252e-06,
|
| 102244 |
+
"loss": 0.7023,
|
| 102245 |
+
"step": 14605
|
| 102246 |
+
},
|
| 102247 |
+
{
|
| 102248 |
+
"epoch": 16.635897435897437,
|
| 102249 |
+
"grad_norm": 0.21216173470020294,
|
| 102250 |
+
"learning_rate": 3.696238174335856e-06,
|
| 102251 |
+
"loss": 0.5788,
|
| 102252 |
+
"step": 14606
|
| 102253 |
+
},
|
| 102254 |
+
{
|
| 102255 |
+
"epoch": 16.637037037037036,
|
| 102256 |
+
"grad_norm": 0.22988300025463104,
|
| 102257 |
+
"learning_rate": 3.6938004096066956e-06,
|
| 102258 |
+
"loss": 0.7976,
|
| 102259 |
+
"step": 14607
|
| 102260 |
+
},
|
| 102261 |
+
{
|
| 102262 |
+
"epoch": 16.63817663817664,
|
| 102263 |
+
"grad_norm": 0.1869416981935501,
|
| 102264 |
+
"learning_rate": 3.6913633849004397e-06,
|
| 102265 |
+
"loss": 0.636,
|
| 102266 |
+
"step": 14608
|
| 102267 |
+
},
|
| 102268 |
+
{
|
| 102269 |
+
"epoch": 16.63931623931624,
|
| 102270 |
+
"grad_norm": 0.21013885736465454,
|
| 102271 |
+
"learning_rate": 3.6889271003017313e-06,
|
| 102272 |
+
"loss": 0.5862,
|
| 102273 |
+
"step": 14609
|
| 102274 |
+
},
|
| 102275 |
+
{
|
| 102276 |
+
"epoch": 16.64045584045584,
|
| 102277 |
+
"grad_norm": 0.19351568818092346,
|
| 102278 |
+
"learning_rate": 3.6864915558951886e-06,
|
| 102279 |
+
"loss": 0.5212,
|
| 102280 |
+
"step": 14610
|
| 102281 |
+
},
|
| 102282 |
+
{
|
| 102283 |
+
"epoch": 16.64159544159544,
|
| 102284 |
+
"grad_norm": 0.17369745671749115,
|
| 102285 |
+
"learning_rate": 3.684056751765416e-06,
|
| 102286 |
+
"loss": 0.9157,
|
| 102287 |
+
"step": 14611
|
| 102288 |
+
},
|
| 102289 |
+
{
|
| 102290 |
+
"epoch": 16.642735042735044,
|
| 102291 |
+
"grad_norm": 0.1538432538509369,
|
| 102292 |
+
"learning_rate": 3.6816226879969636e-06,
|
| 102293 |
+
"loss": 0.654,
|
| 102294 |
+
"step": 14612
|
| 102295 |
+
},
|
| 102296 |
+
{
|
| 102297 |
+
"epoch": 16.643874643874643,
|
| 102298 |
+
"grad_norm": 0.27092444896698,
|
| 102299 |
+
"learning_rate": 3.679189364674382e-06,
|
| 102300 |
+
"loss": 0.3433,
|
| 102301 |
+
"step": 14613
|
| 102302 |
+
},
|
| 102303 |
+
{
|
| 102304 |
+
"epoch": 16.645014245014245,
|
| 102305 |
+
"grad_norm": 0.22474908828735352,
|
| 102306 |
+
"learning_rate": 3.6767567818821847e-06,
|
| 102307 |
+
"loss": 0.4906,
|
| 102308 |
+
"step": 14614
|
| 102309 |
+
},
|
| 102310 |
+
{
|
| 102311 |
+
"epoch": 16.646153846153847,
|
| 102312 |
+
"grad_norm": 0.19490982592105865,
|
| 102313 |
+
"learning_rate": 3.674324939704871e-06,
|
| 102314 |
+
"loss": 0.8353,
|
| 102315 |
+
"step": 14615
|
| 102316 |
+
},
|
| 102317 |
+
{
|
| 102318 |
+
"epoch": 16.647293447293446,
|
| 102319 |
+
"grad_norm": 0.16275721788406372,
|
| 102320 |
+
"learning_rate": 3.671893838226889e-06,
|
| 102321 |
+
"loss": 0.7055,
|
| 102322 |
+
"step": 14616
|
| 102323 |
+
},
|
| 102324 |
+
{
|
| 102325 |
+
"epoch": 16.64843304843305,
|
| 102326 |
+
"grad_norm": 0.17398375272750854,
|
| 102327 |
+
"learning_rate": 3.669463477532689e-06,
|
| 102328 |
+
"loss": 0.9279,
|
| 102329 |
+
"step": 14617
|
| 102330 |
+
},
|
| 102331 |
+
{
|
| 102332 |
+
"epoch": 16.64957264957265,
|
| 102333 |
+
"grad_norm": 0.20997199416160583,
|
| 102334 |
+
"learning_rate": 3.667033857706681e-06,
|
| 102335 |
+
"loss": 0.6837,
|
| 102336 |
+
"step": 14618
|
| 102337 |
+
},
|
| 102338 |
+
{
|
| 102339 |
+
"epoch": 16.65071225071225,
|
| 102340 |
+
"grad_norm": 0.18733809888362885,
|
| 102341 |
+
"learning_rate": 3.664604978833255e-06,
|
| 102342 |
+
"loss": 0.5058,
|
| 102343 |
+
"step": 14619
|
| 102344 |
+
},
|
| 102345 |
+
{
|
| 102346 |
+
"epoch": 16.651851851851852,
|
| 102347 |
+
"grad_norm": 0.16576658189296722,
|
| 102348 |
+
"learning_rate": 3.66217684099677e-06,
|
| 102349 |
+
"loss": 0.7647,
|
| 102350 |
+
"step": 14620
|
| 102351 |
+
},
|
| 102352 |
+
{
|
| 102353 |
+
"epoch": 16.652991452991454,
|
| 102354 |
+
"grad_norm": 0.2201228141784668,
|
| 102355 |
+
"learning_rate": 3.6597494442815598e-06,
|
| 102356 |
+
"loss": 0.8653,
|
| 102357 |
+
"step": 14621
|
| 102358 |
+
},
|
| 102359 |
+
{
|
| 102360 |
+
"epoch": 16.654131054131053,
|
| 102361 |
+
"grad_norm": 0.1736903190612793,
|
| 102362 |
+
"learning_rate": 3.657322788771947e-06,
|
| 102363 |
+
"loss": 0.7397,
|
| 102364 |
+
"step": 14622
|
| 102365 |
+
},
|
| 102366 |
+
{
|
| 102367 |
+
"epoch": 16.655270655270655,
|
| 102368 |
+
"grad_norm": 0.2093871533870697,
|
| 102369 |
+
"learning_rate": 3.6548968745521967e-06,
|
| 102370 |
+
"loss": 0.8634,
|
| 102371 |
+
"step": 14623
|
| 102372 |
+
},
|
| 102373 |
+
{
|
| 102374 |
+
"epoch": 16.656410256410258,
|
| 102375 |
+
"grad_norm": 0.21694384515285492,
|
| 102376 |
+
"learning_rate": 3.652471701706581e-06,
|
| 102377 |
+
"loss": 0.6973,
|
| 102378 |
+
"step": 14624
|
| 102379 |
+
},
|
| 102380 |
+
{
|
| 102381 |
+
"epoch": 16.657549857549856,
|
| 102382 |
+
"grad_norm": 0.2191031575202942,
|
| 102383 |
+
"learning_rate": 3.6500472703193263e-06,
|
| 102384 |
+
"loss": 0.4609,
|
| 102385 |
+
"step": 14625
|
| 102386 |
+
},
|
| 102387 |
+
{
|
| 102388 |
+
"epoch": 16.65868945868946,
|
| 102389 |
+
"grad_norm": 0.1684856116771698,
|
| 102390 |
+
"learning_rate": 3.647623580474649e-06,
|
| 102391 |
+
"loss": 0.8714,
|
| 102392 |
+
"step": 14626
|
| 102393 |
+
},
|
| 102394 |
+
{
|
| 102395 |
+
"epoch": 16.65982905982906,
|
| 102396 |
+
"grad_norm": 0.2283681184053421,
|
| 102397 |
+
"learning_rate": 3.645200632256718e-06,
|
| 102398 |
+
"loss": 0.5105,
|
| 102399 |
+
"step": 14627
|
| 102400 |
+
},
|
| 102401 |
+
{
|
| 102402 |
+
"epoch": 16.66096866096866,
|
| 102403 |
+
"grad_norm": 0.1907675415277481,
|
| 102404 |
+
"learning_rate": 3.6427784257496933e-06,
|
| 102405 |
+
"loss": 0.6516,
|
| 102406 |
+
"step": 14628
|
| 102407 |
+
},
|
| 102408 |
+
{
|
| 102409 |
+
"epoch": 16.662108262108262,
|
| 102410 |
+
"grad_norm": 0.2024574875831604,
|
| 102411 |
+
"learning_rate": 3.640356961037705e-06,
|
| 102412 |
+
"loss": 0.7249,
|
| 102413 |
+
"step": 14629
|
| 102414 |
+
},
|
| 102415 |
+
{
|
| 102416 |
+
"epoch": 16.663247863247864,
|
| 102417 |
+
"grad_norm": 0.19128815829753876,
|
| 102418 |
+
"learning_rate": 3.637936238204867e-06,
|
| 102419 |
+
"loss": 0.5558,
|
| 102420 |
+
"step": 14630
|
| 102421 |
+
},
|
| 102422 |
+
{
|
| 102423 |
+
"epoch": 16.664387464387463,
|
| 102424 |
+
"grad_norm": 0.19036570191383362,
|
| 102425 |
+
"learning_rate": 3.635516257335245e-06,
|
| 102426 |
+
"loss": 0.4762,
|
| 102427 |
+
"step": 14631
|
| 102428 |
+
},
|
| 102429 |
+
{
|
| 102430 |
+
"epoch": 16.665527065527066,
|
| 102431 |
+
"grad_norm": 0.19478853046894073,
|
| 102432 |
+
"learning_rate": 3.633097018512896e-06,
|
| 102433 |
+
"loss": 0.8306,
|
| 102434 |
+
"step": 14632
|
| 102435 |
+
},
|
| 102436 |
+
{
|
| 102437 |
+
"epoch": 16.666666666666668,
|
| 102438 |
+
"grad_norm": 0.19821801781654358,
|
| 102439 |
+
"learning_rate": 3.6306785218218453e-06,
|
| 102440 |
+
"loss": 0.5955,
|
| 102441 |
+
"step": 14633
|
| 102442 |
+
},
|
| 102443 |
+
{
|
| 102444 |
+
"epoch": 16.667806267806267,
|
| 102445 |
+
"grad_norm": 0.21600525081157684,
|
| 102446 |
+
"learning_rate": 3.6282607673461004e-06,
|
| 102447 |
+
"loss": 0.7088,
|
| 102448 |
+
"step": 14634
|
| 102449 |
+
},
|
| 102450 |
+
{
|
| 102451 |
+
"epoch": 16.66894586894587,
|
| 102452 |
+
"grad_norm": 0.1982768028974533,
|
| 102453 |
+
"learning_rate": 3.6258437551696278e-06,
|
| 102454 |
+
"loss": 0.873,
|
| 102455 |
+
"step": 14635
|
| 102456 |
+
},
|
| 102457 |
+
{
|
| 102458 |
+
"epoch": 16.67008547008547,
|
| 102459 |
+
"grad_norm": 0.19198794662952423,
|
| 102460 |
+
"learning_rate": 3.623427485376382e-06,
|
| 102461 |
+
"loss": 0.8126,
|
| 102462 |
+
"step": 14636
|
| 102463 |
+
},
|
| 102464 |
+
{
|
| 102465 |
+
"epoch": 16.67122507122507,
|
| 102466 |
+
"grad_norm": 0.23557241261005402,
|
| 102467 |
+
"learning_rate": 3.6210119580502825e-06,
|
| 102468 |
+
"loss": 0.638,
|
| 102469 |
+
"step": 14637
|
| 102470 |
+
},
|
| 102471 |
+
{
|
| 102472 |
+
"epoch": 16.672364672364672,
|
| 102473 |
+
"grad_norm": 0.18622344732284546,
|
| 102474 |
+
"learning_rate": 3.618597173275237e-06,
|
| 102475 |
+
"loss": 0.7704,
|
| 102476 |
+
"step": 14638
|
| 102477 |
+
},
|
| 102478 |
+
{
|
| 102479 |
+
"epoch": 16.673504273504275,
|
| 102480 |
+
"grad_norm": 0.21982015669345856,
|
| 102481 |
+
"learning_rate": 3.6161831311351065e-06,
|
| 102482 |
+
"loss": 0.5458,
|
| 102483 |
+
"step": 14639
|
| 102484 |
+
},
|
| 102485 |
+
{
|
| 102486 |
+
"epoch": 16.674643874643873,
|
| 102487 |
+
"grad_norm": 0.22193771600723267,
|
| 102488 |
+
"learning_rate": 3.613769831713734e-06,
|
| 102489 |
+
"loss": 0.5057,
|
| 102490 |
+
"step": 14640
|
| 102491 |
+
},
|
| 102492 |
+
{
|
| 102493 |
+
"epoch": 16.675783475783476,
|
| 102494 |
+
"grad_norm": 0.20188890397548676,
|
| 102495 |
+
"learning_rate": 3.611357275094959e-06,
|
| 102496 |
+
"loss": 0.529,
|
| 102497 |
+
"step": 14641
|
| 102498 |
+
},
|
| 102499 |
+
{
|
| 102500 |
+
"epoch": 16.676923076923078,
|
| 102501 |
+
"grad_norm": 0.18457788228988647,
|
| 102502 |
+
"learning_rate": 3.608945461362559e-06,
|
| 102503 |
+
"loss": 0.7242,
|
| 102504 |
+
"step": 14642
|
| 102505 |
+
},
|
| 102506 |
+
{
|
| 102507 |
+
"epoch": 16.678062678062677,
|
| 102508 |
+
"grad_norm": 0.21365272998809814,
|
| 102509 |
+
"learning_rate": 3.6065343906003106e-06,
|
| 102510 |
+
"loss": 0.6462,
|
| 102511 |
+
"step": 14643
|
| 102512 |
+
},
|
| 102513 |
+
{
|
| 102514 |
+
"epoch": 16.67920227920228,
|
| 102515 |
+
"grad_norm": 0.17531974613666534,
|
| 102516 |
+
"learning_rate": 3.604124062891953e-06,
|
| 102517 |
+
"loss": 0.7558,
|
| 102518 |
+
"step": 14644
|
| 102519 |
+
},
|
| 102520 |
+
{
|
| 102521 |
+
"epoch": 16.68034188034188,
|
| 102522 |
+
"grad_norm": 0.18487349152565002,
|
| 102523 |
+
"learning_rate": 3.6017144783212135e-06,
|
| 102524 |
+
"loss": 0.6911,
|
| 102525 |
+
"step": 14645
|
| 102526 |
+
},
|
| 102527 |
+
{
|
| 102528 |
+
"epoch": 16.68148148148148,
|
| 102529 |
+
"grad_norm": 0.1780790388584137,
|
| 102530 |
+
"learning_rate": 3.599305636971767e-06,
|
| 102531 |
+
"loss": 0.7966,
|
| 102532 |
+
"step": 14646
|
| 102533 |
+
},
|
| 102534 |
+
{
|
| 102535 |
+
"epoch": 16.682621082621083,
|
| 102536 |
+
"grad_norm": 0.1781991422176361,
|
| 102537 |
+
"learning_rate": 3.5968975389272906e-06,
|
| 102538 |
+
"loss": 0.5923,
|
| 102539 |
+
"step": 14647
|
| 102540 |
+
},
|
| 102541 |
+
{
|
| 102542 |
+
"epoch": 16.683760683760685,
|
| 102543 |
+
"grad_norm": 0.2787780165672302,
|
| 102544 |
+
"learning_rate": 3.5944901842714207e-06,
|
| 102545 |
+
"loss": 0.6287,
|
| 102546 |
+
"step": 14648
|
| 102547 |
+
},
|
| 102548 |
+
{
|
| 102549 |
+
"epoch": 16.684900284900284,
|
| 102550 |
+
"grad_norm": 0.1921214461326599,
|
| 102551 |
+
"learning_rate": 3.5920835730877762e-06,
|
| 102552 |
+
"loss": 0.7003,
|
| 102553 |
+
"step": 14649
|
| 102554 |
+
},
|
| 102555 |
+
{
|
| 102556 |
+
"epoch": 16.686039886039886,
|
| 102557 |
+
"grad_norm": 0.19757118821144104,
|
| 102558 |
+
"learning_rate": 3.5896777054599372e-06,
|
| 102559 |
+
"loss": 0.6759,
|
| 102560 |
+
"step": 14650
|
| 102561 |
+
},
|
| 102562 |
+
{
|
| 102563 |
+
"epoch": 16.68717948717949,
|
| 102564 |
+
"grad_norm": 0.21820276975631714,
|
| 102565 |
+
"learning_rate": 3.5872725814714652e-06,
|
| 102566 |
+
"loss": 0.8021,
|
| 102567 |
+
"step": 14651
|
| 102568 |
+
},
|
| 102569 |
+
{
|
| 102570 |
+
"epoch": 16.688319088319087,
|
| 102571 |
+
"grad_norm": 0.1930316537618637,
|
| 102572 |
+
"learning_rate": 3.58486820120591e-06,
|
| 102573 |
+
"loss": 0.6904,
|
| 102574 |
+
"step": 14652
|
| 102575 |
+
},
|
| 102576 |
+
{
|
| 102577 |
+
"epoch": 16.68945868945869,
|
| 102578 |
+
"grad_norm": 0.18890811502933502,
|
| 102579 |
+
"learning_rate": 3.5824645647467707e-06,
|
| 102580 |
+
"loss": 0.5526,
|
| 102581 |
+
"step": 14653
|
| 102582 |
+
},
|
| 102583 |
+
{
|
| 102584 |
+
"epoch": 16.69059829059829,
|
| 102585 |
+
"grad_norm": 0.17934978008270264,
|
| 102586 |
+
"learning_rate": 3.5800616721775344e-06,
|
| 102587 |
+
"loss": 0.795,
|
| 102588 |
+
"step": 14654
|
| 102589 |
+
},
|
| 102590 |
+
{
|
| 102591 |
+
"epoch": 16.69173789173789,
|
| 102592 |
+
"grad_norm": 0.2052299827337265,
|
| 102593 |
+
"learning_rate": 3.5776595235816612e-06,
|
| 102594 |
+
"loss": 0.6729,
|
| 102595 |
+
"step": 14655
|
| 102596 |
+
},
|
| 102597 |
+
{
|
| 102598 |
+
"epoch": 16.692877492877493,
|
| 102599 |
+
"grad_norm": 0.1690863072872162,
|
| 102600 |
+
"learning_rate": 3.575258119042593e-06,
|
| 102601 |
+
"loss": 0.481,
|
| 102602 |
+
"step": 14656
|
| 102603 |
+
},
|
| 102604 |
+
{
|
| 102605 |
+
"epoch": 16.694017094017095,
|
| 102606 |
+
"grad_norm": 0.1977846622467041,
|
| 102607 |
+
"learning_rate": 3.572857458643719e-06,
|
| 102608 |
+
"loss": 0.6312,
|
| 102609 |
+
"step": 14657
|
| 102610 |
+
},
|
| 102611 |
+
{
|
| 102612 |
+
"epoch": 16.695156695156694,
|
| 102613 |
+
"grad_norm": 0.21069231629371643,
|
| 102614 |
+
"learning_rate": 3.5704575424684332e-06,
|
| 102615 |
+
"loss": 0.581,
|
| 102616 |
+
"step": 14658
|
| 102617 |
+
},
|
| 102618 |
+
{
|
| 102619 |
+
"epoch": 16.696296296296296,
|
| 102620 |
+
"grad_norm": 0.1934773176908493,
|
| 102621 |
+
"learning_rate": 3.5680583706000857e-06,
|
| 102622 |
+
"loss": 0.2961,
|
| 102623 |
+
"step": 14659
|
| 102624 |
+
},
|
| 102625 |
+
{
|
| 102626 |
+
"epoch": 16.6974358974359,
|
| 102627 |
+
"grad_norm": 0.2008139193058014,
|
| 102628 |
+
"learning_rate": 3.565659943122018e-06,
|
| 102629 |
+
"loss": 0.6576,
|
| 102630 |
+
"step": 14660
|
| 102631 |
+
},
|
| 102632 |
+
{
|
| 102633 |
+
"epoch": 16.698575498575497,
|
| 102634 |
+
"grad_norm": 0.1939091682434082,
|
| 102635 |
+
"learning_rate": 3.563262260117517e-06,
|
| 102636 |
+
"loss": 0.574,
|
| 102637 |
+
"step": 14661
|
| 102638 |
+
},
|
| 102639 |
+
{
|
| 102640 |
+
"epoch": 16.6997150997151,
|
| 102641 |
+
"grad_norm": 0.18059399724006653,
|
| 102642 |
+
"learning_rate": 3.560865321669865e-06,
|
| 102643 |
+
"loss": 0.7012,
|
| 102644 |
+
"step": 14662
|
| 102645 |
+
},
|
| 102646 |
+
{
|
| 102647 |
+
"epoch": 16.700854700854702,
|
| 102648 |
+
"grad_norm": 0.18085725605487823,
|
| 102649 |
+
"learning_rate": 3.558469127862327e-06,
|
| 102650 |
+
"loss": 0.5699,
|
| 102651 |
+
"step": 14663
|
| 102652 |
+
},
|
| 102653 |
+
{
|
| 102654 |
+
"epoch": 16.7019943019943,
|
| 102655 |
+
"grad_norm": 0.20898693799972534,
|
| 102656 |
+
"learning_rate": 3.5560736787781155e-06,
|
| 102657 |
+
"loss": 0.5635,
|
| 102658 |
+
"step": 14664
|
| 102659 |
+
},
|
| 102660 |
+
{
|
| 102661 |
+
"epoch": 16.703133903133903,
|
| 102662 |
+
"grad_norm": 0.21455982327461243,
|
| 102663 |
+
"learning_rate": 3.5536789745004378e-06,
|
| 102664 |
+
"loss": 0.6346,
|
| 102665 |
+
"step": 14665
|
| 102666 |
+
},
|
| 102667 |
+
{
|
| 102668 |
+
"epoch": 16.704273504273505,
|
| 102669 |
+
"grad_norm": 0.2405419945716858,
|
| 102670 |
+
"learning_rate": 3.5512850151124683e-06,
|
| 102671 |
+
"loss": 0.6799,
|
| 102672 |
+
"step": 14666
|
| 102673 |
+
},
|
| 102674 |
+
{
|
| 102675 |
+
"epoch": 16.705413105413104,
|
| 102676 |
+
"grad_norm": 0.1728639453649521,
|
| 102677 |
+
"learning_rate": 3.5488918006973516e-06,
|
| 102678 |
+
"loss": 0.7291,
|
| 102679 |
+
"step": 14667
|
| 102680 |
+
},
|
| 102681 |
+
{
|
| 102682 |
+
"epoch": 16.706552706552706,
|
| 102683 |
+
"grad_norm": 0.1853685826063156,
|
| 102684 |
+
"learning_rate": 3.546499331338218e-06,
|
| 102685 |
+
"loss": 0.9132,
|
| 102686 |
+
"step": 14668
|
| 102687 |
+
},
|
| 102688 |
+
{
|
| 102689 |
+
"epoch": 16.70769230769231,
|
| 102690 |
+
"grad_norm": 0.23775158822536469,
|
| 102691 |
+
"learning_rate": 3.544107607118158e-06,
|
| 102692 |
+
"loss": 0.6041,
|
| 102693 |
+
"step": 14669
|
| 102694 |
+
},
|
| 102695 |
+
{
|
| 102696 |
+
"epoch": 16.708831908831907,
|
| 102697 |
+
"grad_norm": 0.2054608315229416,
|
| 102698 |
+
"learning_rate": 3.5417166281202423e-06,
|
| 102699 |
+
"loss": 0.7261,
|
| 102700 |
+
"step": 14670
|
| 102701 |
+
},
|
| 102702 |
+
{
|
| 102703 |
+
"epoch": 16.70997150997151,
|
| 102704 |
+
"grad_norm": 0.16729244589805603,
|
| 102705 |
+
"learning_rate": 3.5393263944275195e-06,
|
| 102706 |
+
"loss": 0.8438,
|
| 102707 |
+
"step": 14671
|
| 102708 |
+
},
|
| 102709 |
+
{
|
| 102710 |
+
"epoch": 16.711111111111112,
|
| 102711 |
+
"grad_norm": 0.19036667048931122,
|
| 102712 |
+
"learning_rate": 3.5369369061230144e-06,
|
| 102713 |
+
"loss": 0.7448,
|
| 102714 |
+
"step": 14672
|
| 102715 |
+
},
|
| 102716 |
+
{
|
| 102717 |
+
"epoch": 16.71225071225071,
|
| 102718 |
+
"grad_norm": 0.23822540044784546,
|
| 102719 |
+
"learning_rate": 3.5345481632897027e-06,
|
| 102720 |
+
"loss": 0.3402,
|
| 102721 |
+
"step": 14673
|
| 102722 |
+
},
|
| 102723 |
+
{
|
| 102724 |
+
"epoch": 16.713390313390313,
|
| 102725 |
+
"grad_norm": 0.26414963603019714,
|
| 102726 |
+
"learning_rate": 3.5321601660105675e-06,
|
| 102727 |
+
"loss": 0.5163,
|
| 102728 |
+
"step": 14674
|
| 102729 |
+
},
|
| 102730 |
+
{
|
| 102731 |
+
"epoch": 16.714529914529916,
|
| 102732 |
+
"grad_norm": 0.23553740978240967,
|
| 102733 |
+
"learning_rate": 3.5297729143685536e-06,
|
| 102734 |
+
"loss": 0.5234,
|
| 102735 |
+
"step": 14675
|
| 102736 |
+
},
|
| 102737 |
+
{
|
| 102738 |
+
"epoch": 16.715669515669514,
|
| 102739 |
+
"grad_norm": 0.18662650883197784,
|
| 102740 |
+
"learning_rate": 3.527386408446562e-06,
|
| 102741 |
+
"loss": 0.6276,
|
| 102742 |
+
"step": 14676
|
| 102743 |
+
},
|
| 102744 |
+
{
|
| 102745 |
+
"epoch": 16.716809116809117,
|
| 102746 |
+
"grad_norm": 0.21046824753284454,
|
| 102747 |
+
"learning_rate": 3.525000648327492e-06,
|
| 102748 |
+
"loss": 0.5953,
|
| 102749 |
+
"step": 14677
|
| 102750 |
+
},
|
| 102751 |
+
{
|
| 102752 |
+
"epoch": 16.71794871794872,
|
| 102753 |
+
"grad_norm": 0.18378998339176178,
|
| 102754 |
+
"learning_rate": 3.5226156340942063e-06,
|
| 102755 |
+
"loss": 0.8075,
|
| 102756 |
+
"step": 14678
|
| 102757 |
+
},
|
| 102758 |
+
{
|
| 102759 |
+
"epoch": 16.719088319088318,
|
| 102760 |
+
"grad_norm": 0.1846972405910492,
|
| 102761 |
+
"learning_rate": 3.520231365829549e-06,
|
| 102762 |
+
"loss": 0.6992,
|
| 102763 |
+
"step": 14679
|
| 102764 |
+
},
|
| 102765 |
+
{
|
| 102766 |
+
"epoch": 16.72022792022792,
|
| 102767 |
+
"grad_norm": 0.18984457850456238,
|
| 102768 |
+
"learning_rate": 3.5178478436163177e-06,
|
| 102769 |
+
"loss": 0.6164,
|
| 102770 |
+
"step": 14680
|
| 102771 |
+
},
|
| 102772 |
+
{
|
| 102773 |
+
"epoch": 16.721367521367522,
|
| 102774 |
+
"grad_norm": 0.19495059549808502,
|
| 102775 |
+
"learning_rate": 3.5154650675373103e-06,
|
| 102776 |
+
"loss": 0.575,
|
| 102777 |
+
"step": 14681
|
| 102778 |
+
},
|
| 102779 |
+
{
|
| 102780 |
+
"epoch": 16.72250712250712,
|
| 102781 |
+
"grad_norm": 0.1732192188501358,
|
| 102782 |
+
"learning_rate": 3.5130830376752797e-06,
|
| 102783 |
+
"loss": 0.5981,
|
| 102784 |
+
"step": 14682
|
| 102785 |
+
},
|
| 102786 |
+
{
|
| 102787 |
+
"epoch": 16.723646723646723,
|
| 102788 |
+
"grad_norm": 0.1762353926897049,
|
| 102789 |
+
"learning_rate": 3.5107017541129746e-06,
|
| 102790 |
+
"loss": 0.8969,
|
| 102791 |
+
"step": 14683
|
| 102792 |
+
},
|
| 102793 |
+
{
|
| 102794 |
+
"epoch": 16.724786324786326,
|
| 102795 |
+
"grad_norm": 0.18922844529151917,
|
| 102796 |
+
"learning_rate": 3.5083212169330803e-06,
|
| 102797 |
+
"loss": 0.7806,
|
| 102798 |
+
"step": 14684
|
| 102799 |
+
},
|
| 102800 |
+
{
|
| 102801 |
+
"epoch": 16.725925925925925,
|
| 102802 |
+
"grad_norm": 0.18500468134880066,
|
| 102803 |
+
"learning_rate": 3.5059414262182955e-06,
|
| 102804 |
+
"loss": 0.7059,
|
| 102805 |
+
"step": 14685
|
| 102806 |
+
},
|
| 102807 |
+
{
|
| 102808 |
+
"epoch": 16.727065527065527,
|
| 102809 |
+
"grad_norm": 0.20659609138965607,
|
| 102810 |
+
"learning_rate": 3.5035623820512844e-06,
|
| 102811 |
+
"loss": 0.5154,
|
| 102812 |
+
"step": 14686
|
| 102813 |
+
},
|
| 102814 |
+
{
|
| 102815 |
+
"epoch": 16.72820512820513,
|
| 102816 |
+
"grad_norm": 0.1955866813659668,
|
| 102817 |
+
"learning_rate": 3.5011840845146584e-06,
|
| 102818 |
+
"loss": 0.7957,
|
| 102819 |
+
"step": 14687
|
| 102820 |
+
},
|
| 102821 |
+
{
|
| 102822 |
+
"epoch": 16.729344729344728,
|
| 102823 |
+
"grad_norm": 0.23779912292957306,
|
| 102824 |
+
"learning_rate": 3.498806533691032e-06,
|
| 102825 |
+
"loss": 0.729,
|
| 102826 |
+
"step": 14688
|
| 102827 |
+
},
|
| 102828 |
+
{
|
| 102829 |
+
"epoch": 16.73048433048433,
|
| 102830 |
+
"grad_norm": 0.15403661131858826,
|
| 102831 |
+
"learning_rate": 3.496429729662981e-06,
|
| 102832 |
+
"loss": 0.6981,
|
| 102833 |
+
"step": 14689
|
| 102834 |
+
},
|
| 102835 |
+
{
|
| 102836 |
+
"epoch": 16.731623931623933,
|
| 102837 |
+
"grad_norm": 0.1875777244567871,
|
| 102838 |
+
"learning_rate": 3.4940536725130676e-06,
|
| 102839 |
+
"loss": 0.5385,
|
| 102840 |
+
"step": 14690
|
| 102841 |
+
},
|
| 102842 |
+
{
|
| 102843 |
+
"epoch": 16.73276353276353,
|
| 102844 |
+
"grad_norm": 0.21117456257343292,
|
| 102845 |
+
"learning_rate": 3.491678362323808e-06,
|
| 102846 |
+
"loss": 0.6495,
|
| 102847 |
+
"step": 14691
|
| 102848 |
+
},
|
| 102849 |
+
{
|
| 102850 |
+
"epoch": 16.733903133903134,
|
| 102851 |
+
"grad_norm": 0.21795162558555603,
|
| 102852 |
+
"learning_rate": 3.489303799177704e-06,
|
| 102853 |
+
"loss": 0.3546,
|
| 102854 |
+
"step": 14692
|
| 102855 |
+
},
|
| 102856 |
+
{
|
| 102857 |
+
"epoch": 16.735042735042736,
|
| 102858 |
+
"grad_norm": 0.15931764245033264,
|
| 102859 |
+
"learning_rate": 3.486929983157236e-06,
|
| 102860 |
+
"loss": 0.4599,
|
| 102861 |
+
"step": 14693
|
| 102862 |
+
},
|
| 102863 |
+
{
|
| 102864 |
+
"epoch": 16.736182336182335,
|
| 102865 |
+
"grad_norm": 0.1928333342075348,
|
| 102866 |
+
"learning_rate": 3.4845569143448553e-06,
|
| 102867 |
+
"loss": 0.6611,
|
| 102868 |
+
"step": 14694
|
| 102869 |
+
},
|
| 102870 |
+
{
|
| 102871 |
+
"epoch": 16.737321937321937,
|
| 102872 |
+
"grad_norm": 0.21025556325912476,
|
| 102873 |
+
"learning_rate": 3.4821845928229708e-06,
|
| 102874 |
+
"loss": 0.6483,
|
| 102875 |
+
"step": 14695
|
| 102876 |
+
},
|
| 102877 |
+
{
|
| 102878 |
+
"epoch": 16.73846153846154,
|
| 102879 |
+
"grad_norm": 0.241347536444664,
|
| 102880 |
+
"learning_rate": 3.479813018673994e-06,
|
| 102881 |
+
"loss": 0.6776,
|
| 102882 |
+
"step": 14696
|
| 102883 |
+
},
|
| 102884 |
+
{
|
| 102885 |
+
"epoch": 16.739601139601138,
|
| 102886 |
+
"grad_norm": 0.212122842669487,
|
| 102887 |
+
"learning_rate": 3.4774421919802956e-06,
|
| 102888 |
+
"loss": 0.4725,
|
| 102889 |
+
"step": 14697
|
| 102890 |
+
},
|
| 102891 |
+
{
|
| 102892 |
+
"epoch": 16.74074074074074,
|
| 102893 |
+
"grad_norm": 0.2250332534313202,
|
| 102894 |
+
"learning_rate": 3.475072112824215e-06,
|
| 102895 |
+
"loss": 0.6554,
|
| 102896 |
+
"step": 14698
|
| 102897 |
+
},
|
| 102898 |
+
{
|
| 102899 |
+
"epoch": 16.741880341880343,
|
| 102900 |
+
"grad_norm": 0.21439555287361145,
|
| 102901 |
+
"learning_rate": 3.472702781288073e-06,
|
| 102902 |
+
"loss": 0.5723,
|
| 102903 |
+
"step": 14699
|
| 102904 |
+
},
|
| 102905 |
+
{
|
| 102906 |
+
"epoch": 16.74301994301994,
|
| 102907 |
+
"grad_norm": 0.1837013065814972,
|
| 102908 |
+
"learning_rate": 3.4703341974541616e-06,
|
| 102909 |
+
"loss": 0.4998,
|
| 102910 |
+
"step": 14700
|
| 102911 |
}
|
| 102912 |
],
|
| 102913 |
"logging_steps": 1,
|
|
|
|
| 102927 |
"attributes": {}
|
| 102928 |
}
|
| 102929 |
},
|
| 102930 |
+
"total_flos": 8.219161094608355e+19,
|
| 102931 |
"train_batch_size": 8,
|
| 102932 |
"trial_name": null,
|
| 102933 |
"trial_params": null
|