Training in progress, step 4360, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 686586885
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60998db92fba6d2b64bcfc09adcbe3eb8d231b935a62a558a997cf75ffbe8c4a
|
| 3 |
size 686586885
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 343277933
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7586351d3affe74fd5af32a003a5496c3f6412bf6227d85f076c2e6e9d9b092b
|
| 3 |
size 343277933
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dee58c4aeb08ad6f12c883ed709c684c0f5925fa9fd5cd6cc7c4227f4609f6a0
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 4.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -24903,13 +24903,1321 @@
|
|
| 24903 |
"learning_rate": 2.5952380952380957e-05,
|
| 24904 |
"loss": 0.267,
|
| 24905 |
"step": 4142
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24906 |
}
|
| 24907 |
],
|
| 24908 |
"logging_steps": 1,
|
| 24909 |
"max_steps": 4360,
|
| 24910 |
"num_train_epochs": 5,
|
| 24911 |
"save_steps": 218,
|
| 24912 |
-
"total_flos": 8.
|
| 24913 |
"trial_name": null,
|
| 24914 |
"trial_params": null
|
| 24915 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.99597156822373,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 4360,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 24903 |
"learning_rate": 2.5952380952380957e-05,
|
| 24904 |
"loss": 0.267,
|
| 24905 |
"step": 4142
|
| 24906 |
+
},
|
| 24907 |
+
{
|
| 24908 |
+
"epoch": 4.75,
|
| 24909 |
+
"learning_rate": 2.5833333333333336e-05,
|
| 24910 |
+
"loss": 0.2703,
|
| 24911 |
+
"step": 4143
|
| 24912 |
+
},
|
| 24913 |
+
{
|
| 24914 |
+
"epoch": 4.75,
|
| 24915 |
+
"learning_rate": 2.5714285714285714e-05,
|
| 24916 |
+
"loss": 0.2785,
|
| 24917 |
+
"step": 4144
|
| 24918 |
+
},
|
| 24919 |
+
{
|
| 24920 |
+
"epoch": 4.75,
|
| 24921 |
+
"learning_rate": 2.5595238095238093e-05,
|
| 24922 |
+
"loss": 0.2821,
|
| 24923 |
+
"step": 4145
|
| 24924 |
+
},
|
| 24925 |
+
{
|
| 24926 |
+
"epoch": 4.75,
|
| 24927 |
+
"learning_rate": 2.5476190476190476e-05,
|
| 24928 |
+
"loss": 0.2768,
|
| 24929 |
+
"step": 4146
|
| 24930 |
+
},
|
| 24931 |
+
{
|
| 24932 |
+
"epoch": 4.75,
|
| 24933 |
+
"learning_rate": 2.5357142857142858e-05,
|
| 24934 |
+
"loss": 0.2905,
|
| 24935 |
+
"step": 4147
|
| 24936 |
+
},
|
| 24937 |
+
{
|
| 24938 |
+
"epoch": 4.75,
|
| 24939 |
+
"learning_rate": 2.523809523809524e-05,
|
| 24940 |
+
"loss": 0.2886,
|
| 24941 |
+
"step": 4148
|
| 24942 |
+
},
|
| 24943 |
+
{
|
| 24944 |
+
"epoch": 4.75,
|
| 24945 |
+
"learning_rate": 2.511904761904762e-05,
|
| 24946 |
+
"loss": 0.2483,
|
| 24947 |
+
"step": 4149
|
| 24948 |
+
},
|
| 24949 |
+
{
|
| 24950 |
+
"epoch": 4.76,
|
| 24951 |
+
"learning_rate": 2.5e-05,
|
| 24952 |
+
"loss": 0.2541,
|
| 24953 |
+
"step": 4150
|
| 24954 |
+
},
|
| 24955 |
+
{
|
| 24956 |
+
"epoch": 4.76,
|
| 24957 |
+
"learning_rate": 2.4880952380952383e-05,
|
| 24958 |
+
"loss": 0.2748,
|
| 24959 |
+
"step": 4151
|
| 24960 |
+
},
|
| 24961 |
+
{
|
| 24962 |
+
"epoch": 4.76,
|
| 24963 |
+
"learning_rate": 2.4761904761904762e-05,
|
| 24964 |
+
"loss": 0.2549,
|
| 24965 |
+
"step": 4152
|
| 24966 |
+
},
|
| 24967 |
+
{
|
| 24968 |
+
"epoch": 4.76,
|
| 24969 |
+
"learning_rate": 2.4642857142857145e-05,
|
| 24970 |
+
"loss": 0.2789,
|
| 24971 |
+
"step": 4153
|
| 24972 |
+
},
|
| 24973 |
+
{
|
| 24974 |
+
"epoch": 4.76,
|
| 24975 |
+
"learning_rate": 2.4523809523809523e-05,
|
| 24976 |
+
"loss": 0.2416,
|
| 24977 |
+
"step": 4154
|
| 24978 |
+
},
|
| 24979 |
+
{
|
| 24980 |
+
"epoch": 4.76,
|
| 24981 |
+
"learning_rate": 2.4404761904761906e-05,
|
| 24982 |
+
"loss": 0.2583,
|
| 24983 |
+
"step": 4155
|
| 24984 |
+
},
|
| 24985 |
+
{
|
| 24986 |
+
"epoch": 4.76,
|
| 24987 |
+
"learning_rate": 2.4285714285714288e-05,
|
| 24988 |
+
"loss": 0.2346,
|
| 24989 |
+
"step": 4156
|
| 24990 |
+
},
|
| 24991 |
+
{
|
| 24992 |
+
"epoch": 4.76,
|
| 24993 |
+
"learning_rate": 2.4166666666666667e-05,
|
| 24994 |
+
"loss": 0.2326,
|
| 24995 |
+
"step": 4157
|
| 24996 |
+
},
|
| 24997 |
+
{
|
| 24998 |
+
"epoch": 4.76,
|
| 24999 |
+
"learning_rate": 2.404761904761905e-05,
|
| 25000 |
+
"loss": 0.2598,
|
| 25001 |
+
"step": 4158
|
| 25002 |
+
},
|
| 25003 |
+
{
|
| 25004 |
+
"epoch": 4.77,
|
| 25005 |
+
"learning_rate": 2.392857142857143e-05,
|
| 25006 |
+
"loss": 0.2601,
|
| 25007 |
+
"step": 4159
|
| 25008 |
+
},
|
| 25009 |
+
{
|
| 25010 |
+
"epoch": 4.77,
|
| 25011 |
+
"learning_rate": 2.380952380952381e-05,
|
| 25012 |
+
"loss": 0.251,
|
| 25013 |
+
"step": 4160
|
| 25014 |
+
},
|
| 25015 |
+
{
|
| 25016 |
+
"epoch": 4.77,
|
| 25017 |
+
"learning_rate": 2.369047619047619e-05,
|
| 25018 |
+
"loss": 0.2526,
|
| 25019 |
+
"step": 4161
|
| 25020 |
+
},
|
| 25021 |
+
{
|
| 25022 |
+
"epoch": 4.77,
|
| 25023 |
+
"learning_rate": 2.357142857142857e-05,
|
| 25024 |
+
"loss": 0.2951,
|
| 25025 |
+
"step": 4162
|
| 25026 |
+
},
|
| 25027 |
+
{
|
| 25028 |
+
"epoch": 4.77,
|
| 25029 |
+
"learning_rate": 2.3452380952380954e-05,
|
| 25030 |
+
"loss": 0.27,
|
| 25031 |
+
"step": 4163
|
| 25032 |
+
},
|
| 25033 |
+
{
|
| 25034 |
+
"epoch": 4.77,
|
| 25035 |
+
"learning_rate": 2.3333333333333336e-05,
|
| 25036 |
+
"loss": 0.2833,
|
| 25037 |
+
"step": 4164
|
| 25038 |
+
},
|
| 25039 |
+
{
|
| 25040 |
+
"epoch": 4.77,
|
| 25041 |
+
"learning_rate": 2.3214285714285715e-05,
|
| 25042 |
+
"loss": 0.2714,
|
| 25043 |
+
"step": 4165
|
| 25044 |
+
},
|
| 25045 |
+
{
|
| 25046 |
+
"epoch": 4.77,
|
| 25047 |
+
"learning_rate": 2.3095238095238097e-05,
|
| 25048 |
+
"loss": 0.2415,
|
| 25049 |
+
"step": 4166
|
| 25050 |
+
},
|
| 25051 |
+
{
|
| 25052 |
+
"epoch": 4.77,
|
| 25053 |
+
"learning_rate": 2.2976190476190476e-05,
|
| 25054 |
+
"loss": 0.2729,
|
| 25055 |
+
"step": 4167
|
| 25056 |
+
},
|
| 25057 |
+
{
|
| 25058 |
+
"epoch": 4.78,
|
| 25059 |
+
"learning_rate": 2.2857142857142858e-05,
|
| 25060 |
+
"loss": 0.263,
|
| 25061 |
+
"step": 4168
|
| 25062 |
+
},
|
| 25063 |
+
{
|
| 25064 |
+
"epoch": 4.78,
|
| 25065 |
+
"learning_rate": 2.273809523809524e-05,
|
| 25066 |
+
"loss": 0.314,
|
| 25067 |
+
"step": 4169
|
| 25068 |
+
},
|
| 25069 |
+
{
|
| 25070 |
+
"epoch": 4.78,
|
| 25071 |
+
"learning_rate": 2.261904761904762e-05,
|
| 25072 |
+
"loss": 0.2555,
|
| 25073 |
+
"step": 4170
|
| 25074 |
+
},
|
| 25075 |
+
{
|
| 25076 |
+
"epoch": 4.78,
|
| 25077 |
+
"learning_rate": 2.25e-05,
|
| 25078 |
+
"loss": 0.2591,
|
| 25079 |
+
"step": 4171
|
| 25080 |
+
},
|
| 25081 |
+
{
|
| 25082 |
+
"epoch": 4.78,
|
| 25083 |
+
"learning_rate": 2.2380952380952384e-05,
|
| 25084 |
+
"loss": 0.2634,
|
| 25085 |
+
"step": 4172
|
| 25086 |
+
},
|
| 25087 |
+
{
|
| 25088 |
+
"epoch": 4.78,
|
| 25089 |
+
"learning_rate": 2.2261904761904763e-05,
|
| 25090 |
+
"loss": 0.2551,
|
| 25091 |
+
"step": 4173
|
| 25092 |
+
},
|
| 25093 |
+
{
|
| 25094 |
+
"epoch": 4.78,
|
| 25095 |
+
"learning_rate": 2.214285714285714e-05,
|
| 25096 |
+
"loss": 0.2619,
|
| 25097 |
+
"step": 4174
|
| 25098 |
+
},
|
| 25099 |
+
{
|
| 25100 |
+
"epoch": 4.78,
|
| 25101 |
+
"learning_rate": 2.2023809523809524e-05,
|
| 25102 |
+
"loss": 0.2568,
|
| 25103 |
+
"step": 4175
|
| 25104 |
+
},
|
| 25105 |
+
{
|
| 25106 |
+
"epoch": 4.79,
|
| 25107 |
+
"learning_rate": 2.1904761904761906e-05,
|
| 25108 |
+
"loss": 0.2684,
|
| 25109 |
+
"step": 4176
|
| 25110 |
+
},
|
| 25111 |
+
{
|
| 25112 |
+
"epoch": 4.79,
|
| 25113 |
+
"learning_rate": 2.1785714285714288e-05,
|
| 25114 |
+
"loss": 0.2504,
|
| 25115 |
+
"step": 4177
|
| 25116 |
+
},
|
| 25117 |
+
{
|
| 25118 |
+
"epoch": 4.79,
|
| 25119 |
+
"learning_rate": 2.1666666666666667e-05,
|
| 25120 |
+
"loss": 0.2646,
|
| 25121 |
+
"step": 4178
|
| 25122 |
+
},
|
| 25123 |
+
{
|
| 25124 |
+
"epoch": 4.79,
|
| 25125 |
+
"learning_rate": 2.154761904761905e-05,
|
| 25126 |
+
"loss": 0.2585,
|
| 25127 |
+
"step": 4179
|
| 25128 |
+
},
|
| 25129 |
+
{
|
| 25130 |
+
"epoch": 4.79,
|
| 25131 |
+
"learning_rate": 2.1428571428571428e-05,
|
| 25132 |
+
"loss": 0.2731,
|
| 25133 |
+
"step": 4180
|
| 25134 |
+
},
|
| 25135 |
+
{
|
| 25136 |
+
"epoch": 4.79,
|
| 25137 |
+
"learning_rate": 2.130952380952381e-05,
|
| 25138 |
+
"loss": 0.2454,
|
| 25139 |
+
"step": 4181
|
| 25140 |
+
},
|
| 25141 |
+
{
|
| 25142 |
+
"epoch": 4.79,
|
| 25143 |
+
"learning_rate": 2.1190476190476193e-05,
|
| 25144 |
+
"loss": 0.2672,
|
| 25145 |
+
"step": 4182
|
| 25146 |
+
},
|
| 25147 |
+
{
|
| 25148 |
+
"epoch": 4.79,
|
| 25149 |
+
"learning_rate": 2.107142857142857e-05,
|
| 25150 |
+
"loss": 0.2647,
|
| 25151 |
+
"step": 4183
|
| 25152 |
+
},
|
| 25153 |
+
{
|
| 25154 |
+
"epoch": 4.79,
|
| 25155 |
+
"learning_rate": 2.0952380952380954e-05,
|
| 25156 |
+
"loss": 0.3082,
|
| 25157 |
+
"step": 4184
|
| 25158 |
+
},
|
| 25159 |
+
{
|
| 25160 |
+
"epoch": 4.8,
|
| 25161 |
+
"learning_rate": 2.0833333333333336e-05,
|
| 25162 |
+
"loss": 0.2822,
|
| 25163 |
+
"step": 4185
|
| 25164 |
+
},
|
| 25165 |
+
{
|
| 25166 |
+
"epoch": 4.8,
|
| 25167 |
+
"learning_rate": 2.0714285714285718e-05,
|
| 25168 |
+
"loss": 0.283,
|
| 25169 |
+
"step": 4186
|
| 25170 |
+
},
|
| 25171 |
+
{
|
| 25172 |
+
"epoch": 4.8,
|
| 25173 |
+
"learning_rate": 2.0595238095238094e-05,
|
| 25174 |
+
"loss": 0.2863,
|
| 25175 |
+
"step": 4187
|
| 25176 |
+
},
|
| 25177 |
+
{
|
| 25178 |
+
"epoch": 4.8,
|
| 25179 |
+
"learning_rate": 2.0476190476190476e-05,
|
| 25180 |
+
"loss": 0.2624,
|
| 25181 |
+
"step": 4188
|
| 25182 |
+
},
|
| 25183 |
+
{
|
| 25184 |
+
"epoch": 4.8,
|
| 25185 |
+
"learning_rate": 2.0357142857142858e-05,
|
| 25186 |
+
"loss": 0.2603,
|
| 25187 |
+
"step": 4189
|
| 25188 |
+
},
|
| 25189 |
+
{
|
| 25190 |
+
"epoch": 4.8,
|
| 25191 |
+
"learning_rate": 2.023809523809524e-05,
|
| 25192 |
+
"loss": 0.2798,
|
| 25193 |
+
"step": 4190
|
| 25194 |
+
},
|
| 25195 |
+
{
|
| 25196 |
+
"epoch": 4.8,
|
| 25197 |
+
"learning_rate": 2.011904761904762e-05,
|
| 25198 |
+
"loss": 0.284,
|
| 25199 |
+
"step": 4191
|
| 25200 |
+
},
|
| 25201 |
+
{
|
| 25202 |
+
"epoch": 4.8,
|
| 25203 |
+
"learning_rate": 2e-05,
|
| 25204 |
+
"loss": 0.2918,
|
| 25205 |
+
"step": 4192
|
| 25206 |
+
},
|
| 25207 |
+
{
|
| 25208 |
+
"epoch": 4.8,
|
| 25209 |
+
"learning_rate": 1.988095238095238e-05,
|
| 25210 |
+
"loss": 0.2877,
|
| 25211 |
+
"step": 4193
|
| 25212 |
+
},
|
| 25213 |
+
{
|
| 25214 |
+
"epoch": 4.81,
|
| 25215 |
+
"learning_rate": 1.9761904761904763e-05,
|
| 25216 |
+
"loss": 0.2742,
|
| 25217 |
+
"step": 4194
|
| 25218 |
+
},
|
| 25219 |
+
{
|
| 25220 |
+
"epoch": 4.81,
|
| 25221 |
+
"learning_rate": 1.9642857142857145e-05,
|
| 25222 |
+
"loss": 0.2761,
|
| 25223 |
+
"step": 4195
|
| 25224 |
+
},
|
| 25225 |
+
{
|
| 25226 |
+
"epoch": 4.81,
|
| 25227 |
+
"learning_rate": 1.9523809523809524e-05,
|
| 25228 |
+
"loss": 0.2653,
|
| 25229 |
+
"step": 4196
|
| 25230 |
+
},
|
| 25231 |
+
{
|
| 25232 |
+
"epoch": 4.81,
|
| 25233 |
+
"learning_rate": 1.9404761904761906e-05,
|
| 25234 |
+
"loss": 0.2618,
|
| 25235 |
+
"step": 4197
|
| 25236 |
+
},
|
| 25237 |
+
{
|
| 25238 |
+
"epoch": 4.81,
|
| 25239 |
+
"learning_rate": 1.928571428571429e-05,
|
| 25240 |
+
"loss": 0.2968,
|
| 25241 |
+
"step": 4198
|
| 25242 |
+
},
|
| 25243 |
+
{
|
| 25244 |
+
"epoch": 4.81,
|
| 25245 |
+
"learning_rate": 1.9166666666666667e-05,
|
| 25246 |
+
"loss": 0.2693,
|
| 25247 |
+
"step": 4199
|
| 25248 |
+
},
|
| 25249 |
+
{
|
| 25250 |
+
"epoch": 4.81,
|
| 25251 |
+
"learning_rate": 1.9047619047619046e-05,
|
| 25252 |
+
"loss": 0.2781,
|
| 25253 |
+
"step": 4200
|
| 25254 |
+
},
|
| 25255 |
+
{
|
| 25256 |
+
"epoch": 4.81,
|
| 25257 |
+
"learning_rate": 1.892857142857143e-05,
|
| 25258 |
+
"loss": 0.2677,
|
| 25259 |
+
"step": 4201
|
| 25260 |
+
},
|
| 25261 |
+
{
|
| 25262 |
+
"epoch": 4.81,
|
| 25263 |
+
"learning_rate": 1.880952380952381e-05,
|
| 25264 |
+
"loss": 0.2707,
|
| 25265 |
+
"step": 4202
|
| 25266 |
+
},
|
| 25267 |
+
{
|
| 25268 |
+
"epoch": 4.82,
|
| 25269 |
+
"learning_rate": 1.8690476190476193e-05,
|
| 25270 |
+
"loss": 0.2955,
|
| 25271 |
+
"step": 4203
|
| 25272 |
+
},
|
| 25273 |
+
{
|
| 25274 |
+
"epoch": 4.82,
|
| 25275 |
+
"learning_rate": 1.8571428571428572e-05,
|
| 25276 |
+
"loss": 0.2482,
|
| 25277 |
+
"step": 4204
|
| 25278 |
+
},
|
| 25279 |
+
{
|
| 25280 |
+
"epoch": 4.82,
|
| 25281 |
+
"learning_rate": 1.8452380952380954e-05,
|
| 25282 |
+
"loss": 0.2473,
|
| 25283 |
+
"step": 4205
|
| 25284 |
+
},
|
| 25285 |
+
{
|
| 25286 |
+
"epoch": 4.82,
|
| 25287 |
+
"learning_rate": 1.8333333333333333e-05,
|
| 25288 |
+
"loss": 0.2864,
|
| 25289 |
+
"step": 4206
|
| 25290 |
+
},
|
| 25291 |
+
{
|
| 25292 |
+
"epoch": 4.82,
|
| 25293 |
+
"learning_rate": 1.8214285714285715e-05,
|
| 25294 |
+
"loss": 0.2549,
|
| 25295 |
+
"step": 4207
|
| 25296 |
+
},
|
| 25297 |
+
{
|
| 25298 |
+
"epoch": 4.82,
|
| 25299 |
+
"learning_rate": 1.8095238095238094e-05,
|
| 25300 |
+
"loss": 0.2634,
|
| 25301 |
+
"step": 4208
|
| 25302 |
+
},
|
| 25303 |
+
{
|
| 25304 |
+
"epoch": 4.82,
|
| 25305 |
+
"learning_rate": 1.7976190476190476e-05,
|
| 25306 |
+
"loss": 0.2615,
|
| 25307 |
+
"step": 4209
|
| 25308 |
+
},
|
| 25309 |
+
{
|
| 25310 |
+
"epoch": 4.82,
|
| 25311 |
+
"learning_rate": 1.785714285714286e-05,
|
| 25312 |
+
"loss": 0.2872,
|
| 25313 |
+
"step": 4210
|
| 25314 |
+
},
|
| 25315 |
+
{
|
| 25316 |
+
"epoch": 4.83,
|
| 25317 |
+
"learning_rate": 1.773809523809524e-05,
|
| 25318 |
+
"loss": 0.2486,
|
| 25319 |
+
"step": 4211
|
| 25320 |
+
},
|
| 25321 |
+
{
|
| 25322 |
+
"epoch": 4.83,
|
| 25323 |
+
"learning_rate": 1.761904761904762e-05,
|
| 25324 |
+
"loss": 0.2434,
|
| 25325 |
+
"step": 4212
|
| 25326 |
+
},
|
| 25327 |
+
{
|
| 25328 |
+
"epoch": 4.83,
|
| 25329 |
+
"learning_rate": 1.75e-05,
|
| 25330 |
+
"loss": 0.2623,
|
| 25331 |
+
"step": 4213
|
| 25332 |
+
},
|
| 25333 |
+
{
|
| 25334 |
+
"epoch": 4.83,
|
| 25335 |
+
"learning_rate": 1.738095238095238e-05,
|
| 25336 |
+
"loss": 0.2562,
|
| 25337 |
+
"step": 4214
|
| 25338 |
+
},
|
| 25339 |
+
{
|
| 25340 |
+
"epoch": 4.83,
|
| 25341 |
+
"learning_rate": 1.7261904761904763e-05,
|
| 25342 |
+
"loss": 0.266,
|
| 25343 |
+
"step": 4215
|
| 25344 |
+
},
|
| 25345 |
+
{
|
| 25346 |
+
"epoch": 4.83,
|
| 25347 |
+
"learning_rate": 1.7142857142857145e-05,
|
| 25348 |
+
"loss": 0.2595,
|
| 25349 |
+
"step": 4216
|
| 25350 |
+
},
|
| 25351 |
+
{
|
| 25352 |
+
"epoch": 4.83,
|
| 25353 |
+
"learning_rate": 1.7023809523809524e-05,
|
| 25354 |
+
"loss": 0.2737,
|
| 25355 |
+
"step": 4217
|
| 25356 |
+
},
|
| 25357 |
+
{
|
| 25358 |
+
"epoch": 4.83,
|
| 25359 |
+
"learning_rate": 1.6904761904761906e-05,
|
| 25360 |
+
"loss": 0.2811,
|
| 25361 |
+
"step": 4218
|
| 25362 |
+
},
|
| 25363 |
+
{
|
| 25364 |
+
"epoch": 4.83,
|
| 25365 |
+
"learning_rate": 1.6785714285714285e-05,
|
| 25366 |
+
"loss": 0.2666,
|
| 25367 |
+
"step": 4219
|
| 25368 |
+
},
|
| 25369 |
+
{
|
| 25370 |
+
"epoch": 4.84,
|
| 25371 |
+
"learning_rate": 1.6666666666666667e-05,
|
| 25372 |
+
"loss": 0.271,
|
| 25373 |
+
"step": 4220
|
| 25374 |
+
},
|
| 25375 |
+
{
|
| 25376 |
+
"epoch": 4.84,
|
| 25377 |
+
"learning_rate": 1.6547619047619046e-05,
|
| 25378 |
+
"loss": 0.257,
|
| 25379 |
+
"step": 4221
|
| 25380 |
+
},
|
| 25381 |
+
{
|
| 25382 |
+
"epoch": 4.84,
|
| 25383 |
+
"learning_rate": 1.642857142857143e-05,
|
| 25384 |
+
"loss": 0.2699,
|
| 25385 |
+
"step": 4222
|
| 25386 |
+
},
|
| 25387 |
+
{
|
| 25388 |
+
"epoch": 4.84,
|
| 25389 |
+
"learning_rate": 1.630952380952381e-05,
|
| 25390 |
+
"loss": 0.2634,
|
| 25391 |
+
"step": 4223
|
| 25392 |
+
},
|
| 25393 |
+
{
|
| 25394 |
+
"epoch": 4.84,
|
| 25395 |
+
"learning_rate": 1.6190476190476193e-05,
|
| 25396 |
+
"loss": 0.2659,
|
| 25397 |
+
"step": 4224
|
| 25398 |
+
},
|
| 25399 |
+
{
|
| 25400 |
+
"epoch": 4.84,
|
| 25401 |
+
"learning_rate": 1.6071428571428572e-05,
|
| 25402 |
+
"loss": 0.2448,
|
| 25403 |
+
"step": 4225
|
| 25404 |
+
},
|
| 25405 |
+
{
|
| 25406 |
+
"epoch": 4.84,
|
| 25407 |
+
"learning_rate": 1.595238095238095e-05,
|
| 25408 |
+
"loss": 0.2712,
|
| 25409 |
+
"step": 4226
|
| 25410 |
+
},
|
| 25411 |
+
{
|
| 25412 |
+
"epoch": 4.84,
|
| 25413 |
+
"learning_rate": 1.5833333333333333e-05,
|
| 25414 |
+
"loss": 0.2639,
|
| 25415 |
+
"step": 4227
|
| 25416 |
+
},
|
| 25417 |
+
{
|
| 25418 |
+
"epoch": 4.84,
|
| 25419 |
+
"learning_rate": 1.5714285714285715e-05,
|
| 25420 |
+
"loss": 0.2633,
|
| 25421 |
+
"step": 4228
|
| 25422 |
+
},
|
| 25423 |
+
{
|
| 25424 |
+
"epoch": 4.85,
|
| 25425 |
+
"learning_rate": 1.5595238095238098e-05,
|
| 25426 |
+
"loss": 0.2863,
|
| 25427 |
+
"step": 4229
|
| 25428 |
+
},
|
| 25429 |
+
{
|
| 25430 |
+
"epoch": 4.85,
|
| 25431 |
+
"learning_rate": 1.5476190476190476e-05,
|
| 25432 |
+
"loss": 0.2567,
|
| 25433 |
+
"step": 4230
|
| 25434 |
+
},
|
| 25435 |
+
{
|
| 25436 |
+
"epoch": 4.85,
|
| 25437 |
+
"learning_rate": 1.535714285714286e-05,
|
| 25438 |
+
"loss": 0.2694,
|
| 25439 |
+
"step": 4231
|
| 25440 |
+
},
|
| 25441 |
+
{
|
| 25442 |
+
"epoch": 4.85,
|
| 25443 |
+
"learning_rate": 1.5238095238095241e-05,
|
| 25444 |
+
"loss": 0.2501,
|
| 25445 |
+
"step": 4232
|
| 25446 |
+
},
|
| 25447 |
+
{
|
| 25448 |
+
"epoch": 4.85,
|
| 25449 |
+
"learning_rate": 1.5119047619047618e-05,
|
| 25450 |
+
"loss": 0.2795,
|
| 25451 |
+
"step": 4233
|
| 25452 |
+
},
|
| 25453 |
+
{
|
| 25454 |
+
"epoch": 4.85,
|
| 25455 |
+
"learning_rate": 1.5e-05,
|
| 25456 |
+
"loss": 0.2548,
|
| 25457 |
+
"step": 4234
|
| 25458 |
+
},
|
| 25459 |
+
{
|
| 25460 |
+
"epoch": 4.85,
|
| 25461 |
+
"learning_rate": 1.4880952380952381e-05,
|
| 25462 |
+
"loss": 0.2614,
|
| 25463 |
+
"step": 4235
|
| 25464 |
+
},
|
| 25465 |
+
{
|
| 25466 |
+
"epoch": 4.85,
|
| 25467 |
+
"learning_rate": 1.4761904761904763e-05,
|
| 25468 |
+
"loss": 0.2445,
|
| 25469 |
+
"step": 4236
|
| 25470 |
+
},
|
| 25471 |
+
{
|
| 25472 |
+
"epoch": 4.86,
|
| 25473 |
+
"learning_rate": 1.4642857142857144e-05,
|
| 25474 |
+
"loss": 0.2789,
|
| 25475 |
+
"step": 4237
|
| 25476 |
+
},
|
| 25477 |
+
{
|
| 25478 |
+
"epoch": 4.86,
|
| 25479 |
+
"learning_rate": 1.4523809523809526e-05,
|
| 25480 |
+
"loss": 0.269,
|
| 25481 |
+
"step": 4238
|
| 25482 |
+
},
|
| 25483 |
+
{
|
| 25484 |
+
"epoch": 4.86,
|
| 25485 |
+
"learning_rate": 1.4404761904761905e-05,
|
| 25486 |
+
"loss": 0.2692,
|
| 25487 |
+
"step": 4239
|
| 25488 |
+
},
|
| 25489 |
+
{
|
| 25490 |
+
"epoch": 4.86,
|
| 25491 |
+
"learning_rate": 1.4285714285714285e-05,
|
| 25492 |
+
"loss": 0.2703,
|
| 25493 |
+
"step": 4240
|
| 25494 |
+
},
|
| 25495 |
+
{
|
| 25496 |
+
"epoch": 4.86,
|
| 25497 |
+
"learning_rate": 1.4166666666666668e-05,
|
| 25498 |
+
"loss": 0.2658,
|
| 25499 |
+
"step": 4241
|
| 25500 |
+
},
|
| 25501 |
+
{
|
| 25502 |
+
"epoch": 4.86,
|
| 25503 |
+
"learning_rate": 1.4047619047619048e-05,
|
| 25504 |
+
"loss": 0.2561,
|
| 25505 |
+
"step": 4242
|
| 25506 |
+
},
|
| 25507 |
+
{
|
| 25508 |
+
"epoch": 4.86,
|
| 25509 |
+
"learning_rate": 1.392857142857143e-05,
|
| 25510 |
+
"loss": 0.2518,
|
| 25511 |
+
"step": 4243
|
| 25512 |
+
},
|
| 25513 |
+
{
|
| 25514 |
+
"epoch": 4.86,
|
| 25515 |
+
"learning_rate": 1.3809523809523811e-05,
|
| 25516 |
+
"loss": 0.2574,
|
| 25517 |
+
"step": 4244
|
| 25518 |
+
},
|
| 25519 |
+
{
|
| 25520 |
+
"epoch": 4.86,
|
| 25521 |
+
"learning_rate": 1.3690476190476192e-05,
|
| 25522 |
+
"loss": 0.2647,
|
| 25523 |
+
"step": 4245
|
| 25524 |
+
},
|
| 25525 |
+
{
|
| 25526 |
+
"epoch": 4.87,
|
| 25527 |
+
"learning_rate": 1.357142857142857e-05,
|
| 25528 |
+
"loss": 0.2567,
|
| 25529 |
+
"step": 4246
|
| 25530 |
+
},
|
| 25531 |
+
{
|
| 25532 |
+
"epoch": 4.87,
|
| 25533 |
+
"learning_rate": 1.3452380952380953e-05,
|
| 25534 |
+
"loss": 0.283,
|
| 25535 |
+
"step": 4247
|
| 25536 |
+
},
|
| 25537 |
+
{
|
| 25538 |
+
"epoch": 4.87,
|
| 25539 |
+
"learning_rate": 1.3333333333333333e-05,
|
| 25540 |
+
"loss": 0.3054,
|
| 25541 |
+
"step": 4248
|
| 25542 |
+
},
|
| 25543 |
+
{
|
| 25544 |
+
"epoch": 4.87,
|
| 25545 |
+
"learning_rate": 1.3214285714285716e-05,
|
| 25546 |
+
"loss": 0.2886,
|
| 25547 |
+
"step": 4249
|
| 25548 |
+
},
|
| 25549 |
+
{
|
| 25550 |
+
"epoch": 4.87,
|
| 25551 |
+
"learning_rate": 1.3095238095238096e-05,
|
| 25552 |
+
"loss": 0.2887,
|
| 25553 |
+
"step": 4250
|
| 25554 |
+
},
|
| 25555 |
+
{
|
| 25556 |
+
"epoch": 4.87,
|
| 25557 |
+
"learning_rate": 1.2976190476190478e-05,
|
| 25558 |
+
"loss": 0.2579,
|
| 25559 |
+
"step": 4251
|
| 25560 |
+
},
|
| 25561 |
+
{
|
| 25562 |
+
"epoch": 4.87,
|
| 25563 |
+
"learning_rate": 1.2857142857142857e-05,
|
| 25564 |
+
"loss": 0.2851,
|
| 25565 |
+
"step": 4252
|
| 25566 |
+
},
|
| 25567 |
+
{
|
| 25568 |
+
"epoch": 4.87,
|
| 25569 |
+
"learning_rate": 1.2738095238095238e-05,
|
| 25570 |
+
"loss": 0.2655,
|
| 25571 |
+
"step": 4253
|
| 25572 |
+
},
|
| 25573 |
+
{
|
| 25574 |
+
"epoch": 4.87,
|
| 25575 |
+
"learning_rate": 1.261904761904762e-05,
|
| 25576 |
+
"loss": 0.2617,
|
| 25577 |
+
"step": 4254
|
| 25578 |
+
},
|
| 25579 |
+
{
|
| 25580 |
+
"epoch": 4.88,
|
| 25581 |
+
"learning_rate": 1.25e-05,
|
| 25582 |
+
"loss": 0.2548,
|
| 25583 |
+
"step": 4255
|
| 25584 |
+
},
|
| 25585 |
+
{
|
| 25586 |
+
"epoch": 4.88,
|
| 25587 |
+
"learning_rate": 1.2380952380952381e-05,
|
| 25588 |
+
"loss": 0.2908,
|
| 25589 |
+
"step": 4256
|
| 25590 |
+
},
|
| 25591 |
+
{
|
| 25592 |
+
"epoch": 4.88,
|
| 25593 |
+
"learning_rate": 1.2261904761904762e-05,
|
| 25594 |
+
"loss": 0.2523,
|
| 25595 |
+
"step": 4257
|
| 25596 |
+
},
|
| 25597 |
+
{
|
| 25598 |
+
"epoch": 4.88,
|
| 25599 |
+
"learning_rate": 1.2142857142857144e-05,
|
| 25600 |
+
"loss": 0.259,
|
| 25601 |
+
"step": 4258
|
| 25602 |
+
},
|
| 25603 |
+
{
|
| 25604 |
+
"epoch": 4.88,
|
| 25605 |
+
"learning_rate": 1.2023809523809525e-05,
|
| 25606 |
+
"loss": 0.2707,
|
| 25607 |
+
"step": 4259
|
| 25608 |
+
},
|
| 25609 |
+
{
|
| 25610 |
+
"epoch": 4.88,
|
| 25611 |
+
"learning_rate": 1.1904761904761905e-05,
|
| 25612 |
+
"loss": 0.2623,
|
| 25613 |
+
"step": 4260
|
| 25614 |
+
},
|
| 25615 |
+
{
|
| 25616 |
+
"epoch": 4.88,
|
| 25617 |
+
"learning_rate": 1.1785714285714286e-05,
|
| 25618 |
+
"loss": 0.2653,
|
| 25619 |
+
"step": 4261
|
| 25620 |
+
},
|
| 25621 |
+
{
|
| 25622 |
+
"epoch": 4.88,
|
| 25623 |
+
"learning_rate": 1.1666666666666668e-05,
|
| 25624 |
+
"loss": 0.2773,
|
| 25625 |
+
"step": 4262
|
| 25626 |
+
},
|
| 25627 |
+
{
|
| 25628 |
+
"epoch": 4.88,
|
| 25629 |
+
"learning_rate": 1.1547619047619048e-05,
|
| 25630 |
+
"loss": 0.2512,
|
| 25631 |
+
"step": 4263
|
| 25632 |
+
},
|
| 25633 |
+
{
|
| 25634 |
+
"epoch": 4.89,
|
| 25635 |
+
"learning_rate": 1.1428571428571429e-05,
|
| 25636 |
+
"loss": 0.2541,
|
| 25637 |
+
"step": 4264
|
| 25638 |
+
},
|
| 25639 |
+
{
|
| 25640 |
+
"epoch": 4.89,
|
| 25641 |
+
"learning_rate": 1.130952380952381e-05,
|
| 25642 |
+
"loss": 0.2424,
|
| 25643 |
+
"step": 4265
|
| 25644 |
+
},
|
| 25645 |
+
{
|
| 25646 |
+
"epoch": 4.89,
|
| 25647 |
+
"learning_rate": 1.1190476190476192e-05,
|
| 25648 |
+
"loss": 0.2576,
|
| 25649 |
+
"step": 4266
|
| 25650 |
+
},
|
| 25651 |
+
{
|
| 25652 |
+
"epoch": 4.89,
|
| 25653 |
+
"learning_rate": 1.107142857142857e-05,
|
| 25654 |
+
"loss": 0.2701,
|
| 25655 |
+
"step": 4267
|
| 25656 |
+
},
|
| 25657 |
+
{
|
| 25658 |
+
"epoch": 4.89,
|
| 25659 |
+
"learning_rate": 1.0952380952380953e-05,
|
| 25660 |
+
"loss": 0.2323,
|
| 25661 |
+
"step": 4268
|
| 25662 |
+
},
|
| 25663 |
+
{
|
| 25664 |
+
"epoch": 4.89,
|
| 25665 |
+
"learning_rate": 1.0833333333333334e-05,
|
| 25666 |
+
"loss": 0.2822,
|
| 25667 |
+
"step": 4269
|
| 25668 |
+
},
|
| 25669 |
+
{
|
| 25670 |
+
"epoch": 4.89,
|
| 25671 |
+
"learning_rate": 1.0714285714285714e-05,
|
| 25672 |
+
"loss": 0.2591,
|
| 25673 |
+
"step": 4270
|
| 25674 |
+
},
|
| 25675 |
+
{
|
| 25676 |
+
"epoch": 4.89,
|
| 25677 |
+
"learning_rate": 1.0595238095238096e-05,
|
| 25678 |
+
"loss": 0.2553,
|
| 25679 |
+
"step": 4271
|
| 25680 |
+
},
|
| 25681 |
+
{
|
| 25682 |
+
"epoch": 4.9,
|
| 25683 |
+
"learning_rate": 1.0476190476190477e-05,
|
| 25684 |
+
"loss": 0.245,
|
| 25685 |
+
"step": 4272
|
| 25686 |
+
},
|
| 25687 |
+
{
|
| 25688 |
+
"epoch": 4.9,
|
| 25689 |
+
"learning_rate": 1.0357142857142859e-05,
|
| 25690 |
+
"loss": 0.2726,
|
| 25691 |
+
"step": 4273
|
| 25692 |
+
},
|
| 25693 |
+
{
|
| 25694 |
+
"epoch": 4.9,
|
| 25695 |
+
"learning_rate": 1.0238095238095238e-05,
|
| 25696 |
+
"loss": 0.2522,
|
| 25697 |
+
"step": 4274
|
| 25698 |
+
},
|
| 25699 |
+
{
|
| 25700 |
+
"epoch": 4.9,
|
| 25701 |
+
"learning_rate": 1.011904761904762e-05,
|
| 25702 |
+
"loss": 0.2669,
|
| 25703 |
+
"step": 4275
|
| 25704 |
+
},
|
| 25705 |
+
{
|
| 25706 |
+
"epoch": 4.9,
|
| 25707 |
+
"learning_rate": 1e-05,
|
| 25708 |
+
"loss": 0.2634,
|
| 25709 |
+
"step": 4276
|
| 25710 |
+
},
|
| 25711 |
+
{
|
| 25712 |
+
"epoch": 4.9,
|
| 25713 |
+
"learning_rate": 9.880952380952381e-06,
|
| 25714 |
+
"loss": 0.2712,
|
| 25715 |
+
"step": 4277
|
| 25716 |
+
},
|
| 25717 |
+
{
|
| 25718 |
+
"epoch": 4.9,
|
| 25719 |
+
"learning_rate": 9.761904761904762e-06,
|
| 25720 |
+
"loss": 0.2598,
|
| 25721 |
+
"step": 4278
|
| 25722 |
+
},
|
| 25723 |
+
{
|
| 25724 |
+
"epoch": 4.9,
|
| 25725 |
+
"learning_rate": 9.642857142857144e-06,
|
| 25726 |
+
"loss": 0.2706,
|
| 25727 |
+
"step": 4279
|
| 25728 |
+
},
|
| 25729 |
+
{
|
| 25730 |
+
"epoch": 4.9,
|
| 25731 |
+
"learning_rate": 9.523809523809523e-06,
|
| 25732 |
+
"loss": 0.2692,
|
| 25733 |
+
"step": 4280
|
| 25734 |
+
},
|
| 25735 |
+
{
|
| 25736 |
+
"epoch": 4.91,
|
| 25737 |
+
"learning_rate": 9.404761904761905e-06,
|
| 25738 |
+
"loss": 0.2679,
|
| 25739 |
+
"step": 4281
|
| 25740 |
+
},
|
| 25741 |
+
{
|
| 25742 |
+
"epoch": 4.91,
|
| 25743 |
+
"learning_rate": 9.285714285714286e-06,
|
| 25744 |
+
"loss": 0.2761,
|
| 25745 |
+
"step": 4282
|
| 25746 |
+
},
|
| 25747 |
+
{
|
| 25748 |
+
"epoch": 4.91,
|
| 25749 |
+
"learning_rate": 9.166666666666666e-06,
|
| 25750 |
+
"loss": 0.2655,
|
| 25751 |
+
"step": 4283
|
| 25752 |
+
},
|
| 25753 |
+
{
|
| 25754 |
+
"epoch": 4.91,
|
| 25755 |
+
"learning_rate": 9.047619047619047e-06,
|
| 25756 |
+
"loss": 0.2771,
|
| 25757 |
+
"step": 4284
|
| 25758 |
+
},
|
| 25759 |
+
{
|
| 25760 |
+
"epoch": 4.91,
|
| 25761 |
+
"learning_rate": 8.92857142857143e-06,
|
| 25762 |
+
"loss": 0.28,
|
| 25763 |
+
"step": 4285
|
| 25764 |
+
},
|
| 25765 |
+
{
|
| 25766 |
+
"epoch": 4.91,
|
| 25767 |
+
"learning_rate": 8.80952380952381e-06,
|
| 25768 |
+
"loss": 0.2576,
|
| 25769 |
+
"step": 4286
|
| 25770 |
+
},
|
| 25771 |
+
{
|
| 25772 |
+
"epoch": 4.91,
|
| 25773 |
+
"learning_rate": 8.69047619047619e-06,
|
| 25774 |
+
"loss": 0.2791,
|
| 25775 |
+
"step": 4287
|
| 25776 |
+
},
|
| 25777 |
+
{
|
| 25778 |
+
"epoch": 4.91,
|
| 25779 |
+
"learning_rate": 8.571428571428573e-06,
|
| 25780 |
+
"loss": 0.2697,
|
| 25781 |
+
"step": 4288
|
| 25782 |
+
},
|
| 25783 |
+
{
|
| 25784 |
+
"epoch": 4.91,
|
| 25785 |
+
"learning_rate": 8.452380952380953e-06,
|
| 25786 |
+
"loss": 0.2596,
|
| 25787 |
+
"step": 4289
|
| 25788 |
+
},
|
| 25789 |
+
{
|
| 25790 |
+
"epoch": 4.92,
|
| 25791 |
+
"learning_rate": 8.333333333333334e-06,
|
| 25792 |
+
"loss": 0.2956,
|
| 25793 |
+
"step": 4290
|
| 25794 |
+
},
|
| 25795 |
+
{
|
| 25796 |
+
"epoch": 4.92,
|
| 25797 |
+
"learning_rate": 8.214285714285714e-06,
|
| 25798 |
+
"loss": 0.2442,
|
| 25799 |
+
"step": 4291
|
| 25800 |
+
},
|
| 25801 |
+
{
|
| 25802 |
+
"epoch": 4.92,
|
| 25803 |
+
"learning_rate": 8.095238095238097e-06,
|
| 25804 |
+
"loss": 0.2701,
|
| 25805 |
+
"step": 4292
|
| 25806 |
+
},
|
| 25807 |
+
{
|
| 25808 |
+
"epoch": 4.92,
|
| 25809 |
+
"learning_rate": 7.976190476190475e-06,
|
| 25810 |
+
"loss": 0.2996,
|
| 25811 |
+
"step": 4293
|
| 25812 |
+
},
|
| 25813 |
+
{
|
| 25814 |
+
"epoch": 4.92,
|
| 25815 |
+
"learning_rate": 7.857142857142858e-06,
|
| 25816 |
+
"loss": 0.2571,
|
| 25817 |
+
"step": 4294
|
| 25818 |
+
},
|
| 25819 |
+
{
|
| 25820 |
+
"epoch": 4.92,
|
| 25821 |
+
"learning_rate": 7.738095238095238e-06,
|
| 25822 |
+
"loss": 0.244,
|
| 25823 |
+
"step": 4295
|
| 25824 |
+
},
|
| 25825 |
+
{
|
| 25826 |
+
"epoch": 4.92,
|
| 25827 |
+
"learning_rate": 7.6190476190476205e-06,
|
| 25828 |
+
"loss": 0.2813,
|
| 25829 |
+
"step": 4296
|
| 25830 |
+
},
|
| 25831 |
+
{
|
| 25832 |
+
"epoch": 4.92,
|
| 25833 |
+
"learning_rate": 7.5e-06,
|
| 25834 |
+
"loss": 0.2748,
|
| 25835 |
+
"step": 4297
|
| 25836 |
+
},
|
| 25837 |
+
{
|
| 25838 |
+
"epoch": 4.92,
|
| 25839 |
+
"learning_rate": 7.380952380952382e-06,
|
| 25840 |
+
"loss": 0.2431,
|
| 25841 |
+
"step": 4298
|
| 25842 |
+
},
|
| 25843 |
+
{
|
| 25844 |
+
"epoch": 4.93,
|
| 25845 |
+
"learning_rate": 7.261904761904763e-06,
|
| 25846 |
+
"loss": 0.2433,
|
| 25847 |
+
"step": 4299
|
| 25848 |
+
},
|
| 25849 |
+
{
|
| 25850 |
+
"epoch": 4.93,
|
| 25851 |
+
"learning_rate": 7.142857142857143e-06,
|
| 25852 |
+
"loss": 0.2501,
|
| 25853 |
+
"step": 4300
|
| 25854 |
+
},
|
| 25855 |
+
{
|
| 25856 |
+
"epoch": 4.93,
|
| 25857 |
+
"learning_rate": 7.023809523809524e-06,
|
| 25858 |
+
"loss": 0.2824,
|
| 25859 |
+
"step": 4301
|
| 25860 |
+
},
|
| 25861 |
+
{
|
| 25862 |
+
"epoch": 4.93,
|
| 25863 |
+
"learning_rate": 6.9047619047619055e-06,
|
| 25864 |
+
"loss": 0.2593,
|
| 25865 |
+
"step": 4302
|
| 25866 |
+
},
|
| 25867 |
+
{
|
| 25868 |
+
"epoch": 4.93,
|
| 25869 |
+
"learning_rate": 6.785714285714285e-06,
|
| 25870 |
+
"loss": 0.2764,
|
| 25871 |
+
"step": 4303
|
| 25872 |
+
},
|
| 25873 |
+
{
|
| 25874 |
+
"epoch": 4.93,
|
| 25875 |
+
"learning_rate": 6.666666666666667e-06,
|
| 25876 |
+
"loss": 0.2762,
|
| 25877 |
+
"step": 4304
|
| 25878 |
+
},
|
| 25879 |
+
{
|
| 25880 |
+
"epoch": 4.93,
|
| 25881 |
+
"learning_rate": 6.547619047619048e-06,
|
| 25882 |
+
"loss": 0.2644,
|
| 25883 |
+
"step": 4305
|
| 25884 |
+
},
|
| 25885 |
+
{
|
| 25886 |
+
"epoch": 4.93,
|
| 25887 |
+
"learning_rate": 6.428571428571429e-06,
|
| 25888 |
+
"loss": 0.2759,
|
| 25889 |
+
"step": 4306
|
| 25890 |
+
},
|
| 25891 |
+
{
|
| 25892 |
+
"epoch": 4.94,
|
| 25893 |
+
"learning_rate": 6.30952380952381e-06,
|
| 25894 |
+
"loss": 0.2992,
|
| 25895 |
+
"step": 4307
|
| 25896 |
+
},
|
| 25897 |
+
{
|
| 25898 |
+
"epoch": 4.94,
|
| 25899 |
+
"learning_rate": 6.190476190476191e-06,
|
| 25900 |
+
"loss": 0.2387,
|
| 25901 |
+
"step": 4308
|
| 25902 |
+
},
|
| 25903 |
+
{
|
| 25904 |
+
"epoch": 4.94,
|
| 25905 |
+
"learning_rate": 6.071428571428572e-06,
|
| 25906 |
+
"loss": 0.2756,
|
| 25907 |
+
"step": 4309
|
| 25908 |
+
},
|
| 25909 |
+
{
|
| 25910 |
+
"epoch": 4.94,
|
| 25911 |
+
"learning_rate": 5.9523809523809525e-06,
|
| 25912 |
+
"loss": 0.2809,
|
| 25913 |
+
"step": 4310
|
| 25914 |
+
},
|
| 25915 |
+
{
|
| 25916 |
+
"epoch": 4.94,
|
| 25917 |
+
"learning_rate": 5.833333333333334e-06,
|
| 25918 |
+
"loss": 0.287,
|
| 25919 |
+
"step": 4311
|
| 25920 |
+
},
|
| 25921 |
+
{
|
| 25922 |
+
"epoch": 4.94,
|
| 25923 |
+
"learning_rate": 5.7142857142857145e-06,
|
| 25924 |
+
"loss": 0.2691,
|
| 25925 |
+
"step": 4312
|
| 25926 |
+
},
|
| 25927 |
+
{
|
| 25928 |
+
"epoch": 4.94,
|
| 25929 |
+
"learning_rate": 5.595238095238096e-06,
|
| 25930 |
+
"loss": 0.2925,
|
| 25931 |
+
"step": 4313
|
| 25932 |
+
},
|
| 25933 |
+
{
|
| 25934 |
+
"epoch": 4.94,
|
| 25935 |
+
"learning_rate": 5.4761904761904765e-06,
|
| 25936 |
+
"loss": 0.2699,
|
| 25937 |
+
"step": 4314
|
| 25938 |
+
},
|
| 25939 |
+
{
|
| 25940 |
+
"epoch": 4.94,
|
| 25941 |
+
"learning_rate": 5.357142857142857e-06,
|
| 25942 |
+
"loss": 0.2556,
|
| 25943 |
+
"step": 4315
|
| 25944 |
+
},
|
| 25945 |
+
{
|
| 25946 |
+
"epoch": 4.95,
|
| 25947 |
+
"learning_rate": 5.2380952380952384e-06,
|
| 25948 |
+
"loss": 0.2439,
|
| 25949 |
+
"step": 4316
|
| 25950 |
+
},
|
| 25951 |
+
{
|
| 25952 |
+
"epoch": 4.95,
|
| 25953 |
+
"learning_rate": 5.119047619047619e-06,
|
| 25954 |
+
"loss": 0.2887,
|
| 25955 |
+
"step": 4317
|
| 25956 |
+
},
|
| 25957 |
+
{
|
| 25958 |
+
"epoch": 4.95,
|
| 25959 |
+
"learning_rate": 5e-06,
|
| 25960 |
+
"loss": 0.2459,
|
| 25961 |
+
"step": 4318
|
| 25962 |
+
},
|
| 25963 |
+
{
|
| 25964 |
+
"epoch": 4.95,
|
| 25965 |
+
"learning_rate": 4.880952380952381e-06,
|
| 25966 |
+
"loss": 0.259,
|
| 25967 |
+
"step": 4319
|
| 25968 |
+
},
|
| 25969 |
+
{
|
| 25970 |
+
"epoch": 4.95,
|
| 25971 |
+
"learning_rate": 4.7619047619047615e-06,
|
| 25972 |
+
"loss": 0.2685,
|
| 25973 |
+
"step": 4320
|
| 25974 |
+
},
|
| 25975 |
+
{
|
| 25976 |
+
"epoch": 4.95,
|
| 25977 |
+
"learning_rate": 4.642857142857143e-06,
|
| 25978 |
+
"loss": 0.2623,
|
| 25979 |
+
"step": 4321
|
| 25980 |
+
},
|
| 25981 |
+
{
|
| 25982 |
+
"epoch": 4.95,
|
| 25983 |
+
"learning_rate": 4.5238095238095235e-06,
|
| 25984 |
+
"loss": 0.2813,
|
| 25985 |
+
"step": 4322
|
| 25986 |
+
},
|
| 25987 |
+
{
|
| 25988 |
+
"epoch": 4.95,
|
| 25989 |
+
"learning_rate": 4.404761904761905e-06,
|
| 25990 |
+
"loss": 0.2399,
|
| 25991 |
+
"step": 4323
|
| 25992 |
+
},
|
| 25993 |
+
{
|
| 25994 |
+
"epoch": 4.95,
|
| 25995 |
+
"learning_rate": 4.285714285714286e-06,
|
| 25996 |
+
"loss": 0.2774,
|
| 25997 |
+
"step": 4324
|
| 25998 |
+
},
|
| 25999 |
+
{
|
| 26000 |
+
"epoch": 4.96,
|
| 26001 |
+
"learning_rate": 4.166666666666667e-06,
|
| 26002 |
+
"loss": 0.2873,
|
| 26003 |
+
"step": 4325
|
| 26004 |
+
},
|
| 26005 |
+
{
|
| 26006 |
+
"epoch": 4.96,
|
| 26007 |
+
"learning_rate": 4.047619047619048e-06,
|
| 26008 |
+
"loss": 0.2815,
|
| 26009 |
+
"step": 4326
|
| 26010 |
+
},
|
| 26011 |
+
{
|
| 26012 |
+
"epoch": 4.96,
|
| 26013 |
+
"learning_rate": 3.928571428571429e-06,
|
| 26014 |
+
"loss": 0.2733,
|
| 26015 |
+
"step": 4327
|
| 26016 |
+
},
|
| 26017 |
+
{
|
| 26018 |
+
"epoch": 4.96,
|
| 26019 |
+
"learning_rate": 3.8095238095238102e-06,
|
| 26020 |
+
"loss": 0.2475,
|
| 26021 |
+
"step": 4328
|
| 26022 |
+
},
|
| 26023 |
+
{
|
| 26024 |
+
"epoch": 4.96,
|
| 26025 |
+
"learning_rate": 3.690476190476191e-06,
|
| 26026 |
+
"loss": 0.2547,
|
| 26027 |
+
"step": 4329
|
| 26028 |
+
},
|
| 26029 |
+
{
|
| 26030 |
+
"epoch": 4.96,
|
| 26031 |
+
"learning_rate": 3.5714285714285714e-06,
|
| 26032 |
+
"loss": 0.2817,
|
| 26033 |
+
"step": 4330
|
| 26034 |
+
},
|
| 26035 |
+
{
|
| 26036 |
+
"epoch": 4.96,
|
| 26037 |
+
"learning_rate": 3.4523809523809528e-06,
|
| 26038 |
+
"loss": 0.2664,
|
| 26039 |
+
"step": 4331
|
| 26040 |
+
},
|
| 26041 |
+
{
|
| 26042 |
+
"epoch": 4.96,
|
| 26043 |
+
"learning_rate": 3.3333333333333333e-06,
|
| 26044 |
+
"loss": 0.2468,
|
| 26045 |
+
"step": 4332
|
| 26046 |
+
},
|
| 26047 |
+
{
|
| 26048 |
+
"epoch": 4.97,
|
| 26049 |
+
"learning_rate": 3.2142857142857143e-06,
|
| 26050 |
+
"loss": 0.2599,
|
| 26051 |
+
"step": 4333
|
| 26052 |
+
},
|
| 26053 |
+
{
|
| 26054 |
+
"epoch": 4.97,
|
| 26055 |
+
"learning_rate": 3.0952380952380953e-06,
|
| 26056 |
+
"loss": 0.2644,
|
| 26057 |
+
"step": 4334
|
| 26058 |
+
},
|
| 26059 |
+
{
|
| 26060 |
+
"epoch": 4.97,
|
| 26061 |
+
"learning_rate": 2.9761904761904763e-06,
|
| 26062 |
+
"loss": 0.2408,
|
| 26063 |
+
"step": 4335
|
| 26064 |
+
},
|
| 26065 |
+
{
|
| 26066 |
+
"epoch": 4.97,
|
| 26067 |
+
"learning_rate": 2.8571428571428573e-06,
|
| 26068 |
+
"loss": 0.2485,
|
| 26069 |
+
"step": 4336
|
| 26070 |
+
},
|
| 26071 |
+
{
|
| 26072 |
+
"epoch": 4.97,
|
| 26073 |
+
"learning_rate": 2.7380952380952382e-06,
|
| 26074 |
+
"loss": 0.256,
|
| 26075 |
+
"step": 4337
|
| 26076 |
+
},
|
| 26077 |
+
{
|
| 26078 |
+
"epoch": 4.97,
|
| 26079 |
+
"learning_rate": 2.6190476190476192e-06,
|
| 26080 |
+
"loss": 0.2605,
|
| 26081 |
+
"step": 4338
|
| 26082 |
+
},
|
| 26083 |
+
{
|
| 26084 |
+
"epoch": 4.97,
|
| 26085 |
+
"learning_rate": 2.5e-06,
|
| 26086 |
+
"loss": 0.2442,
|
| 26087 |
+
"step": 4339
|
| 26088 |
+
},
|
| 26089 |
+
{
|
| 26090 |
+
"epoch": 4.97,
|
| 26091 |
+
"learning_rate": 2.3809523809523808e-06,
|
| 26092 |
+
"loss": 0.2585,
|
| 26093 |
+
"step": 4340
|
| 26094 |
+
},
|
| 26095 |
+
{
|
| 26096 |
+
"epoch": 4.97,
|
| 26097 |
+
"learning_rate": 2.2619047619047617e-06,
|
| 26098 |
+
"loss": 0.256,
|
| 26099 |
+
"step": 4341
|
| 26100 |
+
},
|
| 26101 |
+
{
|
| 26102 |
+
"epoch": 4.98,
|
| 26103 |
+
"learning_rate": 2.142857142857143e-06,
|
| 26104 |
+
"loss": 0.2488,
|
| 26105 |
+
"step": 4342
|
| 26106 |
+
},
|
| 26107 |
+
{
|
| 26108 |
+
"epoch": 4.98,
|
| 26109 |
+
"learning_rate": 2.023809523809524e-06,
|
| 26110 |
+
"loss": 0.245,
|
| 26111 |
+
"step": 4343
|
| 26112 |
+
},
|
| 26113 |
+
{
|
| 26114 |
+
"epoch": 4.98,
|
| 26115 |
+
"learning_rate": 1.9047619047619051e-06,
|
| 26116 |
+
"loss": 0.2661,
|
| 26117 |
+
"step": 4344
|
| 26118 |
+
},
|
| 26119 |
+
{
|
| 26120 |
+
"epoch": 4.98,
|
| 26121 |
+
"learning_rate": 1.7857142857142857e-06,
|
| 26122 |
+
"loss": 0.2857,
|
| 26123 |
+
"step": 4345
|
| 26124 |
+
},
|
| 26125 |
+
{
|
| 26126 |
+
"epoch": 4.98,
|
| 26127 |
+
"learning_rate": 1.6666666666666667e-06,
|
| 26128 |
+
"loss": 0.2723,
|
| 26129 |
+
"step": 4346
|
| 26130 |
+
},
|
| 26131 |
+
{
|
| 26132 |
+
"epoch": 4.98,
|
| 26133 |
+
"learning_rate": 1.5476190476190476e-06,
|
| 26134 |
+
"loss": 0.2923,
|
| 26135 |
+
"step": 4347
|
| 26136 |
+
},
|
| 26137 |
+
{
|
| 26138 |
+
"epoch": 4.98,
|
| 26139 |
+
"learning_rate": 1.4285714285714286e-06,
|
| 26140 |
+
"loss": 0.269,
|
| 26141 |
+
"step": 4348
|
| 26142 |
+
},
|
| 26143 |
+
{
|
| 26144 |
+
"epoch": 4.98,
|
| 26145 |
+
"learning_rate": 1.3095238095238096e-06,
|
| 26146 |
+
"loss": 0.2469,
|
| 26147 |
+
"step": 4349
|
| 26148 |
+
},
|
| 26149 |
+
{
|
| 26150 |
+
"epoch": 4.98,
|
| 26151 |
+
"learning_rate": 1.1904761904761904e-06,
|
| 26152 |
+
"loss": 0.283,
|
| 26153 |
+
"step": 4350
|
| 26154 |
+
},
|
| 26155 |
+
{
|
| 26156 |
+
"epoch": 4.99,
|
| 26157 |
+
"learning_rate": 1.0714285714285716e-06,
|
| 26158 |
+
"loss": 0.243,
|
| 26159 |
+
"step": 4351
|
| 26160 |
+
},
|
| 26161 |
+
{
|
| 26162 |
+
"epoch": 4.99,
|
| 26163 |
+
"learning_rate": 9.523809523809526e-07,
|
| 26164 |
+
"loss": 0.2456,
|
| 26165 |
+
"step": 4352
|
| 26166 |
+
},
|
| 26167 |
+
{
|
| 26168 |
+
"epoch": 4.99,
|
| 26169 |
+
"learning_rate": 8.333333333333333e-07,
|
| 26170 |
+
"loss": 0.2619,
|
| 26171 |
+
"step": 4353
|
| 26172 |
+
},
|
| 26173 |
+
{
|
| 26174 |
+
"epoch": 4.99,
|
| 26175 |
+
"learning_rate": 7.142857142857143e-07,
|
| 26176 |
+
"loss": 0.2775,
|
| 26177 |
+
"step": 4354
|
| 26178 |
+
},
|
| 26179 |
+
{
|
| 26180 |
+
"epoch": 4.99,
|
| 26181 |
+
"learning_rate": 5.952380952380952e-07,
|
| 26182 |
+
"loss": 0.2455,
|
| 26183 |
+
"step": 4355
|
| 26184 |
+
},
|
| 26185 |
+
{
|
| 26186 |
+
"epoch": 4.99,
|
| 26187 |
+
"learning_rate": 4.761904761904763e-07,
|
| 26188 |
+
"loss": 0.2669,
|
| 26189 |
+
"step": 4356
|
| 26190 |
+
},
|
| 26191 |
+
{
|
| 26192 |
+
"epoch": 4.99,
|
| 26193 |
+
"learning_rate": 3.5714285714285716e-07,
|
| 26194 |
+
"loss": 0.2604,
|
| 26195 |
+
"step": 4357
|
| 26196 |
+
},
|
| 26197 |
+
{
|
| 26198 |
+
"epoch": 4.99,
|
| 26199 |
+
"learning_rate": 2.3809523809523814e-07,
|
| 26200 |
+
"loss": 0.2727,
|
| 26201 |
+
"step": 4358
|
| 26202 |
+
},
|
| 26203 |
+
{
|
| 26204 |
+
"epoch": 4.99,
|
| 26205 |
+
"learning_rate": 1.1904761904761907e-07,
|
| 26206 |
+
"loss": 0.2643,
|
| 26207 |
+
"step": 4359
|
| 26208 |
+
},
|
| 26209 |
+
{
|
| 26210 |
+
"epoch": 5.0,
|
| 26211 |
+
"learning_rate": 0.0,
|
| 26212 |
+
"loss": 0.2551,
|
| 26213 |
+
"step": 4360
|
| 26214 |
}
|
| 26215 |
],
|
| 26216 |
"logging_steps": 1,
|
| 26217 |
"max_steps": 4360,
|
| 26218 |
"num_train_epochs": 5,
|
| 26219 |
"save_steps": 218,
|
| 26220 |
+
"total_flos": 8.64957847687712e+19,
|
| 26221 |
"trial_name": null,
|
| 26222 |
"trial_params": null
|
| 26223 |
}
|