Training in progress, step 4142, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 686648325
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cca6dbb8ebff5745b98bcd4ba9d49d1834c4c6b64e2a7fec95937e4fdab56659
|
| 3 |
size 686648325
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 343308717
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b9717b6a2c04229ec31aad7b9b478b9e8beea80fd103d4106f0aa04dc23c6b0
|
| 3 |
size 343308717
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74e7827d0297369e7a2f3ddd7b9beb15d5961c1ba8e7688ce4fdd3bd268835cc
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 4.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -23595,13 +23595,1321 @@
|
|
| 23595 |
"learning_rate": 5.1904761904761913e-05,
|
| 23596 |
"loss": 0.4955,
|
| 23597 |
"step": 3924
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23598 |
}
|
| 23599 |
],
|
| 23600 |
"logging_steps": 1,
|
| 23601 |
"max_steps": 4360,
|
| 23602 |
"num_train_epochs": 5,
|
| 23603 |
"save_steps": 218,
|
| 23604 |
-
"total_flos":
|
| 23605 |
"trial_name": null,
|
| 23606 |
"trial_params": null
|
| 23607 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.7461729898125435,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 4142,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 23595 |
"learning_rate": 5.1904761904761913e-05,
|
| 23596 |
"loss": 0.4955,
|
| 23597 |
"step": 3924
|
| 23598 |
+
},
|
| 23599 |
+
{
|
| 23600 |
+
"epoch": 4.5,
|
| 23601 |
+
"learning_rate": 5.1785714285714296e-05,
|
| 23602 |
+
"loss": 0.5156,
|
| 23603 |
+
"step": 3925
|
| 23604 |
+
},
|
| 23605 |
+
{
|
| 23606 |
+
"epoch": 4.5,
|
| 23607 |
+
"learning_rate": 5.166666666666667e-05,
|
| 23608 |
+
"loss": 0.4784,
|
| 23609 |
+
"step": 3926
|
| 23610 |
+
},
|
| 23611 |
+
{
|
| 23612 |
+
"epoch": 4.5,
|
| 23613 |
+
"learning_rate": 5.154761904761905e-05,
|
| 23614 |
+
"loss": 0.5066,
|
| 23615 |
+
"step": 3927
|
| 23616 |
+
},
|
| 23617 |
+
{
|
| 23618 |
+
"epoch": 4.5,
|
| 23619 |
+
"learning_rate": 5.142857142857143e-05,
|
| 23620 |
+
"loss": 0.4646,
|
| 23621 |
+
"step": 3928
|
| 23622 |
+
},
|
| 23623 |
+
{
|
| 23624 |
+
"epoch": 4.5,
|
| 23625 |
+
"learning_rate": 5.1309523809523804e-05,
|
| 23626 |
+
"loss": 0.4979,
|
| 23627 |
+
"step": 3929
|
| 23628 |
+
},
|
| 23629 |
+
{
|
| 23630 |
+
"epoch": 4.5,
|
| 23631 |
+
"learning_rate": 5.119047619047619e-05,
|
| 23632 |
+
"loss": 0.4747,
|
| 23633 |
+
"step": 3930
|
| 23634 |
+
},
|
| 23635 |
+
{
|
| 23636 |
+
"epoch": 4.5,
|
| 23637 |
+
"learning_rate": 5.107142857142857e-05,
|
| 23638 |
+
"loss": 0.5074,
|
| 23639 |
+
"step": 3931
|
| 23640 |
+
},
|
| 23641 |
+
{
|
| 23642 |
+
"epoch": 4.51,
|
| 23643 |
+
"learning_rate": 5.095238095238095e-05,
|
| 23644 |
+
"loss": 0.5152,
|
| 23645 |
+
"step": 3932
|
| 23646 |
+
},
|
| 23647 |
+
{
|
| 23648 |
+
"epoch": 4.51,
|
| 23649 |
+
"learning_rate": 5.0833333333333333e-05,
|
| 23650 |
+
"loss": 0.4765,
|
| 23651 |
+
"step": 3933
|
| 23652 |
+
},
|
| 23653 |
+
{
|
| 23654 |
+
"epoch": 4.51,
|
| 23655 |
+
"learning_rate": 5.0714285714285716e-05,
|
| 23656 |
+
"loss": 0.5179,
|
| 23657 |
+
"step": 3934
|
| 23658 |
+
},
|
| 23659 |
+
{
|
| 23660 |
+
"epoch": 4.51,
|
| 23661 |
+
"learning_rate": 5.05952380952381e-05,
|
| 23662 |
+
"loss": 0.5196,
|
| 23663 |
+
"step": 3935
|
| 23664 |
+
},
|
| 23665 |
+
{
|
| 23666 |
+
"epoch": 4.51,
|
| 23667 |
+
"learning_rate": 5.047619047619048e-05,
|
| 23668 |
+
"loss": 0.5191,
|
| 23669 |
+
"step": 3936
|
| 23670 |
+
},
|
| 23671 |
+
{
|
| 23672 |
+
"epoch": 4.51,
|
| 23673 |
+
"learning_rate": 5.0357142857142856e-05,
|
| 23674 |
+
"loss": 0.5005,
|
| 23675 |
+
"step": 3937
|
| 23676 |
+
},
|
| 23677 |
+
{
|
| 23678 |
+
"epoch": 4.51,
|
| 23679 |
+
"learning_rate": 5.023809523809524e-05,
|
| 23680 |
+
"loss": 0.4958,
|
| 23681 |
+
"step": 3938
|
| 23682 |
+
},
|
| 23683 |
+
{
|
| 23684 |
+
"epoch": 4.51,
|
| 23685 |
+
"learning_rate": 5.011904761904762e-05,
|
| 23686 |
+
"loss": 0.4569,
|
| 23687 |
+
"step": 3939
|
| 23688 |
+
},
|
| 23689 |
+
{
|
| 23690 |
+
"epoch": 4.51,
|
| 23691 |
+
"learning_rate": 5e-05,
|
| 23692 |
+
"loss": 0.5226,
|
| 23693 |
+
"step": 3940
|
| 23694 |
+
},
|
| 23695 |
+
{
|
| 23696 |
+
"epoch": 4.52,
|
| 23697 |
+
"learning_rate": 4.9880952380952385e-05,
|
| 23698 |
+
"loss": 0.5085,
|
| 23699 |
+
"step": 3941
|
| 23700 |
+
},
|
| 23701 |
+
{
|
| 23702 |
+
"epoch": 4.52,
|
| 23703 |
+
"learning_rate": 4.976190476190477e-05,
|
| 23704 |
+
"loss": 0.5261,
|
| 23705 |
+
"step": 3942
|
| 23706 |
+
},
|
| 23707 |
+
{
|
| 23708 |
+
"epoch": 4.52,
|
| 23709 |
+
"learning_rate": 4.964285714285715e-05,
|
| 23710 |
+
"loss": 0.4798,
|
| 23711 |
+
"step": 3943
|
| 23712 |
+
},
|
| 23713 |
+
{
|
| 23714 |
+
"epoch": 4.52,
|
| 23715 |
+
"learning_rate": 4.9523809523809525e-05,
|
| 23716 |
+
"loss": 0.4753,
|
| 23717 |
+
"step": 3944
|
| 23718 |
+
},
|
| 23719 |
+
{
|
| 23720 |
+
"epoch": 4.52,
|
| 23721 |
+
"learning_rate": 4.940476190476191e-05,
|
| 23722 |
+
"loss": 0.4792,
|
| 23723 |
+
"step": 3945
|
| 23724 |
+
},
|
| 23725 |
+
{
|
| 23726 |
+
"epoch": 4.52,
|
| 23727 |
+
"learning_rate": 4.928571428571429e-05,
|
| 23728 |
+
"loss": 0.4743,
|
| 23729 |
+
"step": 3946
|
| 23730 |
+
},
|
| 23731 |
+
{
|
| 23732 |
+
"epoch": 4.52,
|
| 23733 |
+
"learning_rate": 4.9166666666666665e-05,
|
| 23734 |
+
"loss": 0.489,
|
| 23735 |
+
"step": 3947
|
| 23736 |
+
},
|
| 23737 |
+
{
|
| 23738 |
+
"epoch": 4.52,
|
| 23739 |
+
"learning_rate": 4.904761904761905e-05,
|
| 23740 |
+
"loss": 0.4938,
|
| 23741 |
+
"step": 3948
|
| 23742 |
+
},
|
| 23743 |
+
{
|
| 23744 |
+
"epoch": 4.53,
|
| 23745 |
+
"learning_rate": 4.892857142857143e-05,
|
| 23746 |
+
"loss": 0.4941,
|
| 23747 |
+
"step": 3949
|
| 23748 |
+
},
|
| 23749 |
+
{
|
| 23750 |
+
"epoch": 4.53,
|
| 23751 |
+
"learning_rate": 4.880952380952381e-05,
|
| 23752 |
+
"loss": 0.4832,
|
| 23753 |
+
"step": 3950
|
| 23754 |
+
},
|
| 23755 |
+
{
|
| 23756 |
+
"epoch": 4.53,
|
| 23757 |
+
"learning_rate": 4.8690476190476194e-05,
|
| 23758 |
+
"loss": 0.4711,
|
| 23759 |
+
"step": 3951
|
| 23760 |
+
},
|
| 23761 |
+
{
|
| 23762 |
+
"epoch": 4.53,
|
| 23763 |
+
"learning_rate": 4.8571428571428576e-05,
|
| 23764 |
+
"loss": 0.5151,
|
| 23765 |
+
"step": 3952
|
| 23766 |
+
},
|
| 23767 |
+
{
|
| 23768 |
+
"epoch": 4.53,
|
| 23769 |
+
"learning_rate": 4.845238095238095e-05,
|
| 23770 |
+
"loss": 0.5096,
|
| 23771 |
+
"step": 3953
|
| 23772 |
+
},
|
| 23773 |
+
{
|
| 23774 |
+
"epoch": 4.53,
|
| 23775 |
+
"learning_rate": 4.8333333333333334e-05,
|
| 23776 |
+
"loss": 0.5178,
|
| 23777 |
+
"step": 3954
|
| 23778 |
+
},
|
| 23779 |
+
{
|
| 23780 |
+
"epoch": 4.53,
|
| 23781 |
+
"learning_rate": 4.8214285714285716e-05,
|
| 23782 |
+
"loss": 0.4865,
|
| 23783 |
+
"step": 3955
|
| 23784 |
+
},
|
| 23785 |
+
{
|
| 23786 |
+
"epoch": 4.53,
|
| 23787 |
+
"learning_rate": 4.80952380952381e-05,
|
| 23788 |
+
"loss": 0.5029,
|
| 23789 |
+
"step": 3956
|
| 23790 |
+
},
|
| 23791 |
+
{
|
| 23792 |
+
"epoch": 4.53,
|
| 23793 |
+
"learning_rate": 4.797619047619048e-05,
|
| 23794 |
+
"loss": 0.5074,
|
| 23795 |
+
"step": 3957
|
| 23796 |
+
},
|
| 23797 |
+
{
|
| 23798 |
+
"epoch": 4.54,
|
| 23799 |
+
"learning_rate": 4.785714285714286e-05,
|
| 23800 |
+
"loss": 0.4813,
|
| 23801 |
+
"step": 3958
|
| 23802 |
+
},
|
| 23803 |
+
{
|
| 23804 |
+
"epoch": 4.54,
|
| 23805 |
+
"learning_rate": 4.7738095238095245e-05,
|
| 23806 |
+
"loss": 0.4527,
|
| 23807 |
+
"step": 3959
|
| 23808 |
+
},
|
| 23809 |
+
{
|
| 23810 |
+
"epoch": 4.54,
|
| 23811 |
+
"learning_rate": 4.761904761904762e-05,
|
| 23812 |
+
"loss": 0.4904,
|
| 23813 |
+
"step": 3960
|
| 23814 |
+
},
|
| 23815 |
+
{
|
| 23816 |
+
"epoch": 4.54,
|
| 23817 |
+
"learning_rate": 4.75e-05,
|
| 23818 |
+
"loss": 0.4823,
|
| 23819 |
+
"step": 3961
|
| 23820 |
+
},
|
| 23821 |
+
{
|
| 23822 |
+
"epoch": 4.54,
|
| 23823 |
+
"learning_rate": 4.738095238095238e-05,
|
| 23824 |
+
"loss": 0.4861,
|
| 23825 |
+
"step": 3962
|
| 23826 |
+
},
|
| 23827 |
+
{
|
| 23828 |
+
"epoch": 4.54,
|
| 23829 |
+
"learning_rate": 4.726190476190476e-05,
|
| 23830 |
+
"loss": 0.4904,
|
| 23831 |
+
"step": 3963
|
| 23832 |
+
},
|
| 23833 |
+
{
|
| 23834 |
+
"epoch": 4.54,
|
| 23835 |
+
"learning_rate": 4.714285714285714e-05,
|
| 23836 |
+
"loss": 0.456,
|
| 23837 |
+
"step": 3964
|
| 23838 |
+
},
|
| 23839 |
+
{
|
| 23840 |
+
"epoch": 4.54,
|
| 23841 |
+
"learning_rate": 4.7023809523809525e-05,
|
| 23842 |
+
"loss": 0.4676,
|
| 23843 |
+
"step": 3965
|
| 23844 |
+
},
|
| 23845 |
+
{
|
| 23846 |
+
"epoch": 4.54,
|
| 23847 |
+
"learning_rate": 4.690476190476191e-05,
|
| 23848 |
+
"loss": 0.5007,
|
| 23849 |
+
"step": 3966
|
| 23850 |
+
},
|
| 23851 |
+
{
|
| 23852 |
+
"epoch": 4.55,
|
| 23853 |
+
"learning_rate": 4.678571428571429e-05,
|
| 23854 |
+
"loss": 0.4918,
|
| 23855 |
+
"step": 3967
|
| 23856 |
+
},
|
| 23857 |
+
{
|
| 23858 |
+
"epoch": 4.55,
|
| 23859 |
+
"learning_rate": 4.666666666666667e-05,
|
| 23860 |
+
"loss": 0.4908,
|
| 23861 |
+
"step": 3968
|
| 23862 |
+
},
|
| 23863 |
+
{
|
| 23864 |
+
"epoch": 4.55,
|
| 23865 |
+
"learning_rate": 4.6547619047619054e-05,
|
| 23866 |
+
"loss": 0.5059,
|
| 23867 |
+
"step": 3969
|
| 23868 |
+
},
|
| 23869 |
+
{
|
| 23870 |
+
"epoch": 4.55,
|
| 23871 |
+
"learning_rate": 4.642857142857143e-05,
|
| 23872 |
+
"loss": 0.4384,
|
| 23873 |
+
"step": 3970
|
| 23874 |
+
},
|
| 23875 |
+
{
|
| 23876 |
+
"epoch": 4.55,
|
| 23877 |
+
"learning_rate": 4.630952380952381e-05,
|
| 23878 |
+
"loss": 0.4925,
|
| 23879 |
+
"step": 3971
|
| 23880 |
+
},
|
| 23881 |
+
{
|
| 23882 |
+
"epoch": 4.55,
|
| 23883 |
+
"learning_rate": 4.6190476190476194e-05,
|
| 23884 |
+
"loss": 0.4924,
|
| 23885 |
+
"step": 3972
|
| 23886 |
+
},
|
| 23887 |
+
{
|
| 23888 |
+
"epoch": 4.55,
|
| 23889 |
+
"learning_rate": 4.607142857142857e-05,
|
| 23890 |
+
"loss": 0.5311,
|
| 23891 |
+
"step": 3973
|
| 23892 |
+
},
|
| 23893 |
+
{
|
| 23894 |
+
"epoch": 4.55,
|
| 23895 |
+
"learning_rate": 4.595238095238095e-05,
|
| 23896 |
+
"loss": 0.4878,
|
| 23897 |
+
"step": 3974
|
| 23898 |
+
},
|
| 23899 |
+
{
|
| 23900 |
+
"epoch": 4.55,
|
| 23901 |
+
"learning_rate": 4.5833333333333334e-05,
|
| 23902 |
+
"loss": 0.4785,
|
| 23903 |
+
"step": 3975
|
| 23904 |
+
},
|
| 23905 |
+
{
|
| 23906 |
+
"epoch": 4.56,
|
| 23907 |
+
"learning_rate": 4.5714285714285716e-05,
|
| 23908 |
+
"loss": 0.4853,
|
| 23909 |
+
"step": 3976
|
| 23910 |
+
},
|
| 23911 |
+
{
|
| 23912 |
+
"epoch": 4.56,
|
| 23913 |
+
"learning_rate": 4.55952380952381e-05,
|
| 23914 |
+
"loss": 0.4983,
|
| 23915 |
+
"step": 3977
|
| 23916 |
+
},
|
| 23917 |
+
{
|
| 23918 |
+
"epoch": 4.56,
|
| 23919 |
+
"learning_rate": 4.547619047619048e-05,
|
| 23920 |
+
"loss": 0.4652,
|
| 23921 |
+
"step": 3978
|
| 23922 |
+
},
|
| 23923 |
+
{
|
| 23924 |
+
"epoch": 4.56,
|
| 23925 |
+
"learning_rate": 4.5357142857142856e-05,
|
| 23926 |
+
"loss": 0.4686,
|
| 23927 |
+
"step": 3979
|
| 23928 |
+
},
|
| 23929 |
+
{
|
| 23930 |
+
"epoch": 4.56,
|
| 23931 |
+
"learning_rate": 4.523809523809524e-05,
|
| 23932 |
+
"loss": 0.5034,
|
| 23933 |
+
"step": 3980
|
| 23934 |
+
},
|
| 23935 |
+
{
|
| 23936 |
+
"epoch": 4.56,
|
| 23937 |
+
"learning_rate": 4.511904761904762e-05,
|
| 23938 |
+
"loss": 0.4892,
|
| 23939 |
+
"step": 3981
|
| 23940 |
+
},
|
| 23941 |
+
{
|
| 23942 |
+
"epoch": 4.56,
|
| 23943 |
+
"learning_rate": 4.5e-05,
|
| 23944 |
+
"loss": 0.5239,
|
| 23945 |
+
"step": 3982
|
| 23946 |
+
},
|
| 23947 |
+
{
|
| 23948 |
+
"epoch": 4.56,
|
| 23949 |
+
"learning_rate": 4.4880952380952385e-05,
|
| 23950 |
+
"loss": 0.499,
|
| 23951 |
+
"step": 3983
|
| 23952 |
+
},
|
| 23953 |
+
{
|
| 23954 |
+
"epoch": 4.57,
|
| 23955 |
+
"learning_rate": 4.476190476190477e-05,
|
| 23956 |
+
"loss": 0.4846,
|
| 23957 |
+
"step": 3984
|
| 23958 |
+
},
|
| 23959 |
+
{
|
| 23960 |
+
"epoch": 4.57,
|
| 23961 |
+
"learning_rate": 4.464285714285715e-05,
|
| 23962 |
+
"loss": 0.5007,
|
| 23963 |
+
"step": 3985
|
| 23964 |
+
},
|
| 23965 |
+
{
|
| 23966 |
+
"epoch": 4.57,
|
| 23967 |
+
"learning_rate": 4.4523809523809525e-05,
|
| 23968 |
+
"loss": 0.4823,
|
| 23969 |
+
"step": 3986
|
| 23970 |
+
},
|
| 23971 |
+
{
|
| 23972 |
+
"epoch": 4.57,
|
| 23973 |
+
"learning_rate": 4.440476190476191e-05,
|
| 23974 |
+
"loss": 0.4995,
|
| 23975 |
+
"step": 3987
|
| 23976 |
+
},
|
| 23977 |
+
{
|
| 23978 |
+
"epoch": 4.57,
|
| 23979 |
+
"learning_rate": 4.428571428571428e-05,
|
| 23980 |
+
"loss": 0.4712,
|
| 23981 |
+
"step": 3988
|
| 23982 |
+
},
|
| 23983 |
+
{
|
| 23984 |
+
"epoch": 4.57,
|
| 23985 |
+
"learning_rate": 4.4166666666666665e-05,
|
| 23986 |
+
"loss": 0.4625,
|
| 23987 |
+
"step": 3989
|
| 23988 |
+
},
|
| 23989 |
+
{
|
| 23990 |
+
"epoch": 4.57,
|
| 23991 |
+
"learning_rate": 4.404761904761905e-05,
|
| 23992 |
+
"loss": 0.5115,
|
| 23993 |
+
"step": 3990
|
| 23994 |
+
},
|
| 23995 |
+
{
|
| 23996 |
+
"epoch": 4.57,
|
| 23997 |
+
"learning_rate": 4.392857142857143e-05,
|
| 23998 |
+
"loss": 0.4736,
|
| 23999 |
+
"step": 3991
|
| 24000 |
+
},
|
| 24001 |
+
{
|
| 24002 |
+
"epoch": 4.57,
|
| 24003 |
+
"learning_rate": 4.380952380952381e-05,
|
| 24004 |
+
"loss": 0.4946,
|
| 24005 |
+
"step": 3992
|
| 24006 |
+
},
|
| 24007 |
+
{
|
| 24008 |
+
"epoch": 4.58,
|
| 24009 |
+
"learning_rate": 4.3690476190476194e-05,
|
| 24010 |
+
"loss": 0.471,
|
| 24011 |
+
"step": 3993
|
| 24012 |
+
},
|
| 24013 |
+
{
|
| 24014 |
+
"epoch": 4.58,
|
| 24015 |
+
"learning_rate": 4.3571428571428576e-05,
|
| 24016 |
+
"loss": 0.5002,
|
| 24017 |
+
"step": 3994
|
| 24018 |
+
},
|
| 24019 |
+
{
|
| 24020 |
+
"epoch": 4.58,
|
| 24021 |
+
"learning_rate": 4.345238095238096e-05,
|
| 24022 |
+
"loss": 0.5194,
|
| 24023 |
+
"step": 3995
|
| 24024 |
+
},
|
| 24025 |
+
{
|
| 24026 |
+
"epoch": 4.58,
|
| 24027 |
+
"learning_rate": 4.3333333333333334e-05,
|
| 24028 |
+
"loss": 0.4444,
|
| 24029 |
+
"step": 3996
|
| 24030 |
+
},
|
| 24031 |
+
{
|
| 24032 |
+
"epoch": 4.58,
|
| 24033 |
+
"learning_rate": 4.3214285714285716e-05,
|
| 24034 |
+
"loss": 0.527,
|
| 24035 |
+
"step": 3997
|
| 24036 |
+
},
|
| 24037 |
+
{
|
| 24038 |
+
"epoch": 4.58,
|
| 24039 |
+
"learning_rate": 4.30952380952381e-05,
|
| 24040 |
+
"loss": 0.454,
|
| 24041 |
+
"step": 3998
|
| 24042 |
+
},
|
| 24043 |
+
{
|
| 24044 |
+
"epoch": 4.58,
|
| 24045 |
+
"learning_rate": 4.297619047619048e-05,
|
| 24046 |
+
"loss": 0.462,
|
| 24047 |
+
"step": 3999
|
| 24048 |
+
},
|
| 24049 |
+
{
|
| 24050 |
+
"epoch": 4.58,
|
| 24051 |
+
"learning_rate": 4.2857142857142856e-05,
|
| 24052 |
+
"loss": 0.4805,
|
| 24053 |
+
"step": 4000
|
| 24054 |
+
},
|
| 24055 |
+
{
|
| 24056 |
+
"epoch": 4.58,
|
| 24057 |
+
"learning_rate": 4.273809523809524e-05,
|
| 24058 |
+
"loss": 0.5294,
|
| 24059 |
+
"step": 4001
|
| 24060 |
+
},
|
| 24061 |
+
{
|
| 24062 |
+
"epoch": 4.59,
|
| 24063 |
+
"learning_rate": 4.261904761904762e-05,
|
| 24064 |
+
"loss": 0.5302,
|
| 24065 |
+
"step": 4002
|
| 24066 |
+
},
|
| 24067 |
+
{
|
| 24068 |
+
"epoch": 4.59,
|
| 24069 |
+
"learning_rate": 4.25e-05,
|
| 24070 |
+
"loss": 0.4862,
|
| 24071 |
+
"step": 4003
|
| 24072 |
+
},
|
| 24073 |
+
{
|
| 24074 |
+
"epoch": 4.59,
|
| 24075 |
+
"learning_rate": 4.2380952380952385e-05,
|
| 24076 |
+
"loss": 0.4749,
|
| 24077 |
+
"step": 4004
|
| 24078 |
+
},
|
| 24079 |
+
{
|
| 24080 |
+
"epoch": 4.59,
|
| 24081 |
+
"learning_rate": 4.226190476190476e-05,
|
| 24082 |
+
"loss": 0.5005,
|
| 24083 |
+
"step": 4005
|
| 24084 |
+
},
|
| 24085 |
+
{
|
| 24086 |
+
"epoch": 4.59,
|
| 24087 |
+
"learning_rate": 4.214285714285714e-05,
|
| 24088 |
+
"loss": 0.4887,
|
| 24089 |
+
"step": 4006
|
| 24090 |
+
},
|
| 24091 |
+
{
|
| 24092 |
+
"epoch": 4.59,
|
| 24093 |
+
"learning_rate": 4.2023809523809525e-05,
|
| 24094 |
+
"loss": 0.4583,
|
| 24095 |
+
"step": 4007
|
| 24096 |
+
},
|
| 24097 |
+
{
|
| 24098 |
+
"epoch": 4.59,
|
| 24099 |
+
"learning_rate": 4.190476190476191e-05,
|
| 24100 |
+
"loss": 0.4749,
|
| 24101 |
+
"step": 4008
|
| 24102 |
+
},
|
| 24103 |
+
{
|
| 24104 |
+
"epoch": 4.59,
|
| 24105 |
+
"learning_rate": 4.178571428571429e-05,
|
| 24106 |
+
"loss": 0.4692,
|
| 24107 |
+
"step": 4009
|
| 24108 |
+
},
|
| 24109 |
+
{
|
| 24110 |
+
"epoch": 4.59,
|
| 24111 |
+
"learning_rate": 4.166666666666667e-05,
|
| 24112 |
+
"loss": 0.4792,
|
| 24113 |
+
"step": 4010
|
| 24114 |
+
},
|
| 24115 |
+
{
|
| 24116 |
+
"epoch": 4.6,
|
| 24117 |
+
"learning_rate": 4.1547619047619054e-05,
|
| 24118 |
+
"loss": 0.4852,
|
| 24119 |
+
"step": 4011
|
| 24120 |
+
},
|
| 24121 |
+
{
|
| 24122 |
+
"epoch": 4.6,
|
| 24123 |
+
"learning_rate": 4.1428571428571437e-05,
|
| 24124 |
+
"loss": 0.5368,
|
| 24125 |
+
"step": 4012
|
| 24126 |
+
},
|
| 24127 |
+
{
|
| 24128 |
+
"epoch": 4.6,
|
| 24129 |
+
"learning_rate": 4.130952380952381e-05,
|
| 24130 |
+
"loss": 0.5334,
|
| 24131 |
+
"step": 4013
|
| 24132 |
+
},
|
| 24133 |
+
{
|
| 24134 |
+
"epoch": 4.6,
|
| 24135 |
+
"learning_rate": 4.119047619047619e-05,
|
| 24136 |
+
"loss": 0.4609,
|
| 24137 |
+
"step": 4014
|
| 24138 |
+
},
|
| 24139 |
+
{
|
| 24140 |
+
"epoch": 4.6,
|
| 24141 |
+
"learning_rate": 4.107142857142857e-05,
|
| 24142 |
+
"loss": 0.4815,
|
| 24143 |
+
"step": 4015
|
| 24144 |
+
},
|
| 24145 |
+
{
|
| 24146 |
+
"epoch": 4.6,
|
| 24147 |
+
"learning_rate": 4.095238095238095e-05,
|
| 24148 |
+
"loss": 0.4818,
|
| 24149 |
+
"step": 4016
|
| 24150 |
+
},
|
| 24151 |
+
{
|
| 24152 |
+
"epoch": 4.6,
|
| 24153 |
+
"learning_rate": 4.0833333333333334e-05,
|
| 24154 |
+
"loss": 0.5259,
|
| 24155 |
+
"step": 4017
|
| 24156 |
+
},
|
| 24157 |
+
{
|
| 24158 |
+
"epoch": 4.6,
|
| 24159 |
+
"learning_rate": 4.0714285714285717e-05,
|
| 24160 |
+
"loss": 0.4996,
|
| 24161 |
+
"step": 4018
|
| 24162 |
+
},
|
| 24163 |
+
{
|
| 24164 |
+
"epoch": 4.61,
|
| 24165 |
+
"learning_rate": 4.05952380952381e-05,
|
| 24166 |
+
"loss": 0.4438,
|
| 24167 |
+
"step": 4019
|
| 24168 |
+
},
|
| 24169 |
+
{
|
| 24170 |
+
"epoch": 4.61,
|
| 24171 |
+
"learning_rate": 4.047619047619048e-05,
|
| 24172 |
+
"loss": 0.4775,
|
| 24173 |
+
"step": 4020
|
| 24174 |
+
},
|
| 24175 |
+
{
|
| 24176 |
+
"epoch": 4.61,
|
| 24177 |
+
"learning_rate": 4.035714285714286e-05,
|
| 24178 |
+
"loss": 0.4664,
|
| 24179 |
+
"step": 4021
|
| 24180 |
+
},
|
| 24181 |
+
{
|
| 24182 |
+
"epoch": 4.61,
|
| 24183 |
+
"learning_rate": 4.023809523809524e-05,
|
| 24184 |
+
"loss": 0.4598,
|
| 24185 |
+
"step": 4022
|
| 24186 |
+
},
|
| 24187 |
+
{
|
| 24188 |
+
"epoch": 4.61,
|
| 24189 |
+
"learning_rate": 4.011904761904762e-05,
|
| 24190 |
+
"loss": 0.4508,
|
| 24191 |
+
"step": 4023
|
| 24192 |
+
},
|
| 24193 |
+
{
|
| 24194 |
+
"epoch": 4.61,
|
| 24195 |
+
"learning_rate": 4e-05,
|
| 24196 |
+
"loss": 0.466,
|
| 24197 |
+
"step": 4024
|
| 24198 |
+
},
|
| 24199 |
+
{
|
| 24200 |
+
"epoch": 4.61,
|
| 24201 |
+
"learning_rate": 3.9880952380952386e-05,
|
| 24202 |
+
"loss": 0.4819,
|
| 24203 |
+
"step": 4025
|
| 24204 |
+
},
|
| 24205 |
+
{
|
| 24206 |
+
"epoch": 4.61,
|
| 24207 |
+
"learning_rate": 3.976190476190476e-05,
|
| 24208 |
+
"loss": 0.5029,
|
| 24209 |
+
"step": 4026
|
| 24210 |
+
},
|
| 24211 |
+
{
|
| 24212 |
+
"epoch": 4.61,
|
| 24213 |
+
"learning_rate": 3.964285714285714e-05,
|
| 24214 |
+
"loss": 0.4965,
|
| 24215 |
+
"step": 4027
|
| 24216 |
+
},
|
| 24217 |
+
{
|
| 24218 |
+
"epoch": 4.62,
|
| 24219 |
+
"learning_rate": 3.9523809523809526e-05,
|
| 24220 |
+
"loss": 0.4605,
|
| 24221 |
+
"step": 4028
|
| 24222 |
+
},
|
| 24223 |
+
{
|
| 24224 |
+
"epoch": 4.62,
|
| 24225 |
+
"learning_rate": 3.940476190476191e-05,
|
| 24226 |
+
"loss": 0.4974,
|
| 24227 |
+
"step": 4029
|
| 24228 |
+
},
|
| 24229 |
+
{
|
| 24230 |
+
"epoch": 4.62,
|
| 24231 |
+
"learning_rate": 3.928571428571429e-05,
|
| 24232 |
+
"loss": 0.5091,
|
| 24233 |
+
"step": 4030
|
| 24234 |
+
},
|
| 24235 |
+
{
|
| 24236 |
+
"epoch": 4.62,
|
| 24237 |
+
"learning_rate": 3.9166666666666665e-05,
|
| 24238 |
+
"loss": 0.5043,
|
| 24239 |
+
"step": 4031
|
| 24240 |
+
},
|
| 24241 |
+
{
|
| 24242 |
+
"epoch": 4.62,
|
| 24243 |
+
"learning_rate": 3.904761904761905e-05,
|
| 24244 |
+
"loss": 0.5091,
|
| 24245 |
+
"step": 4032
|
| 24246 |
+
},
|
| 24247 |
+
{
|
| 24248 |
+
"epoch": 4.62,
|
| 24249 |
+
"learning_rate": 3.892857142857143e-05,
|
| 24250 |
+
"loss": 0.4465,
|
| 24251 |
+
"step": 4033
|
| 24252 |
+
},
|
| 24253 |
+
{
|
| 24254 |
+
"epoch": 4.62,
|
| 24255 |
+
"learning_rate": 3.880952380952381e-05,
|
| 24256 |
+
"loss": 0.4917,
|
| 24257 |
+
"step": 4034
|
| 24258 |
+
},
|
| 24259 |
+
{
|
| 24260 |
+
"epoch": 4.62,
|
| 24261 |
+
"learning_rate": 3.8690476190476195e-05,
|
| 24262 |
+
"loss": 0.4786,
|
| 24263 |
+
"step": 4035
|
| 24264 |
+
},
|
| 24265 |
+
{
|
| 24266 |
+
"epoch": 4.62,
|
| 24267 |
+
"learning_rate": 3.857142857142858e-05,
|
| 24268 |
+
"loss": 0.5069,
|
| 24269 |
+
"step": 4036
|
| 24270 |
+
},
|
| 24271 |
+
{
|
| 24272 |
+
"epoch": 4.63,
|
| 24273 |
+
"learning_rate": 3.845238095238096e-05,
|
| 24274 |
+
"loss": 0.4889,
|
| 24275 |
+
"step": 4037
|
| 24276 |
+
},
|
| 24277 |
+
{
|
| 24278 |
+
"epoch": 4.63,
|
| 24279 |
+
"learning_rate": 3.8333333333333334e-05,
|
| 24280 |
+
"loss": 0.5024,
|
| 24281 |
+
"step": 4038
|
| 24282 |
+
},
|
| 24283 |
+
{
|
| 24284 |
+
"epoch": 4.63,
|
| 24285 |
+
"learning_rate": 3.821428571428572e-05,
|
| 24286 |
+
"loss": 0.4928,
|
| 24287 |
+
"step": 4039
|
| 24288 |
+
},
|
| 24289 |
+
{
|
| 24290 |
+
"epoch": 4.63,
|
| 24291 |
+
"learning_rate": 3.809523809523809e-05,
|
| 24292 |
+
"loss": 0.4758,
|
| 24293 |
+
"step": 4040
|
| 24294 |
+
},
|
| 24295 |
+
{
|
| 24296 |
+
"epoch": 4.63,
|
| 24297 |
+
"learning_rate": 3.7976190476190474e-05,
|
| 24298 |
+
"loss": 0.4676,
|
| 24299 |
+
"step": 4041
|
| 24300 |
+
},
|
| 24301 |
+
{
|
| 24302 |
+
"epoch": 4.63,
|
| 24303 |
+
"learning_rate": 3.785714285714286e-05,
|
| 24304 |
+
"loss": 0.5242,
|
| 24305 |
+
"step": 4042
|
| 24306 |
+
},
|
| 24307 |
+
{
|
| 24308 |
+
"epoch": 4.63,
|
| 24309 |
+
"learning_rate": 3.773809523809524e-05,
|
| 24310 |
+
"loss": 0.4353,
|
| 24311 |
+
"step": 4043
|
| 24312 |
+
},
|
| 24313 |
+
{
|
| 24314 |
+
"epoch": 4.63,
|
| 24315 |
+
"learning_rate": 3.761904761904762e-05,
|
| 24316 |
+
"loss": 0.4649,
|
| 24317 |
+
"step": 4044
|
| 24318 |
+
},
|
| 24319 |
+
{
|
| 24320 |
+
"epoch": 4.64,
|
| 24321 |
+
"learning_rate": 3.7500000000000003e-05,
|
| 24322 |
+
"loss": 0.4433,
|
| 24323 |
+
"step": 4045
|
| 24324 |
+
},
|
| 24325 |
+
{
|
| 24326 |
+
"epoch": 4.64,
|
| 24327 |
+
"learning_rate": 3.7380952380952386e-05,
|
| 24328 |
+
"loss": 0.4425,
|
| 24329 |
+
"step": 4046
|
| 24330 |
+
},
|
| 24331 |
+
{
|
| 24332 |
+
"epoch": 4.64,
|
| 24333 |
+
"learning_rate": 3.726190476190476e-05,
|
| 24334 |
+
"loss": 0.546,
|
| 24335 |
+
"step": 4047
|
| 24336 |
+
},
|
| 24337 |
+
{
|
| 24338 |
+
"epoch": 4.64,
|
| 24339 |
+
"learning_rate": 3.7142857142857143e-05,
|
| 24340 |
+
"loss": 0.4436,
|
| 24341 |
+
"step": 4048
|
| 24342 |
+
},
|
| 24343 |
+
{
|
| 24344 |
+
"epoch": 4.64,
|
| 24345 |
+
"learning_rate": 3.7023809523809526e-05,
|
| 24346 |
+
"loss": 0.4616,
|
| 24347 |
+
"step": 4049
|
| 24348 |
+
},
|
| 24349 |
+
{
|
| 24350 |
+
"epoch": 4.64,
|
| 24351 |
+
"learning_rate": 3.690476190476191e-05,
|
| 24352 |
+
"loss": 0.4319,
|
| 24353 |
+
"step": 4050
|
| 24354 |
+
},
|
| 24355 |
+
{
|
| 24356 |
+
"epoch": 4.64,
|
| 24357 |
+
"learning_rate": 3.678571428571429e-05,
|
| 24358 |
+
"loss": 0.5099,
|
| 24359 |
+
"step": 4051
|
| 24360 |
+
},
|
| 24361 |
+
{
|
| 24362 |
+
"epoch": 4.64,
|
| 24363 |
+
"learning_rate": 3.6666666666666666e-05,
|
| 24364 |
+
"loss": 0.4674,
|
| 24365 |
+
"step": 4052
|
| 24366 |
+
},
|
| 24367 |
+
{
|
| 24368 |
+
"epoch": 4.64,
|
| 24369 |
+
"learning_rate": 3.654761904761905e-05,
|
| 24370 |
+
"loss": 0.4867,
|
| 24371 |
+
"step": 4053
|
| 24372 |
+
},
|
| 24373 |
+
{
|
| 24374 |
+
"epoch": 4.65,
|
| 24375 |
+
"learning_rate": 3.642857142857143e-05,
|
| 24376 |
+
"loss": 0.4823,
|
| 24377 |
+
"step": 4054
|
| 24378 |
+
},
|
| 24379 |
+
{
|
| 24380 |
+
"epoch": 4.65,
|
| 24381 |
+
"learning_rate": 3.630952380952381e-05,
|
| 24382 |
+
"loss": 0.4757,
|
| 24383 |
+
"step": 4055
|
| 24384 |
+
},
|
| 24385 |
+
{
|
| 24386 |
+
"epoch": 4.65,
|
| 24387 |
+
"learning_rate": 3.619047619047619e-05,
|
| 24388 |
+
"loss": 0.475,
|
| 24389 |
+
"step": 4056
|
| 24390 |
+
},
|
| 24391 |
+
{
|
| 24392 |
+
"epoch": 4.65,
|
| 24393 |
+
"learning_rate": 3.607142857142857e-05,
|
| 24394 |
+
"loss": 0.5061,
|
| 24395 |
+
"step": 4057
|
| 24396 |
+
},
|
| 24397 |
+
{
|
| 24398 |
+
"epoch": 4.65,
|
| 24399 |
+
"learning_rate": 3.595238095238095e-05,
|
| 24400 |
+
"loss": 0.4663,
|
| 24401 |
+
"step": 4058
|
| 24402 |
+
},
|
| 24403 |
+
{
|
| 24404 |
+
"epoch": 4.65,
|
| 24405 |
+
"learning_rate": 3.5833333333333335e-05,
|
| 24406 |
+
"loss": 0.4997,
|
| 24407 |
+
"step": 4059
|
| 24408 |
+
},
|
| 24409 |
+
{
|
| 24410 |
+
"epoch": 4.65,
|
| 24411 |
+
"learning_rate": 3.571428571428572e-05,
|
| 24412 |
+
"loss": 0.4875,
|
| 24413 |
+
"step": 4060
|
| 24414 |
+
},
|
| 24415 |
+
{
|
| 24416 |
+
"epoch": 4.65,
|
| 24417 |
+
"learning_rate": 3.55952380952381e-05,
|
| 24418 |
+
"loss": 0.503,
|
| 24419 |
+
"step": 4061
|
| 24420 |
+
},
|
| 24421 |
+
{
|
| 24422 |
+
"epoch": 4.65,
|
| 24423 |
+
"learning_rate": 3.547619047619048e-05,
|
| 24424 |
+
"loss": 0.4953,
|
| 24425 |
+
"step": 4062
|
| 24426 |
+
},
|
| 24427 |
+
{
|
| 24428 |
+
"epoch": 4.66,
|
| 24429 |
+
"learning_rate": 3.5357142857142864e-05,
|
| 24430 |
+
"loss": 0.4739,
|
| 24431 |
+
"step": 4063
|
| 24432 |
+
},
|
| 24433 |
+
{
|
| 24434 |
+
"epoch": 4.66,
|
| 24435 |
+
"learning_rate": 3.523809523809524e-05,
|
| 24436 |
+
"loss": 0.4823,
|
| 24437 |
+
"step": 4064
|
| 24438 |
+
},
|
| 24439 |
+
{
|
| 24440 |
+
"epoch": 4.66,
|
| 24441 |
+
"learning_rate": 3.511904761904762e-05,
|
| 24442 |
+
"loss": 0.4684,
|
| 24443 |
+
"step": 4065
|
| 24444 |
+
},
|
| 24445 |
+
{
|
| 24446 |
+
"epoch": 4.66,
|
| 24447 |
+
"learning_rate": 3.5e-05,
|
| 24448 |
+
"loss": 0.4385,
|
| 24449 |
+
"step": 4066
|
| 24450 |
+
},
|
| 24451 |
+
{
|
| 24452 |
+
"epoch": 4.66,
|
| 24453 |
+
"learning_rate": 3.488095238095238e-05,
|
| 24454 |
+
"loss": 0.5261,
|
| 24455 |
+
"step": 4067
|
| 24456 |
+
},
|
| 24457 |
+
{
|
| 24458 |
+
"epoch": 4.66,
|
| 24459 |
+
"learning_rate": 3.476190476190476e-05,
|
| 24460 |
+
"loss": 0.4849,
|
| 24461 |
+
"step": 4068
|
| 24462 |
+
},
|
| 24463 |
+
{
|
| 24464 |
+
"epoch": 4.66,
|
| 24465 |
+
"learning_rate": 3.4642857142857144e-05,
|
| 24466 |
+
"loss": 0.4515,
|
| 24467 |
+
"step": 4069
|
| 24468 |
+
},
|
| 24469 |
+
{
|
| 24470 |
+
"epoch": 4.66,
|
| 24471 |
+
"learning_rate": 3.4523809523809526e-05,
|
| 24472 |
+
"loss": 0.4681,
|
| 24473 |
+
"step": 4070
|
| 24474 |
+
},
|
| 24475 |
+
{
|
| 24476 |
+
"epoch": 4.66,
|
| 24477 |
+
"learning_rate": 3.440476190476191e-05,
|
| 24478 |
+
"loss": 0.4566,
|
| 24479 |
+
"step": 4071
|
| 24480 |
+
},
|
| 24481 |
+
{
|
| 24482 |
+
"epoch": 4.67,
|
| 24483 |
+
"learning_rate": 3.428571428571429e-05,
|
| 24484 |
+
"loss": 0.4892,
|
| 24485 |
+
"step": 4072
|
| 24486 |
+
},
|
| 24487 |
+
{
|
| 24488 |
+
"epoch": 4.67,
|
| 24489 |
+
"learning_rate": 3.4166666666666666e-05,
|
| 24490 |
+
"loss": 0.4905,
|
| 24491 |
+
"step": 4073
|
| 24492 |
+
},
|
| 24493 |
+
{
|
| 24494 |
+
"epoch": 4.67,
|
| 24495 |
+
"learning_rate": 3.404761904761905e-05,
|
| 24496 |
+
"loss": 0.4611,
|
| 24497 |
+
"step": 4074
|
| 24498 |
+
},
|
| 24499 |
+
{
|
| 24500 |
+
"epoch": 4.67,
|
| 24501 |
+
"learning_rate": 3.392857142857143e-05,
|
| 24502 |
+
"loss": 0.4717,
|
| 24503 |
+
"step": 4075
|
| 24504 |
+
},
|
| 24505 |
+
{
|
| 24506 |
+
"epoch": 4.67,
|
| 24507 |
+
"learning_rate": 3.380952380952381e-05,
|
| 24508 |
+
"loss": 0.4902,
|
| 24509 |
+
"step": 4076
|
| 24510 |
+
},
|
| 24511 |
+
{
|
| 24512 |
+
"epoch": 4.67,
|
| 24513 |
+
"learning_rate": 3.3690476190476195e-05,
|
| 24514 |
+
"loss": 0.5154,
|
| 24515 |
+
"step": 4077
|
| 24516 |
+
},
|
| 24517 |
+
{
|
| 24518 |
+
"epoch": 4.67,
|
| 24519 |
+
"learning_rate": 3.357142857142857e-05,
|
| 24520 |
+
"loss": 0.5223,
|
| 24521 |
+
"step": 4078
|
| 24522 |
+
},
|
| 24523 |
+
{
|
| 24524 |
+
"epoch": 4.67,
|
| 24525 |
+
"learning_rate": 3.345238095238095e-05,
|
| 24526 |
+
"loss": 0.4545,
|
| 24527 |
+
"step": 4079
|
| 24528 |
+
},
|
| 24529 |
+
{
|
| 24530 |
+
"epoch": 4.68,
|
| 24531 |
+
"learning_rate": 3.3333333333333335e-05,
|
| 24532 |
+
"loss": 0.4884,
|
| 24533 |
+
"step": 4080
|
| 24534 |
+
},
|
| 24535 |
+
{
|
| 24536 |
+
"epoch": 4.68,
|
| 24537 |
+
"learning_rate": 3.321428571428572e-05,
|
| 24538 |
+
"loss": 0.4524,
|
| 24539 |
+
"step": 4081
|
| 24540 |
+
},
|
| 24541 |
+
{
|
| 24542 |
+
"epoch": 4.68,
|
| 24543 |
+
"learning_rate": 3.309523809523809e-05,
|
| 24544 |
+
"loss": 0.4909,
|
| 24545 |
+
"step": 4082
|
| 24546 |
+
},
|
| 24547 |
+
{
|
| 24548 |
+
"epoch": 4.68,
|
| 24549 |
+
"learning_rate": 3.2976190476190475e-05,
|
| 24550 |
+
"loss": 0.4865,
|
| 24551 |
+
"step": 4083
|
| 24552 |
+
},
|
| 24553 |
+
{
|
| 24554 |
+
"epoch": 4.68,
|
| 24555 |
+
"learning_rate": 3.285714285714286e-05,
|
| 24556 |
+
"loss": 0.5063,
|
| 24557 |
+
"step": 4084
|
| 24558 |
+
},
|
| 24559 |
+
{
|
| 24560 |
+
"epoch": 4.68,
|
| 24561 |
+
"learning_rate": 3.273809523809524e-05,
|
| 24562 |
+
"loss": 0.4682,
|
| 24563 |
+
"step": 4085
|
| 24564 |
+
},
|
| 24565 |
+
{
|
| 24566 |
+
"epoch": 4.68,
|
| 24567 |
+
"learning_rate": 3.261904761904762e-05,
|
| 24568 |
+
"loss": 0.4765,
|
| 24569 |
+
"step": 4086
|
| 24570 |
+
},
|
| 24571 |
+
{
|
| 24572 |
+
"epoch": 4.68,
|
| 24573 |
+
"learning_rate": 3.2500000000000004e-05,
|
| 24574 |
+
"loss": 0.47,
|
| 24575 |
+
"step": 4087
|
| 24576 |
+
},
|
| 24577 |
+
{
|
| 24578 |
+
"epoch": 4.68,
|
| 24579 |
+
"learning_rate": 3.2380952380952386e-05,
|
| 24580 |
+
"loss": 0.4723,
|
| 24581 |
+
"step": 4088
|
| 24582 |
+
},
|
| 24583 |
+
{
|
| 24584 |
+
"epoch": 4.69,
|
| 24585 |
+
"learning_rate": 3.226190476190477e-05,
|
| 24586 |
+
"loss": 0.4613,
|
| 24587 |
+
"step": 4089
|
| 24588 |
+
},
|
| 24589 |
+
{
|
| 24590 |
+
"epoch": 4.69,
|
| 24591 |
+
"learning_rate": 3.2142857142857144e-05,
|
| 24592 |
+
"loss": 0.4911,
|
| 24593 |
+
"step": 4090
|
| 24594 |
+
},
|
| 24595 |
+
{
|
| 24596 |
+
"epoch": 4.69,
|
| 24597 |
+
"learning_rate": 3.202380952380952e-05,
|
| 24598 |
+
"loss": 0.4535,
|
| 24599 |
+
"step": 4091
|
| 24600 |
+
},
|
| 24601 |
+
{
|
| 24602 |
+
"epoch": 4.69,
|
| 24603 |
+
"learning_rate": 3.19047619047619e-05,
|
| 24604 |
+
"loss": 0.5009,
|
| 24605 |
+
"step": 4092
|
| 24606 |
+
},
|
| 24607 |
+
{
|
| 24608 |
+
"epoch": 4.69,
|
| 24609 |
+
"learning_rate": 3.1785714285714284e-05,
|
| 24610 |
+
"loss": 0.5104,
|
| 24611 |
+
"step": 4093
|
| 24612 |
+
},
|
| 24613 |
+
{
|
| 24614 |
+
"epoch": 4.69,
|
| 24615 |
+
"learning_rate": 3.1666666666666666e-05,
|
| 24616 |
+
"loss": 0.4986,
|
| 24617 |
+
"step": 4094
|
| 24618 |
+
},
|
| 24619 |
+
{
|
| 24620 |
+
"epoch": 4.69,
|
| 24621 |
+
"learning_rate": 3.154761904761905e-05,
|
| 24622 |
+
"loss": 0.4734,
|
| 24623 |
+
"step": 4095
|
| 24624 |
+
},
|
| 24625 |
+
{
|
| 24626 |
+
"epoch": 4.69,
|
| 24627 |
+
"learning_rate": 3.142857142857143e-05,
|
| 24628 |
+
"loss": 0.5051,
|
| 24629 |
+
"step": 4096
|
| 24630 |
+
},
|
| 24631 |
+
{
|
| 24632 |
+
"epoch": 4.69,
|
| 24633 |
+
"learning_rate": 3.130952380952381e-05,
|
| 24634 |
+
"loss": 0.4733,
|
| 24635 |
+
"step": 4097
|
| 24636 |
+
},
|
| 24637 |
+
{
|
| 24638 |
+
"epoch": 4.7,
|
| 24639 |
+
"learning_rate": 3.1190476190476195e-05,
|
| 24640 |
+
"loss": 0.4788,
|
| 24641 |
+
"step": 4098
|
| 24642 |
+
},
|
| 24643 |
+
{
|
| 24644 |
+
"epoch": 4.7,
|
| 24645 |
+
"learning_rate": 3.107142857142857e-05,
|
| 24646 |
+
"loss": 0.503,
|
| 24647 |
+
"step": 4099
|
| 24648 |
+
},
|
| 24649 |
+
{
|
| 24650 |
+
"epoch": 4.7,
|
| 24651 |
+
"learning_rate": 3.095238095238095e-05,
|
| 24652 |
+
"loss": 0.46,
|
| 24653 |
+
"step": 4100
|
| 24654 |
+
},
|
| 24655 |
+
{
|
| 24656 |
+
"epoch": 4.7,
|
| 24657 |
+
"learning_rate": 3.0833333333333335e-05,
|
| 24658 |
+
"loss": 0.5177,
|
| 24659 |
+
"step": 4101
|
| 24660 |
+
},
|
| 24661 |
+
{
|
| 24662 |
+
"epoch": 4.7,
|
| 24663 |
+
"learning_rate": 3.071428571428572e-05,
|
| 24664 |
+
"loss": 0.4695,
|
| 24665 |
+
"step": 4102
|
| 24666 |
+
},
|
| 24667 |
+
{
|
| 24668 |
+
"epoch": 4.7,
|
| 24669 |
+
"learning_rate": 3.05952380952381e-05,
|
| 24670 |
+
"loss": 0.4534,
|
| 24671 |
+
"step": 4103
|
| 24672 |
+
},
|
| 24673 |
+
{
|
| 24674 |
+
"epoch": 4.7,
|
| 24675 |
+
"learning_rate": 3.0476190476190482e-05,
|
| 24676 |
+
"loss": 0.4409,
|
| 24677 |
+
"step": 4104
|
| 24678 |
+
},
|
| 24679 |
+
{
|
| 24680 |
+
"epoch": 4.7,
|
| 24681 |
+
"learning_rate": 3.0357142857142857e-05,
|
| 24682 |
+
"loss": 0.4639,
|
| 24683 |
+
"step": 4105
|
| 24684 |
+
},
|
| 24685 |
+
{
|
| 24686 |
+
"epoch": 4.7,
|
| 24687 |
+
"learning_rate": 3.0238095238095236e-05,
|
| 24688 |
+
"loss": 0.4618,
|
| 24689 |
+
"step": 4106
|
| 24690 |
+
},
|
| 24691 |
+
{
|
| 24692 |
+
"epoch": 4.71,
|
| 24693 |
+
"learning_rate": 3.011904761904762e-05,
|
| 24694 |
+
"loss": 0.5042,
|
| 24695 |
+
"step": 4107
|
| 24696 |
+
},
|
| 24697 |
+
{
|
| 24698 |
+
"epoch": 4.71,
|
| 24699 |
+
"learning_rate": 3e-05,
|
| 24700 |
+
"loss": 0.5324,
|
| 24701 |
+
"step": 4108
|
| 24702 |
+
},
|
| 24703 |
+
{
|
| 24704 |
+
"epoch": 4.71,
|
| 24705 |
+
"learning_rate": 2.9880952380952383e-05,
|
| 24706 |
+
"loss": 0.4507,
|
| 24707 |
+
"step": 4109
|
| 24708 |
+
},
|
| 24709 |
+
{
|
| 24710 |
+
"epoch": 4.71,
|
| 24711 |
+
"learning_rate": 2.9761904761904762e-05,
|
| 24712 |
+
"loss": 0.4561,
|
| 24713 |
+
"step": 4110
|
| 24714 |
+
},
|
| 24715 |
+
{
|
| 24716 |
+
"epoch": 4.71,
|
| 24717 |
+
"learning_rate": 2.9642857142857144e-05,
|
| 24718 |
+
"loss": 0.4574,
|
| 24719 |
+
"step": 4111
|
| 24720 |
+
},
|
| 24721 |
+
{
|
| 24722 |
+
"epoch": 4.71,
|
| 24723 |
+
"learning_rate": 2.9523809523809526e-05,
|
| 24724 |
+
"loss": 0.48,
|
| 24725 |
+
"step": 4112
|
| 24726 |
+
},
|
| 24727 |
+
{
|
| 24728 |
+
"epoch": 4.71,
|
| 24729 |
+
"learning_rate": 2.940476190476191e-05,
|
| 24730 |
+
"loss": 0.467,
|
| 24731 |
+
"step": 4113
|
| 24732 |
+
},
|
| 24733 |
+
{
|
| 24734 |
+
"epoch": 4.71,
|
| 24735 |
+
"learning_rate": 2.9285714285714288e-05,
|
| 24736 |
+
"loss": 0.461,
|
| 24737 |
+
"step": 4114
|
| 24738 |
+
},
|
| 24739 |
+
{
|
| 24740 |
+
"epoch": 4.72,
|
| 24741 |
+
"learning_rate": 2.916666666666667e-05,
|
| 24742 |
+
"loss": 0.4524,
|
| 24743 |
+
"step": 4115
|
| 24744 |
+
},
|
| 24745 |
+
{
|
| 24746 |
+
"epoch": 4.72,
|
| 24747 |
+
"learning_rate": 2.9047619047619052e-05,
|
| 24748 |
+
"loss": 0.4447,
|
| 24749 |
+
"step": 4116
|
| 24750 |
+
},
|
| 24751 |
+
{
|
| 24752 |
+
"epoch": 4.72,
|
| 24753 |
+
"learning_rate": 2.8928571428571434e-05,
|
| 24754 |
+
"loss": 0.4877,
|
| 24755 |
+
"step": 4117
|
| 24756 |
+
},
|
| 24757 |
+
{
|
| 24758 |
+
"epoch": 4.72,
|
| 24759 |
+
"learning_rate": 2.880952380952381e-05,
|
| 24760 |
+
"loss": 0.4741,
|
| 24761 |
+
"step": 4118
|
| 24762 |
+
},
|
| 24763 |
+
{
|
| 24764 |
+
"epoch": 4.72,
|
| 24765 |
+
"learning_rate": 2.869047619047619e-05,
|
| 24766 |
+
"loss": 0.4734,
|
| 24767 |
+
"step": 4119
|
| 24768 |
+
},
|
| 24769 |
+
{
|
| 24770 |
+
"epoch": 4.72,
|
| 24771 |
+
"learning_rate": 2.857142857142857e-05,
|
| 24772 |
+
"loss": 0.4531,
|
| 24773 |
+
"step": 4120
|
| 24774 |
+
},
|
| 24775 |
+
{
|
| 24776 |
+
"epoch": 4.72,
|
| 24777 |
+
"learning_rate": 2.8452380952380953e-05,
|
| 24778 |
+
"loss": 0.4933,
|
| 24779 |
+
"step": 4121
|
| 24780 |
+
},
|
| 24781 |
+
{
|
| 24782 |
+
"epoch": 4.72,
|
| 24783 |
+
"learning_rate": 2.8333333333333335e-05,
|
| 24784 |
+
"loss": 0.4508,
|
| 24785 |
+
"step": 4122
|
| 24786 |
+
},
|
| 24787 |
+
{
|
| 24788 |
+
"epoch": 4.72,
|
| 24789 |
+
"learning_rate": 2.8214285714285714e-05,
|
| 24790 |
+
"loss": 0.5029,
|
| 24791 |
+
"step": 4123
|
| 24792 |
+
},
|
| 24793 |
+
{
|
| 24794 |
+
"epoch": 4.73,
|
| 24795 |
+
"learning_rate": 2.8095238095238096e-05,
|
| 24796 |
+
"loss": 0.4443,
|
| 24797 |
+
"step": 4124
|
| 24798 |
+
},
|
| 24799 |
+
{
|
| 24800 |
+
"epoch": 4.73,
|
| 24801 |
+
"learning_rate": 2.797619047619048e-05,
|
| 24802 |
+
"loss": 0.4845,
|
| 24803 |
+
"step": 4125
|
| 24804 |
+
},
|
| 24805 |
+
{
|
| 24806 |
+
"epoch": 4.73,
|
| 24807 |
+
"learning_rate": 2.785714285714286e-05,
|
| 24808 |
+
"loss": 0.4533,
|
| 24809 |
+
"step": 4126
|
| 24810 |
+
},
|
| 24811 |
+
{
|
| 24812 |
+
"epoch": 4.73,
|
| 24813 |
+
"learning_rate": 2.773809523809524e-05,
|
| 24814 |
+
"loss": 0.4965,
|
| 24815 |
+
"step": 4127
|
| 24816 |
+
},
|
| 24817 |
+
{
|
| 24818 |
+
"epoch": 4.73,
|
| 24819 |
+
"learning_rate": 2.7619047619047622e-05,
|
| 24820 |
+
"loss": 0.4847,
|
| 24821 |
+
"step": 4128
|
| 24822 |
+
},
|
| 24823 |
+
{
|
| 24824 |
+
"epoch": 4.73,
|
| 24825 |
+
"learning_rate": 2.7500000000000004e-05,
|
| 24826 |
+
"loss": 0.4714,
|
| 24827 |
+
"step": 4129
|
| 24828 |
+
},
|
| 24829 |
+
{
|
| 24830 |
+
"epoch": 4.73,
|
| 24831 |
+
"learning_rate": 2.7380952380952383e-05,
|
| 24832 |
+
"loss": 0.4182,
|
| 24833 |
+
"step": 4130
|
| 24834 |
+
},
|
| 24835 |
+
{
|
| 24836 |
+
"epoch": 4.73,
|
| 24837 |
+
"learning_rate": 2.7261904761904762e-05,
|
| 24838 |
+
"loss": 0.4487,
|
| 24839 |
+
"step": 4131
|
| 24840 |
+
},
|
| 24841 |
+
{
|
| 24842 |
+
"epoch": 4.73,
|
| 24843 |
+
"learning_rate": 2.714285714285714e-05,
|
| 24844 |
+
"loss": 0.4608,
|
| 24845 |
+
"step": 4132
|
| 24846 |
+
},
|
| 24847 |
+
{
|
| 24848 |
+
"epoch": 4.74,
|
| 24849 |
+
"learning_rate": 2.7023809523809523e-05,
|
| 24850 |
+
"loss": 0.4848,
|
| 24851 |
+
"step": 4133
|
| 24852 |
+
},
|
| 24853 |
+
{
|
| 24854 |
+
"epoch": 4.74,
|
| 24855 |
+
"learning_rate": 2.6904761904761905e-05,
|
| 24856 |
+
"loss": 0.4764,
|
| 24857 |
+
"step": 4134
|
| 24858 |
+
},
|
| 24859 |
+
{
|
| 24860 |
+
"epoch": 4.74,
|
| 24861 |
+
"learning_rate": 2.6785714285714288e-05,
|
| 24862 |
+
"loss": 0.4766,
|
| 24863 |
+
"step": 4135
|
| 24864 |
+
},
|
| 24865 |
+
{
|
| 24866 |
+
"epoch": 4.74,
|
| 24867 |
+
"learning_rate": 2.6666666666666667e-05,
|
| 24868 |
+
"loss": 0.503,
|
| 24869 |
+
"step": 4136
|
| 24870 |
+
},
|
| 24871 |
+
{
|
| 24872 |
+
"epoch": 4.74,
|
| 24873 |
+
"learning_rate": 2.654761904761905e-05,
|
| 24874 |
+
"loss": 0.4535,
|
| 24875 |
+
"step": 4137
|
| 24876 |
+
},
|
| 24877 |
+
{
|
| 24878 |
+
"epoch": 4.74,
|
| 24879 |
+
"learning_rate": 2.642857142857143e-05,
|
| 24880 |
+
"loss": 0.4668,
|
| 24881 |
+
"step": 4138
|
| 24882 |
+
},
|
| 24883 |
+
{
|
| 24884 |
+
"epoch": 4.74,
|
| 24885 |
+
"learning_rate": 2.6309523809523813e-05,
|
| 24886 |
+
"loss": 0.5005,
|
| 24887 |
+
"step": 4139
|
| 24888 |
+
},
|
| 24889 |
+
{
|
| 24890 |
+
"epoch": 4.74,
|
| 24891 |
+
"learning_rate": 2.6190476190476192e-05,
|
| 24892 |
+
"loss": 0.4881,
|
| 24893 |
+
"step": 4140
|
| 24894 |
+
},
|
| 24895 |
+
{
|
| 24896 |
+
"epoch": 4.75,
|
| 24897 |
+
"learning_rate": 2.6071428571428574e-05,
|
| 24898 |
+
"loss": 0.5118,
|
| 24899 |
+
"step": 4141
|
| 24900 |
+
},
|
| 24901 |
+
{
|
| 24902 |
+
"epoch": 4.75,
|
| 24903 |
+
"learning_rate": 2.5952380952380957e-05,
|
| 24904 |
+
"loss": 0.4575,
|
| 24905 |
+
"step": 4142
|
| 24906 |
}
|
| 24907 |
],
|
| 24908 |
"logging_steps": 1,
|
| 24909 |
"max_steps": 4360,
|
| 24910 |
"num_train_epochs": 5,
|
| 24911 |
"save_steps": 218,
|
| 24912 |
+
"total_flos": 8.217836005873523e+19,
|
| 24913 |
"trial_name": null,
|
| 24914 |
"trial_params": null
|
| 24915 |
}
|