Training in progress, step 3924, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 686648325
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14587ec8ef15e37886a4737727c4b32419583a10b8657eec59ec3f5d0af9d393
|
| 3 |
size 686648325
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 343308717
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eae0a9d68de86873b50e361d6fe2e3b9118bc25abba7b90c38631056558f3c63
|
| 3 |
size 343308717
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1de371dbe3727194bf6b8ebc325cd10c4f5680e47a0ccf1d2a2d1e23545ec57
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 4.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -22287,13 +22287,1321 @@
|
|
| 22287 |
"learning_rate": 7.785714285714286e-05,
|
| 22288 |
"loss": 0.4927,
|
| 22289 |
"step": 3706
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22290 |
}
|
| 22291 |
],
|
| 22292 |
"logging_steps": 1,
|
| 22293 |
"max_steps": 4360,
|
| 22294 |
"num_train_epochs": 5,
|
| 22295 |
"save_steps": 218,
|
| 22296 |
-
"total_flos": 7.
|
| 22297 |
"trial_name": null,
|
| 22298 |
"trial_params": null
|
| 22299 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.496374411401357,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 3924,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 22287 |
"learning_rate": 7.785714285714286e-05,
|
| 22288 |
"loss": 0.4927,
|
| 22289 |
"step": 3706
|
| 22290 |
+
},
|
| 22291 |
+
{
|
| 22292 |
+
"epoch": 4.25,
|
| 22293 |
+
"learning_rate": 7.773809523809525e-05,
|
| 22294 |
+
"loss": 0.4862,
|
| 22295 |
+
"step": 3707
|
| 22296 |
+
},
|
| 22297 |
+
{
|
| 22298 |
+
"epoch": 4.25,
|
| 22299 |
+
"learning_rate": 7.761904761904762e-05,
|
| 22300 |
+
"loss": 0.4553,
|
| 22301 |
+
"step": 3708
|
| 22302 |
+
},
|
| 22303 |
+
{
|
| 22304 |
+
"epoch": 4.25,
|
| 22305 |
+
"learning_rate": 7.75e-05,
|
| 22306 |
+
"loss": 0.4681,
|
| 22307 |
+
"step": 3709
|
| 22308 |
+
},
|
| 22309 |
+
{
|
| 22310 |
+
"epoch": 4.25,
|
| 22311 |
+
"learning_rate": 7.738095238095239e-05,
|
| 22312 |
+
"loss": 0.4895,
|
| 22313 |
+
"step": 3710
|
| 22314 |
+
},
|
| 22315 |
+
{
|
| 22316 |
+
"epoch": 4.25,
|
| 22317 |
+
"learning_rate": 7.726190476190476e-05,
|
| 22318 |
+
"loss": 0.4812,
|
| 22319 |
+
"step": 3711
|
| 22320 |
+
},
|
| 22321 |
+
{
|
| 22322 |
+
"epoch": 4.25,
|
| 22323 |
+
"learning_rate": 7.714285714285715e-05,
|
| 22324 |
+
"loss": 0.4969,
|
| 22325 |
+
"step": 3712
|
| 22326 |
+
},
|
| 22327 |
+
{
|
| 22328 |
+
"epoch": 4.25,
|
| 22329 |
+
"learning_rate": 7.702380952380953e-05,
|
| 22330 |
+
"loss": 0.4978,
|
| 22331 |
+
"step": 3713
|
| 22332 |
+
},
|
| 22333 |
+
{
|
| 22334 |
+
"epoch": 4.26,
|
| 22335 |
+
"learning_rate": 7.690476190476192e-05,
|
| 22336 |
+
"loss": 0.495,
|
| 22337 |
+
"step": 3714
|
| 22338 |
+
},
|
| 22339 |
+
{
|
| 22340 |
+
"epoch": 4.26,
|
| 22341 |
+
"learning_rate": 7.67857142857143e-05,
|
| 22342 |
+
"loss": 0.5193,
|
| 22343 |
+
"step": 3715
|
| 22344 |
+
},
|
| 22345 |
+
{
|
| 22346 |
+
"epoch": 4.26,
|
| 22347 |
+
"learning_rate": 7.666666666666667e-05,
|
| 22348 |
+
"loss": 0.4753,
|
| 22349 |
+
"step": 3716
|
| 22350 |
+
},
|
| 22351 |
+
{
|
| 22352 |
+
"epoch": 4.26,
|
| 22353 |
+
"learning_rate": 7.654761904761904e-05,
|
| 22354 |
+
"loss": 0.5228,
|
| 22355 |
+
"step": 3717
|
| 22356 |
+
},
|
| 22357 |
+
{
|
| 22358 |
+
"epoch": 4.26,
|
| 22359 |
+
"learning_rate": 7.642857142857143e-05,
|
| 22360 |
+
"loss": 0.5039,
|
| 22361 |
+
"step": 3718
|
| 22362 |
+
},
|
| 22363 |
+
{
|
| 22364 |
+
"epoch": 4.26,
|
| 22365 |
+
"learning_rate": 7.630952380952381e-05,
|
| 22366 |
+
"loss": 0.4866,
|
| 22367 |
+
"step": 3719
|
| 22368 |
+
},
|
| 22369 |
+
{
|
| 22370 |
+
"epoch": 4.26,
|
| 22371 |
+
"learning_rate": 7.619047619047618e-05,
|
| 22372 |
+
"loss": 0.4981,
|
| 22373 |
+
"step": 3720
|
| 22374 |
+
},
|
| 22375 |
+
{
|
| 22376 |
+
"epoch": 4.26,
|
| 22377 |
+
"learning_rate": 7.607142857142857e-05,
|
| 22378 |
+
"loss": 0.5064,
|
| 22379 |
+
"step": 3721
|
| 22380 |
+
},
|
| 22381 |
+
{
|
| 22382 |
+
"epoch": 4.26,
|
| 22383 |
+
"learning_rate": 7.595238095238095e-05,
|
| 22384 |
+
"loss": 0.4847,
|
| 22385 |
+
"step": 3722
|
| 22386 |
+
},
|
| 22387 |
+
{
|
| 22388 |
+
"epoch": 4.27,
|
| 22389 |
+
"learning_rate": 7.583333333333334e-05,
|
| 22390 |
+
"loss": 0.464,
|
| 22391 |
+
"step": 3723
|
| 22392 |
+
},
|
| 22393 |
+
{
|
| 22394 |
+
"epoch": 4.27,
|
| 22395 |
+
"learning_rate": 7.571428571428571e-05,
|
| 22396 |
+
"loss": 0.4804,
|
| 22397 |
+
"step": 3724
|
| 22398 |
+
},
|
| 22399 |
+
{
|
| 22400 |
+
"epoch": 4.27,
|
| 22401 |
+
"learning_rate": 7.55952380952381e-05,
|
| 22402 |
+
"loss": 0.4724,
|
| 22403 |
+
"step": 3725
|
| 22404 |
+
},
|
| 22405 |
+
{
|
| 22406 |
+
"epoch": 4.27,
|
| 22407 |
+
"learning_rate": 7.547619047619048e-05,
|
| 22408 |
+
"loss": 0.4639,
|
| 22409 |
+
"step": 3726
|
| 22410 |
+
},
|
| 22411 |
+
{
|
| 22412 |
+
"epoch": 4.27,
|
| 22413 |
+
"learning_rate": 7.535714285714285e-05,
|
| 22414 |
+
"loss": 0.488,
|
| 22415 |
+
"step": 3727
|
| 22416 |
+
},
|
| 22417 |
+
{
|
| 22418 |
+
"epoch": 4.27,
|
| 22419 |
+
"learning_rate": 7.523809523809524e-05,
|
| 22420 |
+
"loss": 0.5178,
|
| 22421 |
+
"step": 3728
|
| 22422 |
+
},
|
| 22423 |
+
{
|
| 22424 |
+
"epoch": 4.27,
|
| 22425 |
+
"learning_rate": 7.511904761904762e-05,
|
| 22426 |
+
"loss": 0.4992,
|
| 22427 |
+
"step": 3729
|
| 22428 |
+
},
|
| 22429 |
+
{
|
| 22430 |
+
"epoch": 4.27,
|
| 22431 |
+
"learning_rate": 7.500000000000001e-05,
|
| 22432 |
+
"loss": 0.4945,
|
| 22433 |
+
"step": 3730
|
| 22434 |
+
},
|
| 22435 |
+
{
|
| 22436 |
+
"epoch": 4.28,
|
| 22437 |
+
"learning_rate": 7.488095238095238e-05,
|
| 22438 |
+
"loss": 0.5391,
|
| 22439 |
+
"step": 3731
|
| 22440 |
+
},
|
| 22441 |
+
{
|
| 22442 |
+
"epoch": 4.28,
|
| 22443 |
+
"learning_rate": 7.476190476190477e-05,
|
| 22444 |
+
"loss": 0.4831,
|
| 22445 |
+
"step": 3732
|
| 22446 |
+
},
|
| 22447 |
+
{
|
| 22448 |
+
"epoch": 4.28,
|
| 22449 |
+
"learning_rate": 7.464285714285715e-05,
|
| 22450 |
+
"loss": 0.4684,
|
| 22451 |
+
"step": 3733
|
| 22452 |
+
},
|
| 22453 |
+
{
|
| 22454 |
+
"epoch": 4.28,
|
| 22455 |
+
"learning_rate": 7.452380952380952e-05,
|
| 22456 |
+
"loss": 0.4725,
|
| 22457 |
+
"step": 3734
|
| 22458 |
+
},
|
| 22459 |
+
{
|
| 22460 |
+
"epoch": 4.28,
|
| 22461 |
+
"learning_rate": 7.440476190476191e-05,
|
| 22462 |
+
"loss": 0.5164,
|
| 22463 |
+
"step": 3735
|
| 22464 |
+
},
|
| 22465 |
+
{
|
| 22466 |
+
"epoch": 4.28,
|
| 22467 |
+
"learning_rate": 7.428571428571429e-05,
|
| 22468 |
+
"loss": 0.5313,
|
| 22469 |
+
"step": 3736
|
| 22470 |
+
},
|
| 22471 |
+
{
|
| 22472 |
+
"epoch": 4.28,
|
| 22473 |
+
"learning_rate": 7.416666666666668e-05,
|
| 22474 |
+
"loss": 0.528,
|
| 22475 |
+
"step": 3737
|
| 22476 |
+
},
|
| 22477 |
+
{
|
| 22478 |
+
"epoch": 4.28,
|
| 22479 |
+
"learning_rate": 7.404761904761905e-05,
|
| 22480 |
+
"loss": 0.5147,
|
| 22481 |
+
"step": 3738
|
| 22482 |
+
},
|
| 22483 |
+
{
|
| 22484 |
+
"epoch": 4.28,
|
| 22485 |
+
"learning_rate": 7.392857142857144e-05,
|
| 22486 |
+
"loss": 0.5041,
|
| 22487 |
+
"step": 3739
|
| 22488 |
+
},
|
| 22489 |
+
{
|
| 22490 |
+
"epoch": 4.29,
|
| 22491 |
+
"learning_rate": 7.380952380952382e-05,
|
| 22492 |
+
"loss": 0.4664,
|
| 22493 |
+
"step": 3740
|
| 22494 |
+
},
|
| 22495 |
+
{
|
| 22496 |
+
"epoch": 4.29,
|
| 22497 |
+
"learning_rate": 7.36904761904762e-05,
|
| 22498 |
+
"loss": 0.4658,
|
| 22499 |
+
"step": 3741
|
| 22500 |
+
},
|
| 22501 |
+
{
|
| 22502 |
+
"epoch": 4.29,
|
| 22503 |
+
"learning_rate": 7.357142857142858e-05,
|
| 22504 |
+
"loss": 0.4897,
|
| 22505 |
+
"step": 3742
|
| 22506 |
+
},
|
| 22507 |
+
{
|
| 22508 |
+
"epoch": 4.29,
|
| 22509 |
+
"learning_rate": 7.345238095238096e-05,
|
| 22510 |
+
"loss": 0.4701,
|
| 22511 |
+
"step": 3743
|
| 22512 |
+
},
|
| 22513 |
+
{
|
| 22514 |
+
"epoch": 4.29,
|
| 22515 |
+
"learning_rate": 7.333333333333333e-05,
|
| 22516 |
+
"loss": 0.5174,
|
| 22517 |
+
"step": 3744
|
| 22518 |
+
},
|
| 22519 |
+
{
|
| 22520 |
+
"epoch": 4.29,
|
| 22521 |
+
"learning_rate": 7.321428571428571e-05,
|
| 22522 |
+
"loss": 0.4717,
|
| 22523 |
+
"step": 3745
|
| 22524 |
+
},
|
| 22525 |
+
{
|
| 22526 |
+
"epoch": 4.29,
|
| 22527 |
+
"learning_rate": 7.30952380952381e-05,
|
| 22528 |
+
"loss": 0.4545,
|
| 22529 |
+
"step": 3746
|
| 22530 |
+
},
|
| 22531 |
+
{
|
| 22532 |
+
"epoch": 4.29,
|
| 22533 |
+
"learning_rate": 7.297619047619047e-05,
|
| 22534 |
+
"loss": 0.4933,
|
| 22535 |
+
"step": 3747
|
| 22536 |
+
},
|
| 22537 |
+
{
|
| 22538 |
+
"epoch": 4.29,
|
| 22539 |
+
"learning_rate": 7.285714285714286e-05,
|
| 22540 |
+
"loss": 0.4849,
|
| 22541 |
+
"step": 3748
|
| 22542 |
+
},
|
| 22543 |
+
{
|
| 22544 |
+
"epoch": 4.3,
|
| 22545 |
+
"learning_rate": 7.273809523809524e-05,
|
| 22546 |
+
"loss": 0.5581,
|
| 22547 |
+
"step": 3749
|
| 22548 |
+
},
|
| 22549 |
+
{
|
| 22550 |
+
"epoch": 4.3,
|
| 22551 |
+
"learning_rate": 7.261904761904762e-05,
|
| 22552 |
+
"loss": 0.4706,
|
| 22553 |
+
"step": 3750
|
| 22554 |
+
},
|
| 22555 |
+
{
|
| 22556 |
+
"epoch": 4.3,
|
| 22557 |
+
"learning_rate": 7.25e-05,
|
| 22558 |
+
"loss": 0.4913,
|
| 22559 |
+
"step": 3751
|
| 22560 |
+
},
|
| 22561 |
+
{
|
| 22562 |
+
"epoch": 4.3,
|
| 22563 |
+
"learning_rate": 7.238095238095238e-05,
|
| 22564 |
+
"loss": 0.5214,
|
| 22565 |
+
"step": 3752
|
| 22566 |
+
},
|
| 22567 |
+
{
|
| 22568 |
+
"epoch": 4.3,
|
| 22569 |
+
"learning_rate": 7.226190476190476e-05,
|
| 22570 |
+
"loss": 0.4639,
|
| 22571 |
+
"step": 3753
|
| 22572 |
+
},
|
| 22573 |
+
{
|
| 22574 |
+
"epoch": 4.3,
|
| 22575 |
+
"learning_rate": 7.214285714285714e-05,
|
| 22576 |
+
"loss": 0.4941,
|
| 22577 |
+
"step": 3754
|
| 22578 |
+
},
|
| 22579 |
+
{
|
| 22580 |
+
"epoch": 4.3,
|
| 22581 |
+
"learning_rate": 7.202380952380953e-05,
|
| 22582 |
+
"loss": 0.4989,
|
| 22583 |
+
"step": 3755
|
| 22584 |
+
},
|
| 22585 |
+
{
|
| 22586 |
+
"epoch": 4.3,
|
| 22587 |
+
"learning_rate": 7.19047619047619e-05,
|
| 22588 |
+
"loss": 0.4952,
|
| 22589 |
+
"step": 3756
|
| 22590 |
+
},
|
| 22591 |
+
{
|
| 22592 |
+
"epoch": 4.31,
|
| 22593 |
+
"learning_rate": 7.17857142857143e-05,
|
| 22594 |
+
"loss": 0.5071,
|
| 22595 |
+
"step": 3757
|
| 22596 |
+
},
|
| 22597 |
+
{
|
| 22598 |
+
"epoch": 4.31,
|
| 22599 |
+
"learning_rate": 7.166666666666667e-05,
|
| 22600 |
+
"loss": 0.4683,
|
| 22601 |
+
"step": 3758
|
| 22602 |
+
},
|
| 22603 |
+
{
|
| 22604 |
+
"epoch": 4.31,
|
| 22605 |
+
"learning_rate": 7.154761904761906e-05,
|
| 22606 |
+
"loss": 0.5294,
|
| 22607 |
+
"step": 3759
|
| 22608 |
+
},
|
| 22609 |
+
{
|
| 22610 |
+
"epoch": 4.31,
|
| 22611 |
+
"learning_rate": 7.142857142857143e-05,
|
| 22612 |
+
"loss": 0.4777,
|
| 22613 |
+
"step": 3760
|
| 22614 |
+
},
|
| 22615 |
+
{
|
| 22616 |
+
"epoch": 4.31,
|
| 22617 |
+
"learning_rate": 7.130952380952381e-05,
|
| 22618 |
+
"loss": 0.5133,
|
| 22619 |
+
"step": 3761
|
| 22620 |
+
},
|
| 22621 |
+
{
|
| 22622 |
+
"epoch": 4.31,
|
| 22623 |
+
"learning_rate": 7.11904761904762e-05,
|
| 22624 |
+
"loss": 0.5054,
|
| 22625 |
+
"step": 3762
|
| 22626 |
+
},
|
| 22627 |
+
{
|
| 22628 |
+
"epoch": 4.31,
|
| 22629 |
+
"learning_rate": 7.107142857142857e-05,
|
| 22630 |
+
"loss": 0.4972,
|
| 22631 |
+
"step": 3763
|
| 22632 |
+
},
|
| 22633 |
+
{
|
| 22634 |
+
"epoch": 4.31,
|
| 22635 |
+
"learning_rate": 7.095238095238096e-05,
|
| 22636 |
+
"loss": 0.465,
|
| 22637 |
+
"step": 3764
|
| 22638 |
+
},
|
| 22639 |
+
{
|
| 22640 |
+
"epoch": 4.31,
|
| 22641 |
+
"learning_rate": 7.083333333333334e-05,
|
| 22642 |
+
"loss": 0.496,
|
| 22643 |
+
"step": 3765
|
| 22644 |
+
},
|
| 22645 |
+
{
|
| 22646 |
+
"epoch": 4.32,
|
| 22647 |
+
"learning_rate": 7.071428571428573e-05,
|
| 22648 |
+
"loss": 0.4971,
|
| 22649 |
+
"step": 3766
|
| 22650 |
+
},
|
| 22651 |
+
{
|
| 22652 |
+
"epoch": 4.32,
|
| 22653 |
+
"learning_rate": 7.05952380952381e-05,
|
| 22654 |
+
"loss": 0.5241,
|
| 22655 |
+
"step": 3767
|
| 22656 |
+
},
|
| 22657 |
+
{
|
| 22658 |
+
"epoch": 4.32,
|
| 22659 |
+
"learning_rate": 7.047619047619048e-05,
|
| 22660 |
+
"loss": 0.4993,
|
| 22661 |
+
"step": 3768
|
| 22662 |
+
},
|
| 22663 |
+
{
|
| 22664 |
+
"epoch": 4.32,
|
| 22665 |
+
"learning_rate": 7.035714285714287e-05,
|
| 22666 |
+
"loss": 0.484,
|
| 22667 |
+
"step": 3769
|
| 22668 |
+
},
|
| 22669 |
+
{
|
| 22670 |
+
"epoch": 4.32,
|
| 22671 |
+
"learning_rate": 7.023809523809524e-05,
|
| 22672 |
+
"loss": 0.5002,
|
| 22673 |
+
"step": 3770
|
| 22674 |
+
},
|
| 22675 |
+
{
|
| 22676 |
+
"epoch": 4.32,
|
| 22677 |
+
"learning_rate": 7.011904761904762e-05,
|
| 22678 |
+
"loss": 0.4903,
|
| 22679 |
+
"step": 3771
|
| 22680 |
+
},
|
| 22681 |
+
{
|
| 22682 |
+
"epoch": 4.32,
|
| 22683 |
+
"learning_rate": 7e-05,
|
| 22684 |
+
"loss": 0.5022,
|
| 22685 |
+
"step": 3772
|
| 22686 |
+
},
|
| 22687 |
+
{
|
| 22688 |
+
"epoch": 4.32,
|
| 22689 |
+
"learning_rate": 6.988095238095238e-05,
|
| 22690 |
+
"loss": 0.5184,
|
| 22691 |
+
"step": 3773
|
| 22692 |
+
},
|
| 22693 |
+
{
|
| 22694 |
+
"epoch": 4.32,
|
| 22695 |
+
"learning_rate": 6.976190476190476e-05,
|
| 22696 |
+
"loss": 0.4818,
|
| 22697 |
+
"step": 3774
|
| 22698 |
+
},
|
| 22699 |
+
{
|
| 22700 |
+
"epoch": 4.33,
|
| 22701 |
+
"learning_rate": 6.964285714285715e-05,
|
| 22702 |
+
"loss": 0.4735,
|
| 22703 |
+
"step": 3775
|
| 22704 |
+
},
|
| 22705 |
+
{
|
| 22706 |
+
"epoch": 4.33,
|
| 22707 |
+
"learning_rate": 6.952380952380952e-05,
|
| 22708 |
+
"loss": 0.4851,
|
| 22709 |
+
"step": 3776
|
| 22710 |
+
},
|
| 22711 |
+
{
|
| 22712 |
+
"epoch": 4.33,
|
| 22713 |
+
"learning_rate": 6.940476190476191e-05,
|
| 22714 |
+
"loss": 0.5129,
|
| 22715 |
+
"step": 3777
|
| 22716 |
+
},
|
| 22717 |
+
{
|
| 22718 |
+
"epoch": 4.33,
|
| 22719 |
+
"learning_rate": 6.928571428571429e-05,
|
| 22720 |
+
"loss": 0.4823,
|
| 22721 |
+
"step": 3778
|
| 22722 |
+
},
|
| 22723 |
+
{
|
| 22724 |
+
"epoch": 4.33,
|
| 22725 |
+
"learning_rate": 6.916666666666666e-05,
|
| 22726 |
+
"loss": 0.5267,
|
| 22727 |
+
"step": 3779
|
| 22728 |
+
},
|
| 22729 |
+
{
|
| 22730 |
+
"epoch": 4.33,
|
| 22731 |
+
"learning_rate": 6.904761904761905e-05,
|
| 22732 |
+
"loss": 0.5153,
|
| 22733 |
+
"step": 3780
|
| 22734 |
+
},
|
| 22735 |
+
{
|
| 22736 |
+
"epoch": 4.33,
|
| 22737 |
+
"learning_rate": 6.892857142857143e-05,
|
| 22738 |
+
"loss": 0.4879,
|
| 22739 |
+
"step": 3781
|
| 22740 |
+
},
|
| 22741 |
+
{
|
| 22742 |
+
"epoch": 4.33,
|
| 22743 |
+
"learning_rate": 6.880952380952382e-05,
|
| 22744 |
+
"loss": 0.4793,
|
| 22745 |
+
"step": 3782
|
| 22746 |
+
},
|
| 22747 |
+
{
|
| 22748 |
+
"epoch": 4.33,
|
| 22749 |
+
"learning_rate": 6.869047619047619e-05,
|
| 22750 |
+
"loss": 0.474,
|
| 22751 |
+
"step": 3783
|
| 22752 |
+
},
|
| 22753 |
+
{
|
| 22754 |
+
"epoch": 4.34,
|
| 22755 |
+
"learning_rate": 6.857142857142858e-05,
|
| 22756 |
+
"loss": 0.4695,
|
| 22757 |
+
"step": 3784
|
| 22758 |
+
},
|
| 22759 |
+
{
|
| 22760 |
+
"epoch": 4.34,
|
| 22761 |
+
"learning_rate": 6.845238095238096e-05,
|
| 22762 |
+
"loss": 0.4998,
|
| 22763 |
+
"step": 3785
|
| 22764 |
+
},
|
| 22765 |
+
{
|
| 22766 |
+
"epoch": 4.34,
|
| 22767 |
+
"learning_rate": 6.833333333333333e-05,
|
| 22768 |
+
"loss": 0.5203,
|
| 22769 |
+
"step": 3786
|
| 22770 |
+
},
|
| 22771 |
+
{
|
| 22772 |
+
"epoch": 4.34,
|
| 22773 |
+
"learning_rate": 6.821428571428572e-05,
|
| 22774 |
+
"loss": 0.5157,
|
| 22775 |
+
"step": 3787
|
| 22776 |
+
},
|
| 22777 |
+
{
|
| 22778 |
+
"epoch": 4.34,
|
| 22779 |
+
"learning_rate": 6.80952380952381e-05,
|
| 22780 |
+
"loss": 0.4948,
|
| 22781 |
+
"step": 3788
|
| 22782 |
+
},
|
| 22783 |
+
{
|
| 22784 |
+
"epoch": 4.34,
|
| 22785 |
+
"learning_rate": 6.797619047619049e-05,
|
| 22786 |
+
"loss": 0.4759,
|
| 22787 |
+
"step": 3789
|
| 22788 |
+
},
|
| 22789 |
+
{
|
| 22790 |
+
"epoch": 4.34,
|
| 22791 |
+
"learning_rate": 6.785714285714286e-05,
|
| 22792 |
+
"loss": 0.4802,
|
| 22793 |
+
"step": 3790
|
| 22794 |
+
},
|
| 22795 |
+
{
|
| 22796 |
+
"epoch": 4.34,
|
| 22797 |
+
"learning_rate": 6.773809523809525e-05,
|
| 22798 |
+
"loss": 0.4772,
|
| 22799 |
+
"step": 3791
|
| 22800 |
+
},
|
| 22801 |
+
{
|
| 22802 |
+
"epoch": 4.35,
|
| 22803 |
+
"learning_rate": 6.761904761904763e-05,
|
| 22804 |
+
"loss": 0.538,
|
| 22805 |
+
"step": 3792
|
| 22806 |
+
},
|
| 22807 |
+
{
|
| 22808 |
+
"epoch": 4.35,
|
| 22809 |
+
"learning_rate": 6.750000000000001e-05,
|
| 22810 |
+
"loss": 0.5095,
|
| 22811 |
+
"step": 3793
|
| 22812 |
+
},
|
| 22813 |
+
{
|
| 22814 |
+
"epoch": 4.35,
|
| 22815 |
+
"learning_rate": 6.738095238095239e-05,
|
| 22816 |
+
"loss": 0.4821,
|
| 22817 |
+
"step": 3794
|
| 22818 |
+
},
|
| 22819 |
+
{
|
| 22820 |
+
"epoch": 4.35,
|
| 22821 |
+
"learning_rate": 6.726190476190477e-05,
|
| 22822 |
+
"loss": 0.4435,
|
| 22823 |
+
"step": 3795
|
| 22824 |
+
},
|
| 22825 |
+
{
|
| 22826 |
+
"epoch": 4.35,
|
| 22827 |
+
"learning_rate": 6.714285714285714e-05,
|
| 22828 |
+
"loss": 0.5081,
|
| 22829 |
+
"step": 3796
|
| 22830 |
+
},
|
| 22831 |
+
{
|
| 22832 |
+
"epoch": 4.35,
|
| 22833 |
+
"learning_rate": 6.702380952380952e-05,
|
| 22834 |
+
"loss": 0.4634,
|
| 22835 |
+
"step": 3797
|
| 22836 |
+
},
|
| 22837 |
+
{
|
| 22838 |
+
"epoch": 4.35,
|
| 22839 |
+
"learning_rate": 6.69047619047619e-05,
|
| 22840 |
+
"loss": 0.5468,
|
| 22841 |
+
"step": 3798
|
| 22842 |
+
},
|
| 22843 |
+
{
|
| 22844 |
+
"epoch": 4.35,
|
| 22845 |
+
"learning_rate": 6.678571428571428e-05,
|
| 22846 |
+
"loss": 0.4323,
|
| 22847 |
+
"step": 3799
|
| 22848 |
+
},
|
| 22849 |
+
{
|
| 22850 |
+
"epoch": 4.35,
|
| 22851 |
+
"learning_rate": 6.666666666666667e-05,
|
| 22852 |
+
"loss": 0.4553,
|
| 22853 |
+
"step": 3800
|
| 22854 |
+
},
|
| 22855 |
+
{
|
| 22856 |
+
"epoch": 4.36,
|
| 22857 |
+
"learning_rate": 6.654761904761905e-05,
|
| 22858 |
+
"loss": 0.4626,
|
| 22859 |
+
"step": 3801
|
| 22860 |
+
},
|
| 22861 |
+
{
|
| 22862 |
+
"epoch": 4.36,
|
| 22863 |
+
"learning_rate": 6.642857142857143e-05,
|
| 22864 |
+
"loss": 0.4565,
|
| 22865 |
+
"step": 3802
|
| 22866 |
+
},
|
| 22867 |
+
{
|
| 22868 |
+
"epoch": 4.36,
|
| 22869 |
+
"learning_rate": 6.630952380952381e-05,
|
| 22870 |
+
"loss": 0.463,
|
| 22871 |
+
"step": 3803
|
| 22872 |
+
},
|
| 22873 |
+
{
|
| 22874 |
+
"epoch": 4.36,
|
| 22875 |
+
"learning_rate": 6.619047619047619e-05,
|
| 22876 |
+
"loss": 0.5065,
|
| 22877 |
+
"step": 3804
|
| 22878 |
+
},
|
| 22879 |
+
{
|
| 22880 |
+
"epoch": 4.36,
|
| 22881 |
+
"learning_rate": 6.607142857142857e-05,
|
| 22882 |
+
"loss": 0.4777,
|
| 22883 |
+
"step": 3805
|
| 22884 |
+
},
|
| 22885 |
+
{
|
| 22886 |
+
"epoch": 4.36,
|
| 22887 |
+
"learning_rate": 6.595238095238095e-05,
|
| 22888 |
+
"loss": 0.4464,
|
| 22889 |
+
"step": 3806
|
| 22890 |
+
},
|
| 22891 |
+
{
|
| 22892 |
+
"epoch": 4.36,
|
| 22893 |
+
"learning_rate": 6.583333333333334e-05,
|
| 22894 |
+
"loss": 0.5004,
|
| 22895 |
+
"step": 3807
|
| 22896 |
+
},
|
| 22897 |
+
{
|
| 22898 |
+
"epoch": 4.36,
|
| 22899 |
+
"learning_rate": 6.571428571428571e-05,
|
| 22900 |
+
"loss": 0.4658,
|
| 22901 |
+
"step": 3808
|
| 22902 |
+
},
|
| 22903 |
+
{
|
| 22904 |
+
"epoch": 4.36,
|
| 22905 |
+
"learning_rate": 6.55952380952381e-05,
|
| 22906 |
+
"loss": 0.505,
|
| 22907 |
+
"step": 3809
|
| 22908 |
+
},
|
| 22909 |
+
{
|
| 22910 |
+
"epoch": 4.37,
|
| 22911 |
+
"learning_rate": 6.547619047619048e-05,
|
| 22912 |
+
"loss": 0.5012,
|
| 22913 |
+
"step": 3810
|
| 22914 |
+
},
|
| 22915 |
+
{
|
| 22916 |
+
"epoch": 4.37,
|
| 22917 |
+
"learning_rate": 6.535714285714287e-05,
|
| 22918 |
+
"loss": 0.4676,
|
| 22919 |
+
"step": 3811
|
| 22920 |
+
},
|
| 22921 |
+
{
|
| 22922 |
+
"epoch": 4.37,
|
| 22923 |
+
"learning_rate": 6.523809523809524e-05,
|
| 22924 |
+
"loss": 0.4936,
|
| 22925 |
+
"step": 3812
|
| 22926 |
+
},
|
| 22927 |
+
{
|
| 22928 |
+
"epoch": 4.37,
|
| 22929 |
+
"learning_rate": 6.511904761904762e-05,
|
| 22930 |
+
"loss": 0.4976,
|
| 22931 |
+
"step": 3813
|
| 22932 |
+
},
|
| 22933 |
+
{
|
| 22934 |
+
"epoch": 4.37,
|
| 22935 |
+
"learning_rate": 6.500000000000001e-05,
|
| 22936 |
+
"loss": 0.5024,
|
| 22937 |
+
"step": 3814
|
| 22938 |
+
},
|
| 22939 |
+
{
|
| 22940 |
+
"epoch": 4.37,
|
| 22941 |
+
"learning_rate": 6.488095238095238e-05,
|
| 22942 |
+
"loss": 0.4792,
|
| 22943 |
+
"step": 3815
|
| 22944 |
+
},
|
| 22945 |
+
{
|
| 22946 |
+
"epoch": 4.37,
|
| 22947 |
+
"learning_rate": 6.476190476190477e-05,
|
| 22948 |
+
"loss": 0.4988,
|
| 22949 |
+
"step": 3816
|
| 22950 |
+
},
|
| 22951 |
+
{
|
| 22952 |
+
"epoch": 4.37,
|
| 22953 |
+
"learning_rate": 6.464285714285715e-05,
|
| 22954 |
+
"loss": 0.4771,
|
| 22955 |
+
"step": 3817
|
| 22956 |
+
},
|
| 22957 |
+
{
|
| 22958 |
+
"epoch": 4.37,
|
| 22959 |
+
"learning_rate": 6.452380952380954e-05,
|
| 22960 |
+
"loss": 0.4664,
|
| 22961 |
+
"step": 3818
|
| 22962 |
+
},
|
| 22963 |
+
{
|
| 22964 |
+
"epoch": 4.38,
|
| 22965 |
+
"learning_rate": 6.440476190476191e-05,
|
| 22966 |
+
"loss": 0.4718,
|
| 22967 |
+
"step": 3819
|
| 22968 |
+
},
|
| 22969 |
+
{
|
| 22970 |
+
"epoch": 4.38,
|
| 22971 |
+
"learning_rate": 6.428571428571429e-05,
|
| 22972 |
+
"loss": 0.5136,
|
| 22973 |
+
"step": 3820
|
| 22974 |
+
},
|
| 22975 |
+
{
|
| 22976 |
+
"epoch": 4.38,
|
| 22977 |
+
"learning_rate": 6.416666666666668e-05,
|
| 22978 |
+
"loss": 0.4493,
|
| 22979 |
+
"step": 3821
|
| 22980 |
+
},
|
| 22981 |
+
{
|
| 22982 |
+
"epoch": 4.38,
|
| 22983 |
+
"learning_rate": 6.404761904761904e-05,
|
| 22984 |
+
"loss": 0.5167,
|
| 22985 |
+
"step": 3822
|
| 22986 |
+
},
|
| 22987 |
+
{
|
| 22988 |
+
"epoch": 4.38,
|
| 22989 |
+
"learning_rate": 6.392857142857143e-05,
|
| 22990 |
+
"loss": 0.4989,
|
| 22991 |
+
"step": 3823
|
| 22992 |
+
},
|
| 22993 |
+
{
|
| 22994 |
+
"epoch": 4.38,
|
| 22995 |
+
"learning_rate": 6.38095238095238e-05,
|
| 22996 |
+
"loss": 0.4883,
|
| 22997 |
+
"step": 3824
|
| 22998 |
+
},
|
| 22999 |
+
{
|
| 23000 |
+
"epoch": 4.38,
|
| 23001 |
+
"learning_rate": 6.369047619047619e-05,
|
| 23002 |
+
"loss": 0.4864,
|
| 23003 |
+
"step": 3825
|
| 23004 |
+
},
|
| 23005 |
+
{
|
| 23006 |
+
"epoch": 4.38,
|
| 23007 |
+
"learning_rate": 6.357142857142857e-05,
|
| 23008 |
+
"loss": 0.4658,
|
| 23009 |
+
"step": 3826
|
| 23010 |
+
},
|
| 23011 |
+
{
|
| 23012 |
+
"epoch": 4.39,
|
| 23013 |
+
"learning_rate": 6.345238095238096e-05,
|
| 23014 |
+
"loss": 0.4774,
|
| 23015 |
+
"step": 3827
|
| 23016 |
+
},
|
| 23017 |
+
{
|
| 23018 |
+
"epoch": 4.39,
|
| 23019 |
+
"learning_rate": 6.333333333333333e-05,
|
| 23020 |
+
"loss": 0.4843,
|
| 23021 |
+
"step": 3828
|
| 23022 |
+
},
|
| 23023 |
+
{
|
| 23024 |
+
"epoch": 4.39,
|
| 23025 |
+
"learning_rate": 6.321428571428572e-05,
|
| 23026 |
+
"loss": 0.5333,
|
| 23027 |
+
"step": 3829
|
| 23028 |
+
},
|
| 23029 |
+
{
|
| 23030 |
+
"epoch": 4.39,
|
| 23031 |
+
"learning_rate": 6.30952380952381e-05,
|
| 23032 |
+
"loss": 0.514,
|
| 23033 |
+
"step": 3830
|
| 23034 |
+
},
|
| 23035 |
+
{
|
| 23036 |
+
"epoch": 4.39,
|
| 23037 |
+
"learning_rate": 6.297619047619047e-05,
|
| 23038 |
+
"loss": 0.4522,
|
| 23039 |
+
"step": 3831
|
| 23040 |
+
},
|
| 23041 |
+
{
|
| 23042 |
+
"epoch": 4.39,
|
| 23043 |
+
"learning_rate": 6.285714285714286e-05,
|
| 23044 |
+
"loss": 0.5137,
|
| 23045 |
+
"step": 3832
|
| 23046 |
+
},
|
| 23047 |
+
{
|
| 23048 |
+
"epoch": 4.39,
|
| 23049 |
+
"learning_rate": 6.273809523809524e-05,
|
| 23050 |
+
"loss": 0.5021,
|
| 23051 |
+
"step": 3833
|
| 23052 |
+
},
|
| 23053 |
+
{
|
| 23054 |
+
"epoch": 4.39,
|
| 23055 |
+
"learning_rate": 6.261904761904763e-05,
|
| 23056 |
+
"loss": 0.4956,
|
| 23057 |
+
"step": 3834
|
| 23058 |
+
},
|
| 23059 |
+
{
|
| 23060 |
+
"epoch": 4.39,
|
| 23061 |
+
"learning_rate": 6.25e-05,
|
| 23062 |
+
"loss": 0.5031,
|
| 23063 |
+
"step": 3835
|
| 23064 |
+
},
|
| 23065 |
+
{
|
| 23066 |
+
"epoch": 4.4,
|
| 23067 |
+
"learning_rate": 6.238095238095239e-05,
|
| 23068 |
+
"loss": 0.4967,
|
| 23069 |
+
"step": 3836
|
| 23070 |
+
},
|
| 23071 |
+
{
|
| 23072 |
+
"epoch": 4.4,
|
| 23073 |
+
"learning_rate": 6.226190476190477e-05,
|
| 23074 |
+
"loss": 0.4847,
|
| 23075 |
+
"step": 3837
|
| 23076 |
+
},
|
| 23077 |
+
{
|
| 23078 |
+
"epoch": 4.4,
|
| 23079 |
+
"learning_rate": 6.214285714285714e-05,
|
| 23080 |
+
"loss": 0.4727,
|
| 23081 |
+
"step": 3838
|
| 23082 |
+
},
|
| 23083 |
+
{
|
| 23084 |
+
"epoch": 4.4,
|
| 23085 |
+
"learning_rate": 6.202380952380953e-05,
|
| 23086 |
+
"loss": 0.4918,
|
| 23087 |
+
"step": 3839
|
| 23088 |
+
},
|
| 23089 |
+
{
|
| 23090 |
+
"epoch": 4.4,
|
| 23091 |
+
"learning_rate": 6.19047619047619e-05,
|
| 23092 |
+
"loss": 0.4962,
|
| 23093 |
+
"step": 3840
|
| 23094 |
+
},
|
| 23095 |
+
{
|
| 23096 |
+
"epoch": 4.4,
|
| 23097 |
+
"learning_rate": 6.17857142857143e-05,
|
| 23098 |
+
"loss": 0.5163,
|
| 23099 |
+
"step": 3841
|
| 23100 |
+
},
|
| 23101 |
+
{
|
| 23102 |
+
"epoch": 4.4,
|
| 23103 |
+
"learning_rate": 6.166666666666667e-05,
|
| 23104 |
+
"loss": 0.4912,
|
| 23105 |
+
"step": 3842
|
| 23106 |
+
},
|
| 23107 |
+
{
|
| 23108 |
+
"epoch": 4.4,
|
| 23109 |
+
"learning_rate": 6.154761904761906e-05,
|
| 23110 |
+
"loss": 0.5001,
|
| 23111 |
+
"step": 3843
|
| 23112 |
+
},
|
| 23113 |
+
{
|
| 23114 |
+
"epoch": 4.4,
|
| 23115 |
+
"learning_rate": 6.142857142857143e-05,
|
| 23116 |
+
"loss": 0.5095,
|
| 23117 |
+
"step": 3844
|
| 23118 |
+
},
|
| 23119 |
+
{
|
| 23120 |
+
"epoch": 4.41,
|
| 23121 |
+
"learning_rate": 6.130952380952381e-05,
|
| 23122 |
+
"loss": 0.5131,
|
| 23123 |
+
"step": 3845
|
| 23124 |
+
},
|
| 23125 |
+
{
|
| 23126 |
+
"epoch": 4.41,
|
| 23127 |
+
"learning_rate": 6.11904761904762e-05,
|
| 23128 |
+
"loss": 0.4981,
|
| 23129 |
+
"step": 3846
|
| 23130 |
+
},
|
| 23131 |
+
{
|
| 23132 |
+
"epoch": 4.41,
|
| 23133 |
+
"learning_rate": 6.107142857142857e-05,
|
| 23134 |
+
"loss": 0.4512,
|
| 23135 |
+
"step": 3847
|
| 23136 |
+
},
|
| 23137 |
+
{
|
| 23138 |
+
"epoch": 4.41,
|
| 23139 |
+
"learning_rate": 6.0952380952380964e-05,
|
| 23140 |
+
"loss": 0.5116,
|
| 23141 |
+
"step": 3848
|
| 23142 |
+
},
|
| 23143 |
+
{
|
| 23144 |
+
"epoch": 4.41,
|
| 23145 |
+
"learning_rate": 6.083333333333333e-05,
|
| 23146 |
+
"loss": 0.4796,
|
| 23147 |
+
"step": 3849
|
| 23148 |
+
},
|
| 23149 |
+
{
|
| 23150 |
+
"epoch": 4.41,
|
| 23151 |
+
"learning_rate": 6.0714285714285715e-05,
|
| 23152 |
+
"loss": 0.4558,
|
| 23153 |
+
"step": 3850
|
| 23154 |
+
},
|
| 23155 |
+
{
|
| 23156 |
+
"epoch": 4.41,
|
| 23157 |
+
"learning_rate": 6.05952380952381e-05,
|
| 23158 |
+
"loss": 0.4864,
|
| 23159 |
+
"step": 3851
|
| 23160 |
+
},
|
| 23161 |
+
{
|
| 23162 |
+
"epoch": 4.41,
|
| 23163 |
+
"learning_rate": 6.047619047619047e-05,
|
| 23164 |
+
"loss": 0.5157,
|
| 23165 |
+
"step": 3852
|
| 23166 |
+
},
|
| 23167 |
+
{
|
| 23168 |
+
"epoch": 4.42,
|
| 23169 |
+
"learning_rate": 6.0357142857142855e-05,
|
| 23170 |
+
"loss": 0.4847,
|
| 23171 |
+
"step": 3853
|
| 23172 |
+
},
|
| 23173 |
+
{
|
| 23174 |
+
"epoch": 4.42,
|
| 23175 |
+
"learning_rate": 6.023809523809524e-05,
|
| 23176 |
+
"loss": 0.5039,
|
| 23177 |
+
"step": 3854
|
| 23178 |
+
},
|
| 23179 |
+
{
|
| 23180 |
+
"epoch": 4.42,
|
| 23181 |
+
"learning_rate": 6.011904761904762e-05,
|
| 23182 |
+
"loss": 0.4917,
|
| 23183 |
+
"step": 3855
|
| 23184 |
+
},
|
| 23185 |
+
{
|
| 23186 |
+
"epoch": 4.42,
|
| 23187 |
+
"learning_rate": 6e-05,
|
| 23188 |
+
"loss": 0.5199,
|
| 23189 |
+
"step": 3856
|
| 23190 |
+
},
|
| 23191 |
+
{
|
| 23192 |
+
"epoch": 4.42,
|
| 23193 |
+
"learning_rate": 5.9880952380952384e-05,
|
| 23194 |
+
"loss": 0.4998,
|
| 23195 |
+
"step": 3857
|
| 23196 |
+
},
|
| 23197 |
+
{
|
| 23198 |
+
"epoch": 4.42,
|
| 23199 |
+
"learning_rate": 5.9761904761904766e-05,
|
| 23200 |
+
"loss": 0.4889,
|
| 23201 |
+
"step": 3858
|
| 23202 |
+
},
|
| 23203 |
+
{
|
| 23204 |
+
"epoch": 4.42,
|
| 23205 |
+
"learning_rate": 5.964285714285714e-05,
|
| 23206 |
+
"loss": 0.4447,
|
| 23207 |
+
"step": 3859
|
| 23208 |
+
},
|
| 23209 |
+
{
|
| 23210 |
+
"epoch": 4.42,
|
| 23211 |
+
"learning_rate": 5.9523809523809524e-05,
|
| 23212 |
+
"loss": 0.4823,
|
| 23213 |
+
"step": 3860
|
| 23214 |
+
},
|
| 23215 |
+
{
|
| 23216 |
+
"epoch": 4.42,
|
| 23217 |
+
"learning_rate": 5.9404761904761906e-05,
|
| 23218 |
+
"loss": 0.5173,
|
| 23219 |
+
"step": 3861
|
| 23220 |
+
},
|
| 23221 |
+
{
|
| 23222 |
+
"epoch": 4.43,
|
| 23223 |
+
"learning_rate": 5.928571428571429e-05,
|
| 23224 |
+
"loss": 0.4829,
|
| 23225 |
+
"step": 3862
|
| 23226 |
+
},
|
| 23227 |
+
{
|
| 23228 |
+
"epoch": 4.43,
|
| 23229 |
+
"learning_rate": 5.916666666666667e-05,
|
| 23230 |
+
"loss": 0.4828,
|
| 23231 |
+
"step": 3863
|
| 23232 |
+
},
|
| 23233 |
+
{
|
| 23234 |
+
"epoch": 4.43,
|
| 23235 |
+
"learning_rate": 5.904761904761905e-05,
|
| 23236 |
+
"loss": 0.4745,
|
| 23237 |
+
"step": 3864
|
| 23238 |
+
},
|
| 23239 |
+
{
|
| 23240 |
+
"epoch": 4.43,
|
| 23241 |
+
"learning_rate": 5.8928571428571435e-05,
|
| 23242 |
+
"loss": 0.4908,
|
| 23243 |
+
"step": 3865
|
| 23244 |
+
},
|
| 23245 |
+
{
|
| 23246 |
+
"epoch": 4.43,
|
| 23247 |
+
"learning_rate": 5.880952380952382e-05,
|
| 23248 |
+
"loss": 0.4774,
|
| 23249 |
+
"step": 3866
|
| 23250 |
+
},
|
| 23251 |
+
{
|
| 23252 |
+
"epoch": 4.43,
|
| 23253 |
+
"learning_rate": 5.869047619047619e-05,
|
| 23254 |
+
"loss": 0.5145,
|
| 23255 |
+
"step": 3867
|
| 23256 |
+
},
|
| 23257 |
+
{
|
| 23258 |
+
"epoch": 4.43,
|
| 23259 |
+
"learning_rate": 5.8571428571428575e-05,
|
| 23260 |
+
"loss": 0.5319,
|
| 23261 |
+
"step": 3868
|
| 23262 |
+
},
|
| 23263 |
+
{
|
| 23264 |
+
"epoch": 4.43,
|
| 23265 |
+
"learning_rate": 5.845238095238096e-05,
|
| 23266 |
+
"loss": 0.4895,
|
| 23267 |
+
"step": 3869
|
| 23268 |
+
},
|
| 23269 |
+
{
|
| 23270 |
+
"epoch": 4.43,
|
| 23271 |
+
"learning_rate": 5.833333333333334e-05,
|
| 23272 |
+
"loss": 0.4687,
|
| 23273 |
+
"step": 3870
|
| 23274 |
+
},
|
| 23275 |
+
{
|
| 23276 |
+
"epoch": 4.44,
|
| 23277 |
+
"learning_rate": 5.821428571428572e-05,
|
| 23278 |
+
"loss": 0.508,
|
| 23279 |
+
"step": 3871
|
| 23280 |
+
},
|
| 23281 |
+
{
|
| 23282 |
+
"epoch": 4.44,
|
| 23283 |
+
"learning_rate": 5.8095238095238104e-05,
|
| 23284 |
+
"loss": 0.4673,
|
| 23285 |
+
"step": 3872
|
| 23286 |
+
},
|
| 23287 |
+
{
|
| 23288 |
+
"epoch": 4.44,
|
| 23289 |
+
"learning_rate": 5.7976190476190486e-05,
|
| 23290 |
+
"loss": 0.5358,
|
| 23291 |
+
"step": 3873
|
| 23292 |
+
},
|
| 23293 |
+
{
|
| 23294 |
+
"epoch": 4.44,
|
| 23295 |
+
"learning_rate": 5.785714285714287e-05,
|
| 23296 |
+
"loss": 0.4783,
|
| 23297 |
+
"step": 3874
|
| 23298 |
+
},
|
| 23299 |
+
{
|
| 23300 |
+
"epoch": 4.44,
|
| 23301 |
+
"learning_rate": 5.773809523809524e-05,
|
| 23302 |
+
"loss": 0.4975,
|
| 23303 |
+
"step": 3875
|
| 23304 |
+
},
|
| 23305 |
+
{
|
| 23306 |
+
"epoch": 4.44,
|
| 23307 |
+
"learning_rate": 5.761904761904762e-05,
|
| 23308 |
+
"loss": 0.4829,
|
| 23309 |
+
"step": 3876
|
| 23310 |
+
},
|
| 23311 |
+
{
|
| 23312 |
+
"epoch": 4.44,
|
| 23313 |
+
"learning_rate": 5.7499999999999995e-05,
|
| 23314 |
+
"loss": 0.4757,
|
| 23315 |
+
"step": 3877
|
| 23316 |
+
},
|
| 23317 |
+
{
|
| 23318 |
+
"epoch": 4.44,
|
| 23319 |
+
"learning_rate": 5.738095238095238e-05,
|
| 23320 |
+
"loss": 0.5187,
|
| 23321 |
+
"step": 3878
|
| 23322 |
+
},
|
| 23323 |
+
{
|
| 23324 |
+
"epoch": 4.44,
|
| 23325 |
+
"learning_rate": 5.726190476190476e-05,
|
| 23326 |
+
"loss": 0.4838,
|
| 23327 |
+
"step": 3879
|
| 23328 |
+
},
|
| 23329 |
+
{
|
| 23330 |
+
"epoch": 4.45,
|
| 23331 |
+
"learning_rate": 5.714285714285714e-05,
|
| 23332 |
+
"loss": 0.5086,
|
| 23333 |
+
"step": 3880
|
| 23334 |
+
},
|
| 23335 |
+
{
|
| 23336 |
+
"epoch": 4.45,
|
| 23337 |
+
"learning_rate": 5.7023809523809524e-05,
|
| 23338 |
+
"loss": 0.4457,
|
| 23339 |
+
"step": 3881
|
| 23340 |
+
},
|
| 23341 |
+
{
|
| 23342 |
+
"epoch": 4.45,
|
| 23343 |
+
"learning_rate": 5.6904761904761906e-05,
|
| 23344 |
+
"loss": 0.4311,
|
| 23345 |
+
"step": 3882
|
| 23346 |
+
},
|
| 23347 |
+
{
|
| 23348 |
+
"epoch": 4.45,
|
| 23349 |
+
"learning_rate": 5.678571428571429e-05,
|
| 23350 |
+
"loss": 0.513,
|
| 23351 |
+
"step": 3883
|
| 23352 |
+
},
|
| 23353 |
+
{
|
| 23354 |
+
"epoch": 4.45,
|
| 23355 |
+
"learning_rate": 5.666666666666667e-05,
|
| 23356 |
+
"loss": 0.4748,
|
| 23357 |
+
"step": 3884
|
| 23358 |
+
},
|
| 23359 |
+
{
|
| 23360 |
+
"epoch": 4.45,
|
| 23361 |
+
"learning_rate": 5.6547619047619046e-05,
|
| 23362 |
+
"loss": 0.4875,
|
| 23363 |
+
"step": 3885
|
| 23364 |
+
},
|
| 23365 |
+
{
|
| 23366 |
+
"epoch": 4.45,
|
| 23367 |
+
"learning_rate": 5.642857142857143e-05,
|
| 23368 |
+
"loss": 0.4985,
|
| 23369 |
+
"step": 3886
|
| 23370 |
+
},
|
| 23371 |
+
{
|
| 23372 |
+
"epoch": 4.45,
|
| 23373 |
+
"learning_rate": 5.630952380952381e-05,
|
| 23374 |
+
"loss": 0.4911,
|
| 23375 |
+
"step": 3887
|
| 23376 |
+
},
|
| 23377 |
+
{
|
| 23378 |
+
"epoch": 4.46,
|
| 23379 |
+
"learning_rate": 5.619047619047619e-05,
|
| 23380 |
+
"loss": 0.5089,
|
| 23381 |
+
"step": 3888
|
| 23382 |
+
},
|
| 23383 |
+
{
|
| 23384 |
+
"epoch": 4.46,
|
| 23385 |
+
"learning_rate": 5.6071428571428575e-05,
|
| 23386 |
+
"loss": 0.4819,
|
| 23387 |
+
"step": 3889
|
| 23388 |
+
},
|
| 23389 |
+
{
|
| 23390 |
+
"epoch": 4.46,
|
| 23391 |
+
"learning_rate": 5.595238095238096e-05,
|
| 23392 |
+
"loss": 0.4462,
|
| 23393 |
+
"step": 3890
|
| 23394 |
+
},
|
| 23395 |
+
{
|
| 23396 |
+
"epoch": 4.46,
|
| 23397 |
+
"learning_rate": 5.583333333333334e-05,
|
| 23398 |
+
"loss": 0.4827,
|
| 23399 |
+
"step": 3891
|
| 23400 |
+
},
|
| 23401 |
+
{
|
| 23402 |
+
"epoch": 4.46,
|
| 23403 |
+
"learning_rate": 5.571428571428572e-05,
|
| 23404 |
+
"loss": 0.5031,
|
| 23405 |
+
"step": 3892
|
| 23406 |
+
},
|
| 23407 |
+
{
|
| 23408 |
+
"epoch": 4.46,
|
| 23409 |
+
"learning_rate": 5.55952380952381e-05,
|
| 23410 |
+
"loss": 0.477,
|
| 23411 |
+
"step": 3893
|
| 23412 |
+
},
|
| 23413 |
+
{
|
| 23414 |
+
"epoch": 4.46,
|
| 23415 |
+
"learning_rate": 5.547619047619048e-05,
|
| 23416 |
+
"loss": 0.4916,
|
| 23417 |
+
"step": 3894
|
| 23418 |
+
},
|
| 23419 |
+
{
|
| 23420 |
+
"epoch": 4.46,
|
| 23421 |
+
"learning_rate": 5.535714285714286e-05,
|
| 23422 |
+
"loss": 0.4476,
|
| 23423 |
+
"step": 3895
|
| 23424 |
+
},
|
| 23425 |
+
{
|
| 23426 |
+
"epoch": 4.46,
|
| 23427 |
+
"learning_rate": 5.5238095238095244e-05,
|
| 23428 |
+
"loss": 0.4824,
|
| 23429 |
+
"step": 3896
|
| 23430 |
+
},
|
| 23431 |
+
{
|
| 23432 |
+
"epoch": 4.47,
|
| 23433 |
+
"learning_rate": 5.5119047619047626e-05,
|
| 23434 |
+
"loss": 0.498,
|
| 23435 |
+
"step": 3897
|
| 23436 |
+
},
|
| 23437 |
+
{
|
| 23438 |
+
"epoch": 4.47,
|
| 23439 |
+
"learning_rate": 5.500000000000001e-05,
|
| 23440 |
+
"loss": 0.4842,
|
| 23441 |
+
"step": 3898
|
| 23442 |
+
},
|
| 23443 |
+
{
|
| 23444 |
+
"epoch": 4.47,
|
| 23445 |
+
"learning_rate": 5.488095238095239e-05,
|
| 23446 |
+
"loss": 0.4743,
|
| 23447 |
+
"step": 3899
|
| 23448 |
+
},
|
| 23449 |
+
{
|
| 23450 |
+
"epoch": 4.47,
|
| 23451 |
+
"learning_rate": 5.4761904761904766e-05,
|
| 23452 |
+
"loss": 0.5251,
|
| 23453 |
+
"step": 3900
|
| 23454 |
+
},
|
| 23455 |
+
{
|
| 23456 |
+
"epoch": 4.47,
|
| 23457 |
+
"learning_rate": 5.464285714285714e-05,
|
| 23458 |
+
"loss": 0.4494,
|
| 23459 |
+
"step": 3901
|
| 23460 |
+
},
|
| 23461 |
+
{
|
| 23462 |
+
"epoch": 4.47,
|
| 23463 |
+
"learning_rate": 5.4523809523809524e-05,
|
| 23464 |
+
"loss": 0.4495,
|
| 23465 |
+
"step": 3902
|
| 23466 |
+
},
|
| 23467 |
+
{
|
| 23468 |
+
"epoch": 4.47,
|
| 23469 |
+
"learning_rate": 5.44047619047619e-05,
|
| 23470 |
+
"loss": 0.4959,
|
| 23471 |
+
"step": 3903
|
| 23472 |
+
},
|
| 23473 |
+
{
|
| 23474 |
+
"epoch": 4.47,
|
| 23475 |
+
"learning_rate": 5.428571428571428e-05,
|
| 23476 |
+
"loss": 0.5122,
|
| 23477 |
+
"step": 3904
|
| 23478 |
+
},
|
| 23479 |
+
{
|
| 23480 |
+
"epoch": 4.47,
|
| 23481 |
+
"learning_rate": 5.4166666666666664e-05,
|
| 23482 |
+
"loss": 0.5037,
|
| 23483 |
+
"step": 3905
|
| 23484 |
+
},
|
| 23485 |
+
{
|
| 23486 |
+
"epoch": 4.48,
|
| 23487 |
+
"learning_rate": 5.4047619047619046e-05,
|
| 23488 |
+
"loss": 0.495,
|
| 23489 |
+
"step": 3906
|
| 23490 |
+
},
|
| 23491 |
+
{
|
| 23492 |
+
"epoch": 4.48,
|
| 23493 |
+
"learning_rate": 5.392857142857143e-05,
|
| 23494 |
+
"loss": 0.4642,
|
| 23495 |
+
"step": 3907
|
| 23496 |
+
},
|
| 23497 |
+
{
|
| 23498 |
+
"epoch": 4.48,
|
| 23499 |
+
"learning_rate": 5.380952380952381e-05,
|
| 23500 |
+
"loss": 0.5068,
|
| 23501 |
+
"step": 3908
|
| 23502 |
+
},
|
| 23503 |
+
{
|
| 23504 |
+
"epoch": 4.48,
|
| 23505 |
+
"learning_rate": 5.369047619047619e-05,
|
| 23506 |
+
"loss": 0.4434,
|
| 23507 |
+
"step": 3909
|
| 23508 |
+
},
|
| 23509 |
+
{
|
| 23510 |
+
"epoch": 4.48,
|
| 23511 |
+
"learning_rate": 5.3571428571428575e-05,
|
| 23512 |
+
"loss": 0.5078,
|
| 23513 |
+
"step": 3910
|
| 23514 |
+
},
|
| 23515 |
+
{
|
| 23516 |
+
"epoch": 4.48,
|
| 23517 |
+
"learning_rate": 5.345238095238095e-05,
|
| 23518 |
+
"loss": 0.5156,
|
| 23519 |
+
"step": 3911
|
| 23520 |
+
},
|
| 23521 |
+
{
|
| 23522 |
+
"epoch": 4.48,
|
| 23523 |
+
"learning_rate": 5.333333333333333e-05,
|
| 23524 |
+
"loss": 0.5199,
|
| 23525 |
+
"step": 3912
|
| 23526 |
+
},
|
| 23527 |
+
{
|
| 23528 |
+
"epoch": 4.48,
|
| 23529 |
+
"learning_rate": 5.3214285714285715e-05,
|
| 23530 |
+
"loss": 0.4746,
|
| 23531 |
+
"step": 3913
|
| 23532 |
+
},
|
| 23533 |
+
{
|
| 23534 |
+
"epoch": 4.48,
|
| 23535 |
+
"learning_rate": 5.30952380952381e-05,
|
| 23536 |
+
"loss": 0.4824,
|
| 23537 |
+
"step": 3914
|
| 23538 |
+
},
|
| 23539 |
+
{
|
| 23540 |
+
"epoch": 4.49,
|
| 23541 |
+
"learning_rate": 5.297619047619048e-05,
|
| 23542 |
+
"loss": 0.4967,
|
| 23543 |
+
"step": 3915
|
| 23544 |
+
},
|
| 23545 |
+
{
|
| 23546 |
+
"epoch": 4.49,
|
| 23547 |
+
"learning_rate": 5.285714285714286e-05,
|
| 23548 |
+
"loss": 0.4773,
|
| 23549 |
+
"step": 3916
|
| 23550 |
+
},
|
| 23551 |
+
{
|
| 23552 |
+
"epoch": 4.49,
|
| 23553 |
+
"learning_rate": 5.2738095238095244e-05,
|
| 23554 |
+
"loss": 0.4552,
|
| 23555 |
+
"step": 3917
|
| 23556 |
+
},
|
| 23557 |
+
{
|
| 23558 |
+
"epoch": 4.49,
|
| 23559 |
+
"learning_rate": 5.261904761904763e-05,
|
| 23560 |
+
"loss": 0.4484,
|
| 23561 |
+
"step": 3918
|
| 23562 |
+
},
|
| 23563 |
+
{
|
| 23564 |
+
"epoch": 4.49,
|
| 23565 |
+
"learning_rate": 5.25e-05,
|
| 23566 |
+
"loss": 0.481,
|
| 23567 |
+
"step": 3919
|
| 23568 |
+
},
|
| 23569 |
+
{
|
| 23570 |
+
"epoch": 4.49,
|
| 23571 |
+
"learning_rate": 5.2380952380952384e-05,
|
| 23572 |
+
"loss": 0.5655,
|
| 23573 |
+
"step": 3920
|
| 23574 |
+
},
|
| 23575 |
+
{
|
| 23576 |
+
"epoch": 4.49,
|
| 23577 |
+
"learning_rate": 5.226190476190477e-05,
|
| 23578 |
+
"loss": 0.4743,
|
| 23579 |
+
"step": 3921
|
| 23580 |
+
},
|
| 23581 |
+
{
|
| 23582 |
+
"epoch": 4.49,
|
| 23583 |
+
"learning_rate": 5.214285714285715e-05,
|
| 23584 |
+
"loss": 0.527,
|
| 23585 |
+
"step": 3922
|
| 23586 |
+
},
|
| 23587 |
+
{
|
| 23588 |
+
"epoch": 4.5,
|
| 23589 |
+
"learning_rate": 5.202380952380953e-05,
|
| 23590 |
+
"loss": 0.4942,
|
| 23591 |
+
"step": 3923
|
| 23592 |
+
},
|
| 23593 |
+
{
|
| 23594 |
+
"epoch": 4.5,
|
| 23595 |
+
"learning_rate": 5.1904761904761913e-05,
|
| 23596 |
+
"loss": 0.4955,
|
| 23597 |
+
"step": 3924
|
| 23598 |
}
|
| 23599 |
],
|
| 23600 |
"logging_steps": 1,
|
| 23601 |
"max_steps": 4360,
|
| 23602 |
"num_train_epochs": 5,
|
| 23603 |
"save_steps": 218,
|
| 23604 |
+
"total_flos": 7.785318321353864e+19,
|
| 23605 |
"trial_name": null,
|
| 23606 |
"trial_params": null
|
| 23607 |
}
|