Training in progress, step 3706, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 686648325
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2090ce2e860226eaaa187c68bd01a85888beca6256482a926003d18601839509
|
| 3 |
size 686648325
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 343308717
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd628ed3b54fe9c40941ce872df27daaf83fbd9953ad639c9d4e8aa5bb712a5e
|
| 3 |
size 343308717
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14575
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dd247f5d8a4bdaa46e9a22dea4b3ff7e3cc6bbd0eca5a0dcd56fe15ceba641a
|
| 3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c95ecdf6284750a225c636b9aa6b47fef4dd3031c032f43a656e22d84c6e044
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -20968,13 +20968,1332 @@
|
|
| 20968 |
"learning_rate": 0.00010380952380952383,
|
| 20969 |
"loss": 0.557,
|
| 20970 |
"step": 3488
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20971 |
}
|
| 20972 |
],
|
| 20973 |
"logging_steps": 1,
|
| 20974 |
"max_steps": 4360,
|
| 20975 |
"num_train_epochs": 5,
|
| 20976 |
"save_steps": 218,
|
| 20977 |
-
"total_flos":
|
| 20978 |
"trial_name": null,
|
| 20979 |
"trial_params": null
|
| 20980 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.246575832990171,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 3706,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 20968 |
"learning_rate": 0.00010380952380952383,
|
| 20969 |
"loss": 0.557,
|
| 20970 |
"step": 3488
|
| 20971 |
+
},
|
| 20972 |
+
{
|
| 20973 |
+
"epoch": 4.0,
|
| 20974 |
+
"learning_rate": 0.0001036904761904762,
|
| 20975 |
+
"loss": 0.5342,
|
| 20976 |
+
"step": 3489
|
| 20977 |
+
},
|
| 20978 |
+
{
|
| 20979 |
+
"epoch": 4.0,
|
| 20980 |
+
"learning_rate": 0.00010357142857142859,
|
| 20981 |
+
"loss": 0.5588,
|
| 20982 |
+
"step": 3490
|
| 20983 |
+
},
|
| 20984 |
+
{
|
| 20985 |
+
"epoch": 4.0,
|
| 20986 |
+
"eval_acc_mean": 0.4121963679790497,
|
| 20987 |
+
"eval_auc_roc_mean": 0.5698418617248535,
|
| 20988 |
+
"eval_f1_mean": 0.39474475383758545,
|
| 20989 |
+
"eval_loss": 0.5699463486671448,
|
| 20990 |
+
"eval_runtime": 12.0358,
|
| 20991 |
+
"eval_samples_per_second": 19.442,
|
| 20992 |
+
"eval_steps_per_second": 2.493,
|
| 20993 |
+
"step": 3490
|
| 20994 |
+
},
|
| 20995 |
+
{
|
| 20996 |
+
"epoch": 4.0,
|
| 20997 |
+
"learning_rate": 0.00010345238095238097,
|
| 20998 |
+
"loss": 0.5445,
|
| 20999 |
+
"step": 3491
|
| 21000 |
+
},
|
| 21001 |
+
{
|
| 21002 |
+
"epoch": 4.0,
|
| 21003 |
+
"learning_rate": 0.00010333333333333334,
|
| 21004 |
+
"loss": 0.511,
|
| 21005 |
+
"step": 3492
|
| 21006 |
+
},
|
| 21007 |
+
{
|
| 21008 |
+
"epoch": 4.0,
|
| 21009 |
+
"learning_rate": 0.00010321428571428573,
|
| 21010 |
+
"loss": 0.529,
|
| 21011 |
+
"step": 3493
|
| 21012 |
+
},
|
| 21013 |
+
{
|
| 21014 |
+
"epoch": 4.0,
|
| 21015 |
+
"learning_rate": 0.0001030952380952381,
|
| 21016 |
+
"loss": 0.4984,
|
| 21017 |
+
"step": 3494
|
| 21018 |
+
},
|
| 21019 |
+
{
|
| 21020 |
+
"epoch": 4.0,
|
| 21021 |
+
"learning_rate": 0.00010297619047619047,
|
| 21022 |
+
"loss": 0.5091,
|
| 21023 |
+
"step": 3495
|
| 21024 |
+
},
|
| 21025 |
+
{
|
| 21026 |
+
"epoch": 4.01,
|
| 21027 |
+
"learning_rate": 0.00010285714285714286,
|
| 21028 |
+
"loss": 0.4909,
|
| 21029 |
+
"step": 3496
|
| 21030 |
+
},
|
| 21031 |
+
{
|
| 21032 |
+
"epoch": 4.01,
|
| 21033 |
+
"learning_rate": 0.00010273809523809523,
|
| 21034 |
+
"loss": 0.5214,
|
| 21035 |
+
"step": 3497
|
| 21036 |
+
},
|
| 21037 |
+
{
|
| 21038 |
+
"epoch": 4.01,
|
| 21039 |
+
"learning_rate": 0.00010261904761904761,
|
| 21040 |
+
"loss": 0.4987,
|
| 21041 |
+
"step": 3498
|
| 21042 |
+
},
|
| 21043 |
+
{
|
| 21044 |
+
"epoch": 4.01,
|
| 21045 |
+
"learning_rate": 0.0001025,
|
| 21046 |
+
"loss": 0.4957,
|
| 21047 |
+
"step": 3499
|
| 21048 |
+
},
|
| 21049 |
+
{
|
| 21050 |
+
"epoch": 4.01,
|
| 21051 |
+
"learning_rate": 0.00010238095238095237,
|
| 21052 |
+
"loss": 0.5145,
|
| 21053 |
+
"step": 3500
|
| 21054 |
+
},
|
| 21055 |
+
{
|
| 21056 |
+
"epoch": 4.01,
|
| 21057 |
+
"learning_rate": 0.00010226190476190476,
|
| 21058 |
+
"loss": 0.5234,
|
| 21059 |
+
"step": 3501
|
| 21060 |
+
},
|
| 21061 |
+
{
|
| 21062 |
+
"epoch": 4.01,
|
| 21063 |
+
"learning_rate": 0.00010214285714285714,
|
| 21064 |
+
"loss": 0.5162,
|
| 21065 |
+
"step": 3502
|
| 21066 |
+
},
|
| 21067 |
+
{
|
| 21068 |
+
"epoch": 4.01,
|
| 21069 |
+
"learning_rate": 0.00010202380952380953,
|
| 21070 |
+
"loss": 0.535,
|
| 21071 |
+
"step": 3503
|
| 21072 |
+
},
|
| 21073 |
+
{
|
| 21074 |
+
"epoch": 4.02,
|
| 21075 |
+
"learning_rate": 0.0001019047619047619,
|
| 21076 |
+
"loss": 0.5255,
|
| 21077 |
+
"step": 3504
|
| 21078 |
+
},
|
| 21079 |
+
{
|
| 21080 |
+
"epoch": 4.02,
|
| 21081 |
+
"learning_rate": 0.00010178571428571428,
|
| 21082 |
+
"loss": 0.496,
|
| 21083 |
+
"step": 3505
|
| 21084 |
+
},
|
| 21085 |
+
{
|
| 21086 |
+
"epoch": 4.02,
|
| 21087 |
+
"learning_rate": 0.00010166666666666667,
|
| 21088 |
+
"loss": 0.5316,
|
| 21089 |
+
"step": 3506
|
| 21090 |
+
},
|
| 21091 |
+
{
|
| 21092 |
+
"epoch": 4.02,
|
| 21093 |
+
"learning_rate": 0.00010154761904761904,
|
| 21094 |
+
"loss": 0.4778,
|
| 21095 |
+
"step": 3507
|
| 21096 |
+
},
|
| 21097 |
+
{
|
| 21098 |
+
"epoch": 4.02,
|
| 21099 |
+
"learning_rate": 0.00010142857142857143,
|
| 21100 |
+
"loss": 0.5046,
|
| 21101 |
+
"step": 3508
|
| 21102 |
+
},
|
| 21103 |
+
{
|
| 21104 |
+
"epoch": 4.02,
|
| 21105 |
+
"learning_rate": 0.00010130952380952381,
|
| 21106 |
+
"loss": 0.5147,
|
| 21107 |
+
"step": 3509
|
| 21108 |
+
},
|
| 21109 |
+
{
|
| 21110 |
+
"epoch": 4.02,
|
| 21111 |
+
"learning_rate": 0.0001011904761904762,
|
| 21112 |
+
"loss": 0.4914,
|
| 21113 |
+
"step": 3510
|
| 21114 |
+
},
|
| 21115 |
+
{
|
| 21116 |
+
"epoch": 4.02,
|
| 21117 |
+
"learning_rate": 0.00010107142857142857,
|
| 21118 |
+
"loss": 0.4769,
|
| 21119 |
+
"step": 3511
|
| 21120 |
+
},
|
| 21121 |
+
{
|
| 21122 |
+
"epoch": 4.02,
|
| 21123 |
+
"learning_rate": 0.00010095238095238096,
|
| 21124 |
+
"loss": 0.5151,
|
| 21125 |
+
"step": 3512
|
| 21126 |
+
},
|
| 21127 |
+
{
|
| 21128 |
+
"epoch": 4.03,
|
| 21129 |
+
"learning_rate": 0.00010083333333333334,
|
| 21130 |
+
"loss": 0.5079,
|
| 21131 |
+
"step": 3513
|
| 21132 |
+
},
|
| 21133 |
+
{
|
| 21134 |
+
"epoch": 4.03,
|
| 21135 |
+
"learning_rate": 0.00010071428571428571,
|
| 21136 |
+
"loss": 0.4521,
|
| 21137 |
+
"step": 3514
|
| 21138 |
+
},
|
| 21139 |
+
{
|
| 21140 |
+
"epoch": 4.03,
|
| 21141 |
+
"learning_rate": 0.0001005952380952381,
|
| 21142 |
+
"loss": 0.5031,
|
| 21143 |
+
"step": 3515
|
| 21144 |
+
},
|
| 21145 |
+
{
|
| 21146 |
+
"epoch": 4.03,
|
| 21147 |
+
"learning_rate": 0.00010047619047619048,
|
| 21148 |
+
"loss": 0.497,
|
| 21149 |
+
"step": 3516
|
| 21150 |
+
},
|
| 21151 |
+
{
|
| 21152 |
+
"epoch": 4.03,
|
| 21153 |
+
"learning_rate": 0.00010035714285714286,
|
| 21154 |
+
"loss": 0.4858,
|
| 21155 |
+
"step": 3517
|
| 21156 |
+
},
|
| 21157 |
+
{
|
| 21158 |
+
"epoch": 4.03,
|
| 21159 |
+
"learning_rate": 0.00010023809523809524,
|
| 21160 |
+
"loss": 0.5095,
|
| 21161 |
+
"step": 3518
|
| 21162 |
+
},
|
| 21163 |
+
{
|
| 21164 |
+
"epoch": 4.03,
|
| 21165 |
+
"learning_rate": 0.00010011904761904763,
|
| 21166 |
+
"loss": 0.5003,
|
| 21167 |
+
"step": 3519
|
| 21168 |
+
},
|
| 21169 |
+
{
|
| 21170 |
+
"epoch": 4.03,
|
| 21171 |
+
"learning_rate": 0.0001,
|
| 21172 |
+
"loss": 0.4674,
|
| 21173 |
+
"step": 3520
|
| 21174 |
+
},
|
| 21175 |
+
{
|
| 21176 |
+
"epoch": 4.03,
|
| 21177 |
+
"learning_rate": 9.988095238095238e-05,
|
| 21178 |
+
"loss": 0.4816,
|
| 21179 |
+
"step": 3521
|
| 21180 |
+
},
|
| 21181 |
+
{
|
| 21182 |
+
"epoch": 4.04,
|
| 21183 |
+
"learning_rate": 9.976190476190477e-05,
|
| 21184 |
+
"loss": 0.521,
|
| 21185 |
+
"step": 3522
|
| 21186 |
+
},
|
| 21187 |
+
{
|
| 21188 |
+
"epoch": 4.04,
|
| 21189 |
+
"learning_rate": 9.964285714285714e-05,
|
| 21190 |
+
"loss": 0.4867,
|
| 21191 |
+
"step": 3523
|
| 21192 |
+
},
|
| 21193 |
+
{
|
| 21194 |
+
"epoch": 4.04,
|
| 21195 |
+
"learning_rate": 9.952380952380953e-05,
|
| 21196 |
+
"loss": 0.5163,
|
| 21197 |
+
"step": 3524
|
| 21198 |
+
},
|
| 21199 |
+
{
|
| 21200 |
+
"epoch": 4.04,
|
| 21201 |
+
"learning_rate": 9.940476190476191e-05,
|
| 21202 |
+
"loss": 0.5152,
|
| 21203 |
+
"step": 3525
|
| 21204 |
+
},
|
| 21205 |
+
{
|
| 21206 |
+
"epoch": 4.04,
|
| 21207 |
+
"learning_rate": 9.92857142857143e-05,
|
| 21208 |
+
"loss": 0.4835,
|
| 21209 |
+
"step": 3526
|
| 21210 |
+
},
|
| 21211 |
+
{
|
| 21212 |
+
"epoch": 4.04,
|
| 21213 |
+
"learning_rate": 9.916666666666667e-05,
|
| 21214 |
+
"loss": 0.5044,
|
| 21215 |
+
"step": 3527
|
| 21216 |
+
},
|
| 21217 |
+
{
|
| 21218 |
+
"epoch": 4.04,
|
| 21219 |
+
"learning_rate": 9.904761904761905e-05,
|
| 21220 |
+
"loss": 0.4332,
|
| 21221 |
+
"step": 3528
|
| 21222 |
+
},
|
| 21223 |
+
{
|
| 21224 |
+
"epoch": 4.04,
|
| 21225 |
+
"learning_rate": 9.892857142857144e-05,
|
| 21226 |
+
"loss": 0.5206,
|
| 21227 |
+
"step": 3529
|
| 21228 |
+
},
|
| 21229 |
+
{
|
| 21230 |
+
"epoch": 4.04,
|
| 21231 |
+
"learning_rate": 9.880952380952381e-05,
|
| 21232 |
+
"loss": 0.5136,
|
| 21233 |
+
"step": 3530
|
| 21234 |
+
},
|
| 21235 |
+
{
|
| 21236 |
+
"epoch": 4.05,
|
| 21237 |
+
"learning_rate": 9.86904761904762e-05,
|
| 21238 |
+
"loss": 0.4979,
|
| 21239 |
+
"step": 3531
|
| 21240 |
+
},
|
| 21241 |
+
{
|
| 21242 |
+
"epoch": 4.05,
|
| 21243 |
+
"learning_rate": 9.857142857142858e-05,
|
| 21244 |
+
"loss": 0.5041,
|
| 21245 |
+
"step": 3532
|
| 21246 |
+
},
|
| 21247 |
+
{
|
| 21248 |
+
"epoch": 4.05,
|
| 21249 |
+
"learning_rate": 9.845238095238097e-05,
|
| 21250 |
+
"loss": 0.498,
|
| 21251 |
+
"step": 3533
|
| 21252 |
+
},
|
| 21253 |
+
{
|
| 21254 |
+
"epoch": 4.05,
|
| 21255 |
+
"learning_rate": 9.833333333333333e-05,
|
| 21256 |
+
"loss": 0.504,
|
| 21257 |
+
"step": 3534
|
| 21258 |
+
},
|
| 21259 |
+
{
|
| 21260 |
+
"epoch": 4.05,
|
| 21261 |
+
"learning_rate": 9.821428571428572e-05,
|
| 21262 |
+
"loss": 0.5087,
|
| 21263 |
+
"step": 3535
|
| 21264 |
+
},
|
| 21265 |
+
{
|
| 21266 |
+
"epoch": 4.05,
|
| 21267 |
+
"learning_rate": 9.80952380952381e-05,
|
| 21268 |
+
"loss": 0.5319,
|
| 21269 |
+
"step": 3536
|
| 21270 |
+
},
|
| 21271 |
+
{
|
| 21272 |
+
"epoch": 4.05,
|
| 21273 |
+
"learning_rate": 9.797619047619048e-05,
|
| 21274 |
+
"loss": 0.521,
|
| 21275 |
+
"step": 3537
|
| 21276 |
+
},
|
| 21277 |
+
{
|
| 21278 |
+
"epoch": 4.05,
|
| 21279 |
+
"learning_rate": 9.785714285714286e-05,
|
| 21280 |
+
"loss": 0.5053,
|
| 21281 |
+
"step": 3538
|
| 21282 |
+
},
|
| 21283 |
+
{
|
| 21284 |
+
"epoch": 4.06,
|
| 21285 |
+
"learning_rate": 9.773809523809523e-05,
|
| 21286 |
+
"loss": 0.5204,
|
| 21287 |
+
"step": 3539
|
| 21288 |
+
},
|
| 21289 |
+
{
|
| 21290 |
+
"epoch": 4.06,
|
| 21291 |
+
"learning_rate": 9.761904761904762e-05,
|
| 21292 |
+
"loss": 0.4697,
|
| 21293 |
+
"step": 3540
|
| 21294 |
+
},
|
| 21295 |
+
{
|
| 21296 |
+
"epoch": 4.06,
|
| 21297 |
+
"learning_rate": 9.75e-05,
|
| 21298 |
+
"loss": 0.5153,
|
| 21299 |
+
"step": 3541
|
| 21300 |
+
},
|
| 21301 |
+
{
|
| 21302 |
+
"epoch": 4.06,
|
| 21303 |
+
"learning_rate": 9.738095238095239e-05,
|
| 21304 |
+
"loss": 0.4875,
|
| 21305 |
+
"step": 3542
|
| 21306 |
+
},
|
| 21307 |
+
{
|
| 21308 |
+
"epoch": 4.06,
|
| 21309 |
+
"learning_rate": 9.726190476190476e-05,
|
| 21310 |
+
"loss": 0.4891,
|
| 21311 |
+
"step": 3543
|
| 21312 |
+
},
|
| 21313 |
+
{
|
| 21314 |
+
"epoch": 4.06,
|
| 21315 |
+
"learning_rate": 9.714285714285715e-05,
|
| 21316 |
+
"loss": 0.506,
|
| 21317 |
+
"step": 3544
|
| 21318 |
+
},
|
| 21319 |
+
{
|
| 21320 |
+
"epoch": 4.06,
|
| 21321 |
+
"learning_rate": 9.702380952380953e-05,
|
| 21322 |
+
"loss": 0.4904,
|
| 21323 |
+
"step": 3545
|
| 21324 |
+
},
|
| 21325 |
+
{
|
| 21326 |
+
"epoch": 4.06,
|
| 21327 |
+
"learning_rate": 9.69047619047619e-05,
|
| 21328 |
+
"loss": 0.5128,
|
| 21329 |
+
"step": 3546
|
| 21330 |
+
},
|
| 21331 |
+
{
|
| 21332 |
+
"epoch": 4.06,
|
| 21333 |
+
"learning_rate": 9.678571428571429e-05,
|
| 21334 |
+
"loss": 0.5315,
|
| 21335 |
+
"step": 3547
|
| 21336 |
+
},
|
| 21337 |
+
{
|
| 21338 |
+
"epoch": 4.07,
|
| 21339 |
+
"learning_rate": 9.666666666666667e-05,
|
| 21340 |
+
"loss": 0.5083,
|
| 21341 |
+
"step": 3548
|
| 21342 |
+
},
|
| 21343 |
+
{
|
| 21344 |
+
"epoch": 4.07,
|
| 21345 |
+
"learning_rate": 9.654761904761906e-05,
|
| 21346 |
+
"loss": 0.4887,
|
| 21347 |
+
"step": 3549
|
| 21348 |
+
},
|
| 21349 |
+
{
|
| 21350 |
+
"epoch": 4.07,
|
| 21351 |
+
"learning_rate": 9.642857142857143e-05,
|
| 21352 |
+
"loss": 0.4595,
|
| 21353 |
+
"step": 3550
|
| 21354 |
+
},
|
| 21355 |
+
{
|
| 21356 |
+
"epoch": 4.07,
|
| 21357 |
+
"learning_rate": 9.630952380952382e-05,
|
| 21358 |
+
"loss": 0.4934,
|
| 21359 |
+
"step": 3551
|
| 21360 |
+
},
|
| 21361 |
+
{
|
| 21362 |
+
"epoch": 4.07,
|
| 21363 |
+
"learning_rate": 9.61904761904762e-05,
|
| 21364 |
+
"loss": 0.5037,
|
| 21365 |
+
"step": 3552
|
| 21366 |
+
},
|
| 21367 |
+
{
|
| 21368 |
+
"epoch": 4.07,
|
| 21369 |
+
"learning_rate": 9.607142857142859e-05,
|
| 21370 |
+
"loss": 0.5226,
|
| 21371 |
+
"step": 3553
|
| 21372 |
+
},
|
| 21373 |
+
{
|
| 21374 |
+
"epoch": 4.07,
|
| 21375 |
+
"learning_rate": 9.595238095238096e-05,
|
| 21376 |
+
"loss": 0.5311,
|
| 21377 |
+
"step": 3554
|
| 21378 |
+
},
|
| 21379 |
+
{
|
| 21380 |
+
"epoch": 4.07,
|
| 21381 |
+
"learning_rate": 9.583333333333334e-05,
|
| 21382 |
+
"loss": 0.4992,
|
| 21383 |
+
"step": 3555
|
| 21384 |
+
},
|
| 21385 |
+
{
|
| 21386 |
+
"epoch": 4.07,
|
| 21387 |
+
"learning_rate": 9.571428571428573e-05,
|
| 21388 |
+
"loss": 0.4919,
|
| 21389 |
+
"step": 3556
|
| 21390 |
+
},
|
| 21391 |
+
{
|
| 21392 |
+
"epoch": 4.08,
|
| 21393 |
+
"learning_rate": 9.55952380952381e-05,
|
| 21394 |
+
"loss": 0.4886,
|
| 21395 |
+
"step": 3557
|
| 21396 |
+
},
|
| 21397 |
+
{
|
| 21398 |
+
"epoch": 4.08,
|
| 21399 |
+
"learning_rate": 9.547619047619049e-05,
|
| 21400 |
+
"loss": 0.4577,
|
| 21401 |
+
"step": 3558
|
| 21402 |
+
},
|
| 21403 |
+
{
|
| 21404 |
+
"epoch": 4.08,
|
| 21405 |
+
"learning_rate": 9.535714285714287e-05,
|
| 21406 |
+
"loss": 0.4612,
|
| 21407 |
+
"step": 3559
|
| 21408 |
+
},
|
| 21409 |
+
{
|
| 21410 |
+
"epoch": 4.08,
|
| 21411 |
+
"learning_rate": 9.523809523809524e-05,
|
| 21412 |
+
"loss": 0.5224,
|
| 21413 |
+
"step": 3560
|
| 21414 |
+
},
|
| 21415 |
+
{
|
| 21416 |
+
"epoch": 4.08,
|
| 21417 |
+
"learning_rate": 9.511904761904762e-05,
|
| 21418 |
+
"loss": 0.5176,
|
| 21419 |
+
"step": 3561
|
| 21420 |
+
},
|
| 21421 |
+
{
|
| 21422 |
+
"epoch": 4.08,
|
| 21423 |
+
"learning_rate": 9.5e-05,
|
| 21424 |
+
"loss": 0.4691,
|
| 21425 |
+
"step": 3562
|
| 21426 |
+
},
|
| 21427 |
+
{
|
| 21428 |
+
"epoch": 4.08,
|
| 21429 |
+
"learning_rate": 9.488095238095238e-05,
|
| 21430 |
+
"loss": 0.4984,
|
| 21431 |
+
"step": 3563
|
| 21432 |
+
},
|
| 21433 |
+
{
|
| 21434 |
+
"epoch": 4.08,
|
| 21435 |
+
"learning_rate": 9.476190476190476e-05,
|
| 21436 |
+
"loss": 0.498,
|
| 21437 |
+
"step": 3564
|
| 21438 |
+
},
|
| 21439 |
+
{
|
| 21440 |
+
"epoch": 4.09,
|
| 21441 |
+
"learning_rate": 9.464285714285715e-05,
|
| 21442 |
+
"loss": 0.5303,
|
| 21443 |
+
"step": 3565
|
| 21444 |
+
},
|
| 21445 |
+
{
|
| 21446 |
+
"epoch": 4.09,
|
| 21447 |
+
"learning_rate": 9.452380952380952e-05,
|
| 21448 |
+
"loss": 0.4714,
|
| 21449 |
+
"step": 3566
|
| 21450 |
+
},
|
| 21451 |
+
{
|
| 21452 |
+
"epoch": 4.09,
|
| 21453 |
+
"learning_rate": 9.440476190476191e-05,
|
| 21454 |
+
"loss": 0.5061,
|
| 21455 |
+
"step": 3567
|
| 21456 |
+
},
|
| 21457 |
+
{
|
| 21458 |
+
"epoch": 4.09,
|
| 21459 |
+
"learning_rate": 9.428571428571429e-05,
|
| 21460 |
+
"loss": 0.4912,
|
| 21461 |
+
"step": 3568
|
| 21462 |
+
},
|
| 21463 |
+
{
|
| 21464 |
+
"epoch": 4.09,
|
| 21465 |
+
"learning_rate": 9.416666666666667e-05,
|
| 21466 |
+
"loss": 0.5054,
|
| 21467 |
+
"step": 3569
|
| 21468 |
+
},
|
| 21469 |
+
{
|
| 21470 |
+
"epoch": 4.09,
|
| 21471 |
+
"learning_rate": 9.404761904761905e-05,
|
| 21472 |
+
"loss": 0.5089,
|
| 21473 |
+
"step": 3570
|
| 21474 |
+
},
|
| 21475 |
+
{
|
| 21476 |
+
"epoch": 4.09,
|
| 21477 |
+
"learning_rate": 9.392857142857144e-05,
|
| 21478 |
+
"loss": 0.4452,
|
| 21479 |
+
"step": 3571
|
| 21480 |
+
},
|
| 21481 |
+
{
|
| 21482 |
+
"epoch": 4.09,
|
| 21483 |
+
"learning_rate": 9.380952380952381e-05,
|
| 21484 |
+
"loss": 0.5264,
|
| 21485 |
+
"step": 3572
|
| 21486 |
+
},
|
| 21487 |
+
{
|
| 21488 |
+
"epoch": 4.09,
|
| 21489 |
+
"learning_rate": 9.369047619047619e-05,
|
| 21490 |
+
"loss": 0.5329,
|
| 21491 |
+
"step": 3573
|
| 21492 |
+
},
|
| 21493 |
+
{
|
| 21494 |
+
"epoch": 4.1,
|
| 21495 |
+
"learning_rate": 9.357142857142858e-05,
|
| 21496 |
+
"loss": 0.5371,
|
| 21497 |
+
"step": 3574
|
| 21498 |
+
},
|
| 21499 |
+
{
|
| 21500 |
+
"epoch": 4.1,
|
| 21501 |
+
"learning_rate": 9.345238095238095e-05,
|
| 21502 |
+
"loss": 0.4759,
|
| 21503 |
+
"step": 3575
|
| 21504 |
+
},
|
| 21505 |
+
{
|
| 21506 |
+
"epoch": 4.1,
|
| 21507 |
+
"learning_rate": 9.333333333333334e-05,
|
| 21508 |
+
"loss": 0.4954,
|
| 21509 |
+
"step": 3576
|
| 21510 |
+
},
|
| 21511 |
+
{
|
| 21512 |
+
"epoch": 4.1,
|
| 21513 |
+
"learning_rate": 9.321428571428572e-05,
|
| 21514 |
+
"loss": 0.4905,
|
| 21515 |
+
"step": 3577
|
| 21516 |
+
},
|
| 21517 |
+
{
|
| 21518 |
+
"epoch": 4.1,
|
| 21519 |
+
"learning_rate": 9.309523809523811e-05,
|
| 21520 |
+
"loss": 0.4869,
|
| 21521 |
+
"step": 3578
|
| 21522 |
+
},
|
| 21523 |
+
{
|
| 21524 |
+
"epoch": 4.1,
|
| 21525 |
+
"learning_rate": 9.297619047619048e-05,
|
| 21526 |
+
"loss": 0.5463,
|
| 21527 |
+
"step": 3579
|
| 21528 |
+
},
|
| 21529 |
+
{
|
| 21530 |
+
"epoch": 4.1,
|
| 21531 |
+
"learning_rate": 9.285714285714286e-05,
|
| 21532 |
+
"loss": 0.5014,
|
| 21533 |
+
"step": 3580
|
| 21534 |
+
},
|
| 21535 |
+
{
|
| 21536 |
+
"epoch": 4.1,
|
| 21537 |
+
"learning_rate": 9.273809523809525e-05,
|
| 21538 |
+
"loss": 0.4822,
|
| 21539 |
+
"step": 3581
|
| 21540 |
+
},
|
| 21541 |
+
{
|
| 21542 |
+
"epoch": 4.1,
|
| 21543 |
+
"learning_rate": 9.261904761904762e-05,
|
| 21544 |
+
"loss": 0.5571,
|
| 21545 |
+
"step": 3582
|
| 21546 |
+
},
|
| 21547 |
+
{
|
| 21548 |
+
"epoch": 4.11,
|
| 21549 |
+
"learning_rate": 9.250000000000001e-05,
|
| 21550 |
+
"loss": 0.5228,
|
| 21551 |
+
"step": 3583
|
| 21552 |
+
},
|
| 21553 |
+
{
|
| 21554 |
+
"epoch": 4.11,
|
| 21555 |
+
"learning_rate": 9.238095238095239e-05,
|
| 21556 |
+
"loss": 0.5198,
|
| 21557 |
+
"step": 3584
|
| 21558 |
+
},
|
| 21559 |
+
{
|
| 21560 |
+
"epoch": 4.11,
|
| 21561 |
+
"learning_rate": 9.226190476190478e-05,
|
| 21562 |
+
"loss": 0.4839,
|
| 21563 |
+
"step": 3585
|
| 21564 |
+
},
|
| 21565 |
+
{
|
| 21566 |
+
"epoch": 4.11,
|
| 21567 |
+
"learning_rate": 9.214285714285714e-05,
|
| 21568 |
+
"loss": 0.5157,
|
| 21569 |
+
"step": 3586
|
| 21570 |
+
},
|
| 21571 |
+
{
|
| 21572 |
+
"epoch": 4.11,
|
| 21573 |
+
"learning_rate": 9.202380952380953e-05,
|
| 21574 |
+
"loss": 0.5031,
|
| 21575 |
+
"step": 3587
|
| 21576 |
+
},
|
| 21577 |
+
{
|
| 21578 |
+
"epoch": 4.11,
|
| 21579 |
+
"learning_rate": 9.19047619047619e-05,
|
| 21580 |
+
"loss": 0.4746,
|
| 21581 |
+
"step": 3588
|
| 21582 |
+
},
|
| 21583 |
+
{
|
| 21584 |
+
"epoch": 4.11,
|
| 21585 |
+
"learning_rate": 9.178571428571429e-05,
|
| 21586 |
+
"loss": 0.4965,
|
| 21587 |
+
"step": 3589
|
| 21588 |
+
},
|
| 21589 |
+
{
|
| 21590 |
+
"epoch": 4.11,
|
| 21591 |
+
"learning_rate": 9.166666666666667e-05,
|
| 21592 |
+
"loss": 0.479,
|
| 21593 |
+
"step": 3590
|
| 21594 |
+
},
|
| 21595 |
+
{
|
| 21596 |
+
"epoch": 4.11,
|
| 21597 |
+
"learning_rate": 9.154761904761904e-05,
|
| 21598 |
+
"loss": 0.4987,
|
| 21599 |
+
"step": 3591
|
| 21600 |
+
},
|
| 21601 |
+
{
|
| 21602 |
+
"epoch": 4.12,
|
| 21603 |
+
"learning_rate": 9.142857142857143e-05,
|
| 21604 |
+
"loss": 0.5126,
|
| 21605 |
+
"step": 3592
|
| 21606 |
+
},
|
| 21607 |
+
{
|
| 21608 |
+
"epoch": 4.12,
|
| 21609 |
+
"learning_rate": 9.130952380952381e-05,
|
| 21610 |
+
"loss": 0.4937,
|
| 21611 |
+
"step": 3593
|
| 21612 |
+
},
|
| 21613 |
+
{
|
| 21614 |
+
"epoch": 4.12,
|
| 21615 |
+
"learning_rate": 9.11904761904762e-05,
|
| 21616 |
+
"loss": 0.4646,
|
| 21617 |
+
"step": 3594
|
| 21618 |
+
},
|
| 21619 |
+
{
|
| 21620 |
+
"epoch": 4.12,
|
| 21621 |
+
"learning_rate": 9.107142857142857e-05,
|
| 21622 |
+
"loss": 0.4674,
|
| 21623 |
+
"step": 3595
|
| 21624 |
+
},
|
| 21625 |
+
{
|
| 21626 |
+
"epoch": 4.12,
|
| 21627 |
+
"learning_rate": 9.095238095238096e-05,
|
| 21628 |
+
"loss": 0.4817,
|
| 21629 |
+
"step": 3596
|
| 21630 |
+
},
|
| 21631 |
+
{
|
| 21632 |
+
"epoch": 4.12,
|
| 21633 |
+
"learning_rate": 9.083333333333334e-05,
|
| 21634 |
+
"loss": 0.4938,
|
| 21635 |
+
"step": 3597
|
| 21636 |
+
},
|
| 21637 |
+
{
|
| 21638 |
+
"epoch": 4.12,
|
| 21639 |
+
"learning_rate": 9.071428571428571e-05,
|
| 21640 |
+
"loss": 0.5279,
|
| 21641 |
+
"step": 3598
|
| 21642 |
+
},
|
| 21643 |
+
{
|
| 21644 |
+
"epoch": 4.12,
|
| 21645 |
+
"learning_rate": 9.05952380952381e-05,
|
| 21646 |
+
"loss": 0.492,
|
| 21647 |
+
"step": 3599
|
| 21648 |
+
},
|
| 21649 |
+
{
|
| 21650 |
+
"epoch": 4.13,
|
| 21651 |
+
"learning_rate": 9.047619047619048e-05,
|
| 21652 |
+
"loss": 0.4842,
|
| 21653 |
+
"step": 3600
|
| 21654 |
+
},
|
| 21655 |
+
{
|
| 21656 |
+
"epoch": 4.13,
|
| 21657 |
+
"learning_rate": 9.035714285714287e-05,
|
| 21658 |
+
"loss": 0.4823,
|
| 21659 |
+
"step": 3601
|
| 21660 |
+
},
|
| 21661 |
+
{
|
| 21662 |
+
"epoch": 4.13,
|
| 21663 |
+
"learning_rate": 9.023809523809524e-05,
|
| 21664 |
+
"loss": 0.5287,
|
| 21665 |
+
"step": 3602
|
| 21666 |
+
},
|
| 21667 |
+
{
|
| 21668 |
+
"epoch": 4.13,
|
| 21669 |
+
"learning_rate": 9.011904761904763e-05,
|
| 21670 |
+
"loss": 0.4568,
|
| 21671 |
+
"step": 3603
|
| 21672 |
+
},
|
| 21673 |
+
{
|
| 21674 |
+
"epoch": 4.13,
|
| 21675 |
+
"learning_rate": 9e-05,
|
| 21676 |
+
"loss": 0.5084,
|
| 21677 |
+
"step": 3604
|
| 21678 |
+
},
|
| 21679 |
+
{
|
| 21680 |
+
"epoch": 4.13,
|
| 21681 |
+
"learning_rate": 8.988095238095238e-05,
|
| 21682 |
+
"loss": 0.5163,
|
| 21683 |
+
"step": 3605
|
| 21684 |
+
},
|
| 21685 |
+
{
|
| 21686 |
+
"epoch": 4.13,
|
| 21687 |
+
"learning_rate": 8.976190476190477e-05,
|
| 21688 |
+
"loss": 0.5282,
|
| 21689 |
+
"step": 3606
|
| 21690 |
+
},
|
| 21691 |
+
{
|
| 21692 |
+
"epoch": 4.13,
|
| 21693 |
+
"learning_rate": 8.964285714285715e-05,
|
| 21694 |
+
"loss": 0.5009,
|
| 21695 |
+
"step": 3607
|
| 21696 |
+
},
|
| 21697 |
+
{
|
| 21698 |
+
"epoch": 4.13,
|
| 21699 |
+
"learning_rate": 8.952380952380953e-05,
|
| 21700 |
+
"loss": 0.4814,
|
| 21701 |
+
"step": 3608
|
| 21702 |
+
},
|
| 21703 |
+
{
|
| 21704 |
+
"epoch": 4.14,
|
| 21705 |
+
"learning_rate": 8.940476190476191e-05,
|
| 21706 |
+
"loss": 0.4787,
|
| 21707 |
+
"step": 3609
|
| 21708 |
+
},
|
| 21709 |
+
{
|
| 21710 |
+
"epoch": 4.14,
|
| 21711 |
+
"learning_rate": 8.92857142857143e-05,
|
| 21712 |
+
"loss": 0.4657,
|
| 21713 |
+
"step": 3610
|
| 21714 |
+
},
|
| 21715 |
+
{
|
| 21716 |
+
"epoch": 4.14,
|
| 21717 |
+
"learning_rate": 8.916666666666667e-05,
|
| 21718 |
+
"loss": 0.5148,
|
| 21719 |
+
"step": 3611
|
| 21720 |
+
},
|
| 21721 |
+
{
|
| 21722 |
+
"epoch": 4.14,
|
| 21723 |
+
"learning_rate": 8.904761904761905e-05,
|
| 21724 |
+
"loss": 0.4891,
|
| 21725 |
+
"step": 3612
|
| 21726 |
+
},
|
| 21727 |
+
{
|
| 21728 |
+
"epoch": 4.14,
|
| 21729 |
+
"learning_rate": 8.892857142857143e-05,
|
| 21730 |
+
"loss": 0.4695,
|
| 21731 |
+
"step": 3613
|
| 21732 |
+
},
|
| 21733 |
+
{
|
| 21734 |
+
"epoch": 4.14,
|
| 21735 |
+
"learning_rate": 8.880952380952381e-05,
|
| 21736 |
+
"loss": 0.4819,
|
| 21737 |
+
"step": 3614
|
| 21738 |
+
},
|
| 21739 |
+
{
|
| 21740 |
+
"epoch": 4.14,
|
| 21741 |
+
"learning_rate": 8.869047619047619e-05,
|
| 21742 |
+
"loss": 0.5085,
|
| 21743 |
+
"step": 3615
|
| 21744 |
+
},
|
| 21745 |
+
{
|
| 21746 |
+
"epoch": 4.14,
|
| 21747 |
+
"learning_rate": 8.857142857142857e-05,
|
| 21748 |
+
"loss": 0.4938,
|
| 21749 |
+
"step": 3616
|
| 21750 |
+
},
|
| 21751 |
+
{
|
| 21752 |
+
"epoch": 4.14,
|
| 21753 |
+
"learning_rate": 8.845238095238095e-05,
|
| 21754 |
+
"loss": 0.5016,
|
| 21755 |
+
"step": 3617
|
| 21756 |
+
},
|
| 21757 |
+
{
|
| 21758 |
+
"epoch": 4.15,
|
| 21759 |
+
"learning_rate": 8.833333333333333e-05,
|
| 21760 |
+
"loss": 0.4972,
|
| 21761 |
+
"step": 3618
|
| 21762 |
+
},
|
| 21763 |
+
{
|
| 21764 |
+
"epoch": 4.15,
|
| 21765 |
+
"learning_rate": 8.821428571428572e-05,
|
| 21766 |
+
"loss": 0.5241,
|
| 21767 |
+
"step": 3619
|
| 21768 |
+
},
|
| 21769 |
+
{
|
| 21770 |
+
"epoch": 4.15,
|
| 21771 |
+
"learning_rate": 8.80952380952381e-05,
|
| 21772 |
+
"loss": 0.4742,
|
| 21773 |
+
"step": 3620
|
| 21774 |
+
},
|
| 21775 |
+
{
|
| 21776 |
+
"epoch": 4.15,
|
| 21777 |
+
"learning_rate": 8.797619047619048e-05,
|
| 21778 |
+
"loss": 0.4778,
|
| 21779 |
+
"step": 3621
|
| 21780 |
+
},
|
| 21781 |
+
{
|
| 21782 |
+
"epoch": 4.15,
|
| 21783 |
+
"learning_rate": 8.785714285714286e-05,
|
| 21784 |
+
"loss": 0.4716,
|
| 21785 |
+
"step": 3622
|
| 21786 |
+
},
|
| 21787 |
+
{
|
| 21788 |
+
"epoch": 4.15,
|
| 21789 |
+
"learning_rate": 8.773809523809525e-05,
|
| 21790 |
+
"loss": 0.4971,
|
| 21791 |
+
"step": 3623
|
| 21792 |
+
},
|
| 21793 |
+
{
|
| 21794 |
+
"epoch": 4.15,
|
| 21795 |
+
"learning_rate": 8.761904761904762e-05,
|
| 21796 |
+
"loss": 0.5073,
|
| 21797 |
+
"step": 3624
|
| 21798 |
+
},
|
| 21799 |
+
{
|
| 21800 |
+
"epoch": 4.15,
|
| 21801 |
+
"learning_rate": 8.75e-05,
|
| 21802 |
+
"loss": 0.5088,
|
| 21803 |
+
"step": 3625
|
| 21804 |
+
},
|
| 21805 |
+
{
|
| 21806 |
+
"epoch": 4.15,
|
| 21807 |
+
"learning_rate": 8.738095238095239e-05,
|
| 21808 |
+
"loss": 0.4696,
|
| 21809 |
+
"step": 3626
|
| 21810 |
+
},
|
| 21811 |
+
{
|
| 21812 |
+
"epoch": 4.16,
|
| 21813 |
+
"learning_rate": 8.726190476190476e-05,
|
| 21814 |
+
"loss": 0.5243,
|
| 21815 |
+
"step": 3627
|
| 21816 |
+
},
|
| 21817 |
+
{
|
| 21818 |
+
"epoch": 4.16,
|
| 21819 |
+
"learning_rate": 8.714285714285715e-05,
|
| 21820 |
+
"loss": 0.5033,
|
| 21821 |
+
"step": 3628
|
| 21822 |
+
},
|
| 21823 |
+
{
|
| 21824 |
+
"epoch": 4.16,
|
| 21825 |
+
"learning_rate": 8.702380952380953e-05,
|
| 21826 |
+
"loss": 0.4826,
|
| 21827 |
+
"step": 3629
|
| 21828 |
+
},
|
| 21829 |
+
{
|
| 21830 |
+
"epoch": 4.16,
|
| 21831 |
+
"learning_rate": 8.690476190476192e-05,
|
| 21832 |
+
"loss": 0.4811,
|
| 21833 |
+
"step": 3630
|
| 21834 |
+
},
|
| 21835 |
+
{
|
| 21836 |
+
"epoch": 4.16,
|
| 21837 |
+
"learning_rate": 8.678571428571429e-05,
|
| 21838 |
+
"loss": 0.4872,
|
| 21839 |
+
"step": 3631
|
| 21840 |
+
},
|
| 21841 |
+
{
|
| 21842 |
+
"epoch": 4.16,
|
| 21843 |
+
"learning_rate": 8.666666666666667e-05,
|
| 21844 |
+
"loss": 0.5267,
|
| 21845 |
+
"step": 3632
|
| 21846 |
+
},
|
| 21847 |
+
{
|
| 21848 |
+
"epoch": 4.16,
|
| 21849 |
+
"learning_rate": 8.654761904761906e-05,
|
| 21850 |
+
"loss": 0.4907,
|
| 21851 |
+
"step": 3633
|
| 21852 |
+
},
|
| 21853 |
+
{
|
| 21854 |
+
"epoch": 4.16,
|
| 21855 |
+
"learning_rate": 8.642857142857143e-05,
|
| 21856 |
+
"loss": 0.5416,
|
| 21857 |
+
"step": 3634
|
| 21858 |
+
},
|
| 21859 |
+
{
|
| 21860 |
+
"epoch": 4.17,
|
| 21861 |
+
"learning_rate": 8.630952380952382e-05,
|
| 21862 |
+
"loss": 0.4687,
|
| 21863 |
+
"step": 3635
|
| 21864 |
+
},
|
| 21865 |
+
{
|
| 21866 |
+
"epoch": 4.17,
|
| 21867 |
+
"learning_rate": 8.61904761904762e-05,
|
| 21868 |
+
"loss": 0.4934,
|
| 21869 |
+
"step": 3636
|
| 21870 |
+
},
|
| 21871 |
+
{
|
| 21872 |
+
"epoch": 4.17,
|
| 21873 |
+
"learning_rate": 8.607142857142859e-05,
|
| 21874 |
+
"loss": 0.4849,
|
| 21875 |
+
"step": 3637
|
| 21876 |
+
},
|
| 21877 |
+
{
|
| 21878 |
+
"epoch": 4.17,
|
| 21879 |
+
"learning_rate": 8.595238095238096e-05,
|
| 21880 |
+
"loss": 0.4877,
|
| 21881 |
+
"step": 3638
|
| 21882 |
+
},
|
| 21883 |
+
{
|
| 21884 |
+
"epoch": 4.17,
|
| 21885 |
+
"learning_rate": 8.583333333333334e-05,
|
| 21886 |
+
"loss": 0.4928,
|
| 21887 |
+
"step": 3639
|
| 21888 |
+
},
|
| 21889 |
+
{
|
| 21890 |
+
"epoch": 4.17,
|
| 21891 |
+
"learning_rate": 8.571428571428571e-05,
|
| 21892 |
+
"loss": 0.4926,
|
| 21893 |
+
"step": 3640
|
| 21894 |
+
},
|
| 21895 |
+
{
|
| 21896 |
+
"epoch": 4.17,
|
| 21897 |
+
"learning_rate": 8.55952380952381e-05,
|
| 21898 |
+
"loss": 0.5076,
|
| 21899 |
+
"step": 3641
|
| 21900 |
+
},
|
| 21901 |
+
{
|
| 21902 |
+
"epoch": 4.17,
|
| 21903 |
+
"learning_rate": 8.547619047619048e-05,
|
| 21904 |
+
"loss": 0.5427,
|
| 21905 |
+
"step": 3642
|
| 21906 |
+
},
|
| 21907 |
+
{
|
| 21908 |
+
"epoch": 4.17,
|
| 21909 |
+
"learning_rate": 8.535714285714285e-05,
|
| 21910 |
+
"loss": 0.4762,
|
| 21911 |
+
"step": 3643
|
| 21912 |
+
},
|
| 21913 |
+
{
|
| 21914 |
+
"epoch": 4.18,
|
| 21915 |
+
"learning_rate": 8.523809523809524e-05,
|
| 21916 |
+
"loss": 0.5221,
|
| 21917 |
+
"step": 3644
|
| 21918 |
+
},
|
| 21919 |
+
{
|
| 21920 |
+
"epoch": 4.18,
|
| 21921 |
+
"learning_rate": 8.511904761904762e-05,
|
| 21922 |
+
"loss": 0.4789,
|
| 21923 |
+
"step": 3645
|
| 21924 |
+
},
|
| 21925 |
+
{
|
| 21926 |
+
"epoch": 4.18,
|
| 21927 |
+
"learning_rate": 8.5e-05,
|
| 21928 |
+
"loss": 0.5083,
|
| 21929 |
+
"step": 3646
|
| 21930 |
+
},
|
| 21931 |
+
{
|
| 21932 |
+
"epoch": 4.18,
|
| 21933 |
+
"learning_rate": 8.488095238095238e-05,
|
| 21934 |
+
"loss": 0.4838,
|
| 21935 |
+
"step": 3647
|
| 21936 |
+
},
|
| 21937 |
+
{
|
| 21938 |
+
"epoch": 4.18,
|
| 21939 |
+
"learning_rate": 8.476190476190477e-05,
|
| 21940 |
+
"loss": 0.5083,
|
| 21941 |
+
"step": 3648
|
| 21942 |
+
},
|
| 21943 |
+
{
|
| 21944 |
+
"epoch": 4.18,
|
| 21945 |
+
"learning_rate": 8.464285714285715e-05,
|
| 21946 |
+
"loss": 0.5057,
|
| 21947 |
+
"step": 3649
|
| 21948 |
+
},
|
| 21949 |
+
{
|
| 21950 |
+
"epoch": 4.18,
|
| 21951 |
+
"learning_rate": 8.452380952380952e-05,
|
| 21952 |
+
"loss": 0.4862,
|
| 21953 |
+
"step": 3650
|
| 21954 |
+
},
|
| 21955 |
+
{
|
| 21956 |
+
"epoch": 4.18,
|
| 21957 |
+
"learning_rate": 8.440476190476191e-05,
|
| 21958 |
+
"loss": 0.5255,
|
| 21959 |
+
"step": 3651
|
| 21960 |
+
},
|
| 21961 |
+
{
|
| 21962 |
+
"epoch": 4.18,
|
| 21963 |
+
"learning_rate": 8.428571428571429e-05,
|
| 21964 |
+
"loss": 0.4879,
|
| 21965 |
+
"step": 3652
|
| 21966 |
+
},
|
| 21967 |
+
{
|
| 21968 |
+
"epoch": 4.19,
|
| 21969 |
+
"learning_rate": 8.416666666666668e-05,
|
| 21970 |
+
"loss": 0.516,
|
| 21971 |
+
"step": 3653
|
| 21972 |
+
},
|
| 21973 |
+
{
|
| 21974 |
+
"epoch": 4.19,
|
| 21975 |
+
"learning_rate": 8.404761904761905e-05,
|
| 21976 |
+
"loss": 0.461,
|
| 21977 |
+
"step": 3654
|
| 21978 |
+
},
|
| 21979 |
+
{
|
| 21980 |
+
"epoch": 4.19,
|
| 21981 |
+
"learning_rate": 8.392857142857144e-05,
|
| 21982 |
+
"loss": 0.5188,
|
| 21983 |
+
"step": 3655
|
| 21984 |
+
},
|
| 21985 |
+
{
|
| 21986 |
+
"epoch": 4.19,
|
| 21987 |
+
"learning_rate": 8.380952380952382e-05,
|
| 21988 |
+
"loss": 0.5012,
|
| 21989 |
+
"step": 3656
|
| 21990 |
+
},
|
| 21991 |
+
{
|
| 21992 |
+
"epoch": 4.19,
|
| 21993 |
+
"learning_rate": 8.369047619047619e-05,
|
| 21994 |
+
"loss": 0.4586,
|
| 21995 |
+
"step": 3657
|
| 21996 |
+
},
|
| 21997 |
+
{
|
| 21998 |
+
"epoch": 4.19,
|
| 21999 |
+
"learning_rate": 8.357142857142858e-05,
|
| 22000 |
+
"loss": 0.5212,
|
| 22001 |
+
"step": 3658
|
| 22002 |
+
},
|
| 22003 |
+
{
|
| 22004 |
+
"epoch": 4.19,
|
| 22005 |
+
"learning_rate": 8.345238095238096e-05,
|
| 22006 |
+
"loss": 0.4953,
|
| 22007 |
+
"step": 3659
|
| 22008 |
+
},
|
| 22009 |
+
{
|
| 22010 |
+
"epoch": 4.19,
|
| 22011 |
+
"learning_rate": 8.333333333333334e-05,
|
| 22012 |
+
"loss": 0.4853,
|
| 22013 |
+
"step": 3660
|
| 22014 |
+
},
|
| 22015 |
+
{
|
| 22016 |
+
"epoch": 4.2,
|
| 22017 |
+
"learning_rate": 8.321428571428572e-05,
|
| 22018 |
+
"loss": 0.5209,
|
| 22019 |
+
"step": 3661
|
| 22020 |
+
},
|
| 22021 |
+
{
|
| 22022 |
+
"epoch": 4.2,
|
| 22023 |
+
"learning_rate": 8.309523809523811e-05,
|
| 22024 |
+
"loss": 0.5341,
|
| 22025 |
+
"step": 3662
|
| 22026 |
+
},
|
| 22027 |
+
{
|
| 22028 |
+
"epoch": 4.2,
|
| 22029 |
+
"learning_rate": 8.297619047619048e-05,
|
| 22030 |
+
"loss": 0.5104,
|
| 22031 |
+
"step": 3663
|
| 22032 |
+
},
|
| 22033 |
+
{
|
| 22034 |
+
"epoch": 4.2,
|
| 22035 |
+
"learning_rate": 8.285714285714287e-05,
|
| 22036 |
+
"loss": 0.5039,
|
| 22037 |
+
"step": 3664
|
| 22038 |
+
},
|
| 22039 |
+
{
|
| 22040 |
+
"epoch": 4.2,
|
| 22041 |
+
"learning_rate": 8.273809523809524e-05,
|
| 22042 |
+
"loss": 0.5249,
|
| 22043 |
+
"step": 3665
|
| 22044 |
+
},
|
| 22045 |
+
{
|
| 22046 |
+
"epoch": 4.2,
|
| 22047 |
+
"learning_rate": 8.261904761904762e-05,
|
| 22048 |
+
"loss": 0.4707,
|
| 22049 |
+
"step": 3666
|
| 22050 |
+
},
|
| 22051 |
+
{
|
| 22052 |
+
"epoch": 4.2,
|
| 22053 |
+
"learning_rate": 8.25e-05,
|
| 22054 |
+
"loss": 0.459,
|
| 22055 |
+
"step": 3667
|
| 22056 |
+
},
|
| 22057 |
+
{
|
| 22058 |
+
"epoch": 4.2,
|
| 22059 |
+
"learning_rate": 8.238095238095238e-05,
|
| 22060 |
+
"loss": 0.5521,
|
| 22061 |
+
"step": 3668
|
| 22062 |
+
},
|
| 22063 |
+
{
|
| 22064 |
+
"epoch": 4.2,
|
| 22065 |
+
"learning_rate": 8.226190476190476e-05,
|
| 22066 |
+
"loss": 0.4723,
|
| 22067 |
+
"step": 3669
|
| 22068 |
+
},
|
| 22069 |
+
{
|
| 22070 |
+
"epoch": 4.21,
|
| 22071 |
+
"learning_rate": 8.214285714285714e-05,
|
| 22072 |
+
"loss": 0.5046,
|
| 22073 |
+
"step": 3670
|
| 22074 |
+
},
|
| 22075 |
+
{
|
| 22076 |
+
"epoch": 4.21,
|
| 22077 |
+
"learning_rate": 8.202380952380953e-05,
|
| 22078 |
+
"loss": 0.4584,
|
| 22079 |
+
"step": 3671
|
| 22080 |
+
},
|
| 22081 |
+
{
|
| 22082 |
+
"epoch": 4.21,
|
| 22083 |
+
"learning_rate": 8.19047619047619e-05,
|
| 22084 |
+
"loss": 0.4937,
|
| 22085 |
+
"step": 3672
|
| 22086 |
+
},
|
| 22087 |
+
{
|
| 22088 |
+
"epoch": 4.21,
|
| 22089 |
+
"learning_rate": 8.178571428571429e-05,
|
| 22090 |
+
"loss": 0.5288,
|
| 22091 |
+
"step": 3673
|
| 22092 |
+
},
|
| 22093 |
+
{
|
| 22094 |
+
"epoch": 4.21,
|
| 22095 |
+
"learning_rate": 8.166666666666667e-05,
|
| 22096 |
+
"loss": 0.5189,
|
| 22097 |
+
"step": 3674
|
| 22098 |
+
},
|
| 22099 |
+
{
|
| 22100 |
+
"epoch": 4.21,
|
| 22101 |
+
"learning_rate": 8.154761904761904e-05,
|
| 22102 |
+
"loss": 0.4438,
|
| 22103 |
+
"step": 3675
|
| 22104 |
+
},
|
| 22105 |
+
{
|
| 22106 |
+
"epoch": 4.21,
|
| 22107 |
+
"learning_rate": 8.142857142857143e-05,
|
| 22108 |
+
"loss": 0.5089,
|
| 22109 |
+
"step": 3676
|
| 22110 |
+
},
|
| 22111 |
+
{
|
| 22112 |
+
"epoch": 4.21,
|
| 22113 |
+
"learning_rate": 8.130952380952381e-05,
|
| 22114 |
+
"loss": 0.4966,
|
| 22115 |
+
"step": 3677
|
| 22116 |
+
},
|
| 22117 |
+
{
|
| 22118 |
+
"epoch": 4.21,
|
| 22119 |
+
"learning_rate": 8.11904761904762e-05,
|
| 22120 |
+
"loss": 0.5458,
|
| 22121 |
+
"step": 3678
|
| 22122 |
+
},
|
| 22123 |
+
{
|
| 22124 |
+
"epoch": 4.22,
|
| 22125 |
+
"learning_rate": 8.107142857142857e-05,
|
| 22126 |
+
"loss": 0.4742,
|
| 22127 |
+
"step": 3679
|
| 22128 |
+
},
|
| 22129 |
+
{
|
| 22130 |
+
"epoch": 4.22,
|
| 22131 |
+
"learning_rate": 8.095238095238096e-05,
|
| 22132 |
+
"loss": 0.5023,
|
| 22133 |
+
"step": 3680
|
| 22134 |
+
},
|
| 22135 |
+
{
|
| 22136 |
+
"epoch": 4.22,
|
| 22137 |
+
"learning_rate": 8.083333333333334e-05,
|
| 22138 |
+
"loss": 0.5127,
|
| 22139 |
+
"step": 3681
|
| 22140 |
+
},
|
| 22141 |
+
{
|
| 22142 |
+
"epoch": 4.22,
|
| 22143 |
+
"learning_rate": 8.071428571428573e-05,
|
| 22144 |
+
"loss": 0.461,
|
| 22145 |
+
"step": 3682
|
| 22146 |
+
},
|
| 22147 |
+
{
|
| 22148 |
+
"epoch": 4.22,
|
| 22149 |
+
"learning_rate": 8.05952380952381e-05,
|
| 22150 |
+
"loss": 0.5361,
|
| 22151 |
+
"step": 3683
|
| 22152 |
+
},
|
| 22153 |
+
{
|
| 22154 |
+
"epoch": 4.22,
|
| 22155 |
+
"learning_rate": 8.047619047619048e-05,
|
| 22156 |
+
"loss": 0.4853,
|
| 22157 |
+
"step": 3684
|
| 22158 |
+
},
|
| 22159 |
+
{
|
| 22160 |
+
"epoch": 4.22,
|
| 22161 |
+
"learning_rate": 8.035714285714287e-05,
|
| 22162 |
+
"loss": 0.5161,
|
| 22163 |
+
"step": 3685
|
| 22164 |
+
},
|
| 22165 |
+
{
|
| 22166 |
+
"epoch": 4.22,
|
| 22167 |
+
"learning_rate": 8.023809523809524e-05,
|
| 22168 |
+
"loss": 0.4814,
|
| 22169 |
+
"step": 3686
|
| 22170 |
+
},
|
| 22171 |
+
{
|
| 22172 |
+
"epoch": 4.22,
|
| 22173 |
+
"learning_rate": 8.011904761904763e-05,
|
| 22174 |
+
"loss": 0.4775,
|
| 22175 |
+
"step": 3687
|
| 22176 |
+
},
|
| 22177 |
+
{
|
| 22178 |
+
"epoch": 4.23,
|
| 22179 |
+
"learning_rate": 8e-05,
|
| 22180 |
+
"loss": 0.508,
|
| 22181 |
+
"step": 3688
|
| 22182 |
+
},
|
| 22183 |
+
{
|
| 22184 |
+
"epoch": 4.23,
|
| 22185 |
+
"learning_rate": 7.98809523809524e-05,
|
| 22186 |
+
"loss": 0.4966,
|
| 22187 |
+
"step": 3689
|
| 22188 |
+
},
|
| 22189 |
+
{
|
| 22190 |
+
"epoch": 4.23,
|
| 22191 |
+
"learning_rate": 7.976190476190477e-05,
|
| 22192 |
+
"loss": 0.5172,
|
| 22193 |
+
"step": 3690
|
| 22194 |
+
},
|
| 22195 |
+
{
|
| 22196 |
+
"epoch": 4.23,
|
| 22197 |
+
"learning_rate": 7.964285714285715e-05,
|
| 22198 |
+
"loss": 0.5043,
|
| 22199 |
+
"step": 3691
|
| 22200 |
+
},
|
| 22201 |
+
{
|
| 22202 |
+
"epoch": 4.23,
|
| 22203 |
+
"learning_rate": 7.952380952380952e-05,
|
| 22204 |
+
"loss": 0.5058,
|
| 22205 |
+
"step": 3692
|
| 22206 |
+
},
|
| 22207 |
+
{
|
| 22208 |
+
"epoch": 4.23,
|
| 22209 |
+
"learning_rate": 7.94047619047619e-05,
|
| 22210 |
+
"loss": 0.5461,
|
| 22211 |
+
"step": 3693
|
| 22212 |
+
},
|
| 22213 |
+
{
|
| 22214 |
+
"epoch": 4.23,
|
| 22215 |
+
"learning_rate": 7.928571428571429e-05,
|
| 22216 |
+
"loss": 0.4991,
|
| 22217 |
+
"step": 3694
|
| 22218 |
+
},
|
| 22219 |
+
{
|
| 22220 |
+
"epoch": 4.23,
|
| 22221 |
+
"learning_rate": 7.916666666666666e-05,
|
| 22222 |
+
"loss": 0.5003,
|
| 22223 |
+
"step": 3695
|
| 22224 |
+
},
|
| 22225 |
+
{
|
| 22226 |
+
"epoch": 4.24,
|
| 22227 |
+
"learning_rate": 7.904761904761905e-05,
|
| 22228 |
+
"loss": 0.4472,
|
| 22229 |
+
"step": 3696
|
| 22230 |
+
},
|
| 22231 |
+
{
|
| 22232 |
+
"epoch": 4.24,
|
| 22233 |
+
"learning_rate": 7.892857142857143e-05,
|
| 22234 |
+
"loss": 0.5547,
|
| 22235 |
+
"step": 3697
|
| 22236 |
+
},
|
| 22237 |
+
{
|
| 22238 |
+
"epoch": 4.24,
|
| 22239 |
+
"learning_rate": 7.880952380952382e-05,
|
| 22240 |
+
"loss": 0.4685,
|
| 22241 |
+
"step": 3698
|
| 22242 |
+
},
|
| 22243 |
+
{
|
| 22244 |
+
"epoch": 4.24,
|
| 22245 |
+
"learning_rate": 7.869047619047619e-05,
|
| 22246 |
+
"loss": 0.4957,
|
| 22247 |
+
"step": 3699
|
| 22248 |
+
},
|
| 22249 |
+
{
|
| 22250 |
+
"epoch": 4.24,
|
| 22251 |
+
"learning_rate": 7.857142857142858e-05,
|
| 22252 |
+
"loss": 0.4596,
|
| 22253 |
+
"step": 3700
|
| 22254 |
+
},
|
| 22255 |
+
{
|
| 22256 |
+
"epoch": 4.24,
|
| 22257 |
+
"learning_rate": 7.845238095238096e-05,
|
| 22258 |
+
"loss": 0.4686,
|
| 22259 |
+
"step": 3701
|
| 22260 |
+
},
|
| 22261 |
+
{
|
| 22262 |
+
"epoch": 4.24,
|
| 22263 |
+
"learning_rate": 7.833333333333333e-05,
|
| 22264 |
+
"loss": 0.506,
|
| 22265 |
+
"step": 3702
|
| 22266 |
+
},
|
| 22267 |
+
{
|
| 22268 |
+
"epoch": 4.24,
|
| 22269 |
+
"learning_rate": 7.821428571428572e-05,
|
| 22270 |
+
"loss": 0.5167,
|
| 22271 |
+
"step": 3703
|
| 22272 |
+
},
|
| 22273 |
+
{
|
| 22274 |
+
"epoch": 4.24,
|
| 22275 |
+
"learning_rate": 7.80952380952381e-05,
|
| 22276 |
+
"loss": 0.5209,
|
| 22277 |
+
"step": 3704
|
| 22278 |
+
},
|
| 22279 |
+
{
|
| 22280 |
+
"epoch": 4.25,
|
| 22281 |
+
"learning_rate": 7.797619047619048e-05,
|
| 22282 |
+
"loss": 0.4838,
|
| 22283 |
+
"step": 3705
|
| 22284 |
+
},
|
| 22285 |
+
{
|
| 22286 |
+
"epoch": 4.25,
|
| 22287 |
+
"learning_rate": 7.785714285714286e-05,
|
| 22288 |
+
"loss": 0.4927,
|
| 22289 |
+
"step": 3706
|
| 22290 |
}
|
| 22291 |
],
|
| 22292 |
"logging_steps": 1,
|
| 22293 |
"max_steps": 4360,
|
| 22294 |
"num_train_epochs": 5,
|
| 22295 |
"save_steps": 218,
|
| 22296 |
+
"total_flos": 7.352800636834205e+19,
|
| 22297 |
"trial_name": null,
|
| 22298 |
"trial_params": null
|
| 22299 |
}
|