Training in progress, step 15000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea926cda4e2153ae9b3b99a90f0761f08add2b7956f78f82e2159b9b8673309c
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f303445e834fb20d37824cc6f433da014fb723158a8056ecc6248d64dcd2d8b7
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9d7917deb54577f19850c270710b76996f00940cff953270cc2bd4c9d93203b
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 18.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -102908,6 +102908,2106 @@
|
|
| 102908 |
"learning_rate": 1.2803609399935274e-06,
|
| 102909 |
"loss": 0.6691170334815979,
|
| 102910 |
"step": 14700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102911 |
}
|
| 102912 |
],
|
| 102913 |
"logging_steps": 1,
|
|
@@ -102927,7 +105027,7 @@
|
|
| 102927 |
"attributes": {}
|
| 102928 |
}
|
| 102929 |
},
|
| 102930 |
-
"total_flos": 4.
|
| 102931 |
"train_batch_size": 8,
|
| 102932 |
"trial_name": null,
|
| 102933 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 18.404907975460123,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 102908 |
"learning_rate": 1.2803609399935274e-06,
|
| 102909 |
"loss": 0.6691170334815979,
|
| 102910 |
"step": 14700
|
| 102911 |
+
},
|
| 102912 |
+
{
|
| 102913 |
+
"epoch": 18.03803680981595,
|
| 102914 |
+
"grad_norm": 0.2619655132293701,
|
| 102915 |
+
"learning_rate": 1.2787757609464296e-06,
|
| 102916 |
+
"loss": 0.22049419581890106,
|
| 102917 |
+
"step": 14701
|
| 102918 |
+
},
|
| 102919 |
+
{
|
| 102920 |
+
"epoch": 18.03926380368098,
|
| 102921 |
+
"grad_norm": 0.2851826250553131,
|
| 102922 |
+
"learning_rate": 1.277191538034228e-06,
|
| 102923 |
+
"loss": 0.6860067248344421,
|
| 102924 |
+
"step": 14702
|
| 102925 |
+
},
|
| 102926 |
+
{
|
| 102927 |
+
"epoch": 18.040490797546013,
|
| 102928 |
+
"grad_norm": 0.2619082033634186,
|
| 102929 |
+
"learning_rate": 1.2756082713207745e-06,
|
| 102930 |
+
"loss": 0.5653128623962402,
|
| 102931 |
+
"step": 14703
|
| 102932 |
+
},
|
| 102933 |
+
{
|
| 102934 |
+
"epoch": 18.041717791411042,
|
| 102935 |
+
"grad_norm": 0.3042795658111572,
|
| 102936 |
+
"learning_rate": 1.27402596086989e-06,
|
| 102937 |
+
"loss": 0.6747358441352844,
|
| 102938 |
+
"step": 14704
|
| 102939 |
+
},
|
| 102940 |
+
{
|
| 102941 |
+
"epoch": 18.042944785276074,
|
| 102942 |
+
"grad_norm": 0.2690955102443695,
|
| 102943 |
+
"learning_rate": 1.2724446067453489e-06,
|
| 102944 |
+
"loss": 0.8403098583221436,
|
| 102945 |
+
"step": 14705
|
| 102946 |
+
},
|
| 102947 |
+
{
|
| 102948 |
+
"epoch": 18.044171779141106,
|
| 102949 |
+
"grad_norm": 0.2661285102367401,
|
| 102950 |
+
"learning_rate": 1.2708642090108942e-06,
|
| 102951 |
+
"loss": 0.6148532629013062,
|
| 102952 |
+
"step": 14706
|
| 102953 |
+
},
|
| 102954 |
+
{
|
| 102955 |
+
"epoch": 18.045398773006134,
|
| 102956 |
+
"grad_norm": 0.2536851465702057,
|
| 102957 |
+
"learning_rate": 1.269284767730225e-06,
|
| 102958 |
+
"loss": 0.5828821063041687,
|
| 102959 |
+
"step": 14707
|
| 102960 |
+
},
|
| 102961 |
+
{
|
| 102962 |
+
"epoch": 18.046625766871166,
|
| 102963 |
+
"grad_norm": 0.27755022048950195,
|
| 102964 |
+
"learning_rate": 1.2677062829670017e-06,
|
| 102965 |
+
"loss": 0.7345338463783264,
|
| 102966 |
+
"step": 14708
|
| 102967 |
+
},
|
| 102968 |
+
{
|
| 102969 |
+
"epoch": 18.047852760736195,
|
| 102970 |
+
"grad_norm": 0.26204586029052734,
|
| 102971 |
+
"learning_rate": 1.266128754784854e-06,
|
| 102972 |
+
"loss": 0.6025913953781128,
|
| 102973 |
+
"step": 14709
|
| 102974 |
+
},
|
| 102975 |
+
{
|
| 102976 |
+
"epoch": 18.049079754601227,
|
| 102977 |
+
"grad_norm": 0.26670968532562256,
|
| 102978 |
+
"learning_rate": 1.264552183247364e-06,
|
| 102979 |
+
"loss": 0.5462385416030884,
|
| 102980 |
+
"step": 14710
|
| 102981 |
+
},
|
| 102982 |
+
{
|
| 102983 |
+
"epoch": 18.05030674846626,
|
| 102984 |
+
"grad_norm": 0.25775453448295593,
|
| 102985 |
+
"learning_rate": 1.262976568418084e-06,
|
| 102986 |
+
"loss": 0.5145770907402039,
|
| 102987 |
+
"step": 14711
|
| 102988 |
+
},
|
| 102989 |
+
{
|
| 102990 |
+
"epoch": 18.051533742331287,
|
| 102991 |
+
"grad_norm": 0.292965292930603,
|
| 102992 |
+
"learning_rate": 1.2614019103605102e-06,
|
| 102993 |
+
"loss": 0.49756181240081787,
|
| 102994 |
+
"step": 14712
|
| 102995 |
+
},
|
| 102996 |
+
{
|
| 102997 |
+
"epoch": 18.05276073619632,
|
| 102998 |
+
"grad_norm": 0.26325327157974243,
|
| 102999 |
+
"learning_rate": 1.2598282091381252e-06,
|
| 103000 |
+
"loss": 0.6994046568870544,
|
| 103001 |
+
"step": 14713
|
| 103002 |
+
},
|
| 103003 |
+
{
|
| 103004 |
+
"epoch": 18.05398773006135,
|
| 103005 |
+
"grad_norm": 0.25802725553512573,
|
| 103006 |
+
"learning_rate": 1.2582554648143562e-06,
|
| 103007 |
+
"loss": 0.4465128183364868,
|
| 103008 |
+
"step": 14714
|
| 103009 |
+
},
|
| 103010 |
+
{
|
| 103011 |
+
"epoch": 18.05521472392638,
|
| 103012 |
+
"grad_norm": 0.2557304799556732,
|
| 103013 |
+
"learning_rate": 1.2566836774525997e-06,
|
| 103014 |
+
"loss": 0.504986584186554,
|
| 103015 |
+
"step": 14715
|
| 103016 |
+
},
|
| 103017 |
+
{
|
| 103018 |
+
"epoch": 18.05644171779141,
|
| 103019 |
+
"grad_norm": 0.2756461203098297,
|
| 103020 |
+
"learning_rate": 1.255112847116202e-06,
|
| 103021 |
+
"loss": 0.659092903137207,
|
| 103022 |
+
"step": 14716
|
| 103023 |
+
},
|
| 103024 |
+
{
|
| 103025 |
+
"epoch": 18.05766871165644,
|
| 103026 |
+
"grad_norm": 0.32308894395828247,
|
| 103027 |
+
"learning_rate": 1.2535429738684822e-06,
|
| 103028 |
+
"loss": 0.6876434087753296,
|
| 103029 |
+
"step": 14717
|
| 103030 |
+
},
|
| 103031 |
+
{
|
| 103032 |
+
"epoch": 18.058895705521472,
|
| 103033 |
+
"grad_norm": 0.24270571768283844,
|
| 103034 |
+
"learning_rate": 1.2519740577727173e-06,
|
| 103035 |
+
"loss": 0.5285595655441284,
|
| 103036 |
+
"step": 14718
|
| 103037 |
+
},
|
| 103038 |
+
{
|
| 103039 |
+
"epoch": 18.060122699386504,
|
| 103040 |
+
"grad_norm": 0.2521442174911499,
|
| 103041 |
+
"learning_rate": 1.2504060988921485e-06,
|
| 103042 |
+
"loss": 0.6927993297576904,
|
| 103043 |
+
"step": 14719
|
| 103044 |
+
},
|
| 103045 |
+
{
|
| 103046 |
+
"epoch": 18.061349693251532,
|
| 103047 |
+
"grad_norm": 0.2553957998752594,
|
| 103048 |
+
"learning_rate": 1.2488390972899778e-06,
|
| 103049 |
+
"loss": 0.5332204103469849,
|
| 103050 |
+
"step": 14720
|
| 103051 |
+
},
|
| 103052 |
+
{
|
| 103053 |
+
"epoch": 18.062576687116565,
|
| 103054 |
+
"grad_norm": 0.2955595850944519,
|
| 103055 |
+
"learning_rate": 1.2472730530293574e-06,
|
| 103056 |
+
"loss": 0.7910521030426025,
|
| 103057 |
+
"step": 14721
|
| 103058 |
+
},
|
| 103059 |
+
{
|
| 103060 |
+
"epoch": 18.063803680981597,
|
| 103061 |
+
"grad_norm": 0.24877744913101196,
|
| 103062 |
+
"learning_rate": 1.2457079661734178e-06,
|
| 103063 |
+
"loss": 0.6661491394042969,
|
| 103064 |
+
"step": 14722
|
| 103065 |
+
},
|
| 103066 |
+
{
|
| 103067 |
+
"epoch": 18.065030674846625,
|
| 103068 |
+
"grad_norm": 0.2780570983886719,
|
| 103069 |
+
"learning_rate": 1.2441438367852388e-06,
|
| 103070 |
+
"loss": 0.5603436827659607,
|
| 103071 |
+
"step": 14723
|
| 103072 |
+
},
|
| 103073 |
+
{
|
| 103074 |
+
"epoch": 18.066257668711657,
|
| 103075 |
+
"grad_norm": 0.3148570656776428,
|
| 103076 |
+
"learning_rate": 1.2425806649278671e-06,
|
| 103077 |
+
"loss": 0.6911613941192627,
|
| 103078 |
+
"step": 14724
|
| 103079 |
+
},
|
| 103080 |
+
{
|
| 103081 |
+
"epoch": 18.067484662576685,
|
| 103082 |
+
"grad_norm": 0.26755058765411377,
|
| 103083 |
+
"learning_rate": 1.2410184506643164e-06,
|
| 103084 |
+
"loss": 0.7695028781890869,
|
| 103085 |
+
"step": 14725
|
| 103086 |
+
},
|
| 103087 |
+
{
|
| 103088 |
+
"epoch": 18.068711656441717,
|
| 103089 |
+
"grad_norm": 0.241511270403862,
|
| 103090 |
+
"learning_rate": 1.2394571940575446e-06,
|
| 103091 |
+
"loss": 0.5810757875442505,
|
| 103092 |
+
"step": 14726
|
| 103093 |
+
},
|
| 103094 |
+
{
|
| 103095 |
+
"epoch": 18.06993865030675,
|
| 103096 |
+
"grad_norm": 0.256823867559433,
|
| 103097 |
+
"learning_rate": 1.2378968951704845e-06,
|
| 103098 |
+
"loss": 0.44509875774383545,
|
| 103099 |
+
"step": 14727
|
| 103100 |
+
},
|
| 103101 |
+
{
|
| 103102 |
+
"epoch": 18.071165644171778,
|
| 103103 |
+
"grad_norm": 0.25903940200805664,
|
| 103104 |
+
"learning_rate": 1.2363375540660304e-06,
|
| 103105 |
+
"loss": 0.6648673415184021,
|
| 103106 |
+
"step": 14728
|
| 103107 |
+
},
|
| 103108 |
+
{
|
| 103109 |
+
"epoch": 18.07239263803681,
|
| 103110 |
+
"grad_norm": 0.26043501496315,
|
| 103111 |
+
"learning_rate": 1.234779170807035e-06,
|
| 103112 |
+
"loss": 0.5573819875717163,
|
| 103113 |
+
"step": 14729
|
| 103114 |
+
},
|
| 103115 |
+
{
|
| 103116 |
+
"epoch": 18.073619631901842,
|
| 103117 |
+
"grad_norm": 0.26453620195388794,
|
| 103118 |
+
"learning_rate": 1.2332217454563116e-06,
|
| 103119 |
+
"loss": 0.47202008962631226,
|
| 103120 |
+
"step": 14730
|
| 103121 |
+
},
|
| 103122 |
+
{
|
| 103123 |
+
"epoch": 18.07484662576687,
|
| 103124 |
+
"grad_norm": 0.30493998527526855,
|
| 103125 |
+
"learning_rate": 1.2316652780766324e-06,
|
| 103126 |
+
"loss": 0.4513840079307556,
|
| 103127 |
+
"step": 14731
|
| 103128 |
+
},
|
| 103129 |
+
{
|
| 103130 |
+
"epoch": 18.076073619631902,
|
| 103131 |
+
"grad_norm": 0.25202369689941406,
|
| 103132 |
+
"learning_rate": 1.2301097687307361e-06,
|
| 103133 |
+
"loss": 0.6360188722610474,
|
| 103134 |
+
"step": 14732
|
| 103135 |
+
},
|
| 103136 |
+
{
|
| 103137 |
+
"epoch": 18.07730061349693,
|
| 103138 |
+
"grad_norm": 0.2519301474094391,
|
| 103139 |
+
"learning_rate": 1.2285552174813225e-06,
|
| 103140 |
+
"loss": 0.37584954500198364,
|
| 103141 |
+
"step": 14733
|
| 103142 |
+
},
|
| 103143 |
+
{
|
| 103144 |
+
"epoch": 18.078527607361963,
|
| 103145 |
+
"grad_norm": 0.2923086881637573,
|
| 103146 |
+
"learning_rate": 1.227001624391047e-06,
|
| 103147 |
+
"loss": 0.5420209169387817,
|
| 103148 |
+
"step": 14734
|
| 103149 |
+
},
|
| 103150 |
+
{
|
| 103151 |
+
"epoch": 18.079754601226995,
|
| 103152 |
+
"grad_norm": 0.25460705161094666,
|
| 103153 |
+
"learning_rate": 1.2254489895225347e-06,
|
| 103154 |
+
"loss": 0.6598200798034668,
|
| 103155 |
+
"step": 14735
|
| 103156 |
+
},
|
| 103157 |
+
{
|
| 103158 |
+
"epoch": 18.080981595092023,
|
| 103159 |
+
"grad_norm": 0.24892649054527283,
|
| 103160 |
+
"learning_rate": 1.2238973129383685e-06,
|
| 103161 |
+
"loss": 0.6341156959533691,
|
| 103162 |
+
"step": 14736
|
| 103163 |
+
},
|
| 103164 |
+
{
|
| 103165 |
+
"epoch": 18.082208588957055,
|
| 103166 |
+
"grad_norm": 0.2726600170135498,
|
| 103167 |
+
"learning_rate": 1.2223465947010877e-06,
|
| 103168 |
+
"loss": 0.6655759811401367,
|
| 103169 |
+
"step": 14737
|
| 103170 |
+
},
|
| 103171 |
+
{
|
| 103172 |
+
"epoch": 18.083435582822087,
|
| 103173 |
+
"grad_norm": 0.26307088136672974,
|
| 103174 |
+
"learning_rate": 1.2207968348732e-06,
|
| 103175 |
+
"loss": 0.5710554122924805,
|
| 103176 |
+
"step": 14738
|
| 103177 |
+
},
|
| 103178 |
+
{
|
| 103179 |
+
"epoch": 18.084662576687116,
|
| 103180 |
+
"grad_norm": 0.2601437270641327,
|
| 103181 |
+
"learning_rate": 1.2192480335171758e-06,
|
| 103182 |
+
"loss": 0.5255711078643799,
|
| 103183 |
+
"step": 14739
|
| 103184 |
+
},
|
| 103185 |
+
{
|
| 103186 |
+
"epoch": 18.085889570552148,
|
| 103187 |
+
"grad_norm": 0.24033832550048828,
|
| 103188 |
+
"learning_rate": 1.2177001906954338e-06,
|
| 103189 |
+
"loss": 0.496280699968338,
|
| 103190 |
+
"step": 14740
|
| 103191 |
+
},
|
| 103192 |
+
{
|
| 103193 |
+
"epoch": 18.08711656441718,
|
| 103194 |
+
"grad_norm": 0.29315364360809326,
|
| 103195 |
+
"learning_rate": 1.216153306470369e-06,
|
| 103196 |
+
"loss": 0.6240687370300293,
|
| 103197 |
+
"step": 14741
|
| 103198 |
+
},
|
| 103199 |
+
{
|
| 103200 |
+
"epoch": 18.088343558282208,
|
| 103201 |
+
"grad_norm": 0.23308129608631134,
|
| 103202 |
+
"learning_rate": 1.2146073809043312e-06,
|
| 103203 |
+
"loss": 0.470639705657959,
|
| 103204 |
+
"step": 14742
|
| 103205 |
+
},
|
| 103206 |
+
{
|
| 103207 |
+
"epoch": 18.08957055214724,
|
| 103208 |
+
"grad_norm": 0.2676543593406677,
|
| 103209 |
+
"learning_rate": 1.2130624140596291e-06,
|
| 103210 |
+
"loss": 0.4614896774291992,
|
| 103211 |
+
"step": 14743
|
| 103212 |
+
},
|
| 103213 |
+
{
|
| 103214 |
+
"epoch": 18.09079754601227,
|
| 103215 |
+
"grad_norm": 0.28502893447875977,
|
| 103216 |
+
"learning_rate": 1.2115184059985463e-06,
|
| 103217 |
+
"loss": 0.6204589605331421,
|
| 103218 |
+
"step": 14744
|
| 103219 |
+
},
|
| 103220 |
+
{
|
| 103221 |
+
"epoch": 18.0920245398773,
|
| 103222 |
+
"grad_norm": 0.2745014727115631,
|
| 103223 |
+
"learning_rate": 1.2099753567833022e-06,
|
| 103224 |
+
"loss": 0.6177423000335693,
|
| 103225 |
+
"step": 14745
|
| 103226 |
+
},
|
| 103227 |
+
{
|
| 103228 |
+
"epoch": 18.093251533742333,
|
| 103229 |
+
"grad_norm": 0.266347199678421,
|
| 103230 |
+
"learning_rate": 1.2084332664761027e-06,
|
| 103231 |
+
"loss": 0.5986963510513306,
|
| 103232 |
+
"step": 14746
|
| 103233 |
+
},
|
| 103234 |
+
{
|
| 103235 |
+
"epoch": 18.09447852760736,
|
| 103236 |
+
"grad_norm": 0.297341525554657,
|
| 103237 |
+
"learning_rate": 1.206892135139101e-06,
|
| 103238 |
+
"loss": 0.6357883214950562,
|
| 103239 |
+
"step": 14747
|
| 103240 |
+
},
|
| 103241 |
+
{
|
| 103242 |
+
"epoch": 18.095705521472393,
|
| 103243 |
+
"grad_norm": 0.26656341552734375,
|
| 103244 |
+
"learning_rate": 1.2053519628344168e-06,
|
| 103245 |
+
"loss": 0.6391650438308716,
|
| 103246 |
+
"step": 14748
|
| 103247 |
+
},
|
| 103248 |
+
{
|
| 103249 |
+
"epoch": 18.096932515337425,
|
| 103250 |
+
"grad_norm": 0.3141368329524994,
|
| 103251 |
+
"learning_rate": 1.2038127496241364e-06,
|
| 103252 |
+
"loss": 0.7222796678543091,
|
| 103253 |
+
"step": 14749
|
| 103254 |
+
},
|
| 103255 |
+
{
|
| 103256 |
+
"epoch": 18.098159509202453,
|
| 103257 |
+
"grad_norm": 0.29429808259010315,
|
| 103258 |
+
"learning_rate": 1.2022744955702908e-06,
|
| 103259 |
+
"loss": 0.6423506140708923,
|
| 103260 |
+
"step": 14750
|
| 103261 |
+
},
|
| 103262 |
+
{
|
| 103263 |
+
"epoch": 18.099386503067485,
|
| 103264 |
+
"grad_norm": 0.31961479783058167,
|
| 103265 |
+
"learning_rate": 1.200737200734886e-06,
|
| 103266 |
+
"loss": 0.5356523394584656,
|
| 103267 |
+
"step": 14751
|
| 103268 |
+
},
|
| 103269 |
+
{
|
| 103270 |
+
"epoch": 18.100613496932514,
|
| 103271 |
+
"grad_norm": 0.27962884306907654,
|
| 103272 |
+
"learning_rate": 1.1992008651798892e-06,
|
| 103273 |
+
"loss": 0.6289323568344116,
|
| 103274 |
+
"step": 14752
|
| 103275 |
+
},
|
| 103276 |
+
{
|
| 103277 |
+
"epoch": 18.101840490797546,
|
| 103278 |
+
"grad_norm": 0.28391116857528687,
|
| 103279 |
+
"learning_rate": 1.19766548896722e-06,
|
| 103280 |
+
"loss": 0.7367600202560425,
|
| 103281 |
+
"step": 14753
|
| 103282 |
+
},
|
| 103283 |
+
{
|
| 103284 |
+
"epoch": 18.103067484662578,
|
| 103285 |
+
"grad_norm": 0.255130410194397,
|
| 103286 |
+
"learning_rate": 1.1961310721587737e-06,
|
| 103287 |
+
"loss": 0.6962535977363586,
|
| 103288 |
+
"step": 14754
|
| 103289 |
+
},
|
| 103290 |
+
{
|
| 103291 |
+
"epoch": 18.104294478527606,
|
| 103292 |
+
"grad_norm": 0.2768807113170624,
|
| 103293 |
+
"learning_rate": 1.1945976148163924e-06,
|
| 103294 |
+
"loss": 0.29734355211257935,
|
| 103295 |
+
"step": 14755
|
| 103296 |
+
},
|
| 103297 |
+
{
|
| 103298 |
+
"epoch": 18.10552147239264,
|
| 103299 |
+
"grad_norm": 0.2612367570400238,
|
| 103300 |
+
"learning_rate": 1.193065117001882e-06,
|
| 103301 |
+
"loss": 0.6541072726249695,
|
| 103302 |
+
"step": 14756
|
| 103303 |
+
},
|
| 103304 |
+
{
|
| 103305 |
+
"epoch": 18.10674846625767,
|
| 103306 |
+
"grad_norm": 0.2620174288749695,
|
| 103307 |
+
"learning_rate": 1.1915335787770155e-06,
|
| 103308 |
+
"loss": 0.34555530548095703,
|
| 103309 |
+
"step": 14757
|
| 103310 |
+
},
|
| 103311 |
+
{
|
| 103312 |
+
"epoch": 18.1079754601227,
|
| 103313 |
+
"grad_norm": 0.24455462396144867,
|
| 103314 |
+
"learning_rate": 1.1900030002035352e-06,
|
| 103315 |
+
"loss": 0.5935930013656616,
|
| 103316 |
+
"step": 14758
|
| 103317 |
+
},
|
| 103318 |
+
{
|
| 103319 |
+
"epoch": 18.10920245398773,
|
| 103320 |
+
"grad_norm": 0.2739318609237671,
|
| 103321 |
+
"learning_rate": 1.1884733813431193e-06,
|
| 103322 |
+
"loss": 0.6299898624420166,
|
| 103323 |
+
"step": 14759
|
| 103324 |
+
},
|
| 103325 |
+
{
|
| 103326 |
+
"epoch": 18.11042944785276,
|
| 103327 |
+
"grad_norm": 0.25147828459739685,
|
| 103328 |
+
"learning_rate": 1.1869447222574299e-06,
|
| 103329 |
+
"loss": 0.6016464829444885,
|
| 103330 |
+
"step": 14760
|
| 103331 |
+
},
|
| 103332 |
+
{
|
| 103333 |
+
"epoch": 18.11165644171779,
|
| 103334 |
+
"grad_norm": 0.3216879665851593,
|
| 103335 |
+
"learning_rate": 1.1854170230080813e-06,
|
| 103336 |
+
"loss": 0.5907288193702698,
|
| 103337 |
+
"step": 14761
|
| 103338 |
+
},
|
| 103339 |
+
{
|
| 103340 |
+
"epoch": 18.112883435582823,
|
| 103341 |
+
"grad_norm": 0.28898489475250244,
|
| 103342 |
+
"learning_rate": 1.1838902836566524e-06,
|
| 103343 |
+
"loss": 0.8385666608810425,
|
| 103344 |
+
"step": 14762
|
| 103345 |
+
},
|
| 103346 |
+
{
|
| 103347 |
+
"epoch": 18.11411042944785,
|
| 103348 |
+
"grad_norm": 0.2882368862628937,
|
| 103349 |
+
"learning_rate": 1.1823645042646852e-06,
|
| 103350 |
+
"loss": 0.7034389972686768,
|
| 103351 |
+
"step": 14763
|
| 103352 |
+
},
|
| 103353 |
+
{
|
| 103354 |
+
"epoch": 18.115337423312884,
|
| 103355 |
+
"grad_norm": 0.27602750062942505,
|
| 103356 |
+
"learning_rate": 1.1808396848936699e-06,
|
| 103357 |
+
"loss": 0.5460613369941711,
|
| 103358 |
+
"step": 14764
|
| 103359 |
+
},
|
| 103360 |
+
{
|
| 103361 |
+
"epoch": 18.116564417177916,
|
| 103362 |
+
"grad_norm": 0.27741363644599915,
|
| 103363 |
+
"learning_rate": 1.1793158256050708e-06,
|
| 103364 |
+
"loss": 0.6697900295257568,
|
| 103365 |
+
"step": 14765
|
| 103366 |
+
},
|
| 103367 |
+
{
|
| 103368 |
+
"epoch": 18.117791411042944,
|
| 103369 |
+
"grad_norm": 0.27683332562446594,
|
| 103370 |
+
"learning_rate": 1.1777929264603138e-06,
|
| 103371 |
+
"loss": 0.7239335775375366,
|
| 103372 |
+
"step": 14766
|
| 103373 |
+
},
|
| 103374 |
+
{
|
| 103375 |
+
"epoch": 18.119018404907976,
|
| 103376 |
+
"grad_norm": 0.2802925109863281,
|
| 103377 |
+
"learning_rate": 1.1762709875207806e-06,
|
| 103378 |
+
"loss": 0.5545239448547363,
|
| 103379 |
+
"step": 14767
|
| 103380 |
+
},
|
| 103381 |
+
{
|
| 103382 |
+
"epoch": 18.120245398773005,
|
| 103383 |
+
"grad_norm": 0.26545843482017517,
|
| 103384 |
+
"learning_rate": 1.174750008847819e-06,
|
| 103385 |
+
"loss": 0.6048265695571899,
|
| 103386 |
+
"step": 14768
|
| 103387 |
+
},
|
| 103388 |
+
{
|
| 103389 |
+
"epoch": 18.121472392638037,
|
| 103390 |
+
"grad_norm": 0.26717478036880493,
|
| 103391 |
+
"learning_rate": 1.1732299905027306e-06,
|
| 103392 |
+
"loss": 0.610572338104248,
|
| 103393 |
+
"step": 14769
|
| 103394 |
+
},
|
| 103395 |
+
{
|
| 103396 |
+
"epoch": 18.12269938650307,
|
| 103397 |
+
"grad_norm": 0.27197328209877014,
|
| 103398 |
+
"learning_rate": 1.1717109325467852e-06,
|
| 103399 |
+
"loss": 0.3873680531978607,
|
| 103400 |
+
"step": 14770
|
| 103401 |
+
},
|
| 103402 |
+
{
|
| 103403 |
+
"epoch": 18.123926380368097,
|
| 103404 |
+
"grad_norm": 0.24926748871803284,
|
| 103405 |
+
"learning_rate": 1.1701928350412117e-06,
|
| 103406 |
+
"loss": 0.5061421394348145,
|
| 103407 |
+
"step": 14771
|
| 103408 |
+
},
|
| 103409 |
+
{
|
| 103410 |
+
"epoch": 18.12515337423313,
|
| 103411 |
+
"grad_norm": 0.4047524034976959,
|
| 103412 |
+
"learning_rate": 1.1686756980472001e-06,
|
| 103413 |
+
"loss": 0.5245703458786011,
|
| 103414 |
+
"step": 14772
|
| 103415 |
+
},
|
| 103416 |
+
{
|
| 103417 |
+
"epoch": 18.12638036809816,
|
| 103418 |
+
"grad_norm": 0.24831652641296387,
|
| 103419 |
+
"learning_rate": 1.1671595216259047e-06,
|
| 103420 |
+
"loss": 0.6340184807777405,
|
| 103421 |
+
"step": 14773
|
| 103422 |
+
},
|
| 103423 |
+
{
|
| 103424 |
+
"epoch": 18.12760736196319,
|
| 103425 |
+
"grad_norm": 0.3011031448841095,
|
| 103426 |
+
"learning_rate": 1.1656443058384313e-06,
|
| 103427 |
+
"loss": 0.5802903175354004,
|
| 103428 |
+
"step": 14774
|
| 103429 |
+
},
|
| 103430 |
+
{
|
| 103431 |
+
"epoch": 18.12883435582822,
|
| 103432 |
+
"grad_norm": 0.25693610310554504,
|
| 103433 |
+
"learning_rate": 1.1641300507458597e-06,
|
| 103434 |
+
"loss": 0.48606646060943604,
|
| 103435 |
+
"step": 14775
|
| 103436 |
+
},
|
| 103437 |
+
{
|
| 103438 |
+
"epoch": 18.13006134969325,
|
| 103439 |
+
"grad_norm": 0.25373196601867676,
|
| 103440 |
+
"learning_rate": 1.1626167564092237e-06,
|
| 103441 |
+
"loss": 0.5785506367683411,
|
| 103442 |
+
"step": 14776
|
| 103443 |
+
},
|
| 103444 |
+
{
|
| 103445 |
+
"epoch": 18.131288343558282,
|
| 103446 |
+
"grad_norm": 0.25822749733924866,
|
| 103447 |
+
"learning_rate": 1.1611044228895224e-06,
|
| 103448 |
+
"loss": 0.660150408744812,
|
| 103449 |
+
"step": 14777
|
| 103450 |
+
},
|
| 103451 |
+
{
|
| 103452 |
+
"epoch": 18.132515337423314,
|
| 103453 |
+
"grad_norm": 0.3991084694862366,
|
| 103454 |
+
"learning_rate": 1.1595930502477121e-06,
|
| 103455 |
+
"loss": 0.5788047313690186,
|
| 103456 |
+
"step": 14778
|
| 103457 |
+
},
|
| 103458 |
+
{
|
| 103459 |
+
"epoch": 18.133742331288342,
|
| 103460 |
+
"grad_norm": 0.269821435213089,
|
| 103461 |
+
"learning_rate": 1.1580826385447024e-06,
|
| 103462 |
+
"loss": 0.5926434993743896,
|
| 103463 |
+
"step": 14779
|
| 103464 |
+
},
|
| 103465 |
+
{
|
| 103466 |
+
"epoch": 18.134969325153374,
|
| 103467 |
+
"grad_norm": 0.2682454586029053,
|
| 103468 |
+
"learning_rate": 1.1565731878413865e-06,
|
| 103469 |
+
"loss": 0.4119260609149933,
|
| 103470 |
+
"step": 14780
|
| 103471 |
+
},
|
| 103472 |
+
{
|
| 103473 |
+
"epoch": 18.136196319018406,
|
| 103474 |
+
"grad_norm": 0.2811979353427887,
|
| 103475 |
+
"learning_rate": 1.1550646981986047e-06,
|
| 103476 |
+
"loss": 0.6638317108154297,
|
| 103477 |
+
"step": 14781
|
| 103478 |
+
},
|
| 103479 |
+
{
|
| 103480 |
+
"epoch": 18.137423312883435,
|
| 103481 |
+
"grad_norm": 0.2945413589477539,
|
| 103482 |
+
"learning_rate": 1.153557169677158e-06,
|
| 103483 |
+
"loss": 0.8222766518592834,
|
| 103484 |
+
"step": 14782
|
| 103485 |
+
},
|
| 103486 |
+
{
|
| 103487 |
+
"epoch": 18.138650306748467,
|
| 103488 |
+
"grad_norm": 0.25061318278312683,
|
| 103489 |
+
"learning_rate": 1.1520506023378064e-06,
|
| 103490 |
+
"loss": 0.5434099435806274,
|
| 103491 |
+
"step": 14783
|
| 103492 |
+
},
|
| 103493 |
+
{
|
| 103494 |
+
"epoch": 18.139877300613495,
|
| 103495 |
+
"grad_norm": 0.29450666904449463,
|
| 103496 |
+
"learning_rate": 1.150544996241279e-06,
|
| 103497 |
+
"loss": 0.6502367258071899,
|
| 103498 |
+
"step": 14784
|
| 103499 |
+
},
|
| 103500 |
+
{
|
| 103501 |
+
"epoch": 18.141104294478527,
|
| 103502 |
+
"grad_norm": 0.26420921087265015,
|
| 103503 |
+
"learning_rate": 1.1490403514482607e-06,
|
| 103504 |
+
"loss": 0.5811017751693726,
|
| 103505 |
+
"step": 14785
|
| 103506 |
+
},
|
| 103507 |
+
{
|
| 103508 |
+
"epoch": 18.14233128834356,
|
| 103509 |
+
"grad_norm": 0.23802681267261505,
|
| 103510 |
+
"learning_rate": 1.1475366680194028e-06,
|
| 103511 |
+
"loss": 0.4077605605125427,
|
| 103512 |
+
"step": 14786
|
| 103513 |
+
},
|
| 103514 |
+
{
|
| 103515 |
+
"epoch": 18.143558282208588,
|
| 103516 |
+
"grad_norm": 0.28373652696609497,
|
| 103517 |
+
"learning_rate": 1.1460339460153152e-06,
|
| 103518 |
+
"loss": 0.6054940223693848,
|
| 103519 |
+
"step": 14787
|
| 103520 |
+
},
|
| 103521 |
+
{
|
| 103522 |
+
"epoch": 18.14478527607362,
|
| 103523 |
+
"grad_norm": 0.2780472934246063,
|
| 103524 |
+
"learning_rate": 1.1445321854965608e-06,
|
| 103525 |
+
"loss": 0.655227541923523,
|
| 103526 |
+
"step": 14788
|
| 103527 |
+
},
|
| 103528 |
+
{
|
| 103529 |
+
"epoch": 18.14601226993865,
|
| 103530 |
+
"grad_norm": 0.2564193606376648,
|
| 103531 |
+
"learning_rate": 1.1430313865236769e-06,
|
| 103532 |
+
"loss": 0.5690796375274658,
|
| 103533 |
+
"step": 14789
|
| 103534 |
+
},
|
| 103535 |
+
{
|
| 103536 |
+
"epoch": 18.14723926380368,
|
| 103537 |
+
"grad_norm": 0.29904335737228394,
|
| 103538 |
+
"learning_rate": 1.141531549157157e-06,
|
| 103539 |
+
"loss": 0.5125435590744019,
|
| 103540 |
+
"step": 14790
|
| 103541 |
+
},
|
| 103542 |
+
{
|
| 103543 |
+
"epoch": 18.148466257668712,
|
| 103544 |
+
"grad_norm": 0.27841755747795105,
|
| 103545 |
+
"learning_rate": 1.140032673457453e-06,
|
| 103546 |
+
"loss": 0.7519367933273315,
|
| 103547 |
+
"step": 14791
|
| 103548 |
+
},
|
| 103549 |
+
{
|
| 103550 |
+
"epoch": 18.14969325153374,
|
| 103551 |
+
"grad_norm": 0.24487988650798798,
|
| 103552 |
+
"learning_rate": 1.1385347594849854e-06,
|
| 103553 |
+
"loss": 0.6062588095664978,
|
| 103554 |
+
"step": 14792
|
| 103555 |
+
},
|
| 103556 |
+
{
|
| 103557 |
+
"epoch": 18.150920245398773,
|
| 103558 |
+
"grad_norm": 0.24204793572425842,
|
| 103559 |
+
"learning_rate": 1.1370378073001204e-06,
|
| 103560 |
+
"loss": 0.4515277147293091,
|
| 103561 |
+
"step": 14793
|
| 103562 |
+
},
|
| 103563 |
+
{
|
| 103564 |
+
"epoch": 18.152147239263805,
|
| 103565 |
+
"grad_norm": 0.28201863169670105,
|
| 103566 |
+
"learning_rate": 1.1355418169632038e-06,
|
| 103567 |
+
"loss": 0.4382992088794708,
|
| 103568 |
+
"step": 14794
|
| 103569 |
+
},
|
| 103570 |
+
{
|
| 103571 |
+
"epoch": 18.153374233128833,
|
| 103572 |
+
"grad_norm": 0.2509777843952179,
|
| 103573 |
+
"learning_rate": 1.1340467885345347e-06,
|
| 103574 |
+
"loss": 0.4443310499191284,
|
| 103575 |
+
"step": 14795
|
| 103576 |
+
},
|
| 103577 |
+
{
|
| 103578 |
+
"epoch": 18.154601226993865,
|
| 103579 |
+
"grad_norm": 0.28569814562797546,
|
| 103580 |
+
"learning_rate": 1.1325527220743703e-06,
|
| 103581 |
+
"loss": 0.5561038255691528,
|
| 103582 |
+
"step": 14796
|
| 103583 |
+
},
|
| 103584 |
+
{
|
| 103585 |
+
"epoch": 18.155828220858897,
|
| 103586 |
+
"grad_norm": 0.3020141124725342,
|
| 103587 |
+
"learning_rate": 1.131059617642935e-06,
|
| 103588 |
+
"loss": 0.7362170219421387,
|
| 103589 |
+
"step": 14797
|
| 103590 |
+
},
|
| 103591 |
+
{
|
| 103592 |
+
"epoch": 18.157055214723925,
|
| 103593 |
+
"grad_norm": 0.29274046421051025,
|
| 103594 |
+
"learning_rate": 1.129567475300408e-06,
|
| 103595 |
+
"loss": 0.5618748664855957,
|
| 103596 |
+
"step": 14798
|
| 103597 |
+
},
|
| 103598 |
+
{
|
| 103599 |
+
"epoch": 18.158282208588957,
|
| 103600 |
+
"grad_norm": 0.33272233605384827,
|
| 103601 |
+
"learning_rate": 1.1280762951069361e-06,
|
| 103602 |
+
"loss": 0.5052449703216553,
|
| 103603 |
+
"step": 14799
|
| 103604 |
+
},
|
| 103605 |
+
{
|
| 103606 |
+
"epoch": 18.15950920245399,
|
| 103607 |
+
"grad_norm": 0.3003717362880707,
|
| 103608 |
+
"learning_rate": 1.1265860771226238e-06,
|
| 103609 |
+
"loss": 0.7986615896224976,
|
| 103610 |
+
"step": 14800
|
| 103611 |
+
},
|
| 103612 |
+
{
|
| 103613 |
+
"epoch": 18.160736196319018,
|
| 103614 |
+
"grad_norm": 0.2630554735660553,
|
| 103615 |
+
"learning_rate": 1.1250968214075398e-06,
|
| 103616 |
+
"loss": 0.4393942356109619,
|
| 103617 |
+
"step": 14801
|
| 103618 |
+
},
|
| 103619 |
+
{
|
| 103620 |
+
"epoch": 18.16196319018405,
|
| 103621 |
+
"grad_norm": 0.26669883728027344,
|
| 103622 |
+
"learning_rate": 1.123608528021708e-06,
|
| 103623 |
+
"loss": 0.6282778382301331,
|
| 103624 |
+
"step": 14802
|
| 103625 |
+
},
|
| 103626 |
+
{
|
| 103627 |
+
"epoch": 18.16319018404908,
|
| 103628 |
+
"grad_norm": 0.24756713211536407,
|
| 103629 |
+
"learning_rate": 1.1221211970251166e-06,
|
| 103630 |
+
"loss": 0.6005612015724182,
|
| 103631 |
+
"step": 14803
|
| 103632 |
+
},
|
| 103633 |
+
{
|
| 103634 |
+
"epoch": 18.16441717791411,
|
| 103635 |
+
"grad_norm": 0.2812489867210388,
|
| 103636 |
+
"learning_rate": 1.1206348284777207e-06,
|
| 103637 |
+
"loss": 0.4900221824645996,
|
| 103638 |
+
"step": 14804
|
| 103639 |
+
},
|
| 103640 |
+
{
|
| 103641 |
+
"epoch": 18.165644171779142,
|
| 103642 |
+
"grad_norm": 0.28689664602279663,
|
| 103643 |
+
"learning_rate": 1.1191494224394277e-06,
|
| 103644 |
+
"loss": 0.6676018238067627,
|
| 103645 |
+
"step": 14805
|
| 103646 |
+
},
|
| 103647 |
+
{
|
| 103648 |
+
"epoch": 18.16687116564417,
|
| 103649 |
+
"grad_norm": 0.24836260080337524,
|
| 103650 |
+
"learning_rate": 1.117664978970112e-06,
|
| 103651 |
+
"loss": 0.707294225692749,
|
| 103652 |
+
"step": 14806
|
| 103653 |
+
},
|
| 103654 |
+
{
|
| 103655 |
+
"epoch": 18.168098159509203,
|
| 103656 |
+
"grad_norm": 0.27456435561180115,
|
| 103657 |
+
"learning_rate": 1.1161814981296064e-06,
|
| 103658 |
+
"loss": 0.5701701641082764,
|
| 103659 |
+
"step": 14807
|
| 103660 |
+
},
|
| 103661 |
+
{
|
| 103662 |
+
"epoch": 18.169325153374235,
|
| 103663 |
+
"grad_norm": 0.2573767602443695,
|
| 103664 |
+
"learning_rate": 1.1146989799777046e-06,
|
| 103665 |
+
"loss": 0.5654751062393188,
|
| 103666 |
+
"step": 14808
|
| 103667 |
+
},
|
| 103668 |
+
{
|
| 103669 |
+
"epoch": 18.170552147239263,
|
| 103670 |
+
"grad_norm": 0.3141152858734131,
|
| 103671 |
+
"learning_rate": 1.1132174245741646e-06,
|
| 103672 |
+
"loss": 0.5964987277984619,
|
| 103673 |
+
"step": 14809
|
| 103674 |
+
},
|
| 103675 |
+
{
|
| 103676 |
+
"epoch": 18.171779141104295,
|
| 103677 |
+
"grad_norm": 0.2930687963962555,
|
| 103678 |
+
"learning_rate": 1.1117368319787046e-06,
|
| 103679 |
+
"loss": 0.3554658889770508,
|
| 103680 |
+
"step": 14810
|
| 103681 |
+
},
|
| 103682 |
+
{
|
| 103683 |
+
"epoch": 18.173006134969324,
|
| 103684 |
+
"grad_norm": 0.27443426847457886,
|
| 103685 |
+
"learning_rate": 1.1102572022509972e-06,
|
| 103686 |
+
"loss": 0.6657389402389526,
|
| 103687 |
+
"step": 14811
|
| 103688 |
+
},
|
| 103689 |
+
{
|
| 103690 |
+
"epoch": 18.174233128834356,
|
| 103691 |
+
"grad_norm": 0.25543758273124695,
|
| 103692 |
+
"learning_rate": 1.1087785354506942e-06,
|
| 103693 |
+
"loss": 0.8102816343307495,
|
| 103694 |
+
"step": 14812
|
| 103695 |
+
},
|
| 103696 |
+
{
|
| 103697 |
+
"epoch": 18.175460122699388,
|
| 103698 |
+
"grad_norm": 0.25440800189971924,
|
| 103699 |
+
"learning_rate": 1.1073008316373812e-06,
|
| 103700 |
+
"loss": 0.6219803094863892,
|
| 103701 |
+
"step": 14813
|
| 103702 |
+
},
|
| 103703 |
+
{
|
| 103704 |
+
"epoch": 18.176687116564416,
|
| 103705 |
+
"grad_norm": 0.2546125650405884,
|
| 103706 |
+
"learning_rate": 1.1058240908706302e-06,
|
| 103707 |
+
"loss": 0.6112823486328125,
|
| 103708 |
+
"step": 14814
|
| 103709 |
+
},
|
| 103710 |
+
{
|
| 103711 |
+
"epoch": 18.177914110429448,
|
| 103712 |
+
"grad_norm": 0.24623389542102814,
|
| 103713 |
+
"learning_rate": 1.1043483132099602e-06,
|
| 103714 |
+
"loss": 0.4982820153236389,
|
| 103715 |
+
"step": 14815
|
| 103716 |
+
},
|
| 103717 |
+
{
|
| 103718 |
+
"epoch": 18.17914110429448,
|
| 103719 |
+
"grad_norm": 0.23903052508831024,
|
| 103720 |
+
"learning_rate": 1.102873498714857e-06,
|
| 103721 |
+
"loss": 0.4568873643875122,
|
| 103722 |
+
"step": 14816
|
| 103723 |
+
},
|
| 103724 |
+
{
|
| 103725 |
+
"epoch": 18.18036809815951,
|
| 103726 |
+
"grad_norm": 0.24269966781139374,
|
| 103727 |
+
"learning_rate": 1.101399647444773e-06,
|
| 103728 |
+
"loss": 0.47848859429359436,
|
| 103729 |
+
"step": 14817
|
| 103730 |
+
},
|
| 103731 |
+
{
|
| 103732 |
+
"epoch": 18.18159509202454,
|
| 103733 |
+
"grad_norm": 0.2939677834510803,
|
| 103734 |
+
"learning_rate": 1.0999267594591023e-06,
|
| 103735 |
+
"loss": 0.5587877035140991,
|
| 103736 |
+
"step": 14818
|
| 103737 |
+
},
|
| 103738 |
+
{
|
| 103739 |
+
"epoch": 18.18282208588957,
|
| 103740 |
+
"grad_norm": 0.285671204328537,
|
| 103741 |
+
"learning_rate": 1.098454834817217e-06,
|
| 103742 |
+
"loss": 0.5312268137931824,
|
| 103743 |
+
"step": 14819
|
| 103744 |
+
},
|
| 103745 |
+
{
|
| 103746 |
+
"epoch": 18.1840490797546,
|
| 103747 |
+
"grad_norm": 0.25661855936050415,
|
| 103748 |
+
"learning_rate": 1.09698387357845e-06,
|
| 103749 |
+
"loss": 0.4996369481086731,
|
| 103750 |
+
"step": 14820
|
| 103751 |
+
},
|
| 103752 |
+
{
|
| 103753 |
+
"epoch": 18.185276073619633,
|
| 103754 |
+
"grad_norm": 0.28520864248275757,
|
| 103755 |
+
"learning_rate": 1.09551387580209e-06,
|
| 103756 |
+
"loss": 0.6270918846130371,
|
| 103757 |
+
"step": 14821
|
| 103758 |
+
},
|
| 103759 |
+
{
|
| 103760 |
+
"epoch": 18.18650306748466,
|
| 103761 |
+
"grad_norm": 0.29174116253852844,
|
| 103762 |
+
"learning_rate": 1.0940448415473897e-06,
|
| 103763 |
+
"loss": 0.6711344718933105,
|
| 103764 |
+
"step": 14822
|
| 103765 |
+
},
|
| 103766 |
+
{
|
| 103767 |
+
"epoch": 18.187730061349694,
|
| 103768 |
+
"grad_norm": 0.2522742450237274,
|
| 103769 |
+
"learning_rate": 1.0925767708735545e-06,
|
| 103770 |
+
"loss": 0.48907315731048584,
|
| 103771 |
+
"step": 14823
|
| 103772 |
+
},
|
| 103773 |
+
{
|
| 103774 |
+
"epoch": 18.188957055214726,
|
| 103775 |
+
"grad_norm": 0.2279265969991684,
|
| 103776 |
+
"learning_rate": 1.0911096638397678e-06,
|
| 103777 |
+
"loss": 0.4598676860332489,
|
| 103778 |
+
"step": 14824
|
| 103779 |
+
},
|
| 103780 |
+
{
|
| 103781 |
+
"epoch": 18.190184049079754,
|
| 103782 |
+
"grad_norm": 0.2627038359642029,
|
| 103783 |
+
"learning_rate": 1.0896435205051597e-06,
|
| 103784 |
+
"loss": 0.4075767993927002,
|
| 103785 |
+
"step": 14825
|
| 103786 |
+
},
|
| 103787 |
+
{
|
| 103788 |
+
"epoch": 18.191411042944786,
|
| 103789 |
+
"grad_norm": 0.2452666014432907,
|
| 103790 |
+
"learning_rate": 1.0881783409288278e-06,
|
| 103791 |
+
"loss": 0.4360887408256531,
|
| 103792 |
+
"step": 14826
|
| 103793 |
+
},
|
| 103794 |
+
{
|
| 103795 |
+
"epoch": 18.192638036809814,
|
| 103796 |
+
"grad_norm": 0.2526918053627014,
|
| 103797 |
+
"learning_rate": 1.0867141251698271e-06,
|
| 103798 |
+
"loss": 0.5169374346733093,
|
| 103799 |
+
"step": 14827
|
| 103800 |
+
},
|
| 103801 |
+
{
|
| 103802 |
+
"epoch": 18.193865030674846,
|
| 103803 |
+
"grad_norm": 0.2482978254556656,
|
| 103804 |
+
"learning_rate": 1.0852508732871774e-06,
|
| 103805 |
+
"loss": 0.540313720703125,
|
| 103806 |
+
"step": 14828
|
| 103807 |
+
},
|
| 103808 |
+
{
|
| 103809 |
+
"epoch": 18.19509202453988,
|
| 103810 |
+
"grad_norm": 0.2726093530654907,
|
| 103811 |
+
"learning_rate": 1.0837885853398566e-06,
|
| 103812 |
+
"loss": 0.7479716539382935,
|
| 103813 |
+
"step": 14829
|
| 103814 |
+
},
|
| 103815 |
+
{
|
| 103816 |
+
"epoch": 18.196319018404907,
|
| 103817 |
+
"grad_norm": 0.265191912651062,
|
| 103818 |
+
"learning_rate": 1.0823272613868091e-06,
|
| 103819 |
+
"loss": 0.7461106777191162,
|
| 103820 |
+
"step": 14830
|
| 103821 |
+
},
|
| 103822 |
+
{
|
| 103823 |
+
"epoch": 18.19754601226994,
|
| 103824 |
+
"grad_norm": 0.2606388032436371,
|
| 103825 |
+
"learning_rate": 1.0808669014869349e-06,
|
| 103826 |
+
"loss": 0.5220180749893188,
|
| 103827 |
+
"step": 14831
|
| 103828 |
+
},
|
| 103829 |
+
{
|
| 103830 |
+
"epoch": 18.19877300613497,
|
| 103831 |
+
"grad_norm": 0.27313604950904846,
|
| 103832 |
+
"learning_rate": 1.0794075056990955e-06,
|
| 103833 |
+
"loss": 0.5687035322189331,
|
| 103834 |
+
"step": 14832
|
| 103835 |
+
},
|
| 103836 |
+
{
|
| 103837 |
+
"epoch": 18.2,
|
| 103838 |
+
"grad_norm": 0.2549886703491211,
|
| 103839 |
+
"learning_rate": 1.077949074082113e-06,
|
| 103840 |
+
"loss": 0.5669887065887451,
|
| 103841 |
+
"step": 14833
|
| 103842 |
+
},
|
| 103843 |
+
{
|
| 103844 |
+
"epoch": 18.20122699386503,
|
| 103845 |
+
"grad_norm": 0.2542164921760559,
|
| 103846 |
+
"learning_rate": 1.0764916066947794e-06,
|
| 103847 |
+
"loss": 0.6500406265258789,
|
| 103848 |
+
"step": 14834
|
| 103849 |
+
},
|
| 103850 |
+
{
|
| 103851 |
+
"epoch": 18.20245398773006,
|
| 103852 |
+
"grad_norm": 0.265130877494812,
|
| 103853 |
+
"learning_rate": 1.075035103595834e-06,
|
| 103854 |
+
"loss": 0.7309906482696533,
|
| 103855 |
+
"step": 14835
|
| 103856 |
+
},
|
| 103857 |
+
{
|
| 103858 |
+
"epoch": 18.20368098159509,
|
| 103859 |
+
"grad_norm": 0.27156496047973633,
|
| 103860 |
+
"learning_rate": 1.0735795648439906e-06,
|
| 103861 |
+
"loss": 0.500552773475647,
|
| 103862 |
+
"step": 14836
|
| 103863 |
+
},
|
| 103864 |
+
{
|
| 103865 |
+
"epoch": 18.204907975460124,
|
| 103866 |
+
"grad_norm": 0.24534833431243896,
|
| 103867 |
+
"learning_rate": 1.0721249904979136e-06,
|
| 103868 |
+
"loss": 0.6094156503677368,
|
| 103869 |
+
"step": 14837
|
| 103870 |
+
},
|
| 103871 |
+
{
|
| 103872 |
+
"epoch": 18.206134969325152,
|
| 103873 |
+
"grad_norm": 0.3039546012878418,
|
| 103874 |
+
"learning_rate": 1.0706713806162337e-06,
|
| 103875 |
+
"loss": 0.48269039392471313,
|
| 103876 |
+
"step": 14838
|
| 103877 |
+
},
|
| 103878 |
+
{
|
| 103879 |
+
"epoch": 18.207361963190184,
|
| 103880 |
+
"grad_norm": 0.268101304769516,
|
| 103881 |
+
"learning_rate": 1.0692187352575405e-06,
|
| 103882 |
+
"loss": 0.5692702531814575,
|
| 103883 |
+
"step": 14839
|
| 103884 |
+
},
|
| 103885 |
+
{
|
| 103886 |
+
"epoch": 18.208588957055216,
|
| 103887 |
+
"grad_norm": 0.27422213554382324,
|
| 103888 |
+
"learning_rate": 1.067767054480387e-06,
|
| 103889 |
+
"loss": 0.786743700504303,
|
| 103890 |
+
"step": 14840
|
| 103891 |
+
},
|
| 103892 |
+
{
|
| 103893 |
+
"epoch": 18.209815950920245,
|
| 103894 |
+
"grad_norm": 0.26179584860801697,
|
| 103895 |
+
"learning_rate": 1.06631633834329e-06,
|
| 103896 |
+
"loss": 0.48063573241233826,
|
| 103897 |
+
"step": 14841
|
| 103898 |
+
},
|
| 103899 |
+
{
|
| 103900 |
+
"epoch": 18.211042944785277,
|
| 103901 |
+
"grad_norm": 0.29538387060165405,
|
| 103902 |
+
"learning_rate": 1.0648665869047198e-06,
|
| 103903 |
+
"loss": 0.5721991062164307,
|
| 103904 |
+
"step": 14842
|
| 103905 |
+
},
|
| 103906 |
+
{
|
| 103907 |
+
"epoch": 18.212269938650305,
|
| 103908 |
+
"grad_norm": 0.29689571261405945,
|
| 103909 |
+
"learning_rate": 1.0634178002231098e-06,
|
| 103910 |
+
"loss": 0.563075602054596,
|
| 103911 |
+
"step": 14843
|
| 103912 |
+
},
|
| 103913 |
+
{
|
| 103914 |
+
"epoch": 18.213496932515337,
|
| 103915 |
+
"grad_norm": 0.24677696824073792,
|
| 103916 |
+
"learning_rate": 1.0619699783568583e-06,
|
| 103917 |
+
"loss": 0.6379353404045105,
|
| 103918 |
+
"step": 14844
|
| 103919 |
+
},
|
| 103920 |
+
{
|
| 103921 |
+
"epoch": 18.21472392638037,
|
| 103922 |
+
"grad_norm": 0.27494364976882935,
|
| 103923 |
+
"learning_rate": 1.0605231213643262e-06,
|
| 103924 |
+
"loss": 0.739030122756958,
|
| 103925 |
+
"step": 14845
|
| 103926 |
+
},
|
| 103927 |
+
{
|
| 103928 |
+
"epoch": 18.215950920245398,
|
| 103929 |
+
"grad_norm": 0.27808767557144165,
|
| 103930 |
+
"learning_rate": 1.0590772293038259e-06,
|
| 103931 |
+
"loss": 0.5933088064193726,
|
| 103932 |
+
"step": 14846
|
| 103933 |
+
},
|
| 103934 |
+
{
|
| 103935 |
+
"epoch": 18.21717791411043,
|
| 103936 |
+
"grad_norm": 0.27176904678344727,
|
| 103937 |
+
"learning_rate": 1.0576323022336438e-06,
|
| 103938 |
+
"loss": 0.6576582193374634,
|
| 103939 |
+
"step": 14847
|
| 103940 |
+
},
|
| 103941 |
+
{
|
| 103942 |
+
"epoch": 18.21840490797546,
|
| 103943 |
+
"grad_norm": 0.26141417026519775,
|
| 103944 |
+
"learning_rate": 1.0561883402120166e-06,
|
| 103945 |
+
"loss": 0.4209238290786743,
|
| 103946 |
+
"step": 14848
|
| 103947 |
+
},
|
| 103948 |
+
{
|
| 103949 |
+
"epoch": 18.21963190184049,
|
| 103950 |
+
"grad_norm": 0.25454291701316833,
|
| 103951 |
+
"learning_rate": 1.0547453432971505e-06,
|
| 103952 |
+
"loss": 0.5694785118103027,
|
| 103953 |
+
"step": 14849
|
| 103954 |
+
},
|
| 103955 |
+
{
|
| 103956 |
+
"epoch": 18.220858895705522,
|
| 103957 |
+
"grad_norm": 0.25230881571769714,
|
| 103958 |
+
"learning_rate": 1.0533033115472047e-06,
|
| 103959 |
+
"loss": 0.6020994186401367,
|
| 103960 |
+
"step": 14850
|
| 103961 |
+
},
|
| 103962 |
+
{
|
| 103963 |
+
"epoch": 18.22208588957055,
|
| 103964 |
+
"grad_norm": 0.28071144223213196,
|
| 103965 |
+
"learning_rate": 1.0518622450203019e-06,
|
| 103966 |
+
"loss": 0.6659685373306274,
|
| 103967 |
+
"step": 14851
|
| 103968 |
+
},
|
| 103969 |
+
{
|
| 103970 |
+
"epoch": 18.223312883435582,
|
| 103971 |
+
"grad_norm": 0.288470983505249,
|
| 103972 |
+
"learning_rate": 1.0504221437745321e-06,
|
| 103973 |
+
"loss": 0.627869725227356,
|
| 103974 |
+
"step": 14852
|
| 103975 |
+
},
|
| 103976 |
+
{
|
| 103977 |
+
"epoch": 18.224539877300614,
|
| 103978 |
+
"grad_norm": 0.27747583389282227,
|
| 103979 |
+
"learning_rate": 1.0489830078679374e-06,
|
| 103980 |
+
"loss": 0.5762244462966919,
|
| 103981 |
+
"step": 14853
|
| 103982 |
+
},
|
| 103983 |
+
{
|
| 103984 |
+
"epoch": 18.225766871165643,
|
| 103985 |
+
"grad_norm": 0.25105953216552734,
|
| 103986 |
+
"learning_rate": 1.0475448373585273e-06,
|
| 103987 |
+
"loss": 0.5180539488792419,
|
| 103988 |
+
"step": 14854
|
| 103989 |
+
},
|
| 103990 |
+
{
|
| 103991 |
+
"epoch": 18.226993865030675,
|
| 103992 |
+
"grad_norm": 0.27241671085357666,
|
| 103993 |
+
"learning_rate": 1.0461076323042745e-06,
|
| 103994 |
+
"loss": 0.44019559025764465,
|
| 103995 |
+
"step": 14855
|
| 103996 |
+
},
|
| 103997 |
+
{
|
| 103998 |
+
"epoch": 18.228220858895707,
|
| 103999 |
+
"grad_norm": 0.2730450928211212,
|
| 104000 |
+
"learning_rate": 1.0446713927630997e-06,
|
| 104001 |
+
"loss": 0.7107751369476318,
|
| 104002 |
+
"step": 14856
|
| 104003 |
+
},
|
| 104004 |
+
{
|
| 104005 |
+
"epoch": 18.229447852760735,
|
| 104006 |
+
"grad_norm": 0.24653923511505127,
|
| 104007 |
+
"learning_rate": 1.0432361187929007e-06,
|
| 104008 |
+
"loss": 0.606518030166626,
|
| 104009 |
+
"step": 14857
|
| 104010 |
+
},
|
| 104011 |
+
{
|
| 104012 |
+
"epoch": 18.230674846625767,
|
| 104013 |
+
"grad_norm": 0.31581035256385803,
|
| 104014 |
+
"learning_rate": 1.0418018104515259e-06,
|
| 104015 |
+
"loss": 0.668636679649353,
|
| 104016 |
+
"step": 14858
|
| 104017 |
+
},
|
| 104018 |
+
{
|
| 104019 |
+
"epoch": 18.2319018404908,
|
| 104020 |
+
"grad_norm": 0.24100524187088013,
|
| 104021 |
+
"learning_rate": 1.0403684677967873e-06,
|
| 104022 |
+
"loss": 0.4480138421058655,
|
| 104023 |
+
"step": 14859
|
| 104024 |
+
},
|
| 104025 |
+
{
|
| 104026 |
+
"epoch": 18.233128834355828,
|
| 104027 |
+
"grad_norm": 0.2734662890434265,
|
| 104028 |
+
"learning_rate": 1.0389360908864665e-06,
|
| 104029 |
+
"loss": 0.5586706399917603,
|
| 104030 |
+
"step": 14860
|
| 104031 |
+
},
|
| 104032 |
+
{
|
| 104033 |
+
"epoch": 18.23435582822086,
|
| 104034 |
+
"grad_norm": 0.2974356412887573,
|
| 104035 |
+
"learning_rate": 1.0375046797782866e-06,
|
| 104036 |
+
"loss": 0.5544067025184631,
|
| 104037 |
+
"step": 14861
|
| 104038 |
+
},
|
| 104039 |
+
{
|
| 104040 |
+
"epoch": 18.235582822085888,
|
| 104041 |
+
"grad_norm": 0.2644416093826294,
|
| 104042 |
+
"learning_rate": 1.0360742345299517e-06,
|
| 104043 |
+
"loss": 0.5195169448852539,
|
| 104044 |
+
"step": 14862
|
| 104045 |
+
},
|
| 104046 |
+
{
|
| 104047 |
+
"epoch": 18.23680981595092,
|
| 104048 |
+
"grad_norm": 0.26911020278930664,
|
| 104049 |
+
"learning_rate": 1.0346447551991184e-06,
|
| 104050 |
+
"loss": 0.6369551420211792,
|
| 104051 |
+
"step": 14863
|
| 104052 |
+
},
|
| 104053 |
+
{
|
| 104054 |
+
"epoch": 18.238036809815952,
|
| 104055 |
+
"grad_norm": 0.25652408599853516,
|
| 104056 |
+
"learning_rate": 1.0332162418434016e-06,
|
| 104057 |
+
"loss": 0.41512924432754517,
|
| 104058 |
+
"step": 14864
|
| 104059 |
+
},
|
| 104060 |
+
{
|
| 104061 |
+
"epoch": 18.23926380368098,
|
| 104062 |
+
"grad_norm": 0.24807386100292206,
|
| 104063 |
+
"learning_rate": 1.0317886945203858e-06,
|
| 104064 |
+
"loss": 0.5505387783050537,
|
| 104065 |
+
"step": 14865
|
| 104066 |
+
},
|
| 104067 |
+
{
|
| 104068 |
+
"epoch": 18.240490797546013,
|
| 104069 |
+
"grad_norm": 0.27318164706230164,
|
| 104070 |
+
"learning_rate": 1.0303621132876084e-06,
|
| 104071 |
+
"loss": 0.5980280637741089,
|
| 104072 |
+
"step": 14866
|
| 104073 |
+
},
|
| 104074 |
+
{
|
| 104075 |
+
"epoch": 18.241717791411045,
|
| 104076 |
+
"grad_norm": 0.2699911296367645,
|
| 104077 |
+
"learning_rate": 1.0289364982025678e-06,
|
| 104078 |
+
"loss": 0.7036045789718628,
|
| 104079 |
+
"step": 14867
|
| 104080 |
+
},
|
| 104081 |
+
{
|
| 104082 |
+
"epoch": 18.242944785276073,
|
| 104083 |
+
"grad_norm": 0.28117480874061584,
|
| 104084 |
+
"learning_rate": 1.0275118493227265e-06,
|
| 104085 |
+
"loss": 0.4618714451789856,
|
| 104086 |
+
"step": 14868
|
| 104087 |
+
},
|
| 104088 |
+
{
|
| 104089 |
+
"epoch": 18.244171779141105,
|
| 104090 |
+
"grad_norm": 0.2638676166534424,
|
| 104091 |
+
"learning_rate": 1.026088166705516e-06,
|
| 104092 |
+
"loss": 0.4010728597640991,
|
| 104093 |
+
"step": 14869
|
| 104094 |
+
},
|
| 104095 |
+
{
|
| 104096 |
+
"epoch": 18.245398773006134,
|
| 104097 |
+
"grad_norm": 0.21647456288337708,
|
| 104098 |
+
"learning_rate": 1.0246654504083158e-06,
|
| 104099 |
+
"loss": 0.38217219710350037,
|
| 104100 |
+
"step": 14870
|
| 104101 |
+
},
|
| 104102 |
+
{
|
| 104103 |
+
"epoch": 18.246625766871166,
|
| 104104 |
+
"grad_norm": 0.28368106484413147,
|
| 104105 |
+
"learning_rate": 1.0232437004884688e-06,
|
| 104106 |
+
"loss": 0.7266005873680115,
|
| 104107 |
+
"step": 14871
|
| 104108 |
+
},
|
| 104109 |
+
{
|
| 104110 |
+
"epoch": 18.247852760736198,
|
| 104111 |
+
"grad_norm": 0.2680337429046631,
|
| 104112 |
+
"learning_rate": 1.0218229170032845e-06,
|
| 104113 |
+
"loss": 0.6680964827537537,
|
| 104114 |
+
"step": 14872
|
| 104115 |
+
},
|
| 104116 |
+
{
|
| 104117 |
+
"epoch": 18.249079754601226,
|
| 104118 |
+
"grad_norm": 0.2281462699174881,
|
| 104119 |
+
"learning_rate": 1.0204031000100312e-06,
|
| 104120 |
+
"loss": 0.6115928888320923,
|
| 104121 |
+
"step": 14873
|
| 104122 |
+
},
|
| 104123 |
+
{
|
| 104124 |
+
"epoch": 18.250306748466258,
|
| 104125 |
+
"grad_norm": 0.2603338956832886,
|
| 104126 |
+
"learning_rate": 1.0189842495659407e-06,
|
| 104127 |
+
"loss": 0.5300279855728149,
|
| 104128 |
+
"step": 14874
|
| 104129 |
+
},
|
| 104130 |
+
{
|
| 104131 |
+
"epoch": 18.25153374233129,
|
| 104132 |
+
"grad_norm": 0.2658950984477997,
|
| 104133 |
+
"learning_rate": 1.0175663657281954e-06,
|
| 104134 |
+
"loss": 0.6190230846405029,
|
| 104135 |
+
"step": 14875
|
| 104136 |
+
},
|
| 104137 |
+
{
|
| 104138 |
+
"epoch": 18.25276073619632,
|
| 104139 |
+
"grad_norm": 0.2600836157798767,
|
| 104140 |
+
"learning_rate": 1.016149448553949e-06,
|
| 104141 |
+
"loss": 0.7614256143569946,
|
| 104142 |
+
"step": 14876
|
| 104143 |
+
},
|
| 104144 |
+
{
|
| 104145 |
+
"epoch": 18.25398773006135,
|
| 104146 |
+
"grad_norm": 0.26288658380508423,
|
| 104147 |
+
"learning_rate": 1.0147334981003177e-06,
|
| 104148 |
+
"loss": 0.651297926902771,
|
| 104149 |
+
"step": 14877
|
| 104150 |
+
},
|
| 104151 |
+
{
|
| 104152 |
+
"epoch": 18.25521472392638,
|
| 104153 |
+
"grad_norm": 0.283282995223999,
|
| 104154 |
+
"learning_rate": 1.0133185144243662e-06,
|
| 104155 |
+
"loss": 0.5358506441116333,
|
| 104156 |
+
"step": 14878
|
| 104157 |
+
},
|
| 104158 |
+
{
|
| 104159 |
+
"epoch": 18.25644171779141,
|
| 104160 |
+
"grad_norm": 0.2618766129016876,
|
| 104161 |
+
"learning_rate": 1.0119044975831383e-06,
|
| 104162 |
+
"loss": 0.4243462085723877,
|
| 104163 |
+
"step": 14879
|
| 104164 |
+
},
|
| 104165 |
+
{
|
| 104166 |
+
"epoch": 18.257668711656443,
|
| 104167 |
+
"grad_norm": 0.24442726373672485,
|
| 104168 |
+
"learning_rate": 1.0104914476336214e-06,
|
| 104169 |
+
"loss": 0.6007345914840698,
|
| 104170 |
+
"step": 14880
|
| 104171 |
+
},
|
| 104172 |
+
{
|
| 104173 |
+
"epoch": 18.25889570552147,
|
| 104174 |
+
"grad_norm": 0.27434179186820984,
|
| 104175 |
+
"learning_rate": 1.0090793646327756e-06,
|
| 104176 |
+
"loss": 0.558912992477417,
|
| 104177 |
+
"step": 14881
|
| 104178 |
+
},
|
| 104179 |
+
{
|
| 104180 |
+
"epoch": 18.260122699386503,
|
| 104181 |
+
"grad_norm": 0.28145796060562134,
|
| 104182 |
+
"learning_rate": 1.0076682486375138e-06,
|
| 104183 |
+
"loss": 0.6267221570014954,
|
| 104184 |
+
"step": 14882
|
| 104185 |
+
},
|
| 104186 |
+
{
|
| 104187 |
+
"epoch": 18.261349693251535,
|
| 104188 |
+
"grad_norm": 0.28936028480529785,
|
| 104189 |
+
"learning_rate": 1.0062580997047155e-06,
|
| 104190 |
+
"loss": 0.5278307795524597,
|
| 104191 |
+
"step": 14883
|
| 104192 |
+
},
|
| 104193 |
+
{
|
| 104194 |
+
"epoch": 18.262576687116564,
|
| 104195 |
+
"grad_norm": 0.2782147228717804,
|
| 104196 |
+
"learning_rate": 1.0048489178912269e-06,
|
| 104197 |
+
"loss": 0.6753562688827515,
|
| 104198 |
+
"step": 14884
|
| 104199 |
+
},
|
| 104200 |
+
{
|
| 104201 |
+
"epoch": 18.263803680981596,
|
| 104202 |
+
"grad_norm": 0.2909688651561737,
|
| 104203 |
+
"learning_rate": 1.0034407032538384e-06,
|
| 104204 |
+
"loss": 0.548197865486145,
|
| 104205 |
+
"step": 14885
|
| 104206 |
+
},
|
| 104207 |
+
{
|
| 104208 |
+
"epoch": 18.265030674846624,
|
| 104209 |
+
"grad_norm": 0.2500540018081665,
|
| 104210 |
+
"learning_rate": 1.0020334558493134e-06,
|
| 104211 |
+
"loss": 0.5557026863098145,
|
| 104212 |
+
"step": 14886
|
| 104213 |
+
},
|
| 104214 |
+
{
|
| 104215 |
+
"epoch": 18.266257668711656,
|
| 104216 |
+
"grad_norm": 0.25284066796302795,
|
| 104217 |
+
"learning_rate": 1.0006271757343782e-06,
|
| 104218 |
+
"loss": 0.44580674171447754,
|
| 104219 |
+
"step": 14887
|
| 104220 |
+
},
|
| 104221 |
+
{
|
| 104222 |
+
"epoch": 18.26748466257669,
|
| 104223 |
+
"grad_norm": 0.2701283395290375,
|
| 104224 |
+
"learning_rate": 9.9922186296571e-07,
|
| 104225 |
+
"loss": 0.5255430936813354,
|
| 104226 |
+
"step": 14888
|
| 104227 |
+
},
|
| 104228 |
+
{
|
| 104229 |
+
"epoch": 18.268711656441717,
|
| 104230 |
+
"grad_norm": 0.25274714827537537,
|
| 104231 |
+
"learning_rate": 9.97817517599961e-07,
|
| 104232 |
+
"loss": 0.5965268611907959,
|
| 104233 |
+
"step": 14889
|
| 104234 |
+
},
|
| 104235 |
+
{
|
| 104236 |
+
"epoch": 18.26993865030675,
|
| 104237 |
+
"grad_norm": 0.24520404636859894,
|
| 104238 |
+
"learning_rate": 9.964141396937243e-07,
|
| 104239 |
+
"loss": 0.47859692573547363,
|
| 104240 |
+
"step": 14890
|
| 104241 |
+
},
|
| 104242 |
+
{
|
| 104243 |
+
"epoch": 18.27116564417178,
|
| 104244 |
+
"grad_norm": 0.24645228683948517,
|
| 104245 |
+
"learning_rate": 9.950117293035771e-07,
|
| 104246 |
+
"loss": 0.45457926392555237,
|
| 104247 |
+
"step": 14891
|
| 104248 |
+
},
|
| 104249 |
+
{
|
| 104250 |
+
"epoch": 18.27239263803681,
|
| 104251 |
+
"grad_norm": 0.2678927183151245,
|
| 104252 |
+
"learning_rate": 9.936102864860435e-07,
|
| 104253 |
+
"loss": 0.5717335343360901,
|
| 104254 |
+
"step": 14892
|
| 104255 |
+
},
|
| 104256 |
+
{
|
| 104257 |
+
"epoch": 18.27361963190184,
|
| 104258 |
+
"grad_norm": 0.2451895922422409,
|
| 104259 |
+
"learning_rate": 9.92209811297612e-07,
|
| 104260 |
+
"loss": 0.628893256187439,
|
| 104261 |
+
"step": 14893
|
| 104262 |
+
},
|
| 104263 |
+
{
|
| 104264 |
+
"epoch": 18.27484662576687,
|
| 104265 |
+
"grad_norm": 0.27695536613464355,
|
| 104266 |
+
"learning_rate": 9.908103037947286e-07,
|
| 104267 |
+
"loss": 0.7185591459274292,
|
| 104268 |
+
"step": 14894
|
| 104269 |
+
},
|
| 104270 |
+
{
|
| 104271 |
+
"epoch": 18.2760736196319,
|
| 104272 |
+
"grad_norm": 0.23462247848510742,
|
| 104273 |
+
"learning_rate": 9.89411764033807e-07,
|
| 104274 |
+
"loss": 0.5936027765274048,
|
| 104275 |
+
"step": 14895
|
| 104276 |
+
},
|
| 104277 |
+
{
|
| 104278 |
+
"epoch": 18.277300613496934,
|
| 104279 |
+
"grad_norm": 0.27535203099250793,
|
| 104280 |
+
"learning_rate": 9.880141920712155e-07,
|
| 104281 |
+
"loss": 0.6924750804901123,
|
| 104282 |
+
"step": 14896
|
| 104283 |
+
},
|
| 104284 |
+
{
|
| 104285 |
+
"epoch": 18.278527607361962,
|
| 104286 |
+
"grad_norm": 0.25745025277137756,
|
| 104287 |
+
"learning_rate": 9.866175879632871e-07,
|
| 104288 |
+
"loss": 0.6263197660446167,
|
| 104289 |
+
"step": 14897
|
| 104290 |
+
},
|
| 104291 |
+
{
|
| 104292 |
+
"epoch": 18.279754601226994,
|
| 104293 |
+
"grad_norm": 0.2636398375034332,
|
| 104294 |
+
"learning_rate": 9.852219517663186e-07,
|
| 104295 |
+
"loss": 0.7402126789093018,
|
| 104296 |
+
"step": 14898
|
| 104297 |
+
},
|
| 104298 |
+
{
|
| 104299 |
+
"epoch": 18.280981595092026,
|
| 104300 |
+
"grad_norm": 0.25433310866355896,
|
| 104301 |
+
"learning_rate": 9.838272835365564e-07,
|
| 104302 |
+
"loss": 0.5887166261672974,
|
| 104303 |
+
"step": 14899
|
| 104304 |
+
},
|
| 104305 |
+
{
|
| 104306 |
+
"epoch": 18.282208588957054,
|
| 104307 |
+
"grad_norm": 0.25136280059814453,
|
| 104308 |
+
"learning_rate": 9.824335833302222e-07,
|
| 104309 |
+
"loss": 0.5705921649932861,
|
| 104310 |
+
"step": 14900
|
| 104311 |
+
},
|
| 104312 |
+
{
|
| 104313 |
+
"epoch": 18.283435582822086,
|
| 104314 |
+
"grad_norm": 0.2858131229877472,
|
| 104315 |
+
"learning_rate": 9.810408512034908e-07,
|
| 104316 |
+
"loss": 0.5064578652381897,
|
| 104317 |
+
"step": 14901
|
| 104318 |
+
},
|
| 104319 |
+
{
|
| 104320 |
+
"epoch": 18.284662576687115,
|
| 104321 |
+
"grad_norm": 0.2772005498409271,
|
| 104322 |
+
"learning_rate": 9.796490872124975e-07,
|
| 104323 |
+
"loss": 0.7065432071685791,
|
| 104324 |
+
"step": 14902
|
| 104325 |
+
},
|
| 104326 |
+
{
|
| 104327 |
+
"epoch": 18.285889570552147,
|
| 104328 |
+
"grad_norm": 0.26755502820014954,
|
| 104329 |
+
"learning_rate": 9.78258291413345e-07,
|
| 104330 |
+
"loss": 0.6741496324539185,
|
| 104331 |
+
"step": 14903
|
| 104332 |
+
},
|
| 104333 |
+
{
|
| 104334 |
+
"epoch": 18.28711656441718,
|
| 104335 |
+
"grad_norm": 0.25398534536361694,
|
| 104336 |
+
"learning_rate": 9.768684638620824e-07,
|
| 104337 |
+
"loss": 0.6294674873352051,
|
| 104338 |
+
"step": 14904
|
| 104339 |
+
},
|
| 104340 |
+
{
|
| 104341 |
+
"epoch": 18.288343558282207,
|
| 104342 |
+
"grad_norm": 0.2427646517753601,
|
| 104343 |
+
"learning_rate": 9.754796046147402e-07,
|
| 104344 |
+
"loss": 0.6190599799156189,
|
| 104345 |
+
"step": 14905
|
| 104346 |
+
},
|
| 104347 |
+
{
|
| 104348 |
+
"epoch": 18.28957055214724,
|
| 104349 |
+
"grad_norm": 0.21861590445041656,
|
| 104350 |
+
"learning_rate": 9.740917137272932e-07,
|
| 104351 |
+
"loss": 0.469300240278244,
|
| 104352 |
+
"step": 14906
|
| 104353 |
+
},
|
| 104354 |
+
{
|
| 104355 |
+
"epoch": 18.29079754601227,
|
| 104356 |
+
"grad_norm": 0.27587732672691345,
|
| 104357 |
+
"learning_rate": 9.727047912556853e-07,
|
| 104358 |
+
"loss": 0.5593298077583313,
|
| 104359 |
+
"step": 14907
|
| 104360 |
+
},
|
| 104361 |
+
{
|
| 104362 |
+
"epoch": 18.2920245398773,
|
| 104363 |
+
"grad_norm": 0.2588701546192169,
|
| 104364 |
+
"learning_rate": 9.71318837255822e-07,
|
| 104365 |
+
"loss": 0.5040408968925476,
|
| 104366 |
+
"step": 14908
|
| 104367 |
+
},
|
| 104368 |
+
{
|
| 104369 |
+
"epoch": 18.293251533742332,
|
| 104370 |
+
"grad_norm": 0.2517450749874115,
|
| 104371 |
+
"learning_rate": 9.69933851783561e-07,
|
| 104372 |
+
"loss": 0.5501172542572021,
|
| 104373 |
+
"step": 14909
|
| 104374 |
+
},
|
| 104375 |
+
{
|
| 104376 |
+
"epoch": 18.29447852760736,
|
| 104377 |
+
"grad_norm": 0.2505769729614258,
|
| 104378 |
+
"learning_rate": 9.685498348947303e-07,
|
| 104379 |
+
"loss": 0.6196407079696655,
|
| 104380 |
+
"step": 14910
|
| 104381 |
+
},
|
| 104382 |
+
{
|
| 104383 |
+
"epoch": 18.295705521472392,
|
| 104384 |
+
"grad_norm": 0.22502915561199188,
|
| 104385 |
+
"learning_rate": 9.671667866451156e-07,
|
| 104386 |
+
"loss": 0.3413742184638977,
|
| 104387 |
+
"step": 14911
|
| 104388 |
+
},
|
| 104389 |
+
{
|
| 104390 |
+
"epoch": 18.296932515337424,
|
| 104391 |
+
"grad_norm": 0.2648622989654541,
|
| 104392 |
+
"learning_rate": 9.65784707090467e-07,
|
| 104393 |
+
"loss": 0.6112321615219116,
|
| 104394 |
+
"step": 14912
|
| 104395 |
+
},
|
| 104396 |
+
{
|
| 104397 |
+
"epoch": 18.298159509202453,
|
| 104398 |
+
"grad_norm": 0.28468289971351624,
|
| 104399 |
+
"learning_rate": 9.644035962864866e-07,
|
| 104400 |
+
"loss": 0.6045237183570862,
|
| 104401 |
+
"step": 14913
|
| 104402 |
+
},
|
| 104403 |
+
{
|
| 104404 |
+
"epoch": 18.299386503067485,
|
| 104405 |
+
"grad_norm": 0.27909916639328003,
|
| 104406 |
+
"learning_rate": 9.630234542888472e-07,
|
| 104407 |
+
"loss": 0.5847504138946533,
|
| 104408 |
+
"step": 14914
|
| 104409 |
+
},
|
| 104410 |
+
{
|
| 104411 |
+
"epoch": 18.300613496932517,
|
| 104412 |
+
"grad_norm": 0.28523364663124084,
|
| 104413 |
+
"learning_rate": 9.616442811531735e-07,
|
| 104414 |
+
"loss": 0.46721869707107544,
|
| 104415 |
+
"step": 14915
|
| 104416 |
+
},
|
| 104417 |
+
{
|
| 104418 |
+
"epoch": 18.301840490797545,
|
| 104419 |
+
"grad_norm": 0.2266593724489212,
|
| 104420 |
+
"learning_rate": 9.602660769350624e-07,
|
| 104421 |
+
"loss": 0.5114679336547852,
|
| 104422 |
+
"step": 14916
|
| 104423 |
+
},
|
| 104424 |
+
{
|
| 104425 |
+
"epoch": 18.303067484662577,
|
| 104426 |
+
"grad_norm": 0.29576271772384644,
|
| 104427 |
+
"learning_rate": 9.588888416900616e-07,
|
| 104428 |
+
"loss": 0.44320952892303467,
|
| 104429 |
+
"step": 14917
|
| 104430 |
+
},
|
| 104431 |
+
{
|
| 104432 |
+
"epoch": 18.30429447852761,
|
| 104433 |
+
"grad_norm": 0.2592233419418335,
|
| 104434 |
+
"learning_rate": 9.575125754736874e-07,
|
| 104435 |
+
"loss": 0.4973076581954956,
|
| 104436 |
+
"step": 14918
|
| 104437 |
+
},
|
| 104438 |
+
{
|
| 104439 |
+
"epoch": 18.305521472392638,
|
| 104440 |
+
"grad_norm": 0.2702350914478302,
|
| 104441 |
+
"learning_rate": 9.56137278341407e-07,
|
| 104442 |
+
"loss": 0.44164231419563293,
|
| 104443 |
+
"step": 14919
|
| 104444 |
+
},
|
| 104445 |
+
{
|
| 104446 |
+
"epoch": 18.30674846625767,
|
| 104447 |
+
"grad_norm": 0.30028244853019714,
|
| 104448 |
+
"learning_rate": 9.547629503486561e-07,
|
| 104449 |
+
"loss": 0.42793703079223633,
|
| 104450 |
+
"step": 14920
|
| 104451 |
+
},
|
| 104452 |
+
{
|
| 104453 |
+
"epoch": 18.307975460122698,
|
| 104454 |
+
"grad_norm": 0.26055872440338135,
|
| 104455 |
+
"learning_rate": 9.533895915508323e-07,
|
| 104456 |
+
"loss": 0.5761421918869019,
|
| 104457 |
+
"step": 14921
|
| 104458 |
+
},
|
| 104459 |
+
{
|
| 104460 |
+
"epoch": 18.30920245398773,
|
| 104461 |
+
"grad_norm": 0.2877582013607025,
|
| 104462 |
+
"learning_rate": 9.520172020032913e-07,
|
| 104463 |
+
"loss": 0.6294504404067993,
|
| 104464 |
+
"step": 14922
|
| 104465 |
+
},
|
| 104466 |
+
{
|
| 104467 |
+
"epoch": 18.310429447852762,
|
| 104468 |
+
"grad_norm": 0.2876107692718506,
|
| 104469 |
+
"learning_rate": 9.506457817613529e-07,
|
| 104470 |
+
"loss": 0.5706079006195068,
|
| 104471 |
+
"step": 14923
|
| 104472 |
+
},
|
| 104473 |
+
{
|
| 104474 |
+
"epoch": 18.31165644171779,
|
| 104475 |
+
"grad_norm": 0.19851307570934296,
|
| 104476 |
+
"learning_rate": 9.492753308802893e-07,
|
| 104477 |
+
"loss": 0.16998180747032166,
|
| 104478 |
+
"step": 14924
|
| 104479 |
+
},
|
| 104480 |
+
{
|
| 104481 |
+
"epoch": 18.312883435582823,
|
| 104482 |
+
"grad_norm": 0.26523008942604065,
|
| 104483 |
+
"learning_rate": 9.479058494153425e-07,
|
| 104484 |
+
"loss": 0.7710438370704651,
|
| 104485 |
+
"step": 14925
|
| 104486 |
+
},
|
| 104487 |
+
{
|
| 104488 |
+
"epoch": 18.314110429447855,
|
| 104489 |
+
"grad_norm": 0.29215750098228455,
|
| 104490 |
+
"learning_rate": 9.465373374217102e-07,
|
| 104491 |
+
"loss": 0.5719816088676453,
|
| 104492 |
+
"step": 14926
|
| 104493 |
+
},
|
| 104494 |
+
{
|
| 104495 |
+
"epoch": 18.315337423312883,
|
| 104496 |
+
"grad_norm": 0.2619645595550537,
|
| 104497 |
+
"learning_rate": 9.45169794954559e-07,
|
| 104498 |
+
"loss": 0.48013782501220703,
|
| 104499 |
+
"step": 14927
|
| 104500 |
+
},
|
| 104501 |
+
{
|
| 104502 |
+
"epoch": 18.316564417177915,
|
| 104503 |
+
"grad_norm": 0.25323647260665894,
|
| 104504 |
+
"learning_rate": 9.438032220690063e-07,
|
| 104505 |
+
"loss": 0.613540768623352,
|
| 104506 |
+
"step": 14928
|
| 104507 |
+
},
|
| 104508 |
+
{
|
| 104509 |
+
"epoch": 18.317791411042943,
|
| 104510 |
+
"grad_norm": 0.2778703570365906,
|
| 104511 |
+
"learning_rate": 9.424376188201328e-07,
|
| 104512 |
+
"loss": 0.7153140306472778,
|
| 104513 |
+
"step": 14929
|
| 104514 |
+
},
|
| 104515 |
+
{
|
| 104516 |
+
"epoch": 18.319018404907975,
|
| 104517 |
+
"grad_norm": 0.27868151664733887,
|
| 104518 |
+
"learning_rate": 9.410729852629863e-07,
|
| 104519 |
+
"loss": 0.5897961854934692,
|
| 104520 |
+
"step": 14930
|
| 104521 |
+
},
|
| 104522 |
+
{
|
| 104523 |
+
"epoch": 18.320245398773007,
|
| 104524 |
+
"grad_norm": 0.24093686044216156,
|
| 104525 |
+
"learning_rate": 9.397093214525698e-07,
|
| 104526 |
+
"loss": 0.3692380487918854,
|
| 104527 |
+
"step": 14931
|
| 104528 |
+
},
|
| 104529 |
+
{
|
| 104530 |
+
"epoch": 18.321472392638036,
|
| 104531 |
+
"grad_norm": 0.3463858962059021,
|
| 104532 |
+
"learning_rate": 9.383466274438452e-07,
|
| 104533 |
+
"loss": 0.8518167734146118,
|
| 104534 |
+
"step": 14932
|
| 104535 |
+
},
|
| 104536 |
+
{
|
| 104537 |
+
"epoch": 18.322699386503068,
|
| 104538 |
+
"grad_norm": 0.23572686314582825,
|
| 104539 |
+
"learning_rate": 9.369849032917488e-07,
|
| 104540 |
+
"loss": 0.49407288432121277,
|
| 104541 |
+
"step": 14933
|
| 104542 |
+
},
|
| 104543 |
+
{
|
| 104544 |
+
"epoch": 18.3239263803681,
|
| 104545 |
+
"grad_norm": 0.2811199724674225,
|
| 104546 |
+
"learning_rate": 9.356241490511564e-07,
|
| 104547 |
+
"loss": 0.6770088076591492,
|
| 104548 |
+
"step": 14934
|
| 104549 |
+
},
|
| 104550 |
+
{
|
| 104551 |
+
"epoch": 18.32515337423313,
|
| 104552 |
+
"grad_norm": 0.2873893678188324,
|
| 104553 |
+
"learning_rate": 9.342643647769183e-07,
|
| 104554 |
+
"loss": 0.6534485220909119,
|
| 104555 |
+
"step": 14935
|
| 104556 |
+
},
|
| 104557 |
+
{
|
| 104558 |
+
"epoch": 18.32638036809816,
|
| 104559 |
+
"grad_norm": 0.24797888100147247,
|
| 104560 |
+
"learning_rate": 9.329055505238466e-07,
|
| 104561 |
+
"loss": 0.37418586015701294,
|
| 104562 |
+
"step": 14936
|
| 104563 |
+
},
|
| 104564 |
+
{
|
| 104565 |
+
"epoch": 18.32760736196319,
|
| 104566 |
+
"grad_norm": 0.277547150850296,
|
| 104567 |
+
"learning_rate": 9.315477063467165e-07,
|
| 104568 |
+
"loss": 0.5396980047225952,
|
| 104569 |
+
"step": 14937
|
| 104570 |
+
},
|
| 104571 |
+
{
|
| 104572 |
+
"epoch": 18.32883435582822,
|
| 104573 |
+
"grad_norm": 0.2659480571746826,
|
| 104574 |
+
"learning_rate": 9.301908323002484e-07,
|
| 104575 |
+
"loss": 0.7431876063346863,
|
| 104576 |
+
"step": 14938
|
| 104577 |
+
},
|
| 104578 |
+
{
|
| 104579 |
+
"epoch": 18.330061349693253,
|
| 104580 |
+
"grad_norm": 0.2782931625843048,
|
| 104581 |
+
"learning_rate": 9.288349284391401e-07,
|
| 104582 |
+
"loss": 0.6341907978057861,
|
| 104583 |
+
"step": 14939
|
| 104584 |
+
},
|
| 104585 |
+
{
|
| 104586 |
+
"epoch": 18.33128834355828,
|
| 104587 |
+
"grad_norm": 0.2460147887468338,
|
| 104588 |
+
"learning_rate": 9.274799948180424e-07,
|
| 104589 |
+
"loss": 0.5243320465087891,
|
| 104590 |
+
"step": 14940
|
| 104591 |
+
},
|
| 104592 |
+
{
|
| 104593 |
+
"epoch": 18.332515337423313,
|
| 104594 |
+
"grad_norm": 0.29538068175315857,
|
| 104595 |
+
"learning_rate": 9.261260314915699e-07,
|
| 104596 |
+
"loss": 0.7207489013671875,
|
| 104597 |
+
"step": 14941
|
| 104598 |
+
},
|
| 104599 |
+
{
|
| 104600 |
+
"epoch": 18.333742331288345,
|
| 104601 |
+
"grad_norm": 0.28071385622024536,
|
| 104602 |
+
"learning_rate": 9.247730385142955e-07,
|
| 104603 |
+
"loss": 0.4540262818336487,
|
| 104604 |
+
"step": 14942
|
| 104605 |
+
},
|
| 104606 |
+
{
|
| 104607 |
+
"epoch": 18.334969325153374,
|
| 104608 |
+
"grad_norm": 0.23607057332992554,
|
| 104609 |
+
"learning_rate": 9.234210159407563e-07,
|
| 104610 |
+
"loss": 0.4125019907951355,
|
| 104611 |
+
"step": 14943
|
| 104612 |
+
},
|
| 104613 |
+
{
|
| 104614 |
+
"epoch": 18.336196319018406,
|
| 104615 |
+
"grad_norm": 0.2712760865688324,
|
| 104616 |
+
"learning_rate": 9.220699638254449e-07,
|
| 104617 |
+
"loss": 0.6386152505874634,
|
| 104618 |
+
"step": 14944
|
| 104619 |
+
},
|
| 104620 |
+
{
|
| 104621 |
+
"epoch": 18.337423312883434,
|
| 104622 |
+
"grad_norm": 0.2476544976234436,
|
| 104623 |
+
"learning_rate": 9.20719882222823e-07,
|
| 104624 |
+
"loss": 0.5701817870140076,
|
| 104625 |
+
"step": 14945
|
| 104626 |
+
},
|
| 104627 |
+
{
|
| 104628 |
+
"epoch": 18.338650306748466,
|
| 104629 |
+
"grad_norm": 0.2878611981868744,
|
| 104630 |
+
"learning_rate": 9.193707711873057e-07,
|
| 104631 |
+
"loss": 0.7396513223648071,
|
| 104632 |
+
"step": 14946
|
| 104633 |
+
},
|
| 104634 |
+
{
|
| 104635 |
+
"epoch": 18.339877300613498,
|
| 104636 |
+
"grad_norm": 0.2748095393180847,
|
| 104637 |
+
"learning_rate": 9.180226307732742e-07,
|
| 104638 |
+
"loss": 0.7712901830673218,
|
| 104639 |
+
"step": 14947
|
| 104640 |
+
},
|
| 104641 |
+
{
|
| 104642 |
+
"epoch": 18.341104294478527,
|
| 104643 |
+
"grad_norm": 0.31877246499061584,
|
| 104644 |
+
"learning_rate": 9.166754610350631e-07,
|
| 104645 |
+
"loss": 0.38149493932724,
|
| 104646 |
+
"step": 14948
|
| 104647 |
+
},
|
| 104648 |
+
{
|
| 104649 |
+
"epoch": 18.34233128834356,
|
| 104650 |
+
"grad_norm": 0.29055070877075195,
|
| 104651 |
+
"learning_rate": 9.153292620269787e-07,
|
| 104652 |
+
"loss": 0.7069321870803833,
|
| 104653 |
+
"step": 14949
|
| 104654 |
+
},
|
| 104655 |
+
{
|
| 104656 |
+
"epoch": 18.34355828220859,
|
| 104657 |
+
"grad_norm": 0.2262503206729889,
|
| 104658 |
+
"learning_rate": 9.139840338032779e-07,
|
| 104659 |
+
"loss": 0.3048746585845947,
|
| 104660 |
+
"step": 14950
|
| 104661 |
+
},
|
| 104662 |
+
{
|
| 104663 |
+
"epoch": 18.34478527607362,
|
| 104664 |
+
"grad_norm": 0.27305489778518677,
|
| 104665 |
+
"learning_rate": 9.126397764181866e-07,
|
| 104666 |
+
"loss": 0.5030574798583984,
|
| 104667 |
+
"step": 14951
|
| 104668 |
+
},
|
| 104669 |
+
{
|
| 104670 |
+
"epoch": 18.34601226993865,
|
| 104671 |
+
"grad_norm": 0.24597647786140442,
|
| 104672 |
+
"learning_rate": 9.112964899258891e-07,
|
| 104673 |
+
"loss": 0.5498008728027344,
|
| 104674 |
+
"step": 14952
|
| 104675 |
+
},
|
| 104676 |
+
{
|
| 104677 |
+
"epoch": 18.34723926380368,
|
| 104678 |
+
"grad_norm": 0.2601284086704254,
|
| 104679 |
+
"learning_rate": 9.099541743805257e-07,
|
| 104680 |
+
"loss": 0.5760515332221985,
|
| 104681 |
+
"step": 14953
|
| 104682 |
+
},
|
| 104683 |
+
{
|
| 104684 |
+
"epoch": 18.34846625766871,
|
| 104685 |
+
"grad_norm": 0.27047309279441833,
|
| 104686 |
+
"learning_rate": 9.086128298362001e-07,
|
| 104687 |
+
"loss": 0.3212139308452606,
|
| 104688 |
+
"step": 14954
|
| 104689 |
+
},
|
| 104690 |
+
{
|
| 104691 |
+
"epoch": 18.349693251533743,
|
| 104692 |
+
"grad_norm": 0.302736759185791,
|
| 104693 |
+
"learning_rate": 9.072724563469831e-07,
|
| 104694 |
+
"loss": 0.8365401029586792,
|
| 104695 |
+
"step": 14955
|
| 104696 |
+
},
|
| 104697 |
+
{
|
| 104698 |
+
"epoch": 18.350920245398772,
|
| 104699 |
+
"grad_norm": 0.28552481532096863,
|
| 104700 |
+
"learning_rate": 9.059330539668953e-07,
|
| 104701 |
+
"loss": 0.6756923794746399,
|
| 104702 |
+
"step": 14956
|
| 104703 |
+
},
|
| 104704 |
+
{
|
| 104705 |
+
"epoch": 18.352147239263804,
|
| 104706 |
+
"grad_norm": 0.25062835216522217,
|
| 104707 |
+
"learning_rate": 9.045946227499297e-07,
|
| 104708 |
+
"loss": 0.5296375751495361,
|
| 104709 |
+
"step": 14957
|
| 104710 |
+
},
|
| 104711 |
+
{
|
| 104712 |
+
"epoch": 18.353374233128836,
|
| 104713 |
+
"grad_norm": 0.30365926027297974,
|
| 104714 |
+
"learning_rate": 9.032571627500319e-07,
|
| 104715 |
+
"loss": 0.7329384088516235,
|
| 104716 |
+
"step": 14958
|
| 104717 |
+
},
|
| 104718 |
+
{
|
| 104719 |
+
"epoch": 18.354601226993864,
|
| 104720 |
+
"grad_norm": 0.23207053542137146,
|
| 104721 |
+
"learning_rate": 9.019206740211144e-07,
|
| 104722 |
+
"loss": 0.4031626880168915,
|
| 104723 |
+
"step": 14959
|
| 104724 |
+
},
|
| 104725 |
+
{
|
| 104726 |
+
"epoch": 18.355828220858896,
|
| 104727 |
+
"grad_norm": 0.28662484884262085,
|
| 104728 |
+
"learning_rate": 9.005851566170426e-07,
|
| 104729 |
+
"loss": 0.6922318339347839,
|
| 104730 |
+
"step": 14960
|
| 104731 |
+
},
|
| 104732 |
+
{
|
| 104733 |
+
"epoch": 18.357055214723925,
|
| 104734 |
+
"grad_norm": 0.24328899383544922,
|
| 104735 |
+
"learning_rate": 8.992506105916509e-07,
|
| 104736 |
+
"loss": 0.6497803926467896,
|
| 104737 |
+
"step": 14961
|
| 104738 |
+
},
|
| 104739 |
+
{
|
| 104740 |
+
"epoch": 18.358282208588957,
|
| 104741 |
+
"grad_norm": 0.2737060785293579,
|
| 104742 |
+
"learning_rate": 8.979170359987299e-07,
|
| 104743 |
+
"loss": 0.4869771897792816,
|
| 104744 |
+
"step": 14962
|
| 104745 |
+
},
|
| 104746 |
+
{
|
| 104747 |
+
"epoch": 18.35950920245399,
|
| 104748 |
+
"grad_norm": 0.22458438575267792,
|
| 104749 |
+
"learning_rate": 8.965844328920281e-07,
|
| 104750 |
+
"loss": 0.31408435106277466,
|
| 104751 |
+
"step": 14963
|
| 104752 |
+
},
|
| 104753 |
+
{
|
| 104754 |
+
"epoch": 18.360736196319017,
|
| 104755 |
+
"grad_norm": 0.30391135811805725,
|
| 104756 |
+
"learning_rate": 8.952528013252665e-07,
|
| 104757 |
+
"loss": 0.6306331753730774,
|
| 104758 |
+
"step": 14964
|
| 104759 |
+
},
|
| 104760 |
+
{
|
| 104761 |
+
"epoch": 18.36196319018405,
|
| 104762 |
+
"grad_norm": 0.28336378931999207,
|
| 104763 |
+
"learning_rate": 8.939221413521132e-07,
|
| 104764 |
+
"loss": 0.5200832486152649,
|
| 104765 |
+
"step": 14965
|
| 104766 |
+
},
|
| 104767 |
+
{
|
| 104768 |
+
"epoch": 18.36319018404908,
|
| 104769 |
+
"grad_norm": 0.26279789209365845,
|
| 104770 |
+
"learning_rate": 8.925924530262087e-07,
|
| 104771 |
+
"loss": 0.6349048614501953,
|
| 104772 |
+
"step": 14966
|
| 104773 |
+
},
|
| 104774 |
+
{
|
| 104775 |
+
"epoch": 18.36441717791411,
|
| 104776 |
+
"grad_norm": 0.25689712166786194,
|
| 104777 |
+
"learning_rate": 8.912637364011434e-07,
|
| 104778 |
+
"loss": 0.6326661705970764,
|
| 104779 |
+
"step": 14967
|
| 104780 |
+
},
|
| 104781 |
+
{
|
| 104782 |
+
"epoch": 18.36564417177914,
|
| 104783 |
+
"grad_norm": 0.3032512664794922,
|
| 104784 |
+
"learning_rate": 8.899359915304772e-07,
|
| 104785 |
+
"loss": 0.6846915483474731,
|
| 104786 |
+
"step": 14968
|
| 104787 |
+
},
|
| 104788 |
+
{
|
| 104789 |
+
"epoch": 18.36687116564417,
|
| 104790 |
+
"grad_norm": 0.2609926462173462,
|
| 104791 |
+
"learning_rate": 8.886092184677258e-07,
|
| 104792 |
+
"loss": 0.7377516031265259,
|
| 104793 |
+
"step": 14969
|
| 104794 |
+
},
|
| 104795 |
+
{
|
| 104796 |
+
"epoch": 18.368098159509202,
|
| 104797 |
+
"grad_norm": 0.2782549560070038,
|
| 104798 |
+
"learning_rate": 8.872834172663714e-07,
|
| 104799 |
+
"loss": 0.5318039655685425,
|
| 104800 |
+
"step": 14970
|
| 104801 |
+
},
|
| 104802 |
+
{
|
| 104803 |
+
"epoch": 18.369325153374234,
|
| 104804 |
+
"grad_norm": 0.2657947838306427,
|
| 104805 |
+
"learning_rate": 8.859585879798515e-07,
|
| 104806 |
+
"loss": 0.781874418258667,
|
| 104807 |
+
"step": 14971
|
| 104808 |
+
},
|
| 104809 |
+
{
|
| 104810 |
+
"epoch": 18.370552147239263,
|
| 104811 |
+
"grad_norm": 0.24260665476322174,
|
| 104812 |
+
"learning_rate": 8.846347306615626e-07,
|
| 104813 |
+
"loss": 0.6092846393585205,
|
| 104814 |
+
"step": 14972
|
| 104815 |
+
},
|
| 104816 |
+
{
|
| 104817 |
+
"epoch": 18.371779141104295,
|
| 104818 |
+
"grad_norm": 0.2657587230205536,
|
| 104819 |
+
"learning_rate": 8.833118453648703e-07,
|
| 104820 |
+
"loss": 0.7326947450637817,
|
| 104821 |
+
"step": 14973
|
| 104822 |
+
},
|
| 104823 |
+
{
|
| 104824 |
+
"epoch": 18.373006134969327,
|
| 104825 |
+
"grad_norm": 0.27869996428489685,
|
| 104826 |
+
"learning_rate": 8.81989932143093e-07,
|
| 104827 |
+
"loss": 0.5424805879592896,
|
| 104828 |
+
"step": 14974
|
| 104829 |
+
},
|
| 104830 |
+
{
|
| 104831 |
+
"epoch": 18.374233128834355,
|
| 104832 |
+
"grad_norm": 0.291361004114151,
|
| 104833 |
+
"learning_rate": 8.806689910495158e-07,
|
| 104834 |
+
"loss": 0.5088820457458496,
|
| 104835 |
+
"step": 14975
|
| 104836 |
+
},
|
| 104837 |
+
{
|
| 104838 |
+
"epoch": 18.375460122699387,
|
| 104839 |
+
"grad_norm": 0.24230031669139862,
|
| 104840 |
+
"learning_rate": 8.793490221373851e-07,
|
| 104841 |
+
"loss": 0.487094521522522,
|
| 104842 |
+
"step": 14976
|
| 104843 |
+
},
|
| 104844 |
+
{
|
| 104845 |
+
"epoch": 18.376687116564415,
|
| 104846 |
+
"grad_norm": 0.26783737540245056,
|
| 104847 |
+
"learning_rate": 8.780300254598944e-07,
|
| 104848 |
+
"loss": 0.7180638313293457,
|
| 104849 |
+
"step": 14977
|
| 104850 |
+
},
|
| 104851 |
+
{
|
| 104852 |
+
"epoch": 18.377914110429447,
|
| 104853 |
+
"grad_norm": 0.3031861186027527,
|
| 104854 |
+
"learning_rate": 8.767120010702179e-07,
|
| 104855 |
+
"loss": 0.6006426215171814,
|
| 104856 |
+
"step": 14978
|
| 104857 |
+
},
|
| 104858 |
+
{
|
| 104859 |
+
"epoch": 18.37914110429448,
|
| 104860 |
+
"grad_norm": 0.26976341009140015,
|
| 104861 |
+
"learning_rate": 8.753949490214768e-07,
|
| 104862 |
+
"loss": 0.6655715107917786,
|
| 104863 |
+
"step": 14979
|
| 104864 |
+
},
|
| 104865 |
+
{
|
| 104866 |
+
"epoch": 18.380368098159508,
|
| 104867 |
+
"grad_norm": 0.26862215995788574,
|
| 104868 |
+
"learning_rate": 8.740788693667651e-07,
|
| 104869 |
+
"loss": 0.6731605529785156,
|
| 104870 |
+
"step": 14980
|
| 104871 |
+
},
|
| 104872 |
+
{
|
| 104873 |
+
"epoch": 18.38159509202454,
|
| 104874 |
+
"grad_norm": 0.26043933629989624,
|
| 104875 |
+
"learning_rate": 8.727637621591206e-07,
|
| 104876 |
+
"loss": 0.6125415563583374,
|
| 104877 |
+
"step": 14981
|
| 104878 |
+
},
|
| 104879 |
+
{
|
| 104880 |
+
"epoch": 18.382822085889572,
|
| 104881 |
+
"grad_norm": 0.2744832932949066,
|
| 104882 |
+
"learning_rate": 8.714496274515566e-07,
|
| 104883 |
+
"loss": 0.6705185174942017,
|
| 104884 |
+
"step": 14982
|
| 104885 |
+
},
|
| 104886 |
+
{
|
| 104887 |
+
"epoch": 18.3840490797546,
|
| 104888 |
+
"grad_norm": 0.2835293412208557,
|
| 104889 |
+
"learning_rate": 8.701364652970417e-07,
|
| 104890 |
+
"loss": 0.6601405143737793,
|
| 104891 |
+
"step": 14983
|
| 104892 |
+
},
|
| 104893 |
+
{
|
| 104894 |
+
"epoch": 18.385276073619632,
|
| 104895 |
+
"grad_norm": 0.25995269417762756,
|
| 104896 |
+
"learning_rate": 8.688242757485032e-07,
|
| 104897 |
+
"loss": 0.6478754281997681,
|
| 104898 |
+
"step": 14984
|
| 104899 |
+
},
|
| 104900 |
+
{
|
| 104901 |
+
"epoch": 18.38650306748466,
|
| 104902 |
+
"grad_norm": 0.26614174246788025,
|
| 104903 |
+
"learning_rate": 8.675130588588404e-07,
|
| 104904 |
+
"loss": 0.5839325189590454,
|
| 104905 |
+
"step": 14985
|
| 104906 |
+
},
|
| 104907 |
+
{
|
| 104908 |
+
"epoch": 18.387730061349693,
|
| 104909 |
+
"grad_norm": 0.2758522927761078,
|
| 104910 |
+
"learning_rate": 8.662028146808942e-07,
|
| 104911 |
+
"loss": 0.5387250781059265,
|
| 104912 |
+
"step": 14986
|
| 104913 |
+
},
|
| 104914 |
+
{
|
| 104915 |
+
"epoch": 18.388957055214725,
|
| 104916 |
+
"grad_norm": 0.25011372566223145,
|
| 104917 |
+
"learning_rate": 8.648935432674781e-07,
|
| 104918 |
+
"loss": 0.6700847148895264,
|
| 104919 |
+
"step": 14987
|
| 104920 |
+
},
|
| 104921 |
+
{
|
| 104922 |
+
"epoch": 18.390184049079753,
|
| 104923 |
+
"grad_norm": 0.28865793347358704,
|
| 104924 |
+
"learning_rate": 8.635852446713694e-07,
|
| 104925 |
+
"loss": 0.5336388349533081,
|
| 104926 |
+
"step": 14988
|
| 104927 |
+
},
|
| 104928 |
+
{
|
| 104929 |
+
"epoch": 18.391411042944785,
|
| 104930 |
+
"grad_norm": 0.2895233631134033,
|
| 104931 |
+
"learning_rate": 8.622779189453007e-07,
|
| 104932 |
+
"loss": 0.7537575960159302,
|
| 104933 |
+
"step": 14989
|
| 104934 |
+
},
|
| 104935 |
+
{
|
| 104936 |
+
"epoch": 18.392638036809817,
|
| 104937 |
+
"grad_norm": 0.250654935836792,
|
| 104938 |
+
"learning_rate": 8.609715661419687e-07,
|
| 104939 |
+
"loss": 0.37646153569221497,
|
| 104940 |
+
"step": 14990
|
| 104941 |
+
},
|
| 104942 |
+
{
|
| 104943 |
+
"epoch": 18.393865030674846,
|
| 104944 |
+
"grad_norm": 0.2651398777961731,
|
| 104945 |
+
"learning_rate": 8.59666186314026e-07,
|
| 104946 |
+
"loss": 0.517984926700592,
|
| 104947 |
+
"step": 14991
|
| 104948 |
+
},
|
| 104949 |
+
{
|
| 104950 |
+
"epoch": 18.395092024539878,
|
| 104951 |
+
"grad_norm": 0.27186036109924316,
|
| 104952 |
+
"learning_rate": 8.583617795140859e-07,
|
| 104953 |
+
"loss": 0.5918647050857544,
|
| 104954 |
+
"step": 14992
|
| 104955 |
+
},
|
| 104956 |
+
{
|
| 104957 |
+
"epoch": 18.39631901840491,
|
| 104958 |
+
"grad_norm": 0.27151286602020264,
|
| 104959 |
+
"learning_rate": 8.570583457947285e-07,
|
| 104960 |
+
"loss": 0.6331974267959595,
|
| 104961 |
+
"step": 14993
|
| 104962 |
+
},
|
| 104963 |
+
{
|
| 104964 |
+
"epoch": 18.397546012269938,
|
| 104965 |
+
"grad_norm": 0.27655377984046936,
|
| 104966 |
+
"learning_rate": 8.557558852084924e-07,
|
| 104967 |
+
"loss": 0.6269717812538147,
|
| 104968 |
+
"step": 14994
|
| 104969 |
+
},
|
| 104970 |
+
{
|
| 104971 |
+
"epoch": 18.39877300613497,
|
| 104972 |
+
"grad_norm": 0.2710462212562561,
|
| 104973 |
+
"learning_rate": 8.544543978078773e-07,
|
| 104974 |
+
"loss": 0.6422926187515259,
|
| 104975 |
+
"step": 14995
|
| 104976 |
+
},
|
| 104977 |
+
{
|
| 104978 |
+
"epoch": 18.4,
|
| 104979 |
+
"grad_norm": 0.3061816692352295,
|
| 104980 |
+
"learning_rate": 8.531538836453357e-07,
|
| 104981 |
+
"loss": 0.7262202501296997,
|
| 104982 |
+
"step": 14996
|
| 104983 |
+
},
|
| 104984 |
+
{
|
| 104985 |
+
"epoch": 18.40122699386503,
|
| 104986 |
+
"grad_norm": 0.26564717292785645,
|
| 104987 |
+
"learning_rate": 8.51854342773295e-07,
|
| 104988 |
+
"loss": 0.800443172454834,
|
| 104989 |
+
"step": 14997
|
| 104990 |
+
},
|
| 104991 |
+
{
|
| 104992 |
+
"epoch": 18.402453987730063,
|
| 104993 |
+
"grad_norm": 0.27169930934906006,
|
| 104994 |
+
"learning_rate": 8.505557752441301e-07,
|
| 104995 |
+
"loss": 0.5864543318748474,
|
| 104996 |
+
"step": 14998
|
| 104997 |
+
},
|
| 104998 |
+
{
|
| 104999 |
+
"epoch": 18.40368098159509,
|
| 105000 |
+
"grad_norm": 0.26096153259277344,
|
| 105001 |
+
"learning_rate": 8.49258181110188e-07,
|
| 105002 |
+
"loss": 0.4188769459724426,
|
| 105003 |
+
"step": 14999
|
| 105004 |
+
},
|
| 105005 |
+
{
|
| 105006 |
+
"epoch": 18.404907975460123,
|
| 105007 |
+
"grad_norm": 0.2749757766723633,
|
| 105008 |
+
"learning_rate": 8.479615604237712e-07,
|
| 105009 |
+
"loss": 0.7002823352813721,
|
| 105010 |
+
"step": 15000
|
| 105011 |
}
|
| 105012 |
],
|
| 105013 |
"logging_steps": 1,
|
|
|
|
| 105027 |
"attributes": {}
|
| 105028 |
}
|
| 105029 |
},
|
| 105030 |
+
"total_flos": 4.1956046407351665e+19,
|
| 105031 |
"train_batch_size": 8,
|
| 105032 |
"trial_name": null,
|
| 105033 |
"trial_params": null
|