Training in progress, step 15300, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e84939c0f973250f71fa2b2e95ad7cdef26663237b810b06e092533bdee3c091
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f135735cf311deb281b30013b6d5b0cc26c7be5b6b1f2db7f99b3cdb3a7e8045
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f73a14a1b444873df20d6cb5a8f67c10a6877b4f265a414df76389bdb52c688b
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 18.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -105008,6 +105008,2106 @@
|
|
| 105008 |
"learning_rate": 8.479615604237712e-07,
|
| 105009 |
"loss": 0.7002823352813721,
|
| 105010 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105011 |
}
|
| 105012 |
],
|
| 105013 |
"logging_steps": 1,
|
|
@@ -105027,7 +107127,7 @@
|
|
| 105027 |
"attributes": {}
|
| 105028 |
}
|
| 105029 |
},
|
| 105030 |
-
"total_flos": 4.
|
| 105031 |
"train_batch_size": 8,
|
| 105032 |
"trial_name": null,
|
| 105033 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 18.773006134969325,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 15300,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 105008 |
"learning_rate": 8.479615604237712e-07,
|
| 105009 |
"loss": 0.7002823352813721,
|
| 105010 |
"step": 15000
|
| 105011 |
+
},
|
| 105012 |
+
{
|
| 105013 |
+
"epoch": 18.406134969325155,
|
| 105014 |
+
"grad_norm": 0.26988518238067627,
|
| 105015 |
+
"learning_rate": 8.466659132371324e-07,
|
| 105016 |
+
"loss": 0.5017666220664978,
|
| 105017 |
+
"step": 15001
|
| 105018 |
+
},
|
| 105019 |
+
{
|
| 105020 |
+
"epoch": 18.407361963190183,
|
| 105021 |
+
"grad_norm": 0.32096439599990845,
|
| 105022 |
+
"learning_rate": 8.453712396025076e-07,
|
| 105023 |
+
"loss": 0.7862387895584106,
|
| 105024 |
+
"step": 15002
|
| 105025 |
+
},
|
| 105026 |
+
{
|
| 105027 |
+
"epoch": 18.408588957055215,
|
| 105028 |
+
"grad_norm": 0.2584100067615509,
|
| 105029 |
+
"learning_rate": 8.440775395720773e-07,
|
| 105030 |
+
"loss": 0.5598587393760681,
|
| 105031 |
+
"step": 15003
|
| 105032 |
+
},
|
| 105033 |
+
{
|
| 105034 |
+
"epoch": 18.409815950920244,
|
| 105035 |
+
"grad_norm": 0.27189522981643677,
|
| 105036 |
+
"learning_rate": 8.427848131979887e-07,
|
| 105037 |
+
"loss": 0.5677369236946106,
|
| 105038 |
+
"step": 15004
|
| 105039 |
+
},
|
| 105040 |
+
{
|
| 105041 |
+
"epoch": 18.411042944785276,
|
| 105042 |
+
"grad_norm": 0.28630581498146057,
|
| 105043 |
+
"learning_rate": 8.414930605323446e-07,
|
| 105044 |
+
"loss": 0.7373979091644287,
|
| 105045 |
+
"step": 15005
|
| 105046 |
+
},
|
| 105047 |
+
{
|
| 105048 |
+
"epoch": 18.412269938650308,
|
| 105049 |
+
"grad_norm": 0.25525593757629395,
|
| 105050 |
+
"learning_rate": 8.402022816272115e-07,
|
| 105051 |
+
"loss": 0.6582209467887878,
|
| 105052 |
+
"step": 15006
|
| 105053 |
+
},
|
| 105054 |
+
{
|
| 105055 |
+
"epoch": 18.413496932515336,
|
| 105056 |
+
"grad_norm": 0.24422357976436615,
|
| 105057 |
+
"learning_rate": 8.389124765346173e-07,
|
| 105058 |
+
"loss": 0.5487877130508423,
|
| 105059 |
+
"step": 15007
|
| 105060 |
+
},
|
| 105061 |
+
{
|
| 105062 |
+
"epoch": 18.41472392638037,
|
| 105063 |
+
"grad_norm": 0.2570844888687134,
|
| 105064 |
+
"learning_rate": 8.376236453065539e-07,
|
| 105065 |
+
"loss": 0.6310504078865051,
|
| 105066 |
+
"step": 15008
|
| 105067 |
+
},
|
| 105068 |
+
{
|
| 105069 |
+
"epoch": 18.4159509202454,
|
| 105070 |
+
"grad_norm": 0.283956378698349,
|
| 105071 |
+
"learning_rate": 8.363357879949685e-07,
|
| 105072 |
+
"loss": 0.47886431217193604,
|
| 105073 |
+
"step": 15009
|
| 105074 |
+
},
|
| 105075 |
+
{
|
| 105076 |
+
"epoch": 18.41717791411043,
|
| 105077 |
+
"grad_norm": 0.2573699653148651,
|
| 105078 |
+
"learning_rate": 8.350489046517695e-07,
|
| 105079 |
+
"loss": 0.6702180504798889,
|
| 105080 |
+
"step": 15010
|
| 105081 |
+
},
|
| 105082 |
+
{
|
| 105083 |
+
"epoch": 18.41840490797546,
|
| 105084 |
+
"grad_norm": 0.2506057024002075,
|
| 105085 |
+
"learning_rate": 8.337629953288295e-07,
|
| 105086 |
+
"loss": 0.6182423830032349,
|
| 105087 |
+
"step": 15011
|
| 105088 |
+
},
|
| 105089 |
+
{
|
| 105090 |
+
"epoch": 18.41963190184049,
|
| 105091 |
+
"grad_norm": 0.2700954079627991,
|
| 105092 |
+
"learning_rate": 8.324780600779764e-07,
|
| 105093 |
+
"loss": 0.8321703672409058,
|
| 105094 |
+
"step": 15012
|
| 105095 |
+
},
|
| 105096 |
+
{
|
| 105097 |
+
"epoch": 18.42085889570552,
|
| 105098 |
+
"grad_norm": 0.2533617913722992,
|
| 105099 |
+
"learning_rate": 8.311940989510076e-07,
|
| 105100 |
+
"loss": 0.6778624057769775,
|
| 105101 |
+
"step": 15013
|
| 105102 |
+
},
|
| 105103 |
+
{
|
| 105104 |
+
"epoch": 18.422085889570553,
|
| 105105 |
+
"grad_norm": 0.23095104098320007,
|
| 105106 |
+
"learning_rate": 8.299111119996706e-07,
|
| 105107 |
+
"loss": 0.43314534425735474,
|
| 105108 |
+
"step": 15014
|
| 105109 |
+
},
|
| 105110 |
+
{
|
| 105111 |
+
"epoch": 18.42331288343558,
|
| 105112 |
+
"grad_norm": 0.2955207824707031,
|
| 105113 |
+
"learning_rate": 8.286290992756851e-07,
|
| 105114 |
+
"loss": 0.7053745985031128,
|
| 105115 |
+
"step": 15015
|
| 105116 |
+
},
|
| 105117 |
+
{
|
| 105118 |
+
"epoch": 18.424539877300614,
|
| 105119 |
+
"grad_norm": 0.30995631217956543,
|
| 105120 |
+
"learning_rate": 8.273480608307182e-07,
|
| 105121 |
+
"loss": 0.736630916595459,
|
| 105122 |
+
"step": 15016
|
| 105123 |
+
},
|
| 105124 |
+
{
|
| 105125 |
+
"epoch": 18.425766871165646,
|
| 105126 |
+
"grad_norm": 0.26751789450645447,
|
| 105127 |
+
"learning_rate": 8.260679967164092e-07,
|
| 105128 |
+
"loss": 0.6557402610778809,
|
| 105129 |
+
"step": 15017
|
| 105130 |
+
},
|
| 105131 |
+
{
|
| 105132 |
+
"epoch": 18.426993865030674,
|
| 105133 |
+
"grad_norm": 0.2378382384777069,
|
| 105134 |
+
"learning_rate": 8.247889069843529e-07,
|
| 105135 |
+
"loss": 0.5029628872871399,
|
| 105136 |
+
"step": 15018
|
| 105137 |
+
},
|
| 105138 |
+
{
|
| 105139 |
+
"epoch": 18.428220858895706,
|
| 105140 |
+
"grad_norm": 0.2665591537952423,
|
| 105141 |
+
"learning_rate": 8.235107916861052e-07,
|
| 105142 |
+
"loss": 0.5210702419281006,
|
| 105143 |
+
"step": 15019
|
| 105144 |
+
},
|
| 105145 |
+
{
|
| 105146 |
+
"epoch": 18.429447852760735,
|
| 105147 |
+
"grad_norm": 0.25683659315109253,
|
| 105148 |
+
"learning_rate": 8.222336508731887e-07,
|
| 105149 |
+
"loss": 0.6265289187431335,
|
| 105150 |
+
"step": 15020
|
| 105151 |
+
},
|
| 105152 |
+
{
|
| 105153 |
+
"epoch": 18.430674846625767,
|
| 105154 |
+
"grad_norm": 0.2376331090927124,
|
| 105155 |
+
"learning_rate": 8.209574845970735e-07,
|
| 105156 |
+
"loss": 0.4027339816093445,
|
| 105157 |
+
"step": 15021
|
| 105158 |
+
},
|
| 105159 |
+
{
|
| 105160 |
+
"epoch": 18.4319018404908,
|
| 105161 |
+
"grad_norm": 0.2511342763900757,
|
| 105162 |
+
"learning_rate": 8.19682292909199e-07,
|
| 105163 |
+
"loss": 0.5003116130828857,
|
| 105164 |
+
"step": 15022
|
| 105165 |
+
},
|
| 105166 |
+
{
|
| 105167 |
+
"epoch": 18.433128834355827,
|
| 105168 |
+
"grad_norm": 0.262071430683136,
|
| 105169 |
+
"learning_rate": 8.184080758609658e-07,
|
| 105170 |
+
"loss": 0.5126713514328003,
|
| 105171 |
+
"step": 15023
|
| 105172 |
+
},
|
| 105173 |
+
{
|
| 105174 |
+
"epoch": 18.43435582822086,
|
| 105175 |
+
"grad_norm": 0.26501163840293884,
|
| 105176 |
+
"learning_rate": 8.17134833503741e-07,
|
| 105177 |
+
"loss": 0.4675607681274414,
|
| 105178 |
+
"step": 15024
|
| 105179 |
+
},
|
| 105180 |
+
{
|
| 105181 |
+
"epoch": 18.43558282208589,
|
| 105182 |
+
"grad_norm": 0.29066699743270874,
|
| 105183 |
+
"learning_rate": 8.158625658888335e-07,
|
| 105184 |
+
"loss": 0.5905107259750366,
|
| 105185 |
+
"step": 15025
|
| 105186 |
+
},
|
| 105187 |
+
{
|
| 105188 |
+
"epoch": 18.43680981595092,
|
| 105189 |
+
"grad_norm": 0.2355525642633438,
|
| 105190 |
+
"learning_rate": 8.145912730675331e-07,
|
| 105191 |
+
"loss": 0.3430643081665039,
|
| 105192 |
+
"step": 15026
|
| 105193 |
+
},
|
| 105194 |
+
{
|
| 105195 |
+
"epoch": 18.43803680981595,
|
| 105196 |
+
"grad_norm": 0.274815171957016,
|
| 105197 |
+
"learning_rate": 8.13320955091082e-07,
|
| 105198 |
+
"loss": 0.633973240852356,
|
| 105199 |
+
"step": 15027
|
| 105200 |
+
},
|
| 105201 |
+
{
|
| 105202 |
+
"epoch": 18.43926380368098,
|
| 105203 |
+
"grad_norm": 0.3103507459163666,
|
| 105204 |
+
"learning_rate": 8.120516120106753e-07,
|
| 105205 |
+
"loss": 0.6810316443443298,
|
| 105206 |
+
"step": 15028
|
| 105207 |
+
},
|
| 105208 |
+
{
|
| 105209 |
+
"epoch": 18.440490797546012,
|
| 105210 |
+
"grad_norm": 0.25461915135383606,
|
| 105211 |
+
"learning_rate": 8.107832438774887e-07,
|
| 105212 |
+
"loss": 0.4680299460887909,
|
| 105213 |
+
"step": 15029
|
| 105214 |
+
},
|
| 105215 |
+
{
|
| 105216 |
+
"epoch": 18.441717791411044,
|
| 105217 |
+
"grad_norm": 0.296601802110672,
|
| 105218 |
+
"learning_rate": 8.09515850742637e-07,
|
| 105219 |
+
"loss": 0.5009694695472717,
|
| 105220 |
+
"step": 15030
|
| 105221 |
+
},
|
| 105222 |
+
{
|
| 105223 |
+
"epoch": 18.442944785276072,
|
| 105224 |
+
"grad_norm": 0.24857783317565918,
|
| 105225 |
+
"learning_rate": 8.082494326572043e-07,
|
| 105226 |
+
"loss": 0.4273577928543091,
|
| 105227 |
+
"step": 15031
|
| 105228 |
+
},
|
| 105229 |
+
{
|
| 105230 |
+
"epoch": 18.444171779141104,
|
| 105231 |
+
"grad_norm": 0.23995882272720337,
|
| 105232 |
+
"learning_rate": 8.069839896722442e-07,
|
| 105233 |
+
"loss": 0.3288445770740509,
|
| 105234 |
+
"step": 15032
|
| 105235 |
+
},
|
| 105236 |
+
{
|
| 105237 |
+
"epoch": 18.445398773006136,
|
| 105238 |
+
"grad_norm": 0.24957355856895447,
|
| 105239 |
+
"learning_rate": 8.057195218387547e-07,
|
| 105240 |
+
"loss": 0.5318127870559692,
|
| 105241 |
+
"step": 15033
|
| 105242 |
+
},
|
| 105243 |
+
{
|
| 105244 |
+
"epoch": 18.446625766871165,
|
| 105245 |
+
"grad_norm": 0.26561078429222107,
|
| 105246 |
+
"learning_rate": 8.044560292077146e-07,
|
| 105247 |
+
"loss": 0.6572602987289429,
|
| 105248 |
+
"step": 15034
|
| 105249 |
+
},
|
| 105250 |
+
{
|
| 105251 |
+
"epoch": 18.447852760736197,
|
| 105252 |
+
"grad_norm": 0.24103668332099915,
|
| 105253 |
+
"learning_rate": 8.031935118300388e-07,
|
| 105254 |
+
"loss": 0.5004051923751831,
|
| 105255 |
+
"step": 15035
|
| 105256 |
+
},
|
| 105257 |
+
{
|
| 105258 |
+
"epoch": 18.449079754601225,
|
| 105259 |
+
"grad_norm": 0.316873162984848,
|
| 105260 |
+
"learning_rate": 8.019319697566196e-07,
|
| 105261 |
+
"loss": 0.7156195044517517,
|
| 105262 |
+
"step": 15036
|
| 105263 |
+
},
|
| 105264 |
+
{
|
| 105265 |
+
"epoch": 18.450306748466257,
|
| 105266 |
+
"grad_norm": 0.25244203209877014,
|
| 105267 |
+
"learning_rate": 8.006714030383084e-07,
|
| 105268 |
+
"loss": 0.6055111885070801,
|
| 105269 |
+
"step": 15037
|
| 105270 |
+
},
|
| 105271 |
+
{
|
| 105272 |
+
"epoch": 18.45153374233129,
|
| 105273 |
+
"grad_norm": 0.29162099957466125,
|
| 105274 |
+
"learning_rate": 7.99411811725917e-07,
|
| 105275 |
+
"loss": 0.6415433883666992,
|
| 105276 |
+
"step": 15038
|
| 105277 |
+
},
|
| 105278 |
+
{
|
| 105279 |
+
"epoch": 18.452760736196318,
|
| 105280 |
+
"grad_norm": 0.23962543904781342,
|
| 105281 |
+
"learning_rate": 7.981531958702131e-07,
|
| 105282 |
+
"loss": 0.5539005994796753,
|
| 105283 |
+
"step": 15039
|
| 105284 |
+
},
|
| 105285 |
+
{
|
| 105286 |
+
"epoch": 18.45398773006135,
|
| 105287 |
+
"grad_norm": 0.24174335598945618,
|
| 105288 |
+
"learning_rate": 7.968955555219259e-07,
|
| 105289 |
+
"loss": 0.6746041774749756,
|
| 105290 |
+
"step": 15040
|
| 105291 |
+
},
|
| 105292 |
+
{
|
| 105293 |
+
"epoch": 18.45521472392638,
|
| 105294 |
+
"grad_norm": 0.2767097055912018,
|
| 105295 |
+
"learning_rate": 7.956388907317508e-07,
|
| 105296 |
+
"loss": 0.5615209937095642,
|
| 105297 |
+
"step": 15041
|
| 105298 |
+
},
|
| 105299 |
+
{
|
| 105300 |
+
"epoch": 18.45644171779141,
|
| 105301 |
+
"grad_norm": 0.255563884973526,
|
| 105302 |
+
"learning_rate": 7.943832015503361e-07,
|
| 105303 |
+
"loss": 0.6252062320709229,
|
| 105304 |
+
"step": 15042
|
| 105305 |
+
},
|
| 105306 |
+
{
|
| 105307 |
+
"epoch": 18.457668711656442,
|
| 105308 |
+
"grad_norm": 0.25251320004463196,
|
| 105309 |
+
"learning_rate": 7.931284880282997e-07,
|
| 105310 |
+
"loss": 0.6074473261833191,
|
| 105311 |
+
"step": 15043
|
| 105312 |
+
},
|
| 105313 |
+
{
|
| 105314 |
+
"epoch": 18.45889570552147,
|
| 105315 |
+
"grad_norm": 0.2742644250392914,
|
| 105316 |
+
"learning_rate": 7.918747502162177e-07,
|
| 105317 |
+
"loss": 0.6066598892211914,
|
| 105318 |
+
"step": 15044
|
| 105319 |
+
},
|
| 105320 |
+
{
|
| 105321 |
+
"epoch": 18.460122699386503,
|
| 105322 |
+
"grad_norm": 0.30004239082336426,
|
| 105323 |
+
"learning_rate": 7.906219881646165e-07,
|
| 105324 |
+
"loss": 0.6686370968818665,
|
| 105325 |
+
"step": 15045
|
| 105326 |
+
},
|
| 105327 |
+
{
|
| 105328 |
+
"epoch": 18.461349693251535,
|
| 105329 |
+
"grad_norm": 0.29838523268699646,
|
| 105330 |
+
"learning_rate": 7.893702019239946e-07,
|
| 105331 |
+
"loss": 0.39523789286613464,
|
| 105332 |
+
"step": 15046
|
| 105333 |
+
},
|
| 105334 |
+
{
|
| 105335 |
+
"epoch": 18.462576687116563,
|
| 105336 |
+
"grad_norm": 0.2740970253944397,
|
| 105337 |
+
"learning_rate": 7.881193915448087e-07,
|
| 105338 |
+
"loss": 0.5338302850723267,
|
| 105339 |
+
"step": 15047
|
| 105340 |
+
},
|
| 105341 |
+
{
|
| 105342 |
+
"epoch": 18.463803680981595,
|
| 105343 |
+
"grad_norm": 0.2426895797252655,
|
| 105344 |
+
"learning_rate": 7.868695570774797e-07,
|
| 105345 |
+
"loss": 0.42754417657852173,
|
| 105346 |
+
"step": 15048
|
| 105347 |
+
},
|
| 105348 |
+
{
|
| 105349 |
+
"epoch": 18.465030674846627,
|
| 105350 |
+
"grad_norm": 0.28807586431503296,
|
| 105351 |
+
"learning_rate": 7.856206985723786e-07,
|
| 105352 |
+
"loss": 0.6008538603782654,
|
| 105353 |
+
"step": 15049
|
| 105354 |
+
},
|
| 105355 |
+
{
|
| 105356 |
+
"epoch": 18.466257668711656,
|
| 105357 |
+
"grad_norm": 0.21640616655349731,
|
| 105358 |
+
"learning_rate": 7.84372816079848e-07,
|
| 105359 |
+
"loss": 0.28484609723091125,
|
| 105360 |
+
"step": 15050
|
| 105361 |
+
},
|
| 105362 |
+
{
|
| 105363 |
+
"epoch": 18.467484662576688,
|
| 105364 |
+
"grad_norm": 0.27961060404777527,
|
| 105365 |
+
"learning_rate": 7.831259096501814e-07,
|
| 105366 |
+
"loss": 0.43502914905548096,
|
| 105367 |
+
"step": 15051
|
| 105368 |
+
},
|
| 105369 |
+
{
|
| 105370 |
+
"epoch": 18.46871165644172,
|
| 105371 |
+
"grad_norm": 0.25631678104400635,
|
| 105372 |
+
"learning_rate": 7.818799793336412e-07,
|
| 105373 |
+
"loss": 0.6460347175598145,
|
| 105374 |
+
"step": 15052
|
| 105375 |
+
},
|
| 105376 |
+
{
|
| 105377 |
+
"epoch": 18.469938650306748,
|
| 105378 |
+
"grad_norm": 0.3183768689632416,
|
| 105379 |
+
"learning_rate": 7.806350251804484e-07,
|
| 105380 |
+
"loss": 0.7350654602050781,
|
| 105381 |
+
"step": 15053
|
| 105382 |
+
},
|
| 105383 |
+
{
|
| 105384 |
+
"epoch": 18.47116564417178,
|
| 105385 |
+
"grad_norm": 0.30789369344711304,
|
| 105386 |
+
"learning_rate": 7.793910472407795e-07,
|
| 105387 |
+
"loss": 0.4440527856349945,
|
| 105388 |
+
"step": 15054
|
| 105389 |
+
},
|
| 105390 |
+
{
|
| 105391 |
+
"epoch": 18.47239263803681,
|
| 105392 |
+
"grad_norm": 0.2752784490585327,
|
| 105393 |
+
"learning_rate": 7.781480455647778e-07,
|
| 105394 |
+
"loss": 0.553666353225708,
|
| 105395 |
+
"step": 15055
|
| 105396 |
+
},
|
| 105397 |
+
{
|
| 105398 |
+
"epoch": 18.47361963190184,
|
| 105399 |
+
"grad_norm": 0.2633611559867859,
|
| 105400 |
+
"learning_rate": 7.769060202025474e-07,
|
| 105401 |
+
"loss": 0.7231197357177734,
|
| 105402 |
+
"step": 15056
|
| 105403 |
+
},
|
| 105404 |
+
{
|
| 105405 |
+
"epoch": 18.474846625766872,
|
| 105406 |
+
"grad_norm": 0.2986191213130951,
|
| 105407 |
+
"learning_rate": 7.756649712041486e-07,
|
| 105408 |
+
"loss": 0.6437402963638306,
|
| 105409 |
+
"step": 15057
|
| 105410 |
+
},
|
| 105411 |
+
{
|
| 105412 |
+
"epoch": 18.4760736196319,
|
| 105413 |
+
"grad_norm": 0.2688346207141876,
|
| 105414 |
+
"learning_rate": 7.744248986196051e-07,
|
| 105415 |
+
"loss": 0.46620458364486694,
|
| 105416 |
+
"step": 15058
|
| 105417 |
+
},
|
| 105418 |
+
{
|
| 105419 |
+
"epoch": 18.477300613496933,
|
| 105420 |
+
"grad_norm": 0.25008657574653625,
|
| 105421 |
+
"learning_rate": 7.731858024989019e-07,
|
| 105422 |
+
"loss": 0.6280673742294312,
|
| 105423 |
+
"step": 15059
|
| 105424 |
+
},
|
| 105425 |
+
{
|
| 105426 |
+
"epoch": 18.478527607361965,
|
| 105427 |
+
"grad_norm": 0.24196794629096985,
|
| 105428 |
+
"learning_rate": 7.71947682891977e-07,
|
| 105429 |
+
"loss": 0.5630426406860352,
|
| 105430 |
+
"step": 15060
|
| 105431 |
+
},
|
| 105432 |
+
{
|
| 105433 |
+
"epoch": 18.479754601226993,
|
| 105434 |
+
"grad_norm": 0.36446908116340637,
|
| 105435 |
+
"learning_rate": 7.70710539848743e-07,
|
| 105436 |
+
"loss": 0.5952461957931519,
|
| 105437 |
+
"step": 15061
|
| 105438 |
+
},
|
| 105439 |
+
{
|
| 105440 |
+
"epoch": 18.480981595092025,
|
| 105441 |
+
"grad_norm": 0.267814964056015,
|
| 105442 |
+
"learning_rate": 7.694743734190657e-07,
|
| 105443 |
+
"loss": 0.4886084198951721,
|
| 105444 |
+
"step": 15062
|
| 105445 |
+
},
|
| 105446 |
+
{
|
| 105447 |
+
"epoch": 18.482208588957054,
|
| 105448 |
+
"grad_norm": 0.246405690908432,
|
| 105449 |
+
"learning_rate": 7.682391836527664e-07,
|
| 105450 |
+
"loss": 0.5635120868682861,
|
| 105451 |
+
"step": 15063
|
| 105452 |
+
},
|
| 105453 |
+
{
|
| 105454 |
+
"epoch": 18.483435582822086,
|
| 105455 |
+
"grad_norm": 0.24214372038841248,
|
| 105456 |
+
"learning_rate": 7.670049705996357e-07,
|
| 105457 |
+
"loss": 0.5162582397460938,
|
| 105458 |
+
"step": 15064
|
| 105459 |
+
},
|
| 105460 |
+
{
|
| 105461 |
+
"epoch": 18.484662576687118,
|
| 105462 |
+
"grad_norm": 0.2803967595100403,
|
| 105463 |
+
"learning_rate": 7.657717343094173e-07,
|
| 105464 |
+
"loss": 0.7461479902267456,
|
| 105465 |
+
"step": 15065
|
| 105466 |
+
},
|
| 105467 |
+
{
|
| 105468 |
+
"epoch": 18.485889570552146,
|
| 105469 |
+
"grad_norm": 0.2980840802192688,
|
| 105470 |
+
"learning_rate": 7.645394748318241e-07,
|
| 105471 |
+
"loss": 0.6661889553070068,
|
| 105472 |
+
"step": 15066
|
| 105473 |
+
},
|
| 105474 |
+
{
|
| 105475 |
+
"epoch": 18.487116564417178,
|
| 105476 |
+
"grad_norm": 0.2393178939819336,
|
| 105477 |
+
"learning_rate": 7.63308192216522e-07,
|
| 105478 |
+
"loss": 0.6595220565795898,
|
| 105479 |
+
"step": 15067
|
| 105480 |
+
},
|
| 105481 |
+
{
|
| 105482 |
+
"epoch": 18.48834355828221,
|
| 105483 |
+
"grad_norm": 0.25572678446769714,
|
| 105484 |
+
"learning_rate": 7.620778865131406e-07,
|
| 105485 |
+
"loss": 0.48996564745903015,
|
| 105486 |
+
"step": 15068
|
| 105487 |
+
},
|
| 105488 |
+
{
|
| 105489 |
+
"epoch": 18.48957055214724,
|
| 105490 |
+
"grad_norm": 0.2874976694583893,
|
| 105491 |
+
"learning_rate": 7.608485577712737e-07,
|
| 105492 |
+
"loss": 0.6075916290283203,
|
| 105493 |
+
"step": 15069
|
| 105494 |
+
},
|
| 105495 |
+
{
|
| 105496 |
+
"epoch": 18.49079754601227,
|
| 105497 |
+
"grad_norm": 0.24423320591449738,
|
| 105498 |
+
"learning_rate": 7.596202060404678e-07,
|
| 105499 |
+
"loss": 0.5831315517425537,
|
| 105500 |
+
"step": 15070
|
| 105501 |
+
},
|
| 105502 |
+
{
|
| 105503 |
+
"epoch": 18.4920245398773,
|
| 105504 |
+
"grad_norm": 0.24751967191696167,
|
| 105505 |
+
"learning_rate": 7.583928313702332e-07,
|
| 105506 |
+
"loss": 0.6452398300170898,
|
| 105507 |
+
"step": 15071
|
| 105508 |
+
},
|
| 105509 |
+
{
|
| 105510 |
+
"epoch": 18.49325153374233,
|
| 105511 |
+
"grad_norm": 0.2806456685066223,
|
| 105512 |
+
"learning_rate": 7.571664338100498e-07,
|
| 105513 |
+
"loss": 0.6140261888504028,
|
| 105514 |
+
"step": 15072
|
| 105515 |
+
},
|
| 105516 |
+
{
|
| 105517 |
+
"epoch": 18.494478527607363,
|
| 105518 |
+
"grad_norm": 0.27064502239227295,
|
| 105519 |
+
"learning_rate": 7.55941013409342e-07,
|
| 105520 |
+
"loss": 0.7779956459999084,
|
| 105521 |
+
"step": 15073
|
| 105522 |
+
},
|
| 105523 |
+
{
|
| 105524 |
+
"epoch": 18.49570552147239,
|
| 105525 |
+
"grad_norm": 0.2319720834493637,
|
| 105526 |
+
"learning_rate": 7.547165702175036e-07,
|
| 105527 |
+
"loss": 0.5071749091148376,
|
| 105528 |
+
"step": 15074
|
| 105529 |
+
},
|
| 105530 |
+
{
|
| 105531 |
+
"epoch": 18.496932515337424,
|
| 105532 |
+
"grad_norm": 0.2741059958934784,
|
| 105533 |
+
"learning_rate": 7.534931042838922e-07,
|
| 105534 |
+
"loss": 0.5579279661178589,
|
| 105535 |
+
"step": 15075
|
| 105536 |
+
},
|
| 105537 |
+
{
|
| 105538 |
+
"epoch": 18.498159509202456,
|
| 105539 |
+
"grad_norm": 0.24883468449115753,
|
| 105540 |
+
"learning_rate": 7.522706156578214e-07,
|
| 105541 |
+
"loss": 0.7345324754714966,
|
| 105542 |
+
"step": 15076
|
| 105543 |
+
},
|
| 105544 |
+
{
|
| 105545 |
+
"epoch": 18.499386503067484,
|
| 105546 |
+
"grad_norm": 0.25426188111305237,
|
| 105547 |
+
"learning_rate": 7.510491043885681e-07,
|
| 105548 |
+
"loss": 0.5537772178649902,
|
| 105549 |
+
"step": 15077
|
| 105550 |
+
},
|
| 105551 |
+
{
|
| 105552 |
+
"epoch": 18.500613496932516,
|
| 105553 |
+
"grad_norm": 0.27426281571388245,
|
| 105554 |
+
"learning_rate": 7.498285705253599e-07,
|
| 105555 |
+
"loss": 0.8123739957809448,
|
| 105556 |
+
"step": 15078
|
| 105557 |
+
},
|
| 105558 |
+
{
|
| 105559 |
+
"epoch": 18.501840490797544,
|
| 105560 |
+
"grad_norm": 0.28615546226501465,
|
| 105561 |
+
"learning_rate": 7.48609014117399e-07,
|
| 105562 |
+
"loss": 0.5964265465736389,
|
| 105563 |
+
"step": 15079
|
| 105564 |
+
},
|
| 105565 |
+
{
|
| 105566 |
+
"epoch": 18.503067484662576,
|
| 105567 |
+
"grad_norm": 0.30579593777656555,
|
| 105568 |
+
"learning_rate": 7.473904352138434e-07,
|
| 105569 |
+
"loss": 0.7176042795181274,
|
| 105570 |
+
"step": 15080
|
| 105571 |
+
},
|
| 105572 |
+
{
|
| 105573 |
+
"epoch": 18.50429447852761,
|
| 105574 |
+
"grad_norm": 0.255885511636734,
|
| 105575 |
+
"learning_rate": 7.461728338638091e-07,
|
| 105576 |
+
"loss": 0.5266407132148743,
|
| 105577 |
+
"step": 15081
|
| 105578 |
+
},
|
| 105579 |
+
{
|
| 105580 |
+
"epoch": 18.505521472392637,
|
| 105581 |
+
"grad_norm": 0.2587753236293793,
|
| 105582 |
+
"learning_rate": 7.449562101163737e-07,
|
| 105583 |
+
"loss": 0.6365657448768616,
|
| 105584 |
+
"step": 15082
|
| 105585 |
+
},
|
| 105586 |
+
{
|
| 105587 |
+
"epoch": 18.50674846625767,
|
| 105588 |
+
"grad_norm": 0.24876601994037628,
|
| 105589 |
+
"learning_rate": 7.437405640205757e-07,
|
| 105590 |
+
"loss": 0.5127677917480469,
|
| 105591 |
+
"step": 15083
|
| 105592 |
+
},
|
| 105593 |
+
{
|
| 105594 |
+
"epoch": 18.5079754601227,
|
| 105595 |
+
"grad_norm": 0.23445630073547363,
|
| 105596 |
+
"learning_rate": 7.425258956254149e-07,
|
| 105597 |
+
"loss": 0.6411794424057007,
|
| 105598 |
+
"step": 15084
|
| 105599 |
+
},
|
| 105600 |
+
{
|
| 105601 |
+
"epoch": 18.50920245398773,
|
| 105602 |
+
"grad_norm": 0.24745701253414154,
|
| 105603 |
+
"learning_rate": 7.413122049798493e-07,
|
| 105604 |
+
"loss": 0.5648179054260254,
|
| 105605 |
+
"step": 15085
|
| 105606 |
+
},
|
| 105607 |
+
{
|
| 105608 |
+
"epoch": 18.51042944785276,
|
| 105609 |
+
"grad_norm": 0.25540637969970703,
|
| 105610 |
+
"learning_rate": 7.400994921328009e-07,
|
| 105611 |
+
"loss": 0.6375343799591064,
|
| 105612 |
+
"step": 15086
|
| 105613 |
+
},
|
| 105614 |
+
{
|
| 105615 |
+
"epoch": 18.51165644171779,
|
| 105616 |
+
"grad_norm": 0.2614000737667084,
|
| 105617 |
+
"learning_rate": 7.388877571331526e-07,
|
| 105618 |
+
"loss": 0.75091552734375,
|
| 105619 |
+
"step": 15087
|
| 105620 |
+
},
|
| 105621 |
+
{
|
| 105622 |
+
"epoch": 18.512883435582822,
|
| 105623 |
+
"grad_norm": 0.2700320780277252,
|
| 105624 |
+
"learning_rate": 7.376770000297434e-07,
|
| 105625 |
+
"loss": 0.6141526699066162,
|
| 105626 |
+
"step": 15088
|
| 105627 |
+
},
|
| 105628 |
+
{
|
| 105629 |
+
"epoch": 18.514110429447854,
|
| 105630 |
+
"grad_norm": 0.2803581953048706,
|
| 105631 |
+
"learning_rate": 7.36467220871373e-07,
|
| 105632 |
+
"loss": 0.6992532014846802,
|
| 105633 |
+
"step": 15089
|
| 105634 |
+
},
|
| 105635 |
+
{
|
| 105636 |
+
"epoch": 18.515337423312882,
|
| 105637 |
+
"grad_norm": 0.4579346179962158,
|
| 105638 |
+
"learning_rate": 7.352584197068052e-07,
|
| 105639 |
+
"loss": 0.635081946849823,
|
| 105640 |
+
"step": 15090
|
| 105641 |
+
},
|
| 105642 |
+
{
|
| 105643 |
+
"epoch": 18.516564417177914,
|
| 105644 |
+
"grad_norm": 0.23495975136756897,
|
| 105645 |
+
"learning_rate": 7.340505965847733e-07,
|
| 105646 |
+
"loss": 0.5005642175674438,
|
| 105647 |
+
"step": 15091
|
| 105648 |
+
},
|
| 105649 |
+
{
|
| 105650 |
+
"epoch": 18.517791411042946,
|
| 105651 |
+
"grad_norm": 0.26014724373817444,
|
| 105652 |
+
"learning_rate": 7.328437515539494e-07,
|
| 105653 |
+
"loss": 0.4779026508331299,
|
| 105654 |
+
"step": 15092
|
| 105655 |
+
},
|
| 105656 |
+
{
|
| 105657 |
+
"epoch": 18.519018404907975,
|
| 105658 |
+
"grad_norm": 0.299617201089859,
|
| 105659 |
+
"learning_rate": 7.316378846629806e-07,
|
| 105660 |
+
"loss": 0.5042616128921509,
|
| 105661 |
+
"step": 15093
|
| 105662 |
+
},
|
| 105663 |
+
{
|
| 105664 |
+
"epoch": 18.520245398773007,
|
| 105665 |
+
"grad_norm": 0.26069051027297974,
|
| 105666 |
+
"learning_rate": 7.304329959604728e-07,
|
| 105667 |
+
"loss": 0.4813694357872009,
|
| 105668 |
+
"step": 15094
|
| 105669 |
+
},
|
| 105670 |
+
{
|
| 105671 |
+
"epoch": 18.521472392638035,
|
| 105672 |
+
"grad_norm": 0.28472867608070374,
|
| 105673 |
+
"learning_rate": 7.292290854949924e-07,
|
| 105674 |
+
"loss": 0.6566644906997681,
|
| 105675 |
+
"step": 15095
|
| 105676 |
+
},
|
| 105677 |
+
{
|
| 105678 |
+
"epoch": 18.522699386503067,
|
| 105679 |
+
"grad_norm": 0.27900993824005127,
|
| 105680 |
+
"learning_rate": 7.280261533150701e-07,
|
| 105681 |
+
"loss": 0.5997101068496704,
|
| 105682 |
+
"step": 15096
|
| 105683 |
+
},
|
| 105684 |
+
{
|
| 105685 |
+
"epoch": 18.5239263803681,
|
| 105686 |
+
"grad_norm": 0.27072933316230774,
|
| 105687 |
+
"learning_rate": 7.268241994691838e-07,
|
| 105688 |
+
"loss": 0.4074103534221649,
|
| 105689 |
+
"step": 15097
|
| 105690 |
+
},
|
| 105691 |
+
{
|
| 105692 |
+
"epoch": 18.525153374233128,
|
| 105693 |
+
"grad_norm": 0.2461530566215515,
|
| 105694 |
+
"learning_rate": 7.256232240057836e-07,
|
| 105695 |
+
"loss": 0.5919272899627686,
|
| 105696 |
+
"step": 15098
|
| 105697 |
+
},
|
| 105698 |
+
{
|
| 105699 |
+
"epoch": 18.52638036809816,
|
| 105700 |
+
"grad_norm": 0.25327226519584656,
|
| 105701 |
+
"learning_rate": 7.244232269732781e-07,
|
| 105702 |
+
"loss": 0.5056906938552856,
|
| 105703 |
+
"step": 15099
|
| 105704 |
+
},
|
| 105705 |
+
{
|
| 105706 |
+
"epoch": 18.52760736196319,
|
| 105707 |
+
"grad_norm": 0.24240347743034363,
|
| 105708 |
+
"learning_rate": 7.232242084200369e-07,
|
| 105709 |
+
"loss": 0.5309599041938782,
|
| 105710 |
+
"step": 15100
|
| 105711 |
+
},
|
| 105712 |
+
{
|
| 105713 |
+
"epoch": 18.52883435582822,
|
| 105714 |
+
"grad_norm": 0.27513226866722107,
|
| 105715 |
+
"learning_rate": 7.220261683943936e-07,
|
| 105716 |
+
"loss": 0.6863803863525391,
|
| 105717 |
+
"step": 15101
|
| 105718 |
+
},
|
| 105719 |
+
{
|
| 105720 |
+
"epoch": 18.530061349693252,
|
| 105721 |
+
"grad_norm": 0.26228705048561096,
|
| 105722 |
+
"learning_rate": 7.20829106944626e-07,
|
| 105723 |
+
"loss": 0.5250183939933777,
|
| 105724 |
+
"step": 15102
|
| 105725 |
+
},
|
| 105726 |
+
{
|
| 105727 |
+
"epoch": 18.53128834355828,
|
| 105728 |
+
"grad_norm": 0.2649267911911011,
|
| 105729 |
+
"learning_rate": 7.19633024118993e-07,
|
| 105730 |
+
"loss": 0.5145870447158813,
|
| 105731 |
+
"step": 15103
|
| 105732 |
+
},
|
| 105733 |
+
{
|
| 105734 |
+
"epoch": 18.532515337423312,
|
| 105735 |
+
"grad_norm": 0.27462324500083923,
|
| 105736 |
+
"learning_rate": 7.184379199657032e-07,
|
| 105737 |
+
"loss": 0.649298906326294,
|
| 105738 |
+
"step": 15104
|
| 105739 |
+
},
|
| 105740 |
+
{
|
| 105741 |
+
"epoch": 18.533742331288344,
|
| 105742 |
+
"grad_norm": 0.265155553817749,
|
| 105743 |
+
"learning_rate": 7.172437945329264e-07,
|
| 105744 |
+
"loss": 0.5217992067337036,
|
| 105745 |
+
"step": 15105
|
| 105746 |
+
},
|
| 105747 |
+
{
|
| 105748 |
+
"epoch": 18.534969325153373,
|
| 105749 |
+
"grad_norm": 0.2447088658809662,
|
| 105750 |
+
"learning_rate": 7.16050647868799e-07,
|
| 105751 |
+
"loss": 0.3777480721473694,
|
| 105752 |
+
"step": 15106
|
| 105753 |
+
},
|
| 105754 |
+
{
|
| 105755 |
+
"epoch": 18.536196319018405,
|
| 105756 |
+
"grad_norm": 0.2790638506412506,
|
| 105757 |
+
"learning_rate": 7.148584800214048e-07,
|
| 105758 |
+
"loss": 0.566234827041626,
|
| 105759 |
+
"step": 15107
|
| 105760 |
+
},
|
| 105761 |
+
{
|
| 105762 |
+
"epoch": 18.537423312883437,
|
| 105763 |
+
"grad_norm": 0.24771304428577423,
|
| 105764 |
+
"learning_rate": 7.136672910388054e-07,
|
| 105765 |
+
"loss": 0.5184342861175537,
|
| 105766 |
+
"step": 15108
|
| 105767 |
+
},
|
| 105768 |
+
{
|
| 105769 |
+
"epoch": 18.538650306748465,
|
| 105770 |
+
"grad_norm": 0.29097306728363037,
|
| 105771 |
+
"learning_rate": 7.124770809690095e-07,
|
| 105772 |
+
"loss": 0.44570547342300415,
|
| 105773 |
+
"step": 15109
|
| 105774 |
+
},
|
| 105775 |
+
{
|
| 105776 |
+
"epoch": 18.539877300613497,
|
| 105777 |
+
"grad_norm": 0.2520453631877899,
|
| 105778 |
+
"learning_rate": 7.112878498599929e-07,
|
| 105779 |
+
"loss": 0.5955885648727417,
|
| 105780 |
+
"step": 15110
|
| 105781 |
+
},
|
| 105782 |
+
{
|
| 105783 |
+
"epoch": 18.54110429447853,
|
| 105784 |
+
"grad_norm": 0.2866119146347046,
|
| 105785 |
+
"learning_rate": 7.100995977596919e-07,
|
| 105786 |
+
"loss": 0.6235331296920776,
|
| 105787 |
+
"step": 15111
|
| 105788 |
+
},
|
| 105789 |
+
{
|
| 105790 |
+
"epoch": 18.542331288343558,
|
| 105791 |
+
"grad_norm": 0.2389226108789444,
|
| 105792 |
+
"learning_rate": 7.089123247159935e-07,
|
| 105793 |
+
"loss": 0.22559209167957306,
|
| 105794 |
+
"step": 15112
|
| 105795 |
+
},
|
| 105796 |
+
{
|
| 105797 |
+
"epoch": 18.54355828220859,
|
| 105798 |
+
"grad_norm": 0.253071129322052,
|
| 105799 |
+
"learning_rate": 7.077260307767592e-07,
|
| 105800 |
+
"loss": 0.6456031799316406,
|
| 105801 |
+
"step": 15113
|
| 105802 |
+
},
|
| 105803 |
+
{
|
| 105804 |
+
"epoch": 18.54478527607362,
|
| 105805 |
+
"grad_norm": 0.25669771432876587,
|
| 105806 |
+
"learning_rate": 7.065407159898091e-07,
|
| 105807 |
+
"loss": 0.5606966018676758,
|
| 105808 |
+
"step": 15114
|
| 105809 |
+
},
|
| 105810 |
+
{
|
| 105811 |
+
"epoch": 18.54601226993865,
|
| 105812 |
+
"grad_norm": 0.28720763325691223,
|
| 105813 |
+
"learning_rate": 7.053563804029162e-07,
|
| 105814 |
+
"loss": 0.6883248090744019,
|
| 105815 |
+
"step": 15115
|
| 105816 |
+
},
|
| 105817 |
+
{
|
| 105818 |
+
"epoch": 18.547239263803682,
|
| 105819 |
+
"grad_norm": 0.3068024218082428,
|
| 105820 |
+
"learning_rate": 7.041730240638145e-07,
|
| 105821 |
+
"loss": 0.6839640140533447,
|
| 105822 |
+
"step": 15116
|
| 105823 |
+
},
|
| 105824 |
+
{
|
| 105825 |
+
"epoch": 18.54846625766871,
|
| 105826 |
+
"grad_norm": 0.24132217466831207,
|
| 105827 |
+
"learning_rate": 7.029906470202046e-07,
|
| 105828 |
+
"loss": 0.49997156858444214,
|
| 105829 |
+
"step": 15117
|
| 105830 |
+
},
|
| 105831 |
+
{
|
| 105832 |
+
"epoch": 18.549693251533743,
|
| 105833 |
+
"grad_norm": 0.3131883442401886,
|
| 105834 |
+
"learning_rate": 7.018092493197432e-07,
|
| 105835 |
+
"loss": 0.5522582530975342,
|
| 105836 |
+
"step": 15118
|
| 105837 |
+
},
|
| 105838 |
+
{
|
| 105839 |
+
"epoch": 18.550920245398775,
|
| 105840 |
+
"grad_norm": 0.24126362800598145,
|
| 105841 |
+
"learning_rate": 7.006288310100528e-07,
|
| 105842 |
+
"loss": 0.5521384477615356,
|
| 105843 |
+
"step": 15119
|
| 105844 |
+
},
|
| 105845 |
+
{
|
| 105846 |
+
"epoch": 18.552147239263803,
|
| 105847 |
+
"grad_norm": 0.28721854090690613,
|
| 105848 |
+
"learning_rate": 6.994493921387096e-07,
|
| 105849 |
+
"loss": 0.5958684682846069,
|
| 105850 |
+
"step": 15120
|
| 105851 |
+
},
|
| 105852 |
+
{
|
| 105853 |
+
"epoch": 18.553374233128835,
|
| 105854 |
+
"grad_norm": 0.30204153060913086,
|
| 105855 |
+
"learning_rate": 6.982709327532561e-07,
|
| 105856 |
+
"loss": 0.7713477611541748,
|
| 105857 |
+
"step": 15121
|
| 105858 |
+
},
|
| 105859 |
+
{
|
| 105860 |
+
"epoch": 18.554601226993864,
|
| 105861 |
+
"grad_norm": 0.2600513696670532,
|
| 105862 |
+
"learning_rate": 6.970934529011874e-07,
|
| 105863 |
+
"loss": 0.48867762088775635,
|
| 105864 |
+
"step": 15122
|
| 105865 |
+
},
|
| 105866 |
+
{
|
| 105867 |
+
"epoch": 18.555828220858896,
|
| 105868 |
+
"grad_norm": 0.27114149928092957,
|
| 105869 |
+
"learning_rate": 6.959169526299686e-07,
|
| 105870 |
+
"loss": 0.7132159471511841,
|
| 105871 |
+
"step": 15123
|
| 105872 |
+
},
|
| 105873 |
+
{
|
| 105874 |
+
"epoch": 18.557055214723928,
|
| 105875 |
+
"grad_norm": 0.26623812317848206,
|
| 105876 |
+
"learning_rate": 6.947414319870171e-07,
|
| 105877 |
+
"loss": 0.5764646530151367,
|
| 105878 |
+
"step": 15124
|
| 105879 |
+
},
|
| 105880 |
+
{
|
| 105881 |
+
"epoch": 18.558282208588956,
|
| 105882 |
+
"grad_norm": 0.28244468569755554,
|
| 105883 |
+
"learning_rate": 6.935668910197202e-07,
|
| 105884 |
+
"loss": 0.7551293969154358,
|
| 105885 |
+
"step": 15125
|
| 105886 |
+
},
|
| 105887 |
+
{
|
| 105888 |
+
"epoch": 18.559509202453988,
|
| 105889 |
+
"grad_norm": 0.2580442726612091,
|
| 105890 |
+
"learning_rate": 6.923933297754204e-07,
|
| 105891 |
+
"loss": 0.5424528121948242,
|
| 105892 |
+
"step": 15126
|
| 105893 |
+
},
|
| 105894 |
+
{
|
| 105895 |
+
"epoch": 18.56073619631902,
|
| 105896 |
+
"grad_norm": 0.24951207637786865,
|
| 105897 |
+
"learning_rate": 6.912207483014161e-07,
|
| 105898 |
+
"loss": 0.6466777920722961,
|
| 105899 |
+
"step": 15127
|
| 105900 |
+
},
|
| 105901 |
+
{
|
| 105902 |
+
"epoch": 18.56196319018405,
|
| 105903 |
+
"grad_norm": 0.26366811990737915,
|
| 105904 |
+
"learning_rate": 6.900491466449721e-07,
|
| 105905 |
+
"loss": 0.5556007623672485,
|
| 105906 |
+
"step": 15128
|
| 105907 |
+
},
|
| 105908 |
+
{
|
| 105909 |
+
"epoch": 18.56319018404908,
|
| 105910 |
+
"grad_norm": 0.26368480920791626,
|
| 105911 |
+
"learning_rate": 6.888785248533119e-07,
|
| 105912 |
+
"loss": 0.6620758771896362,
|
| 105913 |
+
"step": 15129
|
| 105914 |
+
},
|
| 105915 |
+
{
|
| 105916 |
+
"epoch": 18.56441717791411,
|
| 105917 |
+
"grad_norm": 0.30060070753097534,
|
| 105918 |
+
"learning_rate": 6.877088829736228e-07,
|
| 105919 |
+
"loss": 0.7801352739334106,
|
| 105920 |
+
"step": 15130
|
| 105921 |
+
},
|
| 105922 |
+
{
|
| 105923 |
+
"epoch": 18.56564417177914,
|
| 105924 |
+
"grad_norm": 0.25798070430755615,
|
| 105925 |
+
"learning_rate": 6.865402210530503e-07,
|
| 105926 |
+
"loss": 0.626514732837677,
|
| 105927 |
+
"step": 15131
|
| 105928 |
+
},
|
| 105929 |
+
{
|
| 105930 |
+
"epoch": 18.566871165644173,
|
| 105931 |
+
"grad_norm": 0.30103838443756104,
|
| 105932 |
+
"learning_rate": 6.853725391386928e-07,
|
| 105933 |
+
"loss": 0.6700391173362732,
|
| 105934 |
+
"step": 15132
|
| 105935 |
+
},
|
| 105936 |
+
{
|
| 105937 |
+
"epoch": 18.5680981595092,
|
| 105938 |
+
"grad_norm": 0.2740756571292877,
|
| 105939 |
+
"learning_rate": 6.842058372776211e-07,
|
| 105940 |
+
"loss": 0.7246178984642029,
|
| 105941 |
+
"step": 15133
|
| 105942 |
+
},
|
| 105943 |
+
{
|
| 105944 |
+
"epoch": 18.569325153374233,
|
| 105945 |
+
"grad_norm": 0.27037662267684937,
|
| 105946 |
+
"learning_rate": 6.830401155168614e-07,
|
| 105947 |
+
"loss": 0.6969963312149048,
|
| 105948 |
+
"step": 15134
|
| 105949 |
+
},
|
| 105950 |
+
{
|
| 105951 |
+
"epoch": 18.570552147239265,
|
| 105952 |
+
"grad_norm": 0.2706049978733063,
|
| 105953 |
+
"learning_rate": 6.818753739034012e-07,
|
| 105954 |
+
"loss": 0.6336203813552856,
|
| 105955 |
+
"step": 15135
|
| 105956 |
+
},
|
| 105957 |
+
{
|
| 105958 |
+
"epoch": 18.571779141104294,
|
| 105959 |
+
"grad_norm": 0.23064523935317993,
|
| 105960 |
+
"learning_rate": 6.807116124841861e-07,
|
| 105961 |
+
"loss": 0.5598507523536682,
|
| 105962 |
+
"step": 15136
|
| 105963 |
+
},
|
| 105964 |
+
{
|
| 105965 |
+
"epoch": 18.573006134969326,
|
| 105966 |
+
"grad_norm": 0.26371335983276367,
|
| 105967 |
+
"learning_rate": 6.79548831306126e-07,
|
| 105968 |
+
"loss": 0.5162768363952637,
|
| 105969 |
+
"step": 15137
|
| 105970 |
+
},
|
| 105971 |
+
{
|
| 105972 |
+
"epoch": 18.574233128834354,
|
| 105973 |
+
"grad_norm": 0.28740817308425903,
|
| 105974 |
+
"learning_rate": 6.783870304160888e-07,
|
| 105975 |
+
"loss": 0.700891375541687,
|
| 105976 |
+
"step": 15138
|
| 105977 |
+
},
|
| 105978 |
+
{
|
| 105979 |
+
"epoch": 18.575460122699386,
|
| 105980 |
+
"grad_norm": 0.25728079676628113,
|
| 105981 |
+
"learning_rate": 6.772262098609039e-07,
|
| 105982 |
+
"loss": 0.519813060760498,
|
| 105983 |
+
"step": 15139
|
| 105984 |
+
},
|
| 105985 |
+
{
|
| 105986 |
+
"epoch": 18.57668711656442,
|
| 105987 |
+
"grad_norm": 0.2500128448009491,
|
| 105988 |
+
"learning_rate": 6.760663696873587e-07,
|
| 105989 |
+
"loss": 0.7611314058303833,
|
| 105990 |
+
"step": 15140
|
| 105991 |
+
},
|
| 105992 |
+
{
|
| 105993 |
+
"epoch": 18.577914110429447,
|
| 105994 |
+
"grad_norm": 0.20538310706615448,
|
| 105995 |
+
"learning_rate": 6.749075099422047e-07,
|
| 105996 |
+
"loss": 0.3309552073478699,
|
| 105997 |
+
"step": 15141
|
| 105998 |
+
},
|
| 105999 |
+
{
|
| 106000 |
+
"epoch": 18.57914110429448,
|
| 106001 |
+
"grad_norm": 0.27152785658836365,
|
| 106002 |
+
"learning_rate": 6.737496306721519e-07,
|
| 106003 |
+
"loss": 0.6586913466453552,
|
| 106004 |
+
"step": 15142
|
| 106005 |
+
},
|
| 106006 |
+
{
|
| 106007 |
+
"epoch": 18.58036809815951,
|
| 106008 |
+
"grad_norm": 0.2581484615802765,
|
| 106009 |
+
"learning_rate": 6.725927319238684e-07,
|
| 106010 |
+
"loss": 0.6152893304824829,
|
| 106011 |
+
"step": 15143
|
| 106012 |
+
},
|
| 106013 |
+
{
|
| 106014 |
+
"epoch": 18.58159509202454,
|
| 106015 |
+
"grad_norm": 0.2583959698677063,
|
| 106016 |
+
"learning_rate": 6.714368137439891e-07,
|
| 106017 |
+
"loss": 0.5489472150802612,
|
| 106018 |
+
"step": 15144
|
| 106019 |
+
},
|
| 106020 |
+
{
|
| 106021 |
+
"epoch": 18.58282208588957,
|
| 106022 |
+
"grad_norm": 0.2896976172924042,
|
| 106023 |
+
"learning_rate": 6.702818761791074e-07,
|
| 106024 |
+
"loss": 0.5843064188957214,
|
| 106025 |
+
"step": 15145
|
| 106026 |
+
},
|
| 106027 |
+
{
|
| 106028 |
+
"epoch": 18.5840490797546,
|
| 106029 |
+
"grad_norm": 0.32915428280830383,
|
| 106030 |
+
"learning_rate": 6.691279192757694e-07,
|
| 106031 |
+
"loss": 0.6426497101783752,
|
| 106032 |
+
"step": 15146
|
| 106033 |
+
},
|
| 106034 |
+
{
|
| 106035 |
+
"epoch": 18.58527607361963,
|
| 106036 |
+
"grad_norm": 0.27131614089012146,
|
| 106037 |
+
"learning_rate": 6.679749430804933e-07,
|
| 106038 |
+
"loss": 0.6845475435256958,
|
| 106039 |
+
"step": 15147
|
| 106040 |
+
},
|
| 106041 |
+
{
|
| 106042 |
+
"epoch": 18.586503067484664,
|
| 106043 |
+
"grad_norm": 0.27222830057144165,
|
| 106044 |
+
"learning_rate": 6.668229476397475e-07,
|
| 106045 |
+
"loss": 0.8038276433944702,
|
| 106046 |
+
"step": 15148
|
| 106047 |
+
},
|
| 106048 |
+
{
|
| 106049 |
+
"epoch": 18.587730061349692,
|
| 106050 |
+
"grad_norm": 0.2544812262058258,
|
| 106051 |
+
"learning_rate": 6.6567193299997e-07,
|
| 106052 |
+
"loss": 0.6797153353691101,
|
| 106053 |
+
"step": 15149
|
| 106054 |
+
},
|
| 106055 |
+
{
|
| 106056 |
+
"epoch": 18.588957055214724,
|
| 106057 |
+
"grad_norm": 0.2733532786369324,
|
| 106058 |
+
"learning_rate": 6.645218992075569e-07,
|
| 106059 |
+
"loss": 0.6351924538612366,
|
| 106060 |
+
"step": 15150
|
| 106061 |
+
},
|
| 106062 |
+
{
|
| 106063 |
+
"epoch": 18.590184049079756,
|
| 106064 |
+
"grad_norm": 0.27875566482543945,
|
| 106065 |
+
"learning_rate": 6.633728463088545e-07,
|
| 106066 |
+
"loss": 0.6879254579544067,
|
| 106067 |
+
"step": 15151
|
| 106068 |
+
},
|
| 106069 |
+
{
|
| 106070 |
+
"epoch": 18.591411042944785,
|
| 106071 |
+
"grad_norm": 0.25787198543548584,
|
| 106072 |
+
"learning_rate": 6.622247743501814e-07,
|
| 106073 |
+
"loss": 0.5680431723594666,
|
| 106074 |
+
"step": 15152
|
| 106075 |
+
},
|
| 106076 |
+
{
|
| 106077 |
+
"epoch": 18.592638036809817,
|
| 106078 |
+
"grad_norm": 0.2976534962654114,
|
| 106079 |
+
"learning_rate": 6.610776833778171e-07,
|
| 106080 |
+
"loss": 0.6325063705444336,
|
| 106081 |
+
"step": 15153
|
| 106082 |
+
},
|
| 106083 |
+
{
|
| 106084 |
+
"epoch": 18.593865030674845,
|
| 106085 |
+
"grad_norm": 0.30335891246795654,
|
| 106086 |
+
"learning_rate": 6.599315734379913e-07,
|
| 106087 |
+
"loss": 0.6572613716125488,
|
| 106088 |
+
"step": 15154
|
| 106089 |
+
},
|
| 106090 |
+
{
|
| 106091 |
+
"epoch": 18.595092024539877,
|
| 106092 |
+
"grad_norm": 0.260000616312027,
|
| 106093 |
+
"learning_rate": 6.587864445769087e-07,
|
| 106094 |
+
"loss": 0.4684571921825409,
|
| 106095 |
+
"step": 15155
|
| 106096 |
+
},
|
| 106097 |
+
{
|
| 106098 |
+
"epoch": 18.59631901840491,
|
| 106099 |
+
"grad_norm": 0.2805623710155487,
|
| 106100 |
+
"learning_rate": 6.576422968407186e-07,
|
| 106101 |
+
"loss": 0.6678920984268188,
|
| 106102 |
+
"step": 15156
|
| 106103 |
+
},
|
| 106104 |
+
{
|
| 106105 |
+
"epoch": 18.597546012269937,
|
| 106106 |
+
"grad_norm": 0.3032101094722748,
|
| 106107 |
+
"learning_rate": 6.564991302755369e-07,
|
| 106108 |
+
"loss": 0.7595306634902954,
|
| 106109 |
+
"step": 15157
|
| 106110 |
+
},
|
| 106111 |
+
{
|
| 106112 |
+
"epoch": 18.59877300613497,
|
| 106113 |
+
"grad_norm": 0.2664569914340973,
|
| 106114 |
+
"learning_rate": 6.553569449274488e-07,
|
| 106115 |
+
"loss": 0.5015713572502136,
|
| 106116 |
+
"step": 15158
|
| 106117 |
+
},
|
| 106118 |
+
{
|
| 106119 |
+
"epoch": 18.6,
|
| 106120 |
+
"grad_norm": 0.2755366265773773,
|
| 106121 |
+
"learning_rate": 6.542157408424926e-07,
|
| 106122 |
+
"loss": 0.6590249538421631,
|
| 106123 |
+
"step": 15159
|
| 106124 |
+
},
|
| 106125 |
+
{
|
| 106126 |
+
"epoch": 18.60122699386503,
|
| 106127 |
+
"grad_norm": 0.265776127576828,
|
| 106128 |
+
"learning_rate": 6.530755180666592e-07,
|
| 106129 |
+
"loss": 0.7124239206314087,
|
| 106130 |
+
"step": 15160
|
| 106131 |
+
},
|
| 106132 |
+
{
|
| 106133 |
+
"epoch": 18.602453987730062,
|
| 106134 |
+
"grad_norm": 0.24378176033496857,
|
| 106135 |
+
"learning_rate": 6.519362766459119e-07,
|
| 106136 |
+
"loss": 0.5130454897880554,
|
| 106137 |
+
"step": 15161
|
| 106138 |
+
},
|
| 106139 |
+
{
|
| 106140 |
+
"epoch": 18.60368098159509,
|
| 106141 |
+
"grad_norm": 0.25190094113349915,
|
| 106142 |
+
"learning_rate": 6.507980166261724e-07,
|
| 106143 |
+
"loss": 0.6136342287063599,
|
| 106144 |
+
"step": 15162
|
| 106145 |
+
},
|
| 106146 |
+
{
|
| 106147 |
+
"epoch": 18.604907975460122,
|
| 106148 |
+
"grad_norm": 0.2689828872680664,
|
| 106149 |
+
"learning_rate": 6.496607380533176e-07,
|
| 106150 |
+
"loss": 0.5782923698425293,
|
| 106151 |
+
"step": 15163
|
| 106152 |
+
},
|
| 106153 |
+
{
|
| 106154 |
+
"epoch": 18.606134969325154,
|
| 106155 |
+
"grad_norm": 0.29148581624031067,
|
| 106156 |
+
"learning_rate": 6.485244409731917e-07,
|
| 106157 |
+
"loss": 0.8735064268112183,
|
| 106158 |
+
"step": 15164
|
| 106159 |
+
},
|
| 106160 |
+
{
|
| 106161 |
+
"epoch": 18.607361963190183,
|
| 106162 |
+
"grad_norm": 0.2673431932926178,
|
| 106163 |
+
"learning_rate": 6.473891254315911e-07,
|
| 106164 |
+
"loss": 0.6103352308273315,
|
| 106165 |
+
"step": 15165
|
| 106166 |
+
},
|
| 106167 |
+
{
|
| 106168 |
+
"epoch": 18.608588957055215,
|
| 106169 |
+
"grad_norm": 0.24434660375118256,
|
| 106170 |
+
"learning_rate": 6.462547914742794e-07,
|
| 106171 |
+
"loss": 0.5326496362686157,
|
| 106172 |
+
"step": 15166
|
| 106173 |
+
},
|
| 106174 |
+
{
|
| 106175 |
+
"epoch": 18.609815950920247,
|
| 106176 |
+
"grad_norm": 0.26905757188796997,
|
| 106177 |
+
"learning_rate": 6.451214391469756e-07,
|
| 106178 |
+
"loss": 0.6075183153152466,
|
| 106179 |
+
"step": 15167
|
| 106180 |
+
},
|
| 106181 |
+
{
|
| 106182 |
+
"epoch": 18.611042944785275,
|
| 106183 |
+
"grad_norm": 0.27528470754623413,
|
| 106184 |
+
"learning_rate": 6.439890684953681e-07,
|
| 106185 |
+
"loss": 0.6315281987190247,
|
| 106186 |
+
"step": 15168
|
| 106187 |
+
},
|
| 106188 |
+
{
|
| 106189 |
+
"epoch": 18.612269938650307,
|
| 106190 |
+
"grad_norm": 0.26316067576408386,
|
| 106191 |
+
"learning_rate": 6.428576795650953e-07,
|
| 106192 |
+
"loss": 0.5694060921669006,
|
| 106193 |
+
"step": 15169
|
| 106194 |
+
},
|
| 106195 |
+
{
|
| 106196 |
+
"epoch": 18.61349693251534,
|
| 106197 |
+
"grad_norm": 0.27509331703186035,
|
| 106198 |
+
"learning_rate": 6.417272724017598e-07,
|
| 106199 |
+
"loss": 0.5225584506988525,
|
| 106200 |
+
"step": 15170
|
| 106201 |
+
},
|
| 106202 |
+
{
|
| 106203 |
+
"epoch": 18.614723926380368,
|
| 106204 |
+
"grad_norm": 0.2137763351202011,
|
| 106205 |
+
"learning_rate": 6.40597847050925e-07,
|
| 106206 |
+
"loss": 0.5263010859489441,
|
| 106207 |
+
"step": 15171
|
| 106208 |
+
},
|
| 106209 |
+
{
|
| 106210 |
+
"epoch": 18.6159509202454,
|
| 106211 |
+
"grad_norm": 0.28555798530578613,
|
| 106212 |
+
"learning_rate": 6.394694035581156e-07,
|
| 106213 |
+
"loss": 0.5906305909156799,
|
| 106214 |
+
"step": 15172
|
| 106215 |
+
},
|
| 106216 |
+
{
|
| 106217 |
+
"epoch": 18.617177914110428,
|
| 106218 |
+
"grad_norm": 0.27267321944236755,
|
| 106219 |
+
"learning_rate": 6.383419419688147e-07,
|
| 106220 |
+
"loss": 0.7411438226699829,
|
| 106221 |
+
"step": 15173
|
| 106222 |
+
},
|
| 106223 |
+
{
|
| 106224 |
+
"epoch": 18.61840490797546,
|
| 106225 |
+
"grad_norm": 0.28868696093559265,
|
| 106226 |
+
"learning_rate": 6.372154623284748e-07,
|
| 106227 |
+
"loss": 0.4843969941139221,
|
| 106228 |
+
"step": 15174
|
| 106229 |
+
},
|
| 106230 |
+
{
|
| 106231 |
+
"epoch": 18.619631901840492,
|
| 106232 |
+
"grad_norm": 0.25439172983169556,
|
| 106233 |
+
"learning_rate": 6.360899646824903e-07,
|
| 106234 |
+
"loss": 0.5076420307159424,
|
| 106235 |
+
"step": 15175
|
| 106236 |
+
},
|
| 106237 |
+
{
|
| 106238 |
+
"epoch": 18.62085889570552,
|
| 106239 |
+
"grad_norm": 0.25827136635780334,
|
| 106240 |
+
"learning_rate": 6.349654490762302e-07,
|
| 106241 |
+
"loss": 0.5669487118721008,
|
| 106242 |
+
"step": 15176
|
| 106243 |
+
},
|
| 106244 |
+
{
|
| 106245 |
+
"epoch": 18.622085889570553,
|
| 106246 |
+
"grad_norm": 0.2632838785648346,
|
| 106247 |
+
"learning_rate": 6.338419155550223e-07,
|
| 106248 |
+
"loss": 0.5522025227546692,
|
| 106249 |
+
"step": 15177
|
| 106250 |
+
},
|
| 106251 |
+
{
|
| 106252 |
+
"epoch": 18.62331288343558,
|
| 106253 |
+
"grad_norm": 0.33273184299468994,
|
| 106254 |
+
"learning_rate": 6.327193641641527e-07,
|
| 106255 |
+
"loss": 0.8938724994659424,
|
| 106256 |
+
"step": 15178
|
| 106257 |
+
},
|
| 106258 |
+
{
|
| 106259 |
+
"epoch": 18.624539877300613,
|
| 106260 |
+
"grad_norm": 0.2522442042827606,
|
| 106261 |
+
"learning_rate": 6.315977949488683e-07,
|
| 106262 |
+
"loss": 0.5643070936203003,
|
| 106263 |
+
"step": 15179
|
| 106264 |
+
},
|
| 106265 |
+
{
|
| 106266 |
+
"epoch": 18.625766871165645,
|
| 106267 |
+
"grad_norm": 0.296111136674881,
|
| 106268 |
+
"learning_rate": 6.304772079543747e-07,
|
| 106269 |
+
"loss": 0.6548358201980591,
|
| 106270 |
+
"step": 15180
|
| 106271 |
+
},
|
| 106272 |
+
{
|
| 106273 |
+
"epoch": 18.626993865030673,
|
| 106274 |
+
"grad_norm": 0.2684704065322876,
|
| 106275 |
+
"learning_rate": 6.293576032258413e-07,
|
| 106276 |
+
"loss": 0.592420756816864,
|
| 106277 |
+
"step": 15181
|
| 106278 |
+
},
|
| 106279 |
+
{
|
| 106280 |
+
"epoch": 18.628220858895705,
|
| 106281 |
+
"grad_norm": 0.3072897493839264,
|
| 106282 |
+
"learning_rate": 6.282389808083961e-07,
|
| 106283 |
+
"loss": 0.743360161781311,
|
| 106284 |
+
"step": 15182
|
| 106285 |
+
},
|
| 106286 |
+
{
|
| 106287 |
+
"epoch": 18.629447852760737,
|
| 106288 |
+
"grad_norm": 0.26359885931015015,
|
| 106289 |
+
"learning_rate": 6.271213407471305e-07,
|
| 106290 |
+
"loss": 0.7179452180862427,
|
| 106291 |
+
"step": 15183
|
| 106292 |
+
},
|
| 106293 |
+
{
|
| 106294 |
+
"epoch": 18.630674846625766,
|
| 106295 |
+
"grad_norm": 0.25691908597946167,
|
| 106296 |
+
"learning_rate": 6.260046830870864e-07,
|
| 106297 |
+
"loss": 0.6879527568817139,
|
| 106298 |
+
"step": 15184
|
| 106299 |
+
},
|
| 106300 |
+
{
|
| 106301 |
+
"epoch": 18.631901840490798,
|
| 106302 |
+
"grad_norm": 0.2890221178531647,
|
| 106303 |
+
"learning_rate": 6.24889007873275e-07,
|
| 106304 |
+
"loss": 0.6400716304779053,
|
| 106305 |
+
"step": 15185
|
| 106306 |
+
},
|
| 106307 |
+
{
|
| 106308 |
+
"epoch": 18.63312883435583,
|
| 106309 |
+
"grad_norm": 0.2790442109107971,
|
| 106310 |
+
"learning_rate": 6.237743151506715e-07,
|
| 106311 |
+
"loss": 0.4744044542312622,
|
| 106312 |
+
"step": 15186
|
| 106313 |
+
},
|
| 106314 |
+
{
|
| 106315 |
+
"epoch": 18.63435582822086,
|
| 106316 |
+
"grad_norm": 0.27908948063850403,
|
| 106317 |
+
"learning_rate": 6.22660604964198e-07,
|
| 106318 |
+
"loss": 0.6723270416259766,
|
| 106319 |
+
"step": 15187
|
| 106320 |
+
},
|
| 106321 |
+
{
|
| 106322 |
+
"epoch": 18.63558282208589,
|
| 106323 |
+
"grad_norm": 0.2986932396888733,
|
| 106324 |
+
"learning_rate": 6.215478773587552e-07,
|
| 106325 |
+
"loss": 0.771935224533081,
|
| 106326 |
+
"step": 15188
|
| 106327 |
+
},
|
| 106328 |
+
{
|
| 106329 |
+
"epoch": 18.63680981595092,
|
| 106330 |
+
"grad_norm": 0.27121642231941223,
|
| 106331 |
+
"learning_rate": 6.204361323791846e-07,
|
| 106332 |
+
"loss": 0.528566837310791,
|
| 106333 |
+
"step": 15189
|
| 106334 |
+
},
|
| 106335 |
+
{
|
| 106336 |
+
"epoch": 18.63803680981595,
|
| 106337 |
+
"grad_norm": 0.25893843173980713,
|
| 106338 |
+
"learning_rate": 6.193253700703005e-07,
|
| 106339 |
+
"loss": 0.6105513572692871,
|
| 106340 |
+
"step": 15190
|
| 106341 |
+
},
|
| 106342 |
+
{
|
| 106343 |
+
"epoch": 18.639263803680983,
|
| 106344 |
+
"grad_norm": 0.23307769000530243,
|
| 106345 |
+
"learning_rate": 6.182155904768727e-07,
|
| 106346 |
+
"loss": 0.47514796257019043,
|
| 106347 |
+
"step": 15191
|
| 106348 |
+
},
|
| 106349 |
+
{
|
| 106350 |
+
"epoch": 18.64049079754601,
|
| 106351 |
+
"grad_norm": 0.2811766564846039,
|
| 106352 |
+
"learning_rate": 6.171067936436375e-07,
|
| 106353 |
+
"loss": 0.48511427640914917,
|
| 106354 |
+
"step": 15192
|
| 106355 |
+
},
|
| 106356 |
+
{
|
| 106357 |
+
"epoch": 18.641717791411043,
|
| 106358 |
+
"grad_norm": 0.22637492418289185,
|
| 106359 |
+
"learning_rate": 6.159989796152898e-07,
|
| 106360 |
+
"loss": 0.43498456478118896,
|
| 106361 |
+
"step": 15193
|
| 106362 |
+
},
|
| 106363 |
+
{
|
| 106364 |
+
"epoch": 18.642944785276075,
|
| 106365 |
+
"grad_norm": 0.2596653401851654,
|
| 106366 |
+
"learning_rate": 6.148921484364717e-07,
|
| 106367 |
+
"loss": 0.5635366439819336,
|
| 106368 |
+
"step": 15194
|
| 106369 |
+
},
|
| 106370 |
+
{
|
| 106371 |
+
"epoch": 18.644171779141104,
|
| 106372 |
+
"grad_norm": 0.30054521560668945,
|
| 106373 |
+
"learning_rate": 6.137863001518057e-07,
|
| 106374 |
+
"loss": 0.5813276767730713,
|
| 106375 |
+
"step": 15195
|
| 106376 |
+
},
|
| 106377 |
+
{
|
| 106378 |
+
"epoch": 18.645398773006136,
|
| 106379 |
+
"grad_norm": 0.2696506083011627,
|
| 106380 |
+
"learning_rate": 6.126814348058591e-07,
|
| 106381 |
+
"loss": 0.6740450859069824,
|
| 106382 |
+
"step": 15196
|
| 106383 |
+
},
|
| 106384 |
+
{
|
| 106385 |
+
"epoch": 18.646625766871164,
|
| 106386 |
+
"grad_norm": 0.24237844347953796,
|
| 106387 |
+
"learning_rate": 6.11577552443171e-07,
|
| 106388 |
+
"loss": 0.4849277138710022,
|
| 106389 |
+
"step": 15197
|
| 106390 |
+
},
|
| 106391 |
+
{
|
| 106392 |
+
"epoch": 18.647852760736196,
|
| 106393 |
+
"grad_norm": 0.27191784977912903,
|
| 106394 |
+
"learning_rate": 6.104746531082367e-07,
|
| 106395 |
+
"loss": 0.6088966131210327,
|
| 106396 |
+
"step": 15198
|
| 106397 |
+
},
|
| 106398 |
+
{
|
| 106399 |
+
"epoch": 18.649079754601228,
|
| 106400 |
+
"grad_norm": 0.24983380734920502,
|
| 106401 |
+
"learning_rate": 6.093727368455038e-07,
|
| 106402 |
+
"loss": 0.46161937713623047,
|
| 106403 |
+
"step": 15199
|
| 106404 |
+
},
|
| 106405 |
+
{
|
| 106406 |
+
"epoch": 18.650306748466257,
|
| 106407 |
+
"grad_norm": 0.28195759654045105,
|
| 106408 |
+
"learning_rate": 6.082718036993923e-07,
|
| 106409 |
+
"loss": 0.6653953790664673,
|
| 106410 |
+
"step": 15200
|
| 106411 |
+
},
|
| 106412 |
+
{
|
| 106413 |
+
"epoch": 18.65153374233129,
|
| 106414 |
+
"grad_norm": 0.2479625642299652,
|
| 106415 |
+
"learning_rate": 6.071718537142751e-07,
|
| 106416 |
+
"loss": 0.5292989611625671,
|
| 106417 |
+
"step": 15201
|
| 106418 |
+
},
|
| 106419 |
+
{
|
| 106420 |
+
"epoch": 18.65276073619632,
|
| 106421 |
+
"grad_norm": 0.24449020624160767,
|
| 106422 |
+
"learning_rate": 6.060728869344945e-07,
|
| 106423 |
+
"loss": 0.41676780581474304,
|
| 106424 |
+
"step": 15202
|
| 106425 |
+
},
|
| 106426 |
+
{
|
| 106427 |
+
"epoch": 18.65398773006135,
|
| 106428 |
+
"grad_norm": 0.2653891444206238,
|
| 106429 |
+
"learning_rate": 6.0497490340434e-07,
|
| 106430 |
+
"loss": 0.5717869997024536,
|
| 106431 |
+
"step": 15203
|
| 106432 |
+
},
|
| 106433 |
+
{
|
| 106434 |
+
"epoch": 18.65521472392638,
|
| 106435 |
+
"grad_norm": 0.2833523452281952,
|
| 106436 |
+
"learning_rate": 6.038779031680708e-07,
|
| 106437 |
+
"loss": 0.6582491397857666,
|
| 106438 |
+
"step": 15204
|
| 106439 |
+
},
|
| 106440 |
+
{
|
| 106441 |
+
"epoch": 18.65644171779141,
|
| 106442 |
+
"grad_norm": 0.24876902997493744,
|
| 106443 |
+
"learning_rate": 6.027818862699013e-07,
|
| 106444 |
+
"loss": 0.5271468162536621,
|
| 106445 |
+
"step": 15205
|
| 106446 |
+
},
|
| 106447 |
+
{
|
| 106448 |
+
"epoch": 18.65766871165644,
|
| 106449 |
+
"grad_norm": 0.2767656147480011,
|
| 106450 |
+
"learning_rate": 6.016868527540131e-07,
|
| 106451 |
+
"loss": 0.6003050804138184,
|
| 106452 |
+
"step": 15206
|
| 106453 |
+
},
|
| 106454 |
+
{
|
| 106455 |
+
"epoch": 18.658895705521473,
|
| 106456 |
+
"grad_norm": 0.29805460572242737,
|
| 106457 |
+
"learning_rate": 6.005928026645429e-07,
|
| 106458 |
+
"loss": 0.7094470262527466,
|
| 106459 |
+
"step": 15207
|
| 106460 |
+
},
|
| 106461 |
+
{
|
| 106462 |
+
"epoch": 18.660122699386502,
|
| 106463 |
+
"grad_norm": 0.3125782310962677,
|
| 106464 |
+
"learning_rate": 5.994997360455862e-07,
|
| 106465 |
+
"loss": 0.6571345329284668,
|
| 106466 |
+
"step": 15208
|
| 106467 |
+
},
|
| 106468 |
+
{
|
| 106469 |
+
"epoch": 18.661349693251534,
|
| 106470 |
+
"grad_norm": 0.26030030846595764,
|
| 106471 |
+
"learning_rate": 5.984076529412019e-07,
|
| 106472 |
+
"loss": 0.6451325416564941,
|
| 106473 |
+
"step": 15209
|
| 106474 |
+
},
|
| 106475 |
+
{
|
| 106476 |
+
"epoch": 18.662576687116566,
|
| 106477 |
+
"grad_norm": 0.27238890528678894,
|
| 106478 |
+
"learning_rate": 5.973165533954106e-07,
|
| 106479 |
+
"loss": 0.4976768493652344,
|
| 106480 |
+
"step": 15210
|
| 106481 |
+
},
|
| 106482 |
+
{
|
| 106483 |
+
"epoch": 18.663803680981594,
|
| 106484 |
+
"grad_norm": 0.25750961899757385,
|
| 106485 |
+
"learning_rate": 5.962264374521908e-07,
|
| 106486 |
+
"loss": 0.5418727397918701,
|
| 106487 |
+
"step": 15211
|
| 106488 |
+
},
|
| 106489 |
+
{
|
| 106490 |
+
"epoch": 18.665030674846626,
|
| 106491 |
+
"grad_norm": 0.24363575875759125,
|
| 106492 |
+
"learning_rate": 5.951373051554826e-07,
|
| 106493 |
+
"loss": 0.4258368909358978,
|
| 106494 |
+
"step": 15212
|
| 106495 |
+
},
|
| 106496 |
+
{
|
| 106497 |
+
"epoch": 18.666257668711655,
|
| 106498 |
+
"grad_norm": 0.25335267186164856,
|
| 106499 |
+
"learning_rate": 5.940491565491813e-07,
|
| 106500 |
+
"loss": 0.6272305846214294,
|
| 106501 |
+
"step": 15213
|
| 106502 |
+
},
|
| 106503 |
+
{
|
| 106504 |
+
"epoch": 18.667484662576687,
|
| 106505 |
+
"grad_norm": 0.2740723192691803,
|
| 106506 |
+
"learning_rate": 5.929619916771518e-07,
|
| 106507 |
+
"loss": 0.5700002908706665,
|
| 106508 |
+
"step": 15214
|
| 106509 |
+
},
|
| 106510 |
+
{
|
| 106511 |
+
"epoch": 18.66871165644172,
|
| 106512 |
+
"grad_norm": 0.27243390679359436,
|
| 106513 |
+
"learning_rate": 5.918758105832145e-07,
|
| 106514 |
+
"loss": 0.853178083896637,
|
| 106515 |
+
"step": 15215
|
| 106516 |
+
},
|
| 106517 |
+
{
|
| 106518 |
+
"epoch": 18.669938650306747,
|
| 106519 |
+
"grad_norm": 0.28763407468795776,
|
| 106520 |
+
"learning_rate": 5.907906133111485e-07,
|
| 106521 |
+
"loss": 0.6041160225868225,
|
| 106522 |
+
"step": 15216
|
| 106523 |
+
},
|
| 106524 |
+
{
|
| 106525 |
+
"epoch": 18.67116564417178,
|
| 106526 |
+
"grad_norm": 0.2628782093524933,
|
| 106527 |
+
"learning_rate": 5.897063999046965e-07,
|
| 106528 |
+
"loss": 0.5168743133544922,
|
| 106529 |
+
"step": 15217
|
| 106530 |
+
},
|
| 106531 |
+
{
|
| 106532 |
+
"epoch": 18.67239263803681,
|
| 106533 |
+
"grad_norm": 0.3157549202442169,
|
| 106534 |
+
"learning_rate": 5.886231704075596e-07,
|
| 106535 |
+
"loss": 0.7035725116729736,
|
| 106536 |
+
"step": 15218
|
| 106537 |
+
},
|
| 106538 |
+
{
|
| 106539 |
+
"epoch": 18.67361963190184,
|
| 106540 |
+
"grad_norm": 0.3202553689479828,
|
| 106541 |
+
"learning_rate": 5.875409248633973e-07,
|
| 106542 |
+
"loss": 0.536139965057373,
|
| 106543 |
+
"step": 15219
|
| 106544 |
+
},
|
| 106545 |
+
{
|
| 106546 |
+
"epoch": 18.67484662576687,
|
| 106547 |
+
"grad_norm": 0.24115949869155884,
|
| 106548 |
+
"learning_rate": 5.864596633158331e-07,
|
| 106549 |
+
"loss": 0.4880269765853882,
|
| 106550 |
+
"step": 15220
|
| 106551 |
+
},
|
| 106552 |
+
{
|
| 106553 |
+
"epoch": 18.6760736196319,
|
| 106554 |
+
"grad_norm": 0.26897159218788147,
|
| 106555 |
+
"learning_rate": 5.853793858084517e-07,
|
| 106556 |
+
"loss": 0.7130246162414551,
|
| 106557 |
+
"step": 15221
|
| 106558 |
+
},
|
| 106559 |
+
{
|
| 106560 |
+
"epoch": 18.677300613496932,
|
| 106561 |
+
"grad_norm": 0.26036468148231506,
|
| 106562 |
+
"learning_rate": 5.84300092384793e-07,
|
| 106563 |
+
"loss": 0.5350053906440735,
|
| 106564 |
+
"step": 15222
|
| 106565 |
+
},
|
| 106566 |
+
{
|
| 106567 |
+
"epoch": 18.678527607361964,
|
| 106568 |
+
"grad_norm": 0.26878878474235535,
|
| 106569 |
+
"learning_rate": 5.832217830883641e-07,
|
| 106570 |
+
"loss": 0.6075853705406189,
|
| 106571 |
+
"step": 15223
|
| 106572 |
+
},
|
| 106573 |
+
{
|
| 106574 |
+
"epoch": 18.679754601226993,
|
| 106575 |
+
"grad_norm": 0.2722039222717285,
|
| 106576 |
+
"learning_rate": 5.821444579626245e-07,
|
| 106577 |
+
"loss": 0.6574867963790894,
|
| 106578 |
+
"step": 15224
|
| 106579 |
+
},
|
| 106580 |
+
{
|
| 106581 |
+
"epoch": 18.680981595092025,
|
| 106582 |
+
"grad_norm": 0.2535555362701416,
|
| 106583 |
+
"learning_rate": 5.810681170510007e-07,
|
| 106584 |
+
"loss": 0.5557583570480347,
|
| 106585 |
+
"step": 15225
|
| 106586 |
+
},
|
| 106587 |
+
{
|
| 106588 |
+
"epoch": 18.682208588957057,
|
| 106589 |
+
"grad_norm": 0.26748672127723694,
|
| 106590 |
+
"learning_rate": 5.799927603968747e-07,
|
| 106591 |
+
"loss": 0.6356940269470215,
|
| 106592 |
+
"step": 15226
|
| 106593 |
+
},
|
| 106594 |
+
{
|
| 106595 |
+
"epoch": 18.683435582822085,
|
| 106596 |
+
"grad_norm": 0.26495257019996643,
|
| 106597 |
+
"learning_rate": 5.789183880435978e-07,
|
| 106598 |
+
"loss": 0.525108814239502,
|
| 106599 |
+
"step": 15227
|
| 106600 |
+
},
|
| 106601 |
+
{
|
| 106602 |
+
"epoch": 18.684662576687117,
|
| 106603 |
+
"grad_norm": 0.272734671831131,
|
| 106604 |
+
"learning_rate": 5.77845000034466e-07,
|
| 106605 |
+
"loss": 0.6729491949081421,
|
| 106606 |
+
"step": 15228
|
| 106607 |
+
},
|
| 106608 |
+
{
|
| 106609 |
+
"epoch": 18.68588957055215,
|
| 106610 |
+
"grad_norm": 0.25642308592796326,
|
| 106611 |
+
"learning_rate": 5.767725964127473e-07,
|
| 106612 |
+
"loss": 0.565299391746521,
|
| 106613 |
+
"step": 15229
|
| 106614 |
+
},
|
| 106615 |
+
{
|
| 106616 |
+
"epoch": 18.687116564417177,
|
| 106617 |
+
"grad_norm": 0.25593680143356323,
|
| 106618 |
+
"learning_rate": 5.757011772216686e-07,
|
| 106619 |
+
"loss": 0.48387008905410767,
|
| 106620 |
+
"step": 15230
|
| 106621 |
+
},
|
| 106622 |
+
{
|
| 106623 |
+
"epoch": 18.68834355828221,
|
| 106624 |
+
"grad_norm": 0.28749197721481323,
|
| 106625 |
+
"learning_rate": 5.746307425044145e-07,
|
| 106626 |
+
"loss": 0.6839739680290222,
|
| 106627 |
+
"step": 15231
|
| 106628 |
+
},
|
| 106629 |
+
{
|
| 106630 |
+
"epoch": 18.689570552147238,
|
| 106631 |
+
"grad_norm": 0.2638063132762909,
|
| 106632 |
+
"learning_rate": 5.735612923041339e-07,
|
| 106633 |
+
"loss": 0.4446793496608734,
|
| 106634 |
+
"step": 15232
|
| 106635 |
+
},
|
| 106636 |
+
{
|
| 106637 |
+
"epoch": 18.69079754601227,
|
| 106638 |
+
"grad_norm": 0.2121974229812622,
|
| 106639 |
+
"learning_rate": 5.724928266639313e-07,
|
| 106640 |
+
"loss": 0.4256136417388916,
|
| 106641 |
+
"step": 15233
|
| 106642 |
+
},
|
| 106643 |
+
{
|
| 106644 |
+
"epoch": 18.692024539877302,
|
| 106645 |
+
"grad_norm": 0.28113579750061035,
|
| 106646 |
+
"learning_rate": 5.714253456268693e-07,
|
| 106647 |
+
"loss": 0.5871365070343018,
|
| 106648 |
+
"step": 15234
|
| 106649 |
+
},
|
| 106650 |
+
{
|
| 106651 |
+
"epoch": 18.69325153374233,
|
| 106652 |
+
"grad_norm": 0.27249473333358765,
|
| 106653 |
+
"learning_rate": 5.703588492359829e-07,
|
| 106654 |
+
"loss": 0.47084981203079224,
|
| 106655 |
+
"step": 15235
|
| 106656 |
+
},
|
| 106657 |
+
{
|
| 106658 |
+
"epoch": 18.694478527607362,
|
| 106659 |
+
"grad_norm": 0.2878004312515259,
|
| 106660 |
+
"learning_rate": 5.692933375342547e-07,
|
| 106661 |
+
"loss": 0.9171600937843323,
|
| 106662 |
+
"step": 15236
|
| 106663 |
+
},
|
| 106664 |
+
{
|
| 106665 |
+
"epoch": 18.69570552147239,
|
| 106666 |
+
"grad_norm": 0.2571480870246887,
|
| 106667 |
+
"learning_rate": 5.682288105646361e-07,
|
| 106668 |
+
"loss": 0.6159586906433105,
|
| 106669 |
+
"step": 15237
|
| 106670 |
+
},
|
| 106671 |
+
{
|
| 106672 |
+
"epoch": 18.696932515337423,
|
| 106673 |
+
"grad_norm": 0.26886430382728577,
|
| 106674 |
+
"learning_rate": 5.67165268370029e-07,
|
| 106675 |
+
"loss": 0.6129826903343201,
|
| 106676 |
+
"step": 15238
|
| 106677 |
+
},
|
| 106678 |
+
{
|
| 106679 |
+
"epoch": 18.698159509202455,
|
| 106680 |
+
"grad_norm": 0.2444818615913391,
|
| 106681 |
+
"learning_rate": 5.661027109933048e-07,
|
| 106682 |
+
"loss": 0.6010912656784058,
|
| 106683 |
+
"step": 15239
|
| 106684 |
+
},
|
| 106685 |
+
{
|
| 106686 |
+
"epoch": 18.699386503067483,
|
| 106687 |
+
"grad_norm": 0.27998071908950806,
|
| 106688 |
+
"learning_rate": 5.650411384772958e-07,
|
| 106689 |
+
"loss": 0.5719574689865112,
|
| 106690 |
+
"step": 15240
|
| 106691 |
+
},
|
| 106692 |
+
{
|
| 106693 |
+
"epoch": 18.700613496932515,
|
| 106694 |
+
"grad_norm": 0.27225813269615173,
|
| 106695 |
+
"learning_rate": 5.639805508647844e-07,
|
| 106696 |
+
"loss": 0.7159888744354248,
|
| 106697 |
+
"step": 15241
|
| 106698 |
+
},
|
| 106699 |
+
{
|
| 106700 |
+
"epoch": 18.701840490797547,
|
| 106701 |
+
"grad_norm": 0.24633541703224182,
|
| 106702 |
+
"learning_rate": 5.629209481985281e-07,
|
| 106703 |
+
"loss": 0.6096060872077942,
|
| 106704 |
+
"step": 15242
|
| 106705 |
+
},
|
| 106706 |
+
{
|
| 106707 |
+
"epoch": 18.703067484662576,
|
| 106708 |
+
"grad_norm": 0.26989373564720154,
|
| 106709 |
+
"learning_rate": 5.618623305212289e-07,
|
| 106710 |
+
"loss": 0.6154076457023621,
|
| 106711 |
+
"step": 15243
|
| 106712 |
+
},
|
| 106713 |
+
{
|
| 106714 |
+
"epoch": 18.704294478527608,
|
| 106715 |
+
"grad_norm": 0.25045183300971985,
|
| 106716 |
+
"learning_rate": 5.608046978755582e-07,
|
| 106717 |
+
"loss": 0.549468457698822,
|
| 106718 |
+
"step": 15244
|
| 106719 |
+
},
|
| 106720 |
+
{
|
| 106721 |
+
"epoch": 18.70552147239264,
|
| 106722 |
+
"grad_norm": 0.22654187679290771,
|
| 106723 |
+
"learning_rate": 5.597480503041486e-07,
|
| 106724 |
+
"loss": 0.37039828300476074,
|
| 106725 |
+
"step": 15245
|
| 106726 |
+
},
|
| 106727 |
+
{
|
| 106728 |
+
"epoch": 18.706748466257668,
|
| 106729 |
+
"grad_norm": 0.2751024067401886,
|
| 106730 |
+
"learning_rate": 5.586923878495881e-07,
|
| 106731 |
+
"loss": 0.38881999254226685,
|
| 106732 |
+
"step": 15246
|
| 106733 |
+
},
|
| 106734 |
+
{
|
| 106735 |
+
"epoch": 18.7079754601227,
|
| 106736 |
+
"grad_norm": 0.26486557722091675,
|
| 106737 |
+
"learning_rate": 5.57637710554429e-07,
|
| 106738 |
+
"loss": 0.5700777769088745,
|
| 106739 |
+
"step": 15247
|
| 106740 |
+
},
|
| 106741 |
+
{
|
| 106742 |
+
"epoch": 18.70920245398773,
|
| 106743 |
+
"grad_norm": 0.2814318537712097,
|
| 106744 |
+
"learning_rate": 5.565840184611814e-07,
|
| 106745 |
+
"loss": 0.5660899877548218,
|
| 106746 |
+
"step": 15248
|
| 106747 |
+
},
|
| 106748 |
+
{
|
| 106749 |
+
"epoch": 18.71042944785276,
|
| 106750 |
+
"grad_norm": 0.22533221542835236,
|
| 106751 |
+
"learning_rate": 5.555313116123174e-07,
|
| 106752 |
+
"loss": 0.4923405945301056,
|
| 106753 |
+
"step": 15249
|
| 106754 |
+
},
|
| 106755 |
+
{
|
| 106756 |
+
"epoch": 18.711656441717793,
|
| 106757 |
+
"grad_norm": 0.27861565351486206,
|
| 106758 |
+
"learning_rate": 5.544795900502692e-07,
|
| 106759 |
+
"loss": 0.5831438302993774,
|
| 106760 |
+
"step": 15250
|
| 106761 |
+
},
|
| 106762 |
+
{
|
| 106763 |
+
"epoch": 18.71288343558282,
|
| 106764 |
+
"grad_norm": 0.27669093012809753,
|
| 106765 |
+
"learning_rate": 5.53428853817431e-07,
|
| 106766 |
+
"loss": 0.6797917485237122,
|
| 106767 |
+
"step": 15251
|
| 106768 |
+
},
|
| 106769 |
+
{
|
| 106770 |
+
"epoch": 18.714110429447853,
|
| 106771 |
+
"grad_norm": 0.27215513586997986,
|
| 106772 |
+
"learning_rate": 5.523791029561492e-07,
|
| 106773 |
+
"loss": 0.6180120706558228,
|
| 106774 |
+
"step": 15252
|
| 106775 |
+
},
|
| 106776 |
+
{
|
| 106777 |
+
"epoch": 18.715337423312885,
|
| 106778 |
+
"grad_norm": 0.25468236207962036,
|
| 106779 |
+
"learning_rate": 5.513303375087376e-07,
|
| 106780 |
+
"loss": 0.5339978933334351,
|
| 106781 |
+
"step": 15253
|
| 106782 |
+
},
|
| 106783 |
+
{
|
| 106784 |
+
"epoch": 18.716564417177914,
|
| 106785 |
+
"grad_norm": 0.2686900198459625,
|
| 106786 |
+
"learning_rate": 5.502825575174703e-07,
|
| 106787 |
+
"loss": 0.5468940138816833,
|
| 106788 |
+
"step": 15254
|
| 106789 |
+
},
|
| 106790 |
+
{
|
| 106791 |
+
"epoch": 18.717791411042946,
|
| 106792 |
+
"grad_norm": 0.2745606005191803,
|
| 106793 |
+
"learning_rate": 5.492357630245831e-07,
|
| 106794 |
+
"loss": 0.46569889783859253,
|
| 106795 |
+
"step": 15255
|
| 106796 |
+
},
|
| 106797 |
+
{
|
| 106798 |
+
"epoch": 18.719018404907974,
|
| 106799 |
+
"grad_norm": 0.27219411730766296,
|
| 106800 |
+
"learning_rate": 5.481899540722673e-07,
|
| 106801 |
+
"loss": 0.537952184677124,
|
| 106802 |
+
"step": 15256
|
| 106803 |
+
},
|
| 106804 |
+
{
|
| 106805 |
+
"epoch": 18.720245398773006,
|
| 106806 |
+
"grad_norm": 0.2922494113445282,
|
| 106807 |
+
"learning_rate": 5.471451307026726e-07,
|
| 106808 |
+
"loss": 0.3274156451225281,
|
| 106809 |
+
"step": 15257
|
| 106810 |
+
},
|
| 106811 |
+
{
|
| 106812 |
+
"epoch": 18.721472392638038,
|
| 106813 |
+
"grad_norm": 0.24552351236343384,
|
| 106814 |
+
"learning_rate": 5.461012929579151e-07,
|
| 106815 |
+
"loss": 0.3906986713409424,
|
| 106816 |
+
"step": 15258
|
| 106817 |
+
},
|
| 106818 |
+
{
|
| 106819 |
+
"epoch": 18.722699386503066,
|
| 106820 |
+
"grad_norm": 0.2826331555843353,
|
| 106821 |
+
"learning_rate": 5.450584408800724e-07,
|
| 106822 |
+
"loss": 0.6662216186523438,
|
| 106823 |
+
"step": 15259
|
| 106824 |
+
},
|
| 106825 |
+
{
|
| 106826 |
+
"epoch": 18.7239263803681,
|
| 106827 |
+
"grad_norm": 0.2794772982597351,
|
| 106828 |
+
"learning_rate": 5.440165745111747e-07,
|
| 106829 |
+
"loss": 0.37987232208251953,
|
| 106830 |
+
"step": 15260
|
| 106831 |
+
},
|
| 106832 |
+
{
|
| 106833 |
+
"epoch": 18.72515337423313,
|
| 106834 |
+
"grad_norm": 0.2881767153739929,
|
| 106835 |
+
"learning_rate": 5.42975693893219e-07,
|
| 106836 |
+
"loss": 0.7102439403533936,
|
| 106837 |
+
"step": 15261
|
| 106838 |
+
},
|
| 106839 |
+
{
|
| 106840 |
+
"epoch": 18.72638036809816,
|
| 106841 |
+
"grad_norm": 0.2919233739376068,
|
| 106842 |
+
"learning_rate": 5.419357990681606e-07,
|
| 106843 |
+
"loss": 0.6430931687355042,
|
| 106844 |
+
"step": 15262
|
| 106845 |
+
},
|
| 106846 |
+
{
|
| 106847 |
+
"epoch": 18.72760736196319,
|
| 106848 |
+
"grad_norm": 0.30307117104530334,
|
| 106849 |
+
"learning_rate": 5.408968900779104e-07,
|
| 106850 |
+
"loss": 0.6392380595207214,
|
| 106851 |
+
"step": 15263
|
| 106852 |
+
},
|
| 106853 |
+
{
|
| 106854 |
+
"epoch": 18.72883435582822,
|
| 106855 |
+
"grad_norm": 0.3010847866535187,
|
| 106856 |
+
"learning_rate": 5.398589669643489e-07,
|
| 106857 |
+
"loss": 0.5133817791938782,
|
| 106858 |
+
"step": 15264
|
| 106859 |
+
},
|
| 106860 |
+
{
|
| 106861 |
+
"epoch": 18.73006134969325,
|
| 106862 |
+
"grad_norm": 0.25219497084617615,
|
| 106863 |
+
"learning_rate": 5.388220297693092e-07,
|
| 106864 |
+
"loss": 0.6730844378471375,
|
| 106865 |
+
"step": 15265
|
| 106866 |
+
},
|
| 106867 |
+
{
|
| 106868 |
+
"epoch": 18.731288343558283,
|
| 106869 |
+
"grad_norm": 0.48402801156044006,
|
| 106870 |
+
"learning_rate": 5.377860785345911e-07,
|
| 106871 |
+
"loss": 0.6036498546600342,
|
| 106872 |
+
"step": 15266
|
| 106873 |
+
},
|
| 106874 |
+
{
|
| 106875 |
+
"epoch": 18.73251533742331,
|
| 106876 |
+
"grad_norm": 0.30074167251586914,
|
| 106877 |
+
"learning_rate": 5.367511133019448e-07,
|
| 106878 |
+
"loss": 0.7011460065841675,
|
| 106879 |
+
"step": 15267
|
| 106880 |
+
},
|
| 106881 |
+
{
|
| 106882 |
+
"epoch": 18.733742331288344,
|
| 106883 |
+
"grad_norm": 0.2780938446521759,
|
| 106884 |
+
"learning_rate": 5.357171341130895e-07,
|
| 106885 |
+
"loss": 0.645973801612854,
|
| 106886 |
+
"step": 15268
|
| 106887 |
+
},
|
| 106888 |
+
{
|
| 106889 |
+
"epoch": 18.734969325153376,
|
| 106890 |
+
"grad_norm": 0.2802067697048187,
|
| 106891 |
+
"learning_rate": 5.346841410097031e-07,
|
| 106892 |
+
"loss": 0.6336469650268555,
|
| 106893 |
+
"step": 15269
|
| 106894 |
+
},
|
| 106895 |
+
{
|
| 106896 |
+
"epoch": 18.736196319018404,
|
| 106897 |
+
"grad_norm": 0.26530593633651733,
|
| 106898 |
+
"learning_rate": 5.336521340334217e-07,
|
| 106899 |
+
"loss": 0.5966158509254456,
|
| 106900 |
+
"step": 15270
|
| 106901 |
+
},
|
| 106902 |
+
{
|
| 106903 |
+
"epoch": 18.737423312883436,
|
| 106904 |
+
"grad_norm": 0.2976471483707428,
|
| 106905 |
+
"learning_rate": 5.326211132258424e-07,
|
| 106906 |
+
"loss": 0.5017695426940918,
|
| 106907 |
+
"step": 15271
|
| 106908 |
+
},
|
| 106909 |
+
{
|
| 106910 |
+
"epoch": 18.738650306748465,
|
| 106911 |
+
"grad_norm": 0.25110357999801636,
|
| 106912 |
+
"learning_rate": 5.315910786285239e-07,
|
| 106913 |
+
"loss": 0.4725927710533142,
|
| 106914 |
+
"step": 15272
|
| 106915 |
+
},
|
| 106916 |
+
{
|
| 106917 |
+
"epoch": 18.739877300613497,
|
| 106918 |
+
"grad_norm": 0.28125351667404175,
|
| 106919 |
+
"learning_rate": 5.3056203028298e-07,
|
| 106920 |
+
"loss": 0.6430981159210205,
|
| 106921 |
+
"step": 15273
|
| 106922 |
+
},
|
| 106923 |
+
{
|
| 106924 |
+
"epoch": 18.74110429447853,
|
| 106925 |
+
"grad_norm": 0.2614331841468811,
|
| 106926 |
+
"learning_rate": 5.295339682306943e-07,
|
| 106927 |
+
"loss": 0.4597034454345703,
|
| 106928 |
+
"step": 15274
|
| 106929 |
+
},
|
| 106930 |
+
{
|
| 106931 |
+
"epoch": 18.742331288343557,
|
| 106932 |
+
"grad_norm": 0.3079015016555786,
|
| 106933 |
+
"learning_rate": 5.285068925131031e-07,
|
| 106934 |
+
"loss": 0.6254681348800659,
|
| 106935 |
+
"step": 15275
|
| 106936 |
+
},
|
| 106937 |
+
{
|
| 106938 |
+
"epoch": 18.74355828220859,
|
| 106939 |
+
"grad_norm": 0.2647608518600464,
|
| 106940 |
+
"learning_rate": 5.274808031716039e-07,
|
| 106941 |
+
"loss": 0.5448773503303528,
|
| 106942 |
+
"step": 15276
|
| 106943 |
+
},
|
| 106944 |
+
{
|
| 106945 |
+
"epoch": 18.74478527607362,
|
| 106946 |
+
"grad_norm": 0.24666734039783478,
|
| 106947 |
+
"learning_rate": 5.264557002475523e-07,
|
| 106948 |
+
"loss": 0.5429290533065796,
|
| 106949 |
+
"step": 15277
|
| 106950 |
+
},
|
| 106951 |
+
{
|
| 106952 |
+
"epoch": 18.74601226993865,
|
| 106953 |
+
"grad_norm": 0.2802443206310272,
|
| 106954 |
+
"learning_rate": 5.254315837822738e-07,
|
| 106955 |
+
"loss": 0.5163336992263794,
|
| 106956 |
+
"step": 15278
|
| 106957 |
+
},
|
| 106958 |
+
{
|
| 106959 |
+
"epoch": 18.74723926380368,
|
| 106960 |
+
"grad_norm": 0.29850152134895325,
|
| 106961 |
+
"learning_rate": 5.244084538170435e-07,
|
| 106962 |
+
"loss": 0.5023835897445679,
|
| 106963 |
+
"step": 15279
|
| 106964 |
+
},
|
| 106965 |
+
{
|
| 106966 |
+
"epoch": 18.74846625766871,
|
| 106967 |
+
"grad_norm": 0.3194597363471985,
|
| 106968 |
+
"learning_rate": 5.233863103931036e-07,
|
| 106969 |
+
"loss": 0.5030274391174316,
|
| 106970 |
+
"step": 15280
|
| 106971 |
+
},
|
| 106972 |
+
{
|
| 106973 |
+
"epoch": 18.749693251533742,
|
| 106974 |
+
"grad_norm": 0.310451865196228,
|
| 106975 |
+
"learning_rate": 5.223651535516488e-07,
|
| 106976 |
+
"loss": 0.6851617097854614,
|
| 106977 |
+
"step": 15281
|
| 106978 |
+
},
|
| 106979 |
+
{
|
| 106980 |
+
"epoch": 18.750920245398774,
|
| 106981 |
+
"grad_norm": 0.28196021914482117,
|
| 106982 |
+
"learning_rate": 5.213449833338463e-07,
|
| 106983 |
+
"loss": 0.37932470440864563,
|
| 106984 |
+
"step": 15282
|
| 106985 |
+
},
|
| 106986 |
+
{
|
| 106987 |
+
"epoch": 18.752147239263802,
|
| 106988 |
+
"grad_norm": 0.26480937004089355,
|
| 106989 |
+
"learning_rate": 5.203257997808076e-07,
|
| 106990 |
+
"loss": 0.514933168888092,
|
| 106991 |
+
"step": 15283
|
| 106992 |
+
},
|
| 106993 |
+
{
|
| 106994 |
+
"epoch": 18.753374233128834,
|
| 106995 |
+
"grad_norm": 0.25607478618621826,
|
| 106996 |
+
"learning_rate": 5.193076029336191e-07,
|
| 106997 |
+
"loss": 0.49566972255706787,
|
| 106998 |
+
"step": 15284
|
| 106999 |
+
},
|
| 107000 |
+
{
|
| 107001 |
+
"epoch": 18.754601226993866,
|
| 107002 |
+
"grad_norm": 0.23202572762966156,
|
| 107003 |
+
"learning_rate": 5.182903928333233e-07,
|
| 107004 |
+
"loss": 0.43272995948791504,
|
| 107005 |
+
"step": 15285
|
| 107006 |
+
},
|
| 107007 |
+
{
|
| 107008 |
+
"epoch": 18.755828220858895,
|
| 107009 |
+
"grad_norm": 0.3067319691181183,
|
| 107010 |
+
"learning_rate": 5.172741695209149e-07,
|
| 107011 |
+
"loss": 0.6113895773887634,
|
| 107012 |
+
"step": 15286
|
| 107013 |
+
},
|
| 107014 |
+
{
|
| 107015 |
+
"epoch": 18.757055214723927,
|
| 107016 |
+
"grad_norm": 0.27538979053497314,
|
| 107017 |
+
"learning_rate": 5.162589330373585e-07,
|
| 107018 |
+
"loss": 0.46863114833831787,
|
| 107019 |
+
"step": 15287
|
| 107020 |
+
},
|
| 107021 |
+
{
|
| 107022 |
+
"epoch": 18.758282208588955,
|
| 107023 |
+
"grad_norm": 0.28605467081069946,
|
| 107024 |
+
"learning_rate": 5.152446834235741e-07,
|
| 107025 |
+
"loss": 0.4998927712440491,
|
| 107026 |
+
"step": 15288
|
| 107027 |
+
},
|
| 107028 |
+
{
|
| 107029 |
+
"epoch": 18.759509202453987,
|
| 107030 |
+
"grad_norm": 0.2718774974346161,
|
| 107031 |
+
"learning_rate": 5.14231420720443e-07,
|
| 107032 |
+
"loss": 0.6954993605613708,
|
| 107033 |
+
"step": 15289
|
| 107034 |
+
},
|
| 107035 |
+
{
|
| 107036 |
+
"epoch": 18.76073619631902,
|
| 107037 |
+
"grad_norm": 0.2970033884048462,
|
| 107038 |
+
"learning_rate": 5.1321914496881e-07,
|
| 107039 |
+
"loss": 0.5219913125038147,
|
| 107040 |
+
"step": 15290
|
| 107041 |
+
},
|
| 107042 |
+
{
|
| 107043 |
+
"epoch": 18.761963190184048,
|
| 107044 |
+
"grad_norm": 0.24057511985301971,
|
| 107045 |
+
"learning_rate": 5.122078562094734e-07,
|
| 107046 |
+
"loss": 0.5302784442901611,
|
| 107047 |
+
"step": 15291
|
| 107048 |
+
},
|
| 107049 |
+
{
|
| 107050 |
+
"epoch": 18.76319018404908,
|
| 107051 |
+
"grad_norm": 0.23735012114048004,
|
| 107052 |
+
"learning_rate": 5.111975544831948e-07,
|
| 107053 |
+
"loss": 0.5459224581718445,
|
| 107054 |
+
"step": 15292
|
| 107055 |
+
},
|
| 107056 |
+
{
|
| 107057 |
+
"epoch": 18.764417177914112,
|
| 107058 |
+
"grad_norm": 0.23699013888835907,
|
| 107059 |
+
"learning_rate": 5.101882398307029e-07,
|
| 107060 |
+
"loss": 0.41781097650527954,
|
| 107061 |
+
"step": 15293
|
| 107062 |
+
},
|
| 107063 |
+
{
|
| 107064 |
+
"epoch": 18.76564417177914,
|
| 107065 |
+
"grad_norm": 0.2731378674507141,
|
| 107066 |
+
"learning_rate": 5.091799122926733e-07,
|
| 107067 |
+
"loss": 0.47894155979156494,
|
| 107068 |
+
"step": 15294
|
| 107069 |
+
},
|
| 107070 |
+
{
|
| 107071 |
+
"epoch": 18.766871165644172,
|
| 107072 |
+
"grad_norm": 0.2660670578479767,
|
| 107073 |
+
"learning_rate": 5.081725719097541e-07,
|
| 107074 |
+
"loss": 0.5509322881698608,
|
| 107075 |
+
"step": 15295
|
| 107076 |
+
},
|
| 107077 |
+
{
|
| 107078 |
+
"epoch": 18.7680981595092,
|
| 107079 |
+
"grad_norm": 0.26366573572158813,
|
| 107080 |
+
"learning_rate": 5.071662187225407e-07,
|
| 107081 |
+
"loss": 0.49648618698120117,
|
| 107082 |
+
"step": 15296
|
| 107083 |
+
},
|
| 107084 |
+
{
|
| 107085 |
+
"epoch": 18.769325153374233,
|
| 107086 |
+
"grad_norm": 0.2419794499874115,
|
| 107087 |
+
"learning_rate": 5.06160852771606e-07,
|
| 107088 |
+
"loss": 0.4754983186721802,
|
| 107089 |
+
"step": 15297
|
| 107090 |
+
},
|
| 107091 |
+
{
|
| 107092 |
+
"epoch": 18.770552147239265,
|
| 107093 |
+
"grad_norm": 0.2884845435619354,
|
| 107094 |
+
"learning_rate": 5.051564740974651e-07,
|
| 107095 |
+
"loss": 0.5286359786987305,
|
| 107096 |
+
"step": 15298
|
| 107097 |
+
},
|
| 107098 |
+
{
|
| 107099 |
+
"epoch": 18.771779141104293,
|
| 107100 |
+
"grad_norm": 0.26142531633377075,
|
| 107101 |
+
"learning_rate": 5.041530827406076e-07,
|
| 107102 |
+
"loss": 0.6019649505615234,
|
| 107103 |
+
"step": 15299
|
| 107104 |
+
},
|
| 107105 |
+
{
|
| 107106 |
+
"epoch": 18.773006134969325,
|
| 107107 |
+
"grad_norm": 0.3117089867591858,
|
| 107108 |
+
"learning_rate": 5.031506787414764e-07,
|
| 107109 |
+
"loss": 0.7385158538818359,
|
| 107110 |
+
"step": 15300
|
| 107111 |
}
|
| 107112 |
],
|
| 107113 |
"logging_steps": 1,
|
|
|
|
| 107127 |
"attributes": {}
|
| 107128 |
}
|
| 107129 |
},
|
| 107130 |
+
"total_flos": 4.279541914732069e+19,
|
| 107131 |
"train_batch_size": 8,
|
| 107132 |
"trial_name": null,
|
| 107133 |
"trial_params": null
|