Training in progress, step 16200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45a38127fa618d9dbb3b12e56d6ffbba68de594ccd245ae44046d38d58b3a94d
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3640eb8491ee228573f99814b1c7b5b1de68302c65ff68da7c7194e858409903
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5c21b7d6144f31d3664bbbc4876186b9acc335e13ac02994e0af3215160ff77
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 18.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -111308,6 +111308,2106 @@
|
|
| 111308 |
"learning_rate": 1.1882769062005888e-06,
|
| 111309 |
"loss": 0.7433,
|
| 111310 |
"step": 15900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111311 |
}
|
| 111312 |
],
|
| 111313 |
"logging_steps": 1,
|
|
@@ -111327,7 +113427,7 @@
|
|
| 111327 |
"attributes": {}
|
| 111328 |
}
|
| 111329 |
},
|
| 111330 |
-
"total_flos":
|
| 111331 |
"train_batch_size": 8,
|
| 111332 |
"trial_name": null,
|
| 111333 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 18.45128205128205,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 16200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 111308 |
"learning_rate": 1.1882769062005888e-06,
|
| 111309 |
"loss": 0.7433,
|
| 111310 |
"step": 15900
|
| 111311 |
+
},
|
| 111312 |
+
{
|
| 111313 |
+
"epoch": 18.11054131054131,
|
| 111314 |
+
"grad_norm": 0.1794893443584442,
|
| 111315 |
+
"learning_rate": 1.1868579665239716e-06,
|
| 111316 |
+
"loss": 0.8003,
|
| 111317 |
+
"step": 15901
|
| 111318 |
+
},
|
| 111319 |
+
{
|
| 111320 |
+
"epoch": 18.11168091168091,
|
| 111321 |
+
"grad_norm": 0.17528975009918213,
|
| 111322 |
+
"learning_rate": 1.1854398539432626e-06,
|
| 111323 |
+
"loss": 0.556,
|
| 111324 |
+
"step": 15902
|
| 111325 |
+
},
|
| 111326 |
+
{
|
| 111327 |
+
"epoch": 18.112820512820512,
|
| 111328 |
+
"grad_norm": 0.20127664506435394,
|
| 111329 |
+
"learning_rate": 1.1840225685077227e-06,
|
| 111330 |
+
"loss": 0.5355,
|
| 111331 |
+
"step": 15903
|
| 111332 |
+
},
|
| 111333 |
+
{
|
| 111334 |
+
"epoch": 18.113960113960115,
|
| 111335 |
+
"grad_norm": 0.1681235134601593,
|
| 111336 |
+
"learning_rate": 1.1826061102665708e-06,
|
| 111337 |
+
"loss": 0.7987,
|
| 111338 |
+
"step": 15904
|
| 111339 |
+
},
|
| 111340 |
+
{
|
| 111341 |
+
"epoch": 18.115099715099714,
|
| 111342 |
+
"grad_norm": 0.17833998799324036,
|
| 111343 |
+
"learning_rate": 1.1811904792690065e-06,
|
| 111344 |
+
"loss": 0.7327,
|
| 111345 |
+
"step": 15905
|
| 111346 |
+
},
|
| 111347 |
+
{
|
| 111348 |
+
"epoch": 18.116239316239316,
|
| 111349 |
+
"grad_norm": 0.20956158638000488,
|
| 111350 |
+
"learning_rate": 1.1797756755641986e-06,
|
| 111351 |
+
"loss": 0.4477,
|
| 111352 |
+
"step": 15906
|
| 111353 |
+
},
|
| 111354 |
+
{
|
| 111355 |
+
"epoch": 18.117378917378918,
|
| 111356 |
+
"grad_norm": 0.17220225930213928,
|
| 111357 |
+
"learning_rate": 1.1783616992012913e-06,
|
| 111358 |
+
"loss": 0.947,
|
| 111359 |
+
"step": 15907
|
| 111360 |
+
},
|
| 111361 |
+
{
|
| 111362 |
+
"epoch": 18.118518518518517,
|
| 111363 |
+
"grad_norm": 0.2909105122089386,
|
| 111364 |
+
"learning_rate": 1.176948550229387e-06,
|
| 111365 |
+
"loss": 0.5673,
|
| 111366 |
+
"step": 15908
|
| 111367 |
+
},
|
| 111368 |
+
{
|
| 111369 |
+
"epoch": 18.11965811965812,
|
| 111370 |
+
"grad_norm": 0.16907162964344025,
|
| 111371 |
+
"learning_rate": 1.1755362286975741e-06,
|
| 111372 |
+
"loss": 0.7082,
|
| 111373 |
+
"step": 15909
|
| 111374 |
+
},
|
| 111375 |
+
{
|
| 111376 |
+
"epoch": 18.12079772079772,
|
| 111377 |
+
"grad_norm": 0.221530020236969,
|
| 111378 |
+
"learning_rate": 1.174124734654905e-06,
|
| 111379 |
+
"loss": 0.6505,
|
| 111380 |
+
"step": 15910
|
| 111381 |
+
},
|
| 111382 |
+
{
|
| 111383 |
+
"epoch": 18.12193732193732,
|
| 111384 |
+
"grad_norm": 0.15925301611423492,
|
| 111385 |
+
"learning_rate": 1.1727140681504045e-06,
|
| 111386 |
+
"loss": 0.5782,
|
| 111387 |
+
"step": 15911
|
| 111388 |
+
},
|
| 111389 |
+
{
|
| 111390 |
+
"epoch": 18.123076923076923,
|
| 111391 |
+
"grad_norm": 0.22340020537376404,
|
| 111392 |
+
"learning_rate": 1.1713042292330722e-06,
|
| 111393 |
+
"loss": 0.4607,
|
| 111394 |
+
"step": 15912
|
| 111395 |
+
},
|
| 111396 |
+
{
|
| 111397 |
+
"epoch": 18.124216524216525,
|
| 111398 |
+
"grad_norm": 0.19746434688568115,
|
| 111399 |
+
"learning_rate": 1.1698952179518718e-06,
|
| 111400 |
+
"loss": 0.4612,
|
| 111401 |
+
"step": 15913
|
| 111402 |
+
},
|
| 111403 |
+
{
|
| 111404 |
+
"epoch": 18.125356125356124,
|
| 111405 |
+
"grad_norm": 0.169794499874115,
|
| 111406 |
+
"learning_rate": 1.1684870343557446e-06,
|
| 111407 |
+
"loss": 0.9059,
|
| 111408 |
+
"step": 15914
|
| 111409 |
+
},
|
| 111410 |
+
{
|
| 111411 |
+
"epoch": 18.126495726495726,
|
| 111412 |
+
"grad_norm": 0.19517719745635986,
|
| 111413 |
+
"learning_rate": 1.167079678493599e-06,
|
| 111414 |
+
"loss": 0.4896,
|
| 111415 |
+
"step": 15915
|
| 111416 |
+
},
|
| 111417 |
+
{
|
| 111418 |
+
"epoch": 18.12763532763533,
|
| 111419 |
+
"grad_norm": 0.192497119307518,
|
| 111420 |
+
"learning_rate": 1.1656731504143176e-06,
|
| 111421 |
+
"loss": 0.6869,
|
| 111422 |
+
"step": 15916
|
| 111423 |
+
},
|
| 111424 |
+
{
|
| 111425 |
+
"epoch": 18.128774928774927,
|
| 111426 |
+
"grad_norm": 0.18236230313777924,
|
| 111427 |
+
"learning_rate": 1.1642674501667506e-06,
|
| 111428 |
+
"loss": 0.6197,
|
| 111429 |
+
"step": 15917
|
| 111430 |
+
},
|
| 111431 |
+
{
|
| 111432 |
+
"epoch": 18.12991452991453,
|
| 111433 |
+
"grad_norm": 0.23407305777072906,
|
| 111434 |
+
"learning_rate": 1.1628625777997283e-06,
|
| 111435 |
+
"loss": 0.6292,
|
| 111436 |
+
"step": 15918
|
| 111437 |
+
},
|
| 111438 |
+
{
|
| 111439 |
+
"epoch": 18.13105413105413,
|
| 111440 |
+
"grad_norm": 0.18812473118305206,
|
| 111441 |
+
"learning_rate": 1.1614585333620365e-06,
|
| 111442 |
+
"loss": 0.8725,
|
| 111443 |
+
"step": 15919
|
| 111444 |
+
},
|
| 111445 |
+
{
|
| 111446 |
+
"epoch": 18.13219373219373,
|
| 111447 |
+
"grad_norm": 0.2166595607995987,
|
| 111448 |
+
"learning_rate": 1.1600553169024448e-06,
|
| 111449 |
+
"loss": 0.472,
|
| 111450 |
+
"step": 15920
|
| 111451 |
+
},
|
| 111452 |
+
{
|
| 111453 |
+
"epoch": 18.133333333333333,
|
| 111454 |
+
"grad_norm": 0.20252515375614166,
|
| 111455 |
+
"learning_rate": 1.1586529284696918e-06,
|
| 111456 |
+
"loss": 0.4766,
|
| 111457 |
+
"step": 15921
|
| 111458 |
+
},
|
| 111459 |
+
{
|
| 111460 |
+
"epoch": 18.134472934472935,
|
| 111461 |
+
"grad_norm": 0.1772758662700653,
|
| 111462 |
+
"learning_rate": 1.1572513681124914e-06,
|
| 111463 |
+
"loss": 0.5942,
|
| 111464 |
+
"step": 15922
|
| 111465 |
+
},
|
| 111466 |
+
{
|
| 111467 |
+
"epoch": 18.135612535612534,
|
| 111468 |
+
"grad_norm": 0.19498567283153534,
|
| 111469 |
+
"learning_rate": 1.1558506358795156e-06,
|
| 111470 |
+
"loss": 0.4519,
|
| 111471 |
+
"step": 15923
|
| 111472 |
+
},
|
| 111473 |
+
{
|
| 111474 |
+
"epoch": 18.136752136752136,
|
| 111475 |
+
"grad_norm": 0.19647033512592316,
|
| 111476 |
+
"learning_rate": 1.1544507318194203e-06,
|
| 111477 |
+
"loss": 0.5228,
|
| 111478 |
+
"step": 15924
|
| 111479 |
+
},
|
| 111480 |
+
{
|
| 111481 |
+
"epoch": 18.13789173789174,
|
| 111482 |
+
"grad_norm": 0.19874857366085052,
|
| 111483 |
+
"learning_rate": 1.1530516559808246e-06,
|
| 111484 |
+
"loss": 0.5935,
|
| 111485 |
+
"step": 15925
|
| 111486 |
+
},
|
| 111487 |
+
{
|
| 111488 |
+
"epoch": 18.139031339031337,
|
| 111489 |
+
"grad_norm": 0.22173920273780823,
|
| 111490 |
+
"learning_rate": 1.1516534084123288e-06,
|
| 111491 |
+
"loss": 0.56,
|
| 111492 |
+
"step": 15926
|
| 111493 |
+
},
|
| 111494 |
+
{
|
| 111495 |
+
"epoch": 18.14017094017094,
|
| 111496 |
+
"grad_norm": 0.21998946368694305,
|
| 111497 |
+
"learning_rate": 1.1502559891624882e-06,
|
| 111498 |
+
"loss": 0.7126,
|
| 111499 |
+
"step": 15927
|
| 111500 |
+
},
|
| 111501 |
+
{
|
| 111502 |
+
"epoch": 18.141310541310542,
|
| 111503 |
+
"grad_norm": 0.20598110556602478,
|
| 111504 |
+
"learning_rate": 1.1488593982798474e-06,
|
| 111505 |
+
"loss": 0.5714,
|
| 111506 |
+
"step": 15928
|
| 111507 |
+
},
|
| 111508 |
+
{
|
| 111509 |
+
"epoch": 18.14245014245014,
|
| 111510 |
+
"grad_norm": 0.26100605726242065,
|
| 111511 |
+
"learning_rate": 1.1474636358129066e-06,
|
| 111512 |
+
"loss": 0.1406,
|
| 111513 |
+
"step": 15929
|
| 111514 |
+
},
|
| 111515 |
+
{
|
| 111516 |
+
"epoch": 18.143589743589743,
|
| 111517 |
+
"grad_norm": 0.24370066821575165,
|
| 111518 |
+
"learning_rate": 1.1460687018101546e-06,
|
| 111519 |
+
"loss": 0.503,
|
| 111520 |
+
"step": 15930
|
| 111521 |
+
},
|
| 111522 |
+
{
|
| 111523 |
+
"epoch": 18.144729344729345,
|
| 111524 |
+
"grad_norm": 0.20355720818042755,
|
| 111525 |
+
"learning_rate": 1.1446745963200306e-06,
|
| 111526 |
+
"loss": 0.6663,
|
| 111527 |
+
"step": 15931
|
| 111528 |
+
},
|
| 111529 |
+
{
|
| 111530 |
+
"epoch": 18.145868945868944,
|
| 111531 |
+
"grad_norm": 0.1552165448665619,
|
| 111532 |
+
"learning_rate": 1.1432813193909597e-06,
|
| 111533 |
+
"loss": 0.3006,
|
| 111534 |
+
"step": 15932
|
| 111535 |
+
},
|
| 111536 |
+
{
|
| 111537 |
+
"epoch": 18.147008547008546,
|
| 111538 |
+
"grad_norm": 0.1596253663301468,
|
| 111539 |
+
"learning_rate": 1.1418888710713394e-06,
|
| 111540 |
+
"loss": 0.6474,
|
| 111541 |
+
"step": 15933
|
| 111542 |
+
},
|
| 111543 |
+
{
|
| 111544 |
+
"epoch": 18.14814814814815,
|
| 111545 |
+
"grad_norm": 0.19343087077140808,
|
| 111546 |
+
"learning_rate": 1.1404972514095252e-06,
|
| 111547 |
+
"loss": 0.5742,
|
| 111548 |
+
"step": 15934
|
| 111549 |
+
},
|
| 111550 |
+
{
|
| 111551 |
+
"epoch": 18.149287749287748,
|
| 111552 |
+
"grad_norm": 0.17367888987064362,
|
| 111553 |
+
"learning_rate": 1.1391064604538538e-06,
|
| 111554 |
+
"loss": 0.6066,
|
| 111555 |
+
"step": 15935
|
| 111556 |
+
},
|
| 111557 |
+
{
|
| 111558 |
+
"epoch": 18.15042735042735,
|
| 111559 |
+
"grad_norm": 0.23630227148532867,
|
| 111560 |
+
"learning_rate": 1.1377164982526333e-06,
|
| 111561 |
+
"loss": 0.7701,
|
| 111562 |
+
"step": 15936
|
| 111563 |
+
},
|
| 111564 |
+
{
|
| 111565 |
+
"epoch": 18.151566951566952,
|
| 111566 |
+
"grad_norm": 0.1966976374387741,
|
| 111567 |
+
"learning_rate": 1.136327364854145e-06,
|
| 111568 |
+
"loss": 0.7617,
|
| 111569 |
+
"step": 15937
|
| 111570 |
+
},
|
| 111571 |
+
{
|
| 111572 |
+
"epoch": 18.15270655270655,
|
| 111573 |
+
"grad_norm": 0.15290910005569458,
|
| 111574 |
+
"learning_rate": 1.1349390603066307e-06,
|
| 111575 |
+
"loss": 0.7526,
|
| 111576 |
+
"step": 15938
|
| 111577 |
+
},
|
| 111578 |
+
{
|
| 111579 |
+
"epoch": 18.153846153846153,
|
| 111580 |
+
"grad_norm": 0.22119775414466858,
|
| 111581 |
+
"learning_rate": 1.13355158465831e-06,
|
| 111582 |
+
"loss": 0.5943,
|
| 111583 |
+
"step": 15939
|
| 111584 |
+
},
|
| 111585 |
+
{
|
| 111586 |
+
"epoch": 18.154985754985756,
|
| 111587 |
+
"grad_norm": 0.21148540079593658,
|
| 111588 |
+
"learning_rate": 1.1321649379573752e-06,
|
| 111589 |
+
"loss": 0.4899,
|
| 111590 |
+
"step": 15940
|
| 111591 |
+
},
|
| 111592 |
+
{
|
| 111593 |
+
"epoch": 18.156125356125354,
|
| 111594 |
+
"grad_norm": 0.21941092610359192,
|
| 111595 |
+
"learning_rate": 1.130779120251993e-06,
|
| 111596 |
+
"loss": 0.558,
|
| 111597 |
+
"step": 15941
|
| 111598 |
+
},
|
| 111599 |
+
{
|
| 111600 |
+
"epoch": 18.157264957264957,
|
| 111601 |
+
"grad_norm": 0.19267788529396057,
|
| 111602 |
+
"learning_rate": 1.129394131590289e-06,
|
| 111603 |
+
"loss": 0.4554,
|
| 111604 |
+
"step": 15942
|
| 111605 |
+
},
|
| 111606 |
+
{
|
| 111607 |
+
"epoch": 18.15840455840456,
|
| 111608 |
+
"grad_norm": 0.18841686844825745,
|
| 111609 |
+
"learning_rate": 1.128009972020369e-06,
|
| 111610 |
+
"loss": 0.7152,
|
| 111611 |
+
"step": 15943
|
| 111612 |
+
},
|
| 111613 |
+
{
|
| 111614 |
+
"epoch": 18.159544159544158,
|
| 111615 |
+
"grad_norm": 0.19724957644939423,
|
| 111616 |
+
"learning_rate": 1.126626641590317e-06,
|
| 111617 |
+
"loss": 0.7831,
|
| 111618 |
+
"step": 15944
|
| 111619 |
+
},
|
| 111620 |
+
{
|
| 111621 |
+
"epoch": 18.16068376068376,
|
| 111622 |
+
"grad_norm": 0.20876359939575195,
|
| 111623 |
+
"learning_rate": 1.1252441403481696e-06,
|
| 111624 |
+
"loss": 0.5887,
|
| 111625 |
+
"step": 15945
|
| 111626 |
+
},
|
| 111627 |
+
{
|
| 111628 |
+
"epoch": 18.161823361823362,
|
| 111629 |
+
"grad_norm": 0.18697749078273773,
|
| 111630 |
+
"learning_rate": 1.123862468341949e-06,
|
| 111631 |
+
"loss": 0.7821,
|
| 111632 |
+
"step": 15946
|
| 111633 |
+
},
|
| 111634 |
+
{
|
| 111635 |
+
"epoch": 18.162962962962965,
|
| 111636 |
+
"grad_norm": 0.21110433340072632,
|
| 111637 |
+
"learning_rate": 1.1224816256196453e-06,
|
| 111638 |
+
"loss": 0.5808,
|
| 111639 |
+
"step": 15947
|
| 111640 |
+
},
|
| 111641 |
+
{
|
| 111642 |
+
"epoch": 18.164102564102564,
|
| 111643 |
+
"grad_norm": 0.17982840538024902,
|
| 111644 |
+
"learning_rate": 1.1211016122292222e-06,
|
| 111645 |
+
"loss": 0.5423,
|
| 111646 |
+
"step": 15948
|
| 111647 |
+
},
|
| 111648 |
+
{
|
| 111649 |
+
"epoch": 18.165242165242166,
|
| 111650 |
+
"grad_norm": 0.15705281496047974,
|
| 111651 |
+
"learning_rate": 1.119722428218603e-06,
|
| 111652 |
+
"loss": 0.9529,
|
| 111653 |
+
"step": 15949
|
| 111654 |
+
},
|
| 111655 |
+
{
|
| 111656 |
+
"epoch": 18.166381766381768,
|
| 111657 |
+
"grad_norm": 0.16538876295089722,
|
| 111658 |
+
"learning_rate": 1.1183440736356966e-06,
|
| 111659 |
+
"loss": 0.5573,
|
| 111660 |
+
"step": 15950
|
| 111661 |
+
},
|
| 111662 |
+
{
|
| 111663 |
+
"epoch": 18.167521367521367,
|
| 111664 |
+
"grad_norm": 0.2571662366390228,
|
| 111665 |
+
"learning_rate": 1.1169665485283726e-06,
|
| 111666 |
+
"loss": 0.7858,
|
| 111667 |
+
"step": 15951
|
| 111668 |
+
},
|
| 111669 |
+
{
|
| 111670 |
+
"epoch": 18.16866096866097,
|
| 111671 |
+
"grad_norm": 0.16697154939174652,
|
| 111672 |
+
"learning_rate": 1.115589852944482e-06,
|
| 111673 |
+
"loss": 0.8273,
|
| 111674 |
+
"step": 15952
|
| 111675 |
+
},
|
| 111676 |
+
{
|
| 111677 |
+
"epoch": 18.16980056980057,
|
| 111678 |
+
"grad_norm": 0.20390774309635162,
|
| 111679 |
+
"learning_rate": 1.1142139869318364e-06,
|
| 111680 |
+
"loss": 0.7989,
|
| 111681 |
+
"step": 15953
|
| 111682 |
+
},
|
| 111683 |
+
{
|
| 111684 |
+
"epoch": 18.17094017094017,
|
| 111685 |
+
"grad_norm": 0.17538999021053314,
|
| 111686 |
+
"learning_rate": 1.1128389505382225e-06,
|
| 111687 |
+
"loss": 0.5698,
|
| 111688 |
+
"step": 15954
|
| 111689 |
+
},
|
| 111690 |
+
{
|
| 111691 |
+
"epoch": 18.172079772079773,
|
| 111692 |
+
"grad_norm": 0.18313254415988922,
|
| 111693 |
+
"learning_rate": 1.1114647438114078e-06,
|
| 111694 |
+
"loss": 0.5203,
|
| 111695 |
+
"step": 15955
|
| 111696 |
+
},
|
| 111697 |
+
{
|
| 111698 |
+
"epoch": 18.173219373219375,
|
| 111699 |
+
"grad_norm": 0.17053240537643433,
|
| 111700 |
+
"learning_rate": 1.1100913667991125e-06,
|
| 111701 |
+
"loss": 0.591,
|
| 111702 |
+
"step": 15956
|
| 111703 |
+
},
|
| 111704 |
+
{
|
| 111705 |
+
"epoch": 18.174358974358974,
|
| 111706 |
+
"grad_norm": 0.19222436845302582,
|
| 111707 |
+
"learning_rate": 1.1087188195490428e-06,
|
| 111708 |
+
"loss": 0.7421,
|
| 111709 |
+
"step": 15957
|
| 111710 |
+
},
|
| 111711 |
+
{
|
| 111712 |
+
"epoch": 18.175498575498576,
|
| 111713 |
+
"grad_norm": 0.1683763563632965,
|
| 111714 |
+
"learning_rate": 1.1073471021088689e-06,
|
| 111715 |
+
"loss": 0.6887,
|
| 111716 |
+
"step": 15958
|
| 111717 |
+
},
|
| 111718 |
+
{
|
| 111719 |
+
"epoch": 18.17663817663818,
|
| 111720 |
+
"grad_norm": 0.20859429240226746,
|
| 111721 |
+
"learning_rate": 1.105976214526236e-06,
|
| 111722 |
+
"loss": 0.4182,
|
| 111723 |
+
"step": 15959
|
| 111724 |
+
},
|
| 111725 |
+
{
|
| 111726 |
+
"epoch": 18.177777777777777,
|
| 111727 |
+
"grad_norm": 0.20938421785831451,
|
| 111728 |
+
"learning_rate": 1.1046061568487586e-06,
|
| 111729 |
+
"loss": 0.4057,
|
| 111730 |
+
"step": 15960
|
| 111731 |
+
},
|
| 111732 |
+
{
|
| 111733 |
+
"epoch": 18.17891737891738,
|
| 111734 |
+
"grad_norm": 0.18100666999816895,
|
| 111735 |
+
"learning_rate": 1.1032369291240214e-06,
|
| 111736 |
+
"loss": 0.723,
|
| 111737 |
+
"step": 15961
|
| 111738 |
+
},
|
| 111739 |
+
{
|
| 111740 |
+
"epoch": 18.180056980056982,
|
| 111741 |
+
"grad_norm": 0.21834257245063782,
|
| 111742 |
+
"learning_rate": 1.1018685313995802e-06,
|
| 111743 |
+
"loss": 0.6354,
|
| 111744 |
+
"step": 15962
|
| 111745 |
+
},
|
| 111746 |
+
{
|
| 111747 |
+
"epoch": 18.18119658119658,
|
| 111748 |
+
"grad_norm": 0.19915148615837097,
|
| 111749 |
+
"learning_rate": 1.1005009637229669e-06,
|
| 111750 |
+
"loss": 0.6653,
|
| 111751 |
+
"step": 15963
|
| 111752 |
+
},
|
| 111753 |
+
{
|
| 111754 |
+
"epoch": 18.182336182336183,
|
| 111755 |
+
"grad_norm": 0.21545255184173584,
|
| 111756 |
+
"learning_rate": 1.099134226141682e-06,
|
| 111757 |
+
"loss": 0.4773,
|
| 111758 |
+
"step": 15964
|
| 111759 |
+
},
|
| 111760 |
+
{
|
| 111761 |
+
"epoch": 18.183475783475785,
|
| 111762 |
+
"grad_norm": 0.1650085300207138,
|
| 111763 |
+
"learning_rate": 1.097768318703185e-06,
|
| 111764 |
+
"loss": 0.5612,
|
| 111765 |
+
"step": 15965
|
| 111766 |
+
},
|
| 111767 |
+
{
|
| 111768 |
+
"epoch": 18.184615384615384,
|
| 111769 |
+
"grad_norm": 0.1827668994665146,
|
| 111770 |
+
"learning_rate": 1.0964032414549298e-06,
|
| 111771 |
+
"loss": 0.6094,
|
| 111772 |
+
"step": 15966
|
| 111773 |
+
},
|
| 111774 |
+
{
|
| 111775 |
+
"epoch": 18.185754985754986,
|
| 111776 |
+
"grad_norm": 0.217435821890831,
|
| 111777 |
+
"learning_rate": 1.095038994444328e-06,
|
| 111778 |
+
"loss": 0.6947,
|
| 111779 |
+
"step": 15967
|
| 111780 |
+
},
|
| 111781 |
+
{
|
| 111782 |
+
"epoch": 18.18689458689459,
|
| 111783 |
+
"grad_norm": 0.16685186326503754,
|
| 111784 |
+
"learning_rate": 1.0936755777187585e-06,
|
| 111785 |
+
"loss": 0.9233,
|
| 111786 |
+
"step": 15968
|
| 111787 |
+
},
|
| 111788 |
+
{
|
| 111789 |
+
"epoch": 18.188034188034187,
|
| 111790 |
+
"grad_norm": 0.1902848482131958,
|
| 111791 |
+
"learning_rate": 1.092312991325578e-06,
|
| 111792 |
+
"loss": 0.5463,
|
| 111793 |
+
"step": 15969
|
| 111794 |
+
},
|
| 111795 |
+
{
|
| 111796 |
+
"epoch": 18.18917378917379,
|
| 111797 |
+
"grad_norm": 0.2106180489063263,
|
| 111798 |
+
"learning_rate": 1.0909512353121154e-06,
|
| 111799 |
+
"loss": 0.4301,
|
| 111800 |
+
"step": 15970
|
| 111801 |
+
},
|
| 111802 |
+
{
|
| 111803 |
+
"epoch": 18.190313390313392,
|
| 111804 |
+
"grad_norm": 0.2520906925201416,
|
| 111805 |
+
"learning_rate": 1.0895903097256688e-06,
|
| 111806 |
+
"loss": 0.3797,
|
| 111807 |
+
"step": 15971
|
| 111808 |
+
},
|
| 111809 |
+
{
|
| 111810 |
+
"epoch": 18.19145299145299,
|
| 111811 |
+
"grad_norm": 0.20214390754699707,
|
| 111812 |
+
"learning_rate": 1.0882302146135004e-06,
|
| 111813 |
+
"loss": 0.6817,
|
| 111814 |
+
"step": 15972
|
| 111815 |
+
},
|
| 111816 |
+
{
|
| 111817 |
+
"epoch": 18.192592592592593,
|
| 111818 |
+
"grad_norm": 0.1918715387582779,
|
| 111819 |
+
"learning_rate": 1.0868709500228557e-06,
|
| 111820 |
+
"loss": 0.6998,
|
| 111821 |
+
"step": 15973
|
| 111822 |
+
},
|
| 111823 |
+
{
|
| 111824 |
+
"epoch": 18.193732193732195,
|
| 111825 |
+
"grad_norm": 0.22887898981571198,
|
| 111826 |
+
"learning_rate": 1.0855125160009416e-06,
|
| 111827 |
+
"loss": 0.6342,
|
| 111828 |
+
"step": 15974
|
| 111829 |
+
},
|
| 111830 |
+
{
|
| 111831 |
+
"epoch": 18.194871794871794,
|
| 111832 |
+
"grad_norm": 0.21144236624240875,
|
| 111833 |
+
"learning_rate": 1.084154912594948e-06,
|
| 111834 |
+
"loss": 0.4033,
|
| 111835 |
+
"step": 15975
|
| 111836 |
+
},
|
| 111837 |
+
{
|
| 111838 |
+
"epoch": 18.196011396011396,
|
| 111839 |
+
"grad_norm": 0.2070854902267456,
|
| 111840 |
+
"learning_rate": 1.0827981398520177e-06,
|
| 111841 |
+
"loss": 0.7286,
|
| 111842 |
+
"step": 15976
|
| 111843 |
+
},
|
| 111844 |
+
{
|
| 111845 |
+
"epoch": 18.197150997151,
|
| 111846 |
+
"grad_norm": 0.20363754034042358,
|
| 111847 |
+
"learning_rate": 1.0814421978192825e-06,
|
| 111848 |
+
"loss": 0.8095,
|
| 111849 |
+
"step": 15977
|
| 111850 |
+
},
|
| 111851 |
+
{
|
| 111852 |
+
"epoch": 18.198290598290598,
|
| 111853 |
+
"grad_norm": 0.22051745653152466,
|
| 111854 |
+
"learning_rate": 1.0800870865438407e-06,
|
| 111855 |
+
"loss": 0.7489,
|
| 111856 |
+
"step": 15978
|
| 111857 |
+
},
|
| 111858 |
+
{
|
| 111859 |
+
"epoch": 18.1994301994302,
|
| 111860 |
+
"grad_norm": 0.2056119292974472,
|
| 111861 |
+
"learning_rate": 1.0787328060727493e-06,
|
| 111862 |
+
"loss": 0.5298,
|
| 111863 |
+
"step": 15979
|
| 111864 |
+
},
|
| 111865 |
+
{
|
| 111866 |
+
"epoch": 18.200569800569802,
|
| 111867 |
+
"grad_norm": 0.1733495444059372,
|
| 111868 |
+
"learning_rate": 1.077379356453051e-06,
|
| 111869 |
+
"loss": 0.6862,
|
| 111870 |
+
"step": 15980
|
| 111871 |
+
},
|
| 111872 |
+
{
|
| 111873 |
+
"epoch": 18.2017094017094,
|
| 111874 |
+
"grad_norm": 0.19605806469917297,
|
| 111875 |
+
"learning_rate": 1.0760267377317556e-06,
|
| 111876 |
+
"loss": 0.6083,
|
| 111877 |
+
"step": 15981
|
| 111878 |
+
},
|
| 111879 |
+
{
|
| 111880 |
+
"epoch": 18.202849002849003,
|
| 111881 |
+
"grad_norm": 0.27621230483055115,
|
| 111882 |
+
"learning_rate": 1.0746749499558478e-06,
|
| 111883 |
+
"loss": 0.6252,
|
| 111884 |
+
"step": 15982
|
| 111885 |
+
},
|
| 111886 |
+
{
|
| 111887 |
+
"epoch": 18.203988603988606,
|
| 111888 |
+
"grad_norm": 0.22263580560684204,
|
| 111889 |
+
"learning_rate": 1.0733239931722705e-06,
|
| 111890 |
+
"loss": 0.7787,
|
| 111891 |
+
"step": 15983
|
| 111892 |
+
},
|
| 111893 |
+
{
|
| 111894 |
+
"epoch": 18.205128205128204,
|
| 111895 |
+
"grad_norm": 0.19939032196998596,
|
| 111896 |
+
"learning_rate": 1.0719738674279473e-06,
|
| 111897 |
+
"loss": 0.5776,
|
| 111898 |
+
"step": 15984
|
| 111899 |
+
},
|
| 111900 |
+
{
|
| 111901 |
+
"epoch": 18.206267806267807,
|
| 111902 |
+
"grad_norm": 0.27290910482406616,
|
| 111903 |
+
"learning_rate": 1.070624572769774e-06,
|
| 111904 |
+
"loss": 0.5814,
|
| 111905 |
+
"step": 15985
|
| 111906 |
+
},
|
| 111907 |
+
{
|
| 111908 |
+
"epoch": 18.20740740740741,
|
| 111909 |
+
"grad_norm": 0.16844192147254944,
|
| 111910 |
+
"learning_rate": 1.0692761092446213e-06,
|
| 111911 |
+
"loss": 0.6483,
|
| 111912 |
+
"step": 15986
|
| 111913 |
+
},
|
| 111914 |
+
{
|
| 111915 |
+
"epoch": 18.208547008547008,
|
| 111916 |
+
"grad_norm": 0.19200433790683746,
|
| 111917 |
+
"learning_rate": 1.0679284768993103e-06,
|
| 111918 |
+
"loss": 0.7458,
|
| 111919 |
+
"step": 15987
|
| 111920 |
+
},
|
| 111921 |
+
{
|
| 111922 |
+
"epoch": 18.20968660968661,
|
| 111923 |
+
"grad_norm": 0.22204038500785828,
|
| 111924 |
+
"learning_rate": 1.0665816757806618e-06,
|
| 111925 |
+
"loss": 0.5282,
|
| 111926 |
+
"step": 15988
|
| 111927 |
+
},
|
| 111928 |
+
{
|
| 111929 |
+
"epoch": 18.210826210826212,
|
| 111930 |
+
"grad_norm": 0.15707382559776306,
|
| 111931 |
+
"learning_rate": 1.0652357059354494e-06,
|
| 111932 |
+
"loss": 0.8963,
|
| 111933 |
+
"step": 15989
|
| 111934 |
+
},
|
| 111935 |
+
{
|
| 111936 |
+
"epoch": 18.21196581196581,
|
| 111937 |
+
"grad_norm": 0.23402263224124908,
|
| 111938 |
+
"learning_rate": 1.0638905674104193e-06,
|
| 111939 |
+
"loss": 0.8085,
|
| 111940 |
+
"step": 15990
|
| 111941 |
+
},
|
| 111942 |
+
{
|
| 111943 |
+
"epoch": 18.213105413105414,
|
| 111944 |
+
"grad_norm": 0.1872255951166153,
|
| 111945 |
+
"learning_rate": 1.062546260252295e-06,
|
| 111946 |
+
"loss": 0.701,
|
| 111947 |
+
"step": 15991
|
| 111948 |
+
},
|
| 111949 |
+
{
|
| 111950 |
+
"epoch": 18.214245014245016,
|
| 111951 |
+
"grad_norm": 0.2242417186498642,
|
| 111952 |
+
"learning_rate": 1.0612027845077698e-06,
|
| 111953 |
+
"loss": 0.7828,
|
| 111954 |
+
"step": 15992
|
| 111955 |
+
},
|
| 111956 |
+
{
|
| 111957 |
+
"epoch": 18.215384615384615,
|
| 111958 |
+
"grad_norm": 0.1795329749584198,
|
| 111959 |
+
"learning_rate": 1.0598601402235037e-06,
|
| 111960 |
+
"loss": 0.6698,
|
| 111961 |
+
"step": 15993
|
| 111962 |
+
},
|
| 111963 |
+
{
|
| 111964 |
+
"epoch": 18.216524216524217,
|
| 111965 |
+
"grad_norm": 0.24393922090530396,
|
| 111966 |
+
"learning_rate": 1.0585183274461287e-06,
|
| 111967 |
+
"loss": 0.7277,
|
| 111968 |
+
"step": 15994
|
| 111969 |
+
},
|
| 111970 |
+
{
|
| 111971 |
+
"epoch": 18.21766381766382,
|
| 111972 |
+
"grad_norm": 0.2110724300146103,
|
| 111973 |
+
"learning_rate": 1.0571773462222517e-06,
|
| 111974 |
+
"loss": 0.6454,
|
| 111975 |
+
"step": 15995
|
| 111976 |
+
},
|
| 111977 |
+
{
|
| 111978 |
+
"epoch": 18.218803418803418,
|
| 111979 |
+
"grad_norm": 0.21045121550559998,
|
| 111980 |
+
"learning_rate": 1.055837196598447e-06,
|
| 111981 |
+
"loss": 0.6849,
|
| 111982 |
+
"step": 15996
|
| 111983 |
+
},
|
| 111984 |
+
{
|
| 111985 |
+
"epoch": 18.21994301994302,
|
| 111986 |
+
"grad_norm": 0.1899309754371643,
|
| 111987 |
+
"learning_rate": 1.0544978786212662e-06,
|
| 111988 |
+
"loss": 0.7113,
|
| 111989 |
+
"step": 15997
|
| 111990 |
+
},
|
| 111991 |
+
{
|
| 111992 |
+
"epoch": 18.221082621082623,
|
| 111993 |
+
"grad_norm": 0.19039730727672577,
|
| 111994 |
+
"learning_rate": 1.0531593923372218e-06,
|
| 111995 |
+
"loss": 0.7073,
|
| 111996 |
+
"step": 15998
|
| 111997 |
+
},
|
| 111998 |
+
{
|
| 111999 |
+
"epoch": 18.22222222222222,
|
| 112000 |
+
"grad_norm": 0.16976770758628845,
|
| 112001 |
+
"learning_rate": 1.0518217377928046e-06,
|
| 112002 |
+
"loss": 0.7008,
|
| 112003 |
+
"step": 15999
|
| 112004 |
+
},
|
| 112005 |
+
{
|
| 112006 |
+
"epoch": 18.223361823361824,
|
| 112007 |
+
"grad_norm": 0.17399312555789948,
|
| 112008 |
+
"learning_rate": 1.0504849150344776e-06,
|
| 112009 |
+
"loss": 0.6882,
|
| 112010 |
+
"step": 16000
|
| 112011 |
+
},
|
| 112012 |
+
{
|
| 112013 |
+
"epoch": 18.224501424501426,
|
| 112014 |
+
"grad_norm": 0.19433003664016724,
|
| 112015 |
+
"learning_rate": 1.0491489241086754e-06,
|
| 112016 |
+
"loss": 0.7706,
|
| 112017 |
+
"step": 16001
|
| 112018 |
+
},
|
| 112019 |
+
{
|
| 112020 |
+
"epoch": 18.225641025641025,
|
| 112021 |
+
"grad_norm": 0.20473644137382507,
|
| 112022 |
+
"learning_rate": 1.047813765061792e-06,
|
| 112023 |
+
"loss": 0.7283,
|
| 112024 |
+
"step": 16002
|
| 112025 |
+
},
|
| 112026 |
+
{
|
| 112027 |
+
"epoch": 18.226780626780627,
|
| 112028 |
+
"grad_norm": 0.3713143467903137,
|
| 112029 |
+
"learning_rate": 1.0464794379402065e-06,
|
| 112030 |
+
"loss": 0.7069,
|
| 112031 |
+
"step": 16003
|
| 112032 |
+
},
|
| 112033 |
+
{
|
| 112034 |
+
"epoch": 18.22792022792023,
|
| 112035 |
+
"grad_norm": 0.21117597818374634,
|
| 112036 |
+
"learning_rate": 1.0451459427902599e-06,
|
| 112037 |
+
"loss": 0.5685,
|
| 112038 |
+
"step": 16004
|
| 112039 |
+
},
|
| 112040 |
+
{
|
| 112041 |
+
"epoch": 18.22905982905983,
|
| 112042 |
+
"grad_norm": 0.21739910542964935,
|
| 112043 |
+
"learning_rate": 1.0438132796582762e-06,
|
| 112044 |
+
"loss": 0.6592,
|
| 112045 |
+
"step": 16005
|
| 112046 |
+
},
|
| 112047 |
+
{
|
| 112048 |
+
"epoch": 18.23019943019943,
|
| 112049 |
+
"grad_norm": 0.15645365417003632,
|
| 112050 |
+
"learning_rate": 1.0424814485905321e-06,
|
| 112051 |
+
"loss": 0.7862,
|
| 112052 |
+
"step": 16006
|
| 112053 |
+
},
|
| 112054 |
+
{
|
| 112055 |
+
"epoch": 18.231339031339033,
|
| 112056 |
+
"grad_norm": 0.24789197742938995,
|
| 112057 |
+
"learning_rate": 1.0411504496332935e-06,
|
| 112058 |
+
"loss": 0.3764,
|
| 112059 |
+
"step": 16007
|
| 112060 |
+
},
|
| 112061 |
+
{
|
| 112062 |
+
"epoch": 18.23247863247863,
|
| 112063 |
+
"grad_norm": 0.1837555170059204,
|
| 112064 |
+
"learning_rate": 1.0398202828327847e-06,
|
| 112065 |
+
"loss": 0.5905,
|
| 112066 |
+
"step": 16008
|
| 112067 |
+
},
|
| 112068 |
+
{
|
| 112069 |
+
"epoch": 18.233618233618234,
|
| 112070 |
+
"grad_norm": 0.16961321234703064,
|
| 112071 |
+
"learning_rate": 1.0384909482352074e-06,
|
| 112072 |
+
"loss": 0.7501,
|
| 112073 |
+
"step": 16009
|
| 112074 |
+
},
|
| 112075 |
+
{
|
| 112076 |
+
"epoch": 18.234757834757836,
|
| 112077 |
+
"grad_norm": 0.16449250280857086,
|
| 112078 |
+
"learning_rate": 1.0371624458867357e-06,
|
| 112079 |
+
"loss": 0.7763,
|
| 112080 |
+
"step": 16010
|
| 112081 |
+
},
|
| 112082 |
+
{
|
| 112083 |
+
"epoch": 18.235897435897435,
|
| 112084 |
+
"grad_norm": 0.17696818709373474,
|
| 112085 |
+
"learning_rate": 1.0358347758335106e-06,
|
| 112086 |
+
"loss": 0.6562,
|
| 112087 |
+
"step": 16011
|
| 112088 |
+
},
|
| 112089 |
+
{
|
| 112090 |
+
"epoch": 18.237037037037037,
|
| 112091 |
+
"grad_norm": 0.2112455666065216,
|
| 112092 |
+
"learning_rate": 1.0345079381216483e-06,
|
| 112093 |
+
"loss": 0.686,
|
| 112094 |
+
"step": 16012
|
| 112095 |
+
},
|
| 112096 |
+
{
|
| 112097 |
+
"epoch": 18.23817663817664,
|
| 112098 |
+
"grad_norm": 0.1881578415632248,
|
| 112099 |
+
"learning_rate": 1.0331819327972253e-06,
|
| 112100 |
+
"loss": 0.8405,
|
| 112101 |
+
"step": 16013
|
| 112102 |
+
},
|
| 112103 |
+
{
|
| 112104 |
+
"epoch": 18.23931623931624,
|
| 112105 |
+
"grad_norm": 0.21206188201904297,
|
| 112106 |
+
"learning_rate": 1.0318567599063051e-06,
|
| 112107 |
+
"loss": 0.545,
|
| 112108 |
+
"step": 16014
|
| 112109 |
+
},
|
| 112110 |
+
{
|
| 112111 |
+
"epoch": 18.24045584045584,
|
| 112112 |
+
"grad_norm": 0.21515263617038727,
|
| 112113 |
+
"learning_rate": 1.0305324194949117e-06,
|
| 112114 |
+
"loss": 0.576,
|
| 112115 |
+
"step": 16015
|
| 112116 |
+
},
|
| 112117 |
+
{
|
| 112118 |
+
"epoch": 18.241595441595443,
|
| 112119 |
+
"grad_norm": 0.1821409910917282,
|
| 112120 |
+
"learning_rate": 1.0292089116090475e-06,
|
| 112121 |
+
"loss": 0.6085,
|
| 112122 |
+
"step": 16016
|
| 112123 |
+
},
|
| 112124 |
+
{
|
| 112125 |
+
"epoch": 18.242735042735042,
|
| 112126 |
+
"grad_norm": 0.22497190535068512,
|
| 112127 |
+
"learning_rate": 1.0278862362946728e-06,
|
| 112128 |
+
"loss": 0.5394,
|
| 112129 |
+
"step": 16017
|
| 112130 |
+
},
|
| 112131 |
+
{
|
| 112132 |
+
"epoch": 18.243874643874644,
|
| 112133 |
+
"grad_norm": 0.1855476349592209,
|
| 112134 |
+
"learning_rate": 1.0265643935977342e-06,
|
| 112135 |
+
"loss": 0.7609,
|
| 112136 |
+
"step": 16018
|
| 112137 |
+
},
|
| 112138 |
+
{
|
| 112139 |
+
"epoch": 18.245014245014247,
|
| 112140 |
+
"grad_norm": 0.2256733924150467,
|
| 112141 |
+
"learning_rate": 1.0252433835641422e-06,
|
| 112142 |
+
"loss": 0.7753,
|
| 112143 |
+
"step": 16019
|
| 112144 |
+
},
|
| 112145 |
+
{
|
| 112146 |
+
"epoch": 18.246153846153845,
|
| 112147 |
+
"grad_norm": 0.17753781378269196,
|
| 112148 |
+
"learning_rate": 1.0239232062397797e-06,
|
| 112149 |
+
"loss": 0.7024,
|
| 112150 |
+
"step": 16020
|
| 112151 |
+
},
|
| 112152 |
+
{
|
| 112153 |
+
"epoch": 18.247293447293448,
|
| 112154 |
+
"grad_norm": 0.18134057521820068,
|
| 112155 |
+
"learning_rate": 1.022603861670493e-06,
|
| 112156 |
+
"loss": 0.7823,
|
| 112157 |
+
"step": 16021
|
| 112158 |
+
},
|
| 112159 |
+
{
|
| 112160 |
+
"epoch": 18.24843304843305,
|
| 112161 |
+
"grad_norm": 0.18681129813194275,
|
| 112162 |
+
"learning_rate": 1.0212853499021153e-06,
|
| 112163 |
+
"loss": 0.7798,
|
| 112164 |
+
"step": 16022
|
| 112165 |
+
},
|
| 112166 |
+
{
|
| 112167 |
+
"epoch": 18.24957264957265,
|
| 112168 |
+
"grad_norm": 0.24428966641426086,
|
| 112169 |
+
"learning_rate": 1.019967670980443e-06,
|
| 112170 |
+
"loss": 0.6506,
|
| 112171 |
+
"step": 16023
|
| 112172 |
+
},
|
| 112173 |
+
{
|
| 112174 |
+
"epoch": 18.25071225071225,
|
| 112175 |
+
"grad_norm": 0.16551734507083893,
|
| 112176 |
+
"learning_rate": 1.0186508249512339e-06,
|
| 112177 |
+
"loss": 0.5754,
|
| 112178 |
+
"step": 16024
|
| 112179 |
+
},
|
| 112180 |
+
{
|
| 112181 |
+
"epoch": 18.251851851851853,
|
| 112182 |
+
"grad_norm": 0.21288277208805084,
|
| 112183 |
+
"learning_rate": 1.0173348118602322e-06,
|
| 112184 |
+
"loss": 0.544,
|
| 112185 |
+
"step": 16025
|
| 112186 |
+
},
|
| 112187 |
+
{
|
| 112188 |
+
"epoch": 18.252991452991452,
|
| 112189 |
+
"grad_norm": 0.2549050748348236,
|
| 112190 |
+
"learning_rate": 1.0160196317531428e-06,
|
| 112191 |
+
"loss": 0.4273,
|
| 112192 |
+
"step": 16026
|
| 112193 |
+
},
|
| 112194 |
+
{
|
| 112195 |
+
"epoch": 18.254131054131054,
|
| 112196 |
+
"grad_norm": 0.1870443969964981,
|
| 112197 |
+
"learning_rate": 1.0147052846756544e-06,
|
| 112198 |
+
"loss": 0.5073,
|
| 112199 |
+
"step": 16027
|
| 112200 |
+
},
|
| 112201 |
+
{
|
| 112202 |
+
"epoch": 18.255270655270657,
|
| 112203 |
+
"grad_norm": 0.22737272083759308,
|
| 112204 |
+
"learning_rate": 1.0133917706734053e-06,
|
| 112205 |
+
"loss": 0.747,
|
| 112206 |
+
"step": 16028
|
| 112207 |
+
},
|
| 112208 |
+
{
|
| 112209 |
+
"epoch": 18.256410256410255,
|
| 112210 |
+
"grad_norm": 0.164210706949234,
|
| 112211 |
+
"learning_rate": 1.0120790897920256e-06,
|
| 112212 |
+
"loss": 0.5032,
|
| 112213 |
+
"step": 16029
|
| 112214 |
+
},
|
| 112215 |
+
{
|
| 112216 |
+
"epoch": 18.257549857549858,
|
| 112217 |
+
"grad_norm": 0.20671352744102478,
|
| 112218 |
+
"learning_rate": 1.0107672420771042e-06,
|
| 112219 |
+
"loss": 0.5299,
|
| 112220 |
+
"step": 16030
|
| 112221 |
+
},
|
| 112222 |
+
{
|
| 112223 |
+
"epoch": 18.25868945868946,
|
| 112224 |
+
"grad_norm": 0.18295066058635712,
|
| 112225 |
+
"learning_rate": 1.0094562275742125e-06,
|
| 112226 |
+
"loss": 0.8214,
|
| 112227 |
+
"step": 16031
|
| 112228 |
+
},
|
| 112229 |
+
{
|
| 112230 |
+
"epoch": 18.25982905982906,
|
| 112231 |
+
"grad_norm": 0.1918344795703888,
|
| 112232 |
+
"learning_rate": 1.0081460463288727e-06,
|
| 112233 |
+
"loss": 0.6617,
|
| 112234 |
+
"step": 16032
|
| 112235 |
+
},
|
| 112236 |
+
{
|
| 112237 |
+
"epoch": 18.26096866096866,
|
| 112238 |
+
"grad_norm": 0.19031167030334473,
|
| 112239 |
+
"learning_rate": 1.0068366983866013e-06,
|
| 112240 |
+
"loss": 0.5399,
|
| 112241 |
+
"step": 16033
|
| 112242 |
+
},
|
| 112243 |
+
{
|
| 112244 |
+
"epoch": 18.262108262108264,
|
| 112245 |
+
"grad_norm": 0.18421906232833862,
|
| 112246 |
+
"learning_rate": 1.0055281837928754e-06,
|
| 112247 |
+
"loss": 0.7753,
|
| 112248 |
+
"step": 16034
|
| 112249 |
+
},
|
| 112250 |
+
{
|
| 112251 |
+
"epoch": 18.263247863247862,
|
| 112252 |
+
"grad_norm": 0.18231862783432007,
|
| 112253 |
+
"learning_rate": 1.0042205025931396e-06,
|
| 112254 |
+
"loss": 0.8264,
|
| 112255 |
+
"step": 16035
|
| 112256 |
+
},
|
| 112257 |
+
{
|
| 112258 |
+
"epoch": 18.264387464387465,
|
| 112259 |
+
"grad_norm": 0.27754512429237366,
|
| 112260 |
+
"learning_rate": 1.0029136548328127e-06,
|
| 112261 |
+
"loss": 0.4332,
|
| 112262 |
+
"step": 16036
|
| 112263 |
+
},
|
| 112264 |
+
{
|
| 112265 |
+
"epoch": 18.265527065527067,
|
| 112266 |
+
"grad_norm": 0.25456663966178894,
|
| 112267 |
+
"learning_rate": 1.0016076405572865e-06,
|
| 112268 |
+
"loss": 0.5675,
|
| 112269 |
+
"step": 16037
|
| 112270 |
+
},
|
| 112271 |
+
{
|
| 112272 |
+
"epoch": 18.266666666666666,
|
| 112273 |
+
"grad_norm": 0.2550671100616455,
|
| 112274 |
+
"learning_rate": 1.0003024598119248e-06,
|
| 112275 |
+
"loss": 0.5403,
|
| 112276 |
+
"step": 16038
|
| 112277 |
+
},
|
| 112278 |
+
{
|
| 112279 |
+
"epoch": 18.267806267806268,
|
| 112280 |
+
"grad_norm": 0.23232153058052063,
|
| 112281 |
+
"learning_rate": 9.989981126420605e-07,
|
| 112282 |
+
"loss": 0.3538,
|
| 112283 |
+
"step": 16039
|
| 112284 |
+
},
|
| 112285 |
+
{
|
| 112286 |
+
"epoch": 18.26894586894587,
|
| 112287 |
+
"grad_norm": 0.1822681427001953,
|
| 112288 |
+
"learning_rate": 9.976945990929909e-07,
|
| 112289 |
+
"loss": 0.6123,
|
| 112290 |
+
"step": 16040
|
| 112291 |
+
},
|
| 112292 |
+
{
|
| 112293 |
+
"epoch": 18.27008547008547,
|
| 112294 |
+
"grad_norm": 0.20538312196731567,
|
| 112295 |
+
"learning_rate": 9.963919192099963e-07,
|
| 112296 |
+
"loss": 0.8579,
|
| 112297 |
+
"step": 16041
|
| 112298 |
+
},
|
| 112299 |
+
{
|
| 112300 |
+
"epoch": 18.27122507122507,
|
| 112301 |
+
"grad_norm": 0.19148221611976624,
|
| 112302 |
+
"learning_rate": 9.950900730383184e-07,
|
| 112303 |
+
"loss": 0.415,
|
| 112304 |
+
"step": 16042
|
| 112305 |
+
},
|
| 112306 |
+
{
|
| 112307 |
+
"epoch": 18.272364672364674,
|
| 112308 |
+
"grad_norm": 0.199871227145195,
|
| 112309 |
+
"learning_rate": 9.937890606231764e-07,
|
| 112310 |
+
"loss": 0.6397,
|
| 112311 |
+
"step": 16043
|
| 112312 |
+
},
|
| 112313 |
+
{
|
| 112314 |
+
"epoch": 18.273504273504273,
|
| 112315 |
+
"grad_norm": 0.19247028231620789,
|
| 112316 |
+
"learning_rate": 9.924888820097567e-07,
|
| 112317 |
+
"loss": 0.606,
|
| 112318 |
+
"step": 16044
|
| 112319 |
+
},
|
| 112320 |
+
{
|
| 112321 |
+
"epoch": 18.274643874643875,
|
| 112322 |
+
"grad_norm": 0.20238713920116425,
|
| 112323 |
+
"learning_rate": 9.911895372432227e-07,
|
| 112324 |
+
"loss": 0.5232,
|
| 112325 |
+
"step": 16045
|
| 112326 |
+
},
|
| 112327 |
+
{
|
| 112328 |
+
"epoch": 18.275783475783477,
|
| 112329 |
+
"grad_norm": 0.19953593611717224,
|
| 112330 |
+
"learning_rate": 9.898910263687e-07,
|
| 112331 |
+
"loss": 0.8065,
|
| 112332 |
+
"step": 16046
|
| 112333 |
+
},
|
| 112334 |
+
{
|
| 112335 |
+
"epoch": 18.276923076923076,
|
| 112336 |
+
"grad_norm": 0.1754782646894455,
|
| 112337 |
+
"learning_rate": 9.885933494312883e-07,
|
| 112338 |
+
"loss": 0.443,
|
| 112339 |
+
"step": 16047
|
| 112340 |
+
},
|
| 112341 |
+
{
|
| 112342 |
+
"epoch": 18.27806267806268,
|
| 112343 |
+
"grad_norm": 0.20692458748817444,
|
| 112344 |
+
"learning_rate": 9.872965064760597e-07,
|
| 112345 |
+
"loss": 0.5389,
|
| 112346 |
+
"step": 16048
|
| 112347 |
+
},
|
| 112348 |
+
{
|
| 112349 |
+
"epoch": 18.27920227920228,
|
| 112350 |
+
"grad_norm": 0.25308653712272644,
|
| 112351 |
+
"learning_rate": 9.860004975480618e-07,
|
| 112352 |
+
"loss": 0.8961,
|
| 112353 |
+
"step": 16049
|
| 112354 |
+
},
|
| 112355 |
+
{
|
| 112356 |
+
"epoch": 18.28034188034188,
|
| 112357 |
+
"grad_norm": 0.19349783658981323,
|
| 112358 |
+
"learning_rate": 9.847053226923058e-07,
|
| 112359 |
+
"loss": 0.6028,
|
| 112360 |
+
"step": 16050
|
| 112361 |
+
},
|
| 112362 |
+
{
|
| 112363 |
+
"epoch": 18.28148148148148,
|
| 112364 |
+
"grad_norm": 0.20703546702861786,
|
| 112365 |
+
"learning_rate": 9.83410981953775e-07,
|
| 112366 |
+
"loss": 0.6376,
|
| 112367 |
+
"step": 16051
|
| 112368 |
+
},
|
| 112369 |
+
{
|
| 112370 |
+
"epoch": 18.282621082621084,
|
| 112371 |
+
"grad_norm": 0.19268111884593964,
|
| 112372 |
+
"learning_rate": 9.821174753774254e-07,
|
| 112373 |
+
"loss": 0.574,
|
| 112374 |
+
"step": 16052
|
| 112375 |
+
},
|
| 112376 |
+
{
|
| 112377 |
+
"epoch": 18.283760683760683,
|
| 112378 |
+
"grad_norm": 0.19388440251350403,
|
| 112379 |
+
"learning_rate": 9.808248030081845e-07,
|
| 112380 |
+
"loss": 0.573,
|
| 112381 |
+
"step": 16053
|
| 112382 |
+
},
|
| 112383 |
+
{
|
| 112384 |
+
"epoch": 18.284900284900285,
|
| 112385 |
+
"grad_norm": 0.23397889733314514,
|
| 112386 |
+
"learning_rate": 9.79532964890953e-07,
|
| 112387 |
+
"loss": 0.806,
|
| 112388 |
+
"step": 16054
|
| 112389 |
+
},
|
| 112390 |
+
{
|
| 112391 |
+
"epoch": 18.286039886039887,
|
| 112392 |
+
"grad_norm": 0.23620925843715668,
|
| 112393 |
+
"learning_rate": 9.782419610705973e-07,
|
| 112394 |
+
"loss": 0.6473,
|
| 112395 |
+
"step": 16055
|
| 112396 |
+
},
|
| 112397 |
+
{
|
| 112398 |
+
"epoch": 18.287179487179486,
|
| 112399 |
+
"grad_norm": 0.18553614616394043,
|
| 112400 |
+
"learning_rate": 9.769517915919596e-07,
|
| 112401 |
+
"loss": 0.7546,
|
| 112402 |
+
"step": 16056
|
| 112403 |
+
},
|
| 112404 |
+
{
|
| 112405 |
+
"epoch": 18.28831908831909,
|
| 112406 |
+
"grad_norm": 0.17249667644500732,
|
| 112407 |
+
"learning_rate": 9.756624564998513e-07,
|
| 112408 |
+
"loss": 0.7413,
|
| 112409 |
+
"step": 16057
|
| 112410 |
+
},
|
| 112411 |
+
{
|
| 112412 |
+
"epoch": 18.28945868945869,
|
| 112413 |
+
"grad_norm": 0.19826942682266235,
|
| 112414 |
+
"learning_rate": 9.743739558390503e-07,
|
| 112415 |
+
"loss": 0.809,
|
| 112416 |
+
"step": 16058
|
| 112417 |
+
},
|
| 112418 |
+
{
|
| 112419 |
+
"epoch": 18.29059829059829,
|
| 112420 |
+
"grad_norm": 0.18703773617744446,
|
| 112421 |
+
"learning_rate": 9.730862896543124e-07,
|
| 112422 |
+
"loss": 0.6999,
|
| 112423 |
+
"step": 16059
|
| 112424 |
+
},
|
| 112425 |
+
{
|
| 112426 |
+
"epoch": 18.291737891737892,
|
| 112427 |
+
"grad_norm": 0.1930650770664215,
|
| 112428 |
+
"learning_rate": 9.717994579903634e-07,
|
| 112429 |
+
"loss": 0.4993,
|
| 112430 |
+
"step": 16060
|
| 112431 |
+
},
|
| 112432 |
+
{
|
| 112433 |
+
"epoch": 18.292877492877494,
|
| 112434 |
+
"grad_norm": 0.18729305267333984,
|
| 112435 |
+
"learning_rate": 9.705134608918975e-07,
|
| 112436 |
+
"loss": 0.7114,
|
| 112437 |
+
"step": 16061
|
| 112438 |
+
},
|
| 112439 |
+
{
|
| 112440 |
+
"epoch": 18.294017094017093,
|
| 112441 |
+
"grad_norm": 0.22105178236961365,
|
| 112442 |
+
"learning_rate": 9.692282984035794e-07,
|
| 112443 |
+
"loss": 0.5916,
|
| 112444 |
+
"step": 16062
|
| 112445 |
+
},
|
| 112446 |
+
{
|
| 112447 |
+
"epoch": 18.295156695156695,
|
| 112448 |
+
"grad_norm": 0.23766843974590302,
|
| 112449 |
+
"learning_rate": 9.679439705700482e-07,
|
| 112450 |
+
"loss": 0.637,
|
| 112451 |
+
"step": 16063
|
| 112452 |
+
},
|
| 112453 |
+
{
|
| 112454 |
+
"epoch": 18.296296296296298,
|
| 112455 |
+
"grad_norm": 0.1865086406469345,
|
| 112456 |
+
"learning_rate": 9.666604774359101e-07,
|
| 112457 |
+
"loss": 0.8526,
|
| 112458 |
+
"step": 16064
|
| 112459 |
+
},
|
| 112460 |
+
{
|
| 112461 |
+
"epoch": 18.297435897435896,
|
| 112462 |
+
"grad_norm": 0.19988276064395905,
|
| 112463 |
+
"learning_rate": 9.65377819045743e-07,
|
| 112464 |
+
"loss": 0.8234,
|
| 112465 |
+
"step": 16065
|
| 112466 |
+
},
|
| 112467 |
+
{
|
| 112468 |
+
"epoch": 18.2985754985755,
|
| 112469 |
+
"grad_norm": 0.17610132694244385,
|
| 112470 |
+
"learning_rate": 9.640959954441032e-07,
|
| 112471 |
+
"loss": 0.6586,
|
| 112472 |
+
"step": 16066
|
| 112473 |
+
},
|
| 112474 |
+
{
|
| 112475 |
+
"epoch": 18.2997150997151,
|
| 112476 |
+
"grad_norm": 0.21880441904067993,
|
| 112477 |
+
"learning_rate": 9.628150066755076e-07,
|
| 112478 |
+
"loss": 0.7334,
|
| 112479 |
+
"step": 16067
|
| 112480 |
+
},
|
| 112481 |
+
{
|
| 112482 |
+
"epoch": 18.3008547008547,
|
| 112483 |
+
"grad_norm": 0.19010087847709656,
|
| 112484 |
+
"learning_rate": 9.615348527844514e-07,
|
| 112485 |
+
"loss": 0.8148,
|
| 112486 |
+
"step": 16068
|
| 112487 |
+
},
|
| 112488 |
+
{
|
| 112489 |
+
"epoch": 18.301994301994302,
|
| 112490 |
+
"grad_norm": 0.21880176663398743,
|
| 112491 |
+
"learning_rate": 9.602555338153934e-07,
|
| 112492 |
+
"loss": 0.6192,
|
| 112493 |
+
"step": 16069
|
| 112494 |
+
},
|
| 112495 |
+
{
|
| 112496 |
+
"epoch": 18.303133903133904,
|
| 112497 |
+
"grad_norm": 0.20157532393932343,
|
| 112498 |
+
"learning_rate": 9.589770498127704e-07,
|
| 112499 |
+
"loss": 0.7407,
|
| 112500 |
+
"step": 16070
|
| 112501 |
+
},
|
| 112502 |
+
{
|
| 112503 |
+
"epoch": 18.304273504273503,
|
| 112504 |
+
"grad_norm": 0.1672639548778534,
|
| 112505 |
+
"learning_rate": 9.576994008209883e-07,
|
| 112506 |
+
"loss": 0.4369,
|
| 112507 |
+
"step": 16071
|
| 112508 |
+
},
|
| 112509 |
+
{
|
| 112510 |
+
"epoch": 18.305413105413106,
|
| 112511 |
+
"grad_norm": 0.1705726683139801,
|
| 112512 |
+
"learning_rate": 9.564225868844257e-07,
|
| 112513 |
+
"loss": 0.6773,
|
| 112514 |
+
"step": 16072
|
| 112515 |
+
},
|
| 112516 |
+
{
|
| 112517 |
+
"epoch": 18.306552706552708,
|
| 112518 |
+
"grad_norm": 0.18530426919460297,
|
| 112519 |
+
"learning_rate": 9.55146608047422e-07,
|
| 112520 |
+
"loss": 0.5391,
|
| 112521 |
+
"step": 16073
|
| 112522 |
+
},
|
| 112523 |
+
{
|
| 112524 |
+
"epoch": 18.307692307692307,
|
| 112525 |
+
"grad_norm": 0.25975337624549866,
|
| 112526 |
+
"learning_rate": 9.538714643543e-07,
|
| 112527 |
+
"loss": 0.6085,
|
| 112528 |
+
"step": 16074
|
| 112529 |
+
},
|
| 112530 |
+
{
|
| 112531 |
+
"epoch": 18.30883190883191,
|
| 112532 |
+
"grad_norm": 0.2506848871707916,
|
| 112533 |
+
"learning_rate": 9.525971558493524e-07,
|
| 112534 |
+
"loss": 0.5404,
|
| 112535 |
+
"step": 16075
|
| 112536 |
+
},
|
| 112537 |
+
{
|
| 112538 |
+
"epoch": 18.30997150997151,
|
| 112539 |
+
"grad_norm": 0.1733851432800293,
|
| 112540 |
+
"learning_rate": 9.513236825768323e-07,
|
| 112541 |
+
"loss": 0.5343,
|
| 112542 |
+
"step": 16076
|
| 112543 |
+
},
|
| 112544 |
+
{
|
| 112545 |
+
"epoch": 18.31111111111111,
|
| 112546 |
+
"grad_norm": 0.17692172527313232,
|
| 112547 |
+
"learning_rate": 9.500510445809768e-07,
|
| 112548 |
+
"loss": 0.5883,
|
| 112549 |
+
"step": 16077
|
| 112550 |
+
},
|
| 112551 |
+
{
|
| 112552 |
+
"epoch": 18.312250712250712,
|
| 112553 |
+
"grad_norm": 0.16592204570770264,
|
| 112554 |
+
"learning_rate": 9.487792419059865e-07,
|
| 112555 |
+
"loss": 0.8538,
|
| 112556 |
+
"step": 16078
|
| 112557 |
+
},
|
| 112558 |
+
{
|
| 112559 |
+
"epoch": 18.313390313390315,
|
| 112560 |
+
"grad_norm": 0.18896038830280304,
|
| 112561 |
+
"learning_rate": 9.475082745960345e-07,
|
| 112562 |
+
"loss": 0.7501,
|
| 112563 |
+
"step": 16079
|
| 112564 |
+
},
|
| 112565 |
+
{
|
| 112566 |
+
"epoch": 18.314529914529913,
|
| 112567 |
+
"grad_norm": 0.21196994185447693,
|
| 112568 |
+
"learning_rate": 9.46238142695266e-07,
|
| 112569 |
+
"loss": 0.7132,
|
| 112570 |
+
"step": 16080
|
| 112571 |
+
},
|
| 112572 |
+
{
|
| 112573 |
+
"epoch": 18.315669515669516,
|
| 112574 |
+
"grad_norm": 0.18229085206985474,
|
| 112575 |
+
"learning_rate": 9.449688462477929e-07,
|
| 112576 |
+
"loss": 0.641,
|
| 112577 |
+
"step": 16081
|
| 112578 |
+
},
|
| 112579 |
+
{
|
| 112580 |
+
"epoch": 18.316809116809118,
|
| 112581 |
+
"grad_norm": 0.2193417251110077,
|
| 112582 |
+
"learning_rate": 9.437003852977022e-07,
|
| 112583 |
+
"loss": 0.7161,
|
| 112584 |
+
"step": 16082
|
| 112585 |
+
},
|
| 112586 |
+
{
|
| 112587 |
+
"epoch": 18.317948717948717,
|
| 112588 |
+
"grad_norm": 0.2101801633834839,
|
| 112589 |
+
"learning_rate": 9.424327598890531e-07,
|
| 112590 |
+
"loss": 0.5073,
|
| 112591 |
+
"step": 16083
|
| 112592 |
+
},
|
| 112593 |
+
{
|
| 112594 |
+
"epoch": 18.31908831908832,
|
| 112595 |
+
"grad_norm": 0.21685943007469177,
|
| 112596 |
+
"learning_rate": 9.41165970065877e-07,
|
| 112597 |
+
"loss": 0.5648,
|
| 112598 |
+
"step": 16084
|
| 112599 |
+
},
|
| 112600 |
+
{
|
| 112601 |
+
"epoch": 18.32022792022792,
|
| 112602 |
+
"grad_norm": 0.23681126534938812,
|
| 112603 |
+
"learning_rate": 9.399000158721638e-07,
|
| 112604 |
+
"loss": 0.6195,
|
| 112605 |
+
"step": 16085
|
| 112606 |
+
},
|
| 112607 |
+
{
|
| 112608 |
+
"epoch": 18.32136752136752,
|
| 112609 |
+
"grad_norm": 0.16676515340805054,
|
| 112610 |
+
"learning_rate": 9.386348973518893e-07,
|
| 112611 |
+
"loss": 0.8512,
|
| 112612 |
+
"step": 16086
|
| 112613 |
+
},
|
| 112614 |
+
{
|
| 112615 |
+
"epoch": 18.322507122507123,
|
| 112616 |
+
"grad_norm": 0.20879307389259338,
|
| 112617 |
+
"learning_rate": 9.373706145489935e-07,
|
| 112618 |
+
"loss": 0.5226,
|
| 112619 |
+
"step": 16087
|
| 112620 |
+
},
|
| 112621 |
+
{
|
| 112622 |
+
"epoch": 18.323646723646725,
|
| 112623 |
+
"grad_norm": 0.2310570925474167,
|
| 112624 |
+
"learning_rate": 9.361071675073912e-07,
|
| 112625 |
+
"loss": 0.5879,
|
| 112626 |
+
"step": 16088
|
| 112627 |
+
},
|
| 112628 |
+
{
|
| 112629 |
+
"epoch": 18.324786324786324,
|
| 112630 |
+
"grad_norm": 0.25332340598106384,
|
| 112631 |
+
"learning_rate": 9.348445562709613e-07,
|
| 112632 |
+
"loss": 0.4888,
|
| 112633 |
+
"step": 16089
|
| 112634 |
+
},
|
| 112635 |
+
{
|
| 112636 |
+
"epoch": 18.325925925925926,
|
| 112637 |
+
"grad_norm": 0.18049627542495728,
|
| 112638 |
+
"learning_rate": 9.335827808835574e-07,
|
| 112639 |
+
"loss": 0.816,
|
| 112640 |
+
"step": 16090
|
| 112641 |
+
},
|
| 112642 |
+
{
|
| 112643 |
+
"epoch": 18.32706552706553,
|
| 112644 |
+
"grad_norm": 0.2308613657951355,
|
| 112645 |
+
"learning_rate": 9.323218413890111e-07,
|
| 112646 |
+
"loss": 0.6842,
|
| 112647 |
+
"step": 16091
|
| 112648 |
+
},
|
| 112649 |
+
{
|
| 112650 |
+
"epoch": 18.328205128205127,
|
| 112651 |
+
"grad_norm": 0.1703282594680786,
|
| 112652 |
+
"learning_rate": 9.310617378311126e-07,
|
| 112653 |
+
"loss": 0.7388,
|
| 112654 |
+
"step": 16092
|
| 112655 |
+
},
|
| 112656 |
+
{
|
| 112657 |
+
"epoch": 18.32934472934473,
|
| 112658 |
+
"grad_norm": 0.20733553171157837,
|
| 112659 |
+
"learning_rate": 9.298024702536268e-07,
|
| 112660 |
+
"loss": 0.515,
|
| 112661 |
+
"step": 16093
|
| 112662 |
+
},
|
| 112663 |
+
{
|
| 112664 |
+
"epoch": 18.33048433048433,
|
| 112665 |
+
"grad_norm": 0.20539535582065582,
|
| 112666 |
+
"learning_rate": 9.285440387002964e-07,
|
| 112667 |
+
"loss": 0.7314,
|
| 112668 |
+
"step": 16094
|
| 112669 |
+
},
|
| 112670 |
+
{
|
| 112671 |
+
"epoch": 18.33162393162393,
|
| 112672 |
+
"grad_norm": 0.2035544216632843,
|
| 112673 |
+
"learning_rate": 9.272864432148282e-07,
|
| 112674 |
+
"loss": 0.8127,
|
| 112675 |
+
"step": 16095
|
| 112676 |
+
},
|
| 112677 |
+
{
|
| 112678 |
+
"epoch": 18.332763532763533,
|
| 112679 |
+
"grad_norm": 0.15647606551647186,
|
| 112680 |
+
"learning_rate": 9.260296838409038e-07,
|
| 112681 |
+
"loss": 0.7105,
|
| 112682 |
+
"step": 16096
|
| 112683 |
+
},
|
| 112684 |
+
{
|
| 112685 |
+
"epoch": 18.333903133903135,
|
| 112686 |
+
"grad_norm": 0.19675278663635254,
|
| 112687 |
+
"learning_rate": 9.24773760622169e-07,
|
| 112688 |
+
"loss": 0.604,
|
| 112689 |
+
"step": 16097
|
| 112690 |
+
},
|
| 112691 |
+
{
|
| 112692 |
+
"epoch": 18.335042735042734,
|
| 112693 |
+
"grad_norm": 0.2259322851896286,
|
| 112694 |
+
"learning_rate": 9.2351867360225e-07,
|
| 112695 |
+
"loss": 0.5047,
|
| 112696 |
+
"step": 16098
|
| 112697 |
+
},
|
| 112698 |
+
{
|
| 112699 |
+
"epoch": 18.336182336182336,
|
| 112700 |
+
"grad_norm": 0.17692503333091736,
|
| 112701 |
+
"learning_rate": 9.222644228247368e-07,
|
| 112702 |
+
"loss": 0.7321,
|
| 112703 |
+
"step": 16099
|
| 112704 |
+
},
|
| 112705 |
+
{
|
| 112706 |
+
"epoch": 18.33732193732194,
|
| 112707 |
+
"grad_norm": 0.18350818753242493,
|
| 112708 |
+
"learning_rate": 9.210110083331947e-07,
|
| 112709 |
+
"loss": 0.8457,
|
| 112710 |
+
"step": 16100
|
| 112711 |
+
},
|
| 112712 |
+
{
|
| 112713 |
+
"epoch": 18.338461538461537,
|
| 112714 |
+
"grad_norm": 0.19642199575901031,
|
| 112715 |
+
"learning_rate": 9.197584301711582e-07,
|
| 112716 |
+
"loss": 0.7522,
|
| 112717 |
+
"step": 16101
|
| 112718 |
+
},
|
| 112719 |
+
{
|
| 112720 |
+
"epoch": 18.33960113960114,
|
| 112721 |
+
"grad_norm": 0.22106897830963135,
|
| 112722 |
+
"learning_rate": 9.185066883821341e-07,
|
| 112723 |
+
"loss": 0.6094,
|
| 112724 |
+
"step": 16102
|
| 112725 |
+
},
|
| 112726 |
+
{
|
| 112727 |
+
"epoch": 18.340740740740742,
|
| 112728 |
+
"grad_norm": 0.1990758329629898,
|
| 112729 |
+
"learning_rate": 9.172557830095935e-07,
|
| 112730 |
+
"loss": 0.5999,
|
| 112731 |
+
"step": 16103
|
| 112732 |
+
},
|
| 112733 |
+
{
|
| 112734 |
+
"epoch": 18.34188034188034,
|
| 112735 |
+
"grad_norm": 0.17298246920108795,
|
| 112736 |
+
"learning_rate": 9.160057140969902e-07,
|
| 112737 |
+
"loss": 0.8371,
|
| 112738 |
+
"step": 16104
|
| 112739 |
+
},
|
| 112740 |
+
{
|
| 112741 |
+
"epoch": 18.343019943019943,
|
| 112742 |
+
"grad_norm": 0.17569628357887268,
|
| 112743 |
+
"learning_rate": 9.147564816877369e-07,
|
| 112744 |
+
"loss": 0.6274,
|
| 112745 |
+
"step": 16105
|
| 112746 |
+
},
|
| 112747 |
+
{
|
| 112748 |
+
"epoch": 18.344159544159545,
|
| 112749 |
+
"grad_norm": 0.17075328528881073,
|
| 112750 |
+
"learning_rate": 9.13508085825232e-07,
|
| 112751 |
+
"loss": 0.7926,
|
| 112752 |
+
"step": 16106
|
| 112753 |
+
},
|
| 112754 |
+
{
|
| 112755 |
+
"epoch": 18.345299145299144,
|
| 112756 |
+
"grad_norm": 0.19348692893981934,
|
| 112757 |
+
"learning_rate": 9.122605265528244e-07,
|
| 112758 |
+
"loss": 0.6492,
|
| 112759 |
+
"step": 16107
|
| 112760 |
+
},
|
| 112761 |
+
{
|
| 112762 |
+
"epoch": 18.346438746438746,
|
| 112763 |
+
"grad_norm": 0.17550738155841827,
|
| 112764 |
+
"learning_rate": 9.110138039138488e-07,
|
| 112765 |
+
"loss": 0.6892,
|
| 112766 |
+
"step": 16108
|
| 112767 |
+
},
|
| 112768 |
+
{
|
| 112769 |
+
"epoch": 18.34757834757835,
|
| 112770 |
+
"grad_norm": 0.1994296759366989,
|
| 112771 |
+
"learning_rate": 9.097679179516095e-07,
|
| 112772 |
+
"loss": 0.6749,
|
| 112773 |
+
"step": 16109
|
| 112774 |
+
},
|
| 112775 |
+
{
|
| 112776 |
+
"epoch": 18.348717948717947,
|
| 112777 |
+
"grad_norm": 0.25115880370140076,
|
| 112778 |
+
"learning_rate": 9.085228687093799e-07,
|
| 112779 |
+
"loss": 0.6444,
|
| 112780 |
+
"step": 16110
|
| 112781 |
+
},
|
| 112782 |
+
{
|
| 112783 |
+
"epoch": 18.34985754985755,
|
| 112784 |
+
"grad_norm": 0.2618429958820343,
|
| 112785 |
+
"learning_rate": 9.072786562304036e-07,
|
| 112786 |
+
"loss": 0.4291,
|
| 112787 |
+
"step": 16111
|
| 112788 |
+
},
|
| 112789 |
+
{
|
| 112790 |
+
"epoch": 18.350997150997152,
|
| 112791 |
+
"grad_norm": 0.2156478464603424,
|
| 112792 |
+
"learning_rate": 9.060352805578931e-07,
|
| 112793 |
+
"loss": 0.6608,
|
| 112794 |
+
"step": 16112
|
| 112795 |
+
},
|
| 112796 |
+
{
|
| 112797 |
+
"epoch": 18.35213675213675,
|
| 112798 |
+
"grad_norm": 0.18937750160694122,
|
| 112799 |
+
"learning_rate": 9.047927417350388e-07,
|
| 112800 |
+
"loss": 0.6339,
|
| 112801 |
+
"step": 16113
|
| 112802 |
+
},
|
| 112803 |
+
{
|
| 112804 |
+
"epoch": 18.353276353276353,
|
| 112805 |
+
"grad_norm": 0.18046444654464722,
|
| 112806 |
+
"learning_rate": 9.035510398049923e-07,
|
| 112807 |
+
"loss": 0.6544,
|
| 112808 |
+
"step": 16114
|
| 112809 |
+
},
|
| 112810 |
+
{
|
| 112811 |
+
"epoch": 18.354415954415956,
|
| 112812 |
+
"grad_norm": 0.18438094854354858,
|
| 112813 |
+
"learning_rate": 9.023101748108859e-07,
|
| 112814 |
+
"loss": 0.5341,
|
| 112815 |
+
"step": 16115
|
| 112816 |
+
},
|
| 112817 |
+
{
|
| 112818 |
+
"epoch": 18.355555555555554,
|
| 112819 |
+
"grad_norm": 0.2069292813539505,
|
| 112820 |
+
"learning_rate": 9.010701467958127e-07,
|
| 112821 |
+
"loss": 0.6938,
|
| 112822 |
+
"step": 16116
|
| 112823 |
+
},
|
| 112824 |
+
{
|
| 112825 |
+
"epoch": 18.356695156695157,
|
| 112826 |
+
"grad_norm": 0.1813022941350937,
|
| 112827 |
+
"learning_rate": 8.998309558028467e-07,
|
| 112828 |
+
"loss": 0.7141,
|
| 112829 |
+
"step": 16117
|
| 112830 |
+
},
|
| 112831 |
+
{
|
| 112832 |
+
"epoch": 18.35783475783476,
|
| 112833 |
+
"grad_norm": 0.186764195561409,
|
| 112834 |
+
"learning_rate": 8.985926018750312e-07,
|
| 112835 |
+
"loss": 0.7199,
|
| 112836 |
+
"step": 16118
|
| 112837 |
+
},
|
| 112838 |
+
{
|
| 112839 |
+
"epoch": 18.358974358974358,
|
| 112840 |
+
"grad_norm": 0.1632576286792755,
|
| 112841 |
+
"learning_rate": 8.973550850553709e-07,
|
| 112842 |
+
"loss": 0.6903,
|
| 112843 |
+
"step": 16119
|
| 112844 |
+
},
|
| 112845 |
+
{
|
| 112846 |
+
"epoch": 18.36011396011396,
|
| 112847 |
+
"grad_norm": 0.1646881252527237,
|
| 112848 |
+
"learning_rate": 8.961184053868449e-07,
|
| 112849 |
+
"loss": 0.6153,
|
| 112850 |
+
"step": 16120
|
| 112851 |
+
},
|
| 112852 |
+
{
|
| 112853 |
+
"epoch": 18.361253561253562,
|
| 112854 |
+
"grad_norm": 0.21068136394023895,
|
| 112855 |
+
"learning_rate": 8.948825629124219e-07,
|
| 112856 |
+
"loss": 0.7535,
|
| 112857 |
+
"step": 16121
|
| 112858 |
+
},
|
| 112859 |
+
{
|
| 112860 |
+
"epoch": 18.36239316239316,
|
| 112861 |
+
"grad_norm": 0.16106244921684265,
|
| 112862 |
+
"learning_rate": 8.936475576750119e-07,
|
| 112863 |
+
"loss": 0.6235,
|
| 112864 |
+
"step": 16122
|
| 112865 |
+
},
|
| 112866 |
+
{
|
| 112867 |
+
"epoch": 18.363532763532763,
|
| 112868 |
+
"grad_norm": 0.19227463006973267,
|
| 112869 |
+
"learning_rate": 8.924133897175168e-07,
|
| 112870 |
+
"loss": 0.7561,
|
| 112871 |
+
"step": 16123
|
| 112872 |
+
},
|
| 112873 |
+
{
|
| 112874 |
+
"epoch": 18.364672364672366,
|
| 112875 |
+
"grad_norm": 0.18547876179218292,
|
| 112876 |
+
"learning_rate": 8.911800590827996e-07,
|
| 112877 |
+
"loss": 0.6382,
|
| 112878 |
+
"step": 16124
|
| 112879 |
+
},
|
| 112880 |
+
{
|
| 112881 |
+
"epoch": 18.365811965811965,
|
| 112882 |
+
"grad_norm": 0.21618832647800446,
|
| 112883 |
+
"learning_rate": 8.899475658137007e-07,
|
| 112884 |
+
"loss": 0.7581,
|
| 112885 |
+
"step": 16125
|
| 112886 |
+
},
|
| 112887 |
+
{
|
| 112888 |
+
"epoch": 18.366951566951567,
|
| 112889 |
+
"grad_norm": 0.19110552966594696,
|
| 112890 |
+
"learning_rate": 8.887159099530251e-07,
|
| 112891 |
+
"loss": 0.6661,
|
| 112892 |
+
"step": 16126
|
| 112893 |
+
},
|
| 112894 |
+
{
|
| 112895 |
+
"epoch": 18.36809116809117,
|
| 112896 |
+
"grad_norm": 0.2559528648853302,
|
| 112897 |
+
"learning_rate": 8.874850915435495e-07,
|
| 112898 |
+
"loss": 0.7246,
|
| 112899 |
+
"step": 16127
|
| 112900 |
+
},
|
| 112901 |
+
{
|
| 112902 |
+
"epoch": 18.369230769230768,
|
| 112903 |
+
"grad_norm": 0.2422475963830948,
|
| 112904 |
+
"learning_rate": 8.862551106280287e-07,
|
| 112905 |
+
"loss": 0.361,
|
| 112906 |
+
"step": 16128
|
| 112907 |
+
},
|
| 112908 |
+
{
|
| 112909 |
+
"epoch": 18.37037037037037,
|
| 112910 |
+
"grad_norm": 0.19314159452915192,
|
| 112911 |
+
"learning_rate": 8.850259672491839e-07,
|
| 112912 |
+
"loss": 0.4311,
|
| 112913 |
+
"step": 16129
|
| 112914 |
+
},
|
| 112915 |
+
{
|
| 112916 |
+
"epoch": 18.371509971509973,
|
| 112917 |
+
"grad_norm": 0.19628053903579712,
|
| 112918 |
+
"learning_rate": 8.837976614496978e-07,
|
| 112919 |
+
"loss": 0.6354,
|
| 112920 |
+
"step": 16130
|
| 112921 |
+
},
|
| 112922 |
+
{
|
| 112923 |
+
"epoch": 18.37264957264957,
|
| 112924 |
+
"grad_norm": 0.19356375932693481,
|
| 112925 |
+
"learning_rate": 8.825701932722391e-07,
|
| 112926 |
+
"loss": 0.8193,
|
| 112927 |
+
"step": 16131
|
| 112928 |
+
},
|
| 112929 |
+
{
|
| 112930 |
+
"epoch": 18.373789173789174,
|
| 112931 |
+
"grad_norm": 0.19120517373085022,
|
| 112932 |
+
"learning_rate": 8.813435627594457e-07,
|
| 112933 |
+
"loss": 0.7391,
|
| 112934 |
+
"step": 16132
|
| 112935 |
+
},
|
| 112936 |
+
{
|
| 112937 |
+
"epoch": 18.374928774928776,
|
| 112938 |
+
"grad_norm": 0.20273134112358093,
|
| 112939 |
+
"learning_rate": 8.801177699539142e-07,
|
| 112940 |
+
"loss": 0.6134,
|
| 112941 |
+
"step": 16133
|
| 112942 |
+
},
|
| 112943 |
+
{
|
| 112944 |
+
"epoch": 18.376068376068375,
|
| 112945 |
+
"grad_norm": 0.24117746949195862,
|
| 112946 |
+
"learning_rate": 8.788928148982217e-07,
|
| 112947 |
+
"loss": 0.6725,
|
| 112948 |
+
"step": 16134
|
| 112949 |
+
},
|
| 112950 |
+
{
|
| 112951 |
+
"epoch": 18.377207977207977,
|
| 112952 |
+
"grad_norm": 0.22128960490226746,
|
| 112953 |
+
"learning_rate": 8.776686976349147e-07,
|
| 112954 |
+
"loss": 0.6186,
|
| 112955 |
+
"step": 16135
|
| 112956 |
+
},
|
| 112957 |
+
{
|
| 112958 |
+
"epoch": 18.37834757834758,
|
| 112959 |
+
"grad_norm": 0.2156917154788971,
|
| 112960 |
+
"learning_rate": 8.764454182065146e-07,
|
| 112961 |
+
"loss": 0.6418,
|
| 112962 |
+
"step": 16136
|
| 112963 |
+
},
|
| 112964 |
+
{
|
| 112965 |
+
"epoch": 18.379487179487178,
|
| 112966 |
+
"grad_norm": 0.224776491522789,
|
| 112967 |
+
"learning_rate": 8.752229766555015e-07,
|
| 112968 |
+
"loss": 0.6653,
|
| 112969 |
+
"step": 16137
|
| 112970 |
+
},
|
| 112971 |
+
{
|
| 112972 |
+
"epoch": 18.38062678062678,
|
| 112973 |
+
"grad_norm": 0.18596409261226654,
|
| 112974 |
+
"learning_rate": 8.740013730243357e-07,
|
| 112975 |
+
"loss": 0.6871,
|
| 112976 |
+
"step": 16138
|
| 112977 |
+
},
|
| 112978 |
+
{
|
| 112979 |
+
"epoch": 18.381766381766383,
|
| 112980 |
+
"grad_norm": 0.21921518445014954,
|
| 112981 |
+
"learning_rate": 8.727806073554528e-07,
|
| 112982 |
+
"loss": 0.5403,
|
| 112983 |
+
"step": 16139
|
| 112984 |
+
},
|
| 112985 |
+
{
|
| 112986 |
+
"epoch": 18.38290598290598,
|
| 112987 |
+
"grad_norm": 0.16866829991340637,
|
| 112988 |
+
"learning_rate": 8.715606796912495e-07,
|
| 112989 |
+
"loss": 0.7576,
|
| 112990 |
+
"step": 16140
|
| 112991 |
+
},
|
| 112992 |
+
{
|
| 112993 |
+
"epoch": 18.384045584045584,
|
| 112994 |
+
"grad_norm": 0.18714411556720734,
|
| 112995 |
+
"learning_rate": 8.703415900740974e-07,
|
| 112996 |
+
"loss": 0.9033,
|
| 112997 |
+
"step": 16141
|
| 112998 |
+
},
|
| 112999 |
+
{
|
| 113000 |
+
"epoch": 18.385185185185186,
|
| 113001 |
+
"grad_norm": 0.19236190617084503,
|
| 113002 |
+
"learning_rate": 8.691233385463321e-07,
|
| 113003 |
+
"loss": 0.6799,
|
| 113004 |
+
"step": 16142
|
| 113005 |
+
},
|
| 113006 |
+
{
|
| 113007 |
+
"epoch": 18.386324786324785,
|
| 113008 |
+
"grad_norm": 0.1837623119354248,
|
| 113009 |
+
"learning_rate": 8.679059251502835e-07,
|
| 113010 |
+
"loss": 0.5914,
|
| 113011 |
+
"step": 16143
|
| 113012 |
+
},
|
| 113013 |
+
{
|
| 113014 |
+
"epoch": 18.387464387464387,
|
| 113015 |
+
"grad_norm": 0.19275638461112976,
|
| 113016 |
+
"learning_rate": 8.66689349928218e-07,
|
| 113017 |
+
"loss": 0.544,
|
| 113018 |
+
"step": 16144
|
| 113019 |
+
},
|
| 113020 |
+
{
|
| 113021 |
+
"epoch": 18.38860398860399,
|
| 113022 |
+
"grad_norm": 0.1795569509267807,
|
| 113023 |
+
"learning_rate": 8.654736129224017e-07,
|
| 113024 |
+
"loss": 0.8339,
|
| 113025 |
+
"step": 16145
|
| 113026 |
+
},
|
| 113027 |
+
{
|
| 113028 |
+
"epoch": 18.38974358974359,
|
| 113029 |
+
"grad_norm": 0.18885968625545502,
|
| 113030 |
+
"learning_rate": 8.642587141750563e-07,
|
| 113031 |
+
"loss": 0.4927,
|
| 113032 |
+
"step": 16146
|
| 113033 |
+
},
|
| 113034 |
+
{
|
| 113035 |
+
"epoch": 18.39088319088319,
|
| 113036 |
+
"grad_norm": 0.20421624183654785,
|
| 113037 |
+
"learning_rate": 8.630446537283815e-07,
|
| 113038 |
+
"loss": 0.5268,
|
| 113039 |
+
"step": 16147
|
| 113040 |
+
},
|
| 113041 |
+
{
|
| 113042 |
+
"epoch": 18.392022792022793,
|
| 113043 |
+
"grad_norm": 0.17060433328151703,
|
| 113044 |
+
"learning_rate": 8.618314316245407e-07,
|
| 113045 |
+
"loss": 0.7207,
|
| 113046 |
+
"step": 16148
|
| 113047 |
+
},
|
| 113048 |
+
{
|
| 113049 |
+
"epoch": 18.39316239316239,
|
| 113050 |
+
"grad_norm": 0.25263509154319763,
|
| 113051 |
+
"learning_rate": 8.606190479056725e-07,
|
| 113052 |
+
"loss": 0.6565,
|
| 113053 |
+
"step": 16149
|
| 113054 |
+
},
|
| 113055 |
+
{
|
| 113056 |
+
"epoch": 18.394301994301994,
|
| 113057 |
+
"grad_norm": 0.17055436968803406,
|
| 113058 |
+
"learning_rate": 8.594075026138904e-07,
|
| 113059 |
+
"loss": 0.5902,
|
| 113060 |
+
"step": 16150
|
| 113061 |
+
},
|
| 113062 |
+
{
|
| 113063 |
+
"epoch": 18.395441595441596,
|
| 113064 |
+
"grad_norm": 0.21823427081108093,
|
| 113065 |
+
"learning_rate": 8.581967957912746e-07,
|
| 113066 |
+
"loss": 0.6674,
|
| 113067 |
+
"step": 16151
|
| 113068 |
+
},
|
| 113069 |
+
{
|
| 113070 |
+
"epoch": 18.396581196581195,
|
| 113071 |
+
"grad_norm": 0.20258358120918274,
|
| 113072 |
+
"learning_rate": 8.569869274798719e-07,
|
| 113073 |
+
"loss": 0.6395,
|
| 113074 |
+
"step": 16152
|
| 113075 |
+
},
|
| 113076 |
+
{
|
| 113077 |
+
"epoch": 18.397720797720797,
|
| 113078 |
+
"grad_norm": 0.1745457798242569,
|
| 113079 |
+
"learning_rate": 8.557778977217046e-07,
|
| 113080 |
+
"loss": 0.8352,
|
| 113081 |
+
"step": 16153
|
| 113082 |
+
},
|
| 113083 |
+
{
|
| 113084 |
+
"epoch": 18.3988603988604,
|
| 113085 |
+
"grad_norm": 0.1940583735704422,
|
| 113086 |
+
"learning_rate": 8.545697065587694e-07,
|
| 113087 |
+
"loss": 0.6205,
|
| 113088 |
+
"step": 16154
|
| 113089 |
+
},
|
| 113090 |
+
{
|
| 113091 |
+
"epoch": 18.4,
|
| 113092 |
+
"grad_norm": 0.20821359753608704,
|
| 113093 |
+
"learning_rate": 8.533623540330327e-07,
|
| 113094 |
+
"loss": 0.6508,
|
| 113095 |
+
"step": 16155
|
| 113096 |
+
},
|
| 113097 |
+
{
|
| 113098 |
+
"epoch": 18.4011396011396,
|
| 113099 |
+
"grad_norm": 0.2047506719827652,
|
| 113100 |
+
"learning_rate": 8.521558401864193e-07,
|
| 113101 |
+
"loss": 0.7315,
|
| 113102 |
+
"step": 16156
|
| 113103 |
+
},
|
| 113104 |
+
{
|
| 113105 |
+
"epoch": 18.402279202279203,
|
| 113106 |
+
"grad_norm": 0.16641731560230255,
|
| 113107 |
+
"learning_rate": 8.509501650608432e-07,
|
| 113108 |
+
"loss": 0.6271,
|
| 113109 |
+
"step": 16157
|
| 113110 |
+
},
|
| 113111 |
+
{
|
| 113112 |
+
"epoch": 18.403418803418802,
|
| 113113 |
+
"grad_norm": 0.23004211485385895,
|
| 113114 |
+
"learning_rate": 8.497453286981788e-07,
|
| 113115 |
+
"loss": 0.5899,
|
| 113116 |
+
"step": 16158
|
| 113117 |
+
},
|
| 113118 |
+
{
|
| 113119 |
+
"epoch": 18.404558404558404,
|
| 113120 |
+
"grad_norm": 0.1965126395225525,
|
| 113121 |
+
"learning_rate": 8.485413311402734e-07,
|
| 113122 |
+
"loss": 0.5774,
|
| 113123 |
+
"step": 16159
|
| 113124 |
+
},
|
| 113125 |
+
{
|
| 113126 |
+
"epoch": 18.405698005698007,
|
| 113127 |
+
"grad_norm": 0.2303241342306137,
|
| 113128 |
+
"learning_rate": 8.473381724289409e-07,
|
| 113129 |
+
"loss": 0.5625,
|
| 113130 |
+
"step": 16160
|
| 113131 |
+
},
|
| 113132 |
+
{
|
| 113133 |
+
"epoch": 18.406837606837605,
|
| 113134 |
+
"grad_norm": 0.19273196160793304,
|
| 113135 |
+
"learning_rate": 8.461358526059754e-07,
|
| 113136 |
+
"loss": 0.5605,
|
| 113137 |
+
"step": 16161
|
| 113138 |
+
},
|
| 113139 |
+
{
|
| 113140 |
+
"epoch": 18.407977207977208,
|
| 113141 |
+
"grad_norm": 0.2125997543334961,
|
| 113142 |
+
"learning_rate": 8.449343717131325e-07,
|
| 113143 |
+
"loss": 0.6399,
|
| 113144 |
+
"step": 16162
|
| 113145 |
+
},
|
| 113146 |
+
{
|
| 113147 |
+
"epoch": 18.40911680911681,
|
| 113148 |
+
"grad_norm": 0.1999417543411255,
|
| 113149 |
+
"learning_rate": 8.437337297921511e-07,
|
| 113150 |
+
"loss": 0.7604,
|
| 113151 |
+
"step": 16163
|
| 113152 |
+
},
|
| 113153 |
+
{
|
| 113154 |
+
"epoch": 18.41025641025641,
|
| 113155 |
+
"grad_norm": 0.16549277305603027,
|
| 113156 |
+
"learning_rate": 8.425339268847199e-07,
|
| 113157 |
+
"loss": 0.7765,
|
| 113158 |
+
"step": 16164
|
| 113159 |
+
},
|
| 113160 |
+
{
|
| 113161 |
+
"epoch": 18.41139601139601,
|
| 113162 |
+
"grad_norm": 0.19712743163108826,
|
| 113163 |
+
"learning_rate": 8.413349630325223e-07,
|
| 113164 |
+
"loss": 0.4729,
|
| 113165 |
+
"step": 16165
|
| 113166 |
+
},
|
| 113167 |
+
{
|
| 113168 |
+
"epoch": 18.412535612535613,
|
| 113169 |
+
"grad_norm": 0.19830380380153656,
|
| 113170 |
+
"learning_rate": 8.401368382772029e-07,
|
| 113171 |
+
"loss": 0.637,
|
| 113172 |
+
"step": 16166
|
| 113173 |
+
},
|
| 113174 |
+
{
|
| 113175 |
+
"epoch": 18.413675213675212,
|
| 113176 |
+
"grad_norm": 0.23345257341861725,
|
| 113177 |
+
"learning_rate": 8.389395526603644e-07,
|
| 113178 |
+
"loss": 0.4638,
|
| 113179 |
+
"step": 16167
|
| 113180 |
+
},
|
| 113181 |
+
{
|
| 113182 |
+
"epoch": 18.414814814814815,
|
| 113183 |
+
"grad_norm": 0.20206978917121887,
|
| 113184 |
+
"learning_rate": 8.377431062236013e-07,
|
| 113185 |
+
"loss": 0.7629,
|
| 113186 |
+
"step": 16168
|
| 113187 |
+
},
|
| 113188 |
+
{
|
| 113189 |
+
"epoch": 18.415954415954417,
|
| 113190 |
+
"grad_norm": 0.2112087458372116,
|
| 113191 |
+
"learning_rate": 8.365474990084638e-07,
|
| 113192 |
+
"loss": 0.5297,
|
| 113193 |
+
"step": 16169
|
| 113194 |
+
},
|
| 113195 |
+
{
|
| 113196 |
+
"epoch": 18.417094017094016,
|
| 113197 |
+
"grad_norm": 0.1696016639471054,
|
| 113198 |
+
"learning_rate": 8.353527310564879e-07,
|
| 113199 |
+
"loss": 0.6227,
|
| 113200 |
+
"step": 16170
|
| 113201 |
+
},
|
| 113202 |
+
{
|
| 113203 |
+
"epoch": 18.418233618233618,
|
| 113204 |
+
"grad_norm": 0.2516838014125824,
|
| 113205 |
+
"learning_rate": 8.341588024091602e-07,
|
| 113206 |
+
"loss": 0.6091,
|
| 113207 |
+
"step": 16171
|
| 113208 |
+
},
|
| 113209 |
+
{
|
| 113210 |
+
"epoch": 18.41937321937322,
|
| 113211 |
+
"grad_norm": 0.21054531633853912,
|
| 113212 |
+
"learning_rate": 8.329657131079527e-07,
|
| 113213 |
+
"loss": 0.6746,
|
| 113214 |
+
"step": 16172
|
| 113215 |
+
},
|
| 113216 |
+
{
|
| 113217 |
+
"epoch": 18.42051282051282,
|
| 113218 |
+
"grad_norm": 0.17801140248775482,
|
| 113219 |
+
"learning_rate": 8.317734631943047e-07,
|
| 113220 |
+
"loss": 0.6221,
|
| 113221 |
+
"step": 16173
|
| 113222 |
+
},
|
| 113223 |
+
{
|
| 113224 |
+
"epoch": 18.42165242165242,
|
| 113225 |
+
"grad_norm": 0.21043559908866882,
|
| 113226 |
+
"learning_rate": 8.30582052709633e-07,
|
| 113227 |
+
"loss": 0.5761,
|
| 113228 |
+
"step": 16174
|
| 113229 |
+
},
|
| 113230 |
+
{
|
| 113231 |
+
"epoch": 18.422792022792024,
|
| 113232 |
+
"grad_norm": 0.17518368363380432,
|
| 113233 |
+
"learning_rate": 8.293914816953046e-07,
|
| 113234 |
+
"loss": 0.828,
|
| 113235 |
+
"step": 16175
|
| 113236 |
+
},
|
| 113237 |
+
{
|
| 113238 |
+
"epoch": 18.423931623931622,
|
| 113239 |
+
"grad_norm": 0.18897578120231628,
|
| 113240 |
+
"learning_rate": 8.282017501926837e-07,
|
| 113241 |
+
"loss": 0.5563,
|
| 113242 |
+
"step": 16176
|
| 113243 |
+
},
|
| 113244 |
+
{
|
| 113245 |
+
"epoch": 18.425071225071225,
|
| 113246 |
+
"grad_norm": 0.19218704104423523,
|
| 113247 |
+
"learning_rate": 8.270128582430925e-07,
|
| 113248 |
+
"loss": 0.554,
|
| 113249 |
+
"step": 16177
|
| 113250 |
+
},
|
| 113251 |
+
{
|
| 113252 |
+
"epoch": 18.426210826210827,
|
| 113253 |
+
"grad_norm": 0.19123661518096924,
|
| 113254 |
+
"learning_rate": 8.258248058878148e-07,
|
| 113255 |
+
"loss": 0.8325,
|
| 113256 |
+
"step": 16178
|
| 113257 |
+
},
|
| 113258 |
+
{
|
| 113259 |
+
"epoch": 18.427350427350426,
|
| 113260 |
+
"grad_norm": 0.19908234477043152,
|
| 113261 |
+
"learning_rate": 8.246375931681232e-07,
|
| 113262 |
+
"loss": 0.5215,
|
| 113263 |
+
"step": 16179
|
| 113264 |
+
},
|
| 113265 |
+
{
|
| 113266 |
+
"epoch": 18.428490028490028,
|
| 113267 |
+
"grad_norm": 0.18487827479839325,
|
| 113268 |
+
"learning_rate": 8.234512201252487e-07,
|
| 113269 |
+
"loss": 0.6166,
|
| 113270 |
+
"step": 16180
|
| 113271 |
+
},
|
| 113272 |
+
{
|
| 113273 |
+
"epoch": 18.42962962962963,
|
| 113274 |
+
"grad_norm": 0.21101725101470947,
|
| 113275 |
+
"learning_rate": 8.222656868004053e-07,
|
| 113276 |
+
"loss": 0.6704,
|
| 113277 |
+
"step": 16181
|
| 113278 |
+
},
|
| 113279 |
+
{
|
| 113280 |
+
"epoch": 18.43076923076923,
|
| 113281 |
+
"grad_norm": 0.19184231758117676,
|
| 113282 |
+
"learning_rate": 8.210809932347575e-07,
|
| 113283 |
+
"loss": 0.6684,
|
| 113284 |
+
"step": 16182
|
| 113285 |
+
},
|
| 113286 |
+
{
|
| 113287 |
+
"epoch": 18.43190883190883,
|
| 113288 |
+
"grad_norm": 0.24409416317939758,
|
| 113289 |
+
"learning_rate": 8.19897139469461e-07,
|
| 113290 |
+
"loss": 0.4151,
|
| 113291 |
+
"step": 16183
|
| 113292 |
+
},
|
| 113293 |
+
{
|
| 113294 |
+
"epoch": 18.433048433048434,
|
| 113295 |
+
"grad_norm": 0.18449606001377106,
|
| 113296 |
+
"learning_rate": 8.187141255456304e-07,
|
| 113297 |
+
"loss": 0.5247,
|
| 113298 |
+
"step": 16184
|
| 113299 |
+
},
|
| 113300 |
+
{
|
| 113301 |
+
"epoch": 18.434188034188033,
|
| 113302 |
+
"grad_norm": 0.21148008108139038,
|
| 113303 |
+
"learning_rate": 8.175319515043606e-07,
|
| 113304 |
+
"loss": 0.7081,
|
| 113305 |
+
"step": 16185
|
| 113306 |
+
},
|
| 113307 |
+
{
|
| 113308 |
+
"epoch": 18.435327635327635,
|
| 113309 |
+
"grad_norm": 0.21608728170394897,
|
| 113310 |
+
"learning_rate": 8.163506173867047e-07,
|
| 113311 |
+
"loss": 0.8131,
|
| 113312 |
+
"step": 16186
|
| 113313 |
+
},
|
| 113314 |
+
{
|
| 113315 |
+
"epoch": 18.436467236467237,
|
| 113316 |
+
"grad_norm": 0.19518068432807922,
|
| 113317 |
+
"learning_rate": 8.151701232336967e-07,
|
| 113318 |
+
"loss": 0.7202,
|
| 113319 |
+
"step": 16187
|
| 113320 |
+
},
|
| 113321 |
+
{
|
| 113322 |
+
"epoch": 18.437606837606836,
|
| 113323 |
+
"grad_norm": 0.2155269980430603,
|
| 113324 |
+
"learning_rate": 8.139904690863426e-07,
|
| 113325 |
+
"loss": 0.7246,
|
| 113326 |
+
"step": 16188
|
| 113327 |
+
},
|
| 113328 |
+
{
|
| 113329 |
+
"epoch": 18.43874643874644,
|
| 113330 |
+
"grad_norm": 0.15008096396923065,
|
| 113331 |
+
"learning_rate": 8.128116549856097e-07,
|
| 113332 |
+
"loss": 0.593,
|
| 113333 |
+
"step": 16189
|
| 113334 |
+
},
|
| 113335 |
+
{
|
| 113336 |
+
"epoch": 18.43988603988604,
|
| 113337 |
+
"grad_norm": 0.2401498258113861,
|
| 113338 |
+
"learning_rate": 8.116336809724428e-07,
|
| 113339 |
+
"loss": 0.4578,
|
| 113340 |
+
"step": 16190
|
| 113341 |
+
},
|
| 113342 |
+
{
|
| 113343 |
+
"epoch": 18.44102564102564,
|
| 113344 |
+
"grad_norm": 0.1776338815689087,
|
| 113345 |
+
"learning_rate": 8.104565470877568e-07,
|
| 113346 |
+
"loss": 0.8418,
|
| 113347 |
+
"step": 16191
|
| 113348 |
+
},
|
| 113349 |
+
{
|
| 113350 |
+
"epoch": 18.442165242165242,
|
| 113351 |
+
"grad_norm": 0.2272671014070511,
|
| 113352 |
+
"learning_rate": 8.09280253372438e-07,
|
| 113353 |
+
"loss": 0.5513,
|
| 113354 |
+
"step": 16192
|
| 113355 |
+
},
|
| 113356 |
+
{
|
| 113357 |
+
"epoch": 18.443304843304844,
|
| 113358 |
+
"grad_norm": 0.19338665902614594,
|
| 113359 |
+
"learning_rate": 8.081047998673375e-07,
|
| 113360 |
+
"loss": 0.7093,
|
| 113361 |
+
"step": 16193
|
| 113362 |
+
},
|
| 113363 |
+
{
|
| 113364 |
+
"epoch": 18.444444444444443,
|
| 113365 |
+
"grad_norm": 0.17604541778564453,
|
| 113366 |
+
"learning_rate": 8.069301866132861e-07,
|
| 113367 |
+
"loss": 0.8846,
|
| 113368 |
+
"step": 16194
|
| 113369 |
+
},
|
| 113370 |
+
{
|
| 113371 |
+
"epoch": 18.445584045584045,
|
| 113372 |
+
"grad_norm": 0.21153387427330017,
|
| 113373 |
+
"learning_rate": 8.057564136510792e-07,
|
| 113374 |
+
"loss": 0.4936,
|
| 113375 |
+
"step": 16195
|
| 113376 |
+
},
|
| 113377 |
+
{
|
| 113378 |
+
"epoch": 18.446723646723648,
|
| 113379 |
+
"grad_norm": 0.1837536096572876,
|
| 113380 |
+
"learning_rate": 8.045834810214898e-07,
|
| 113381 |
+
"loss": 0.7118,
|
| 113382 |
+
"step": 16196
|
| 113383 |
+
},
|
| 113384 |
+
{
|
| 113385 |
+
"epoch": 18.447863247863246,
|
| 113386 |
+
"grad_norm": 0.2227238118648529,
|
| 113387 |
+
"learning_rate": 8.034113887652517e-07,
|
| 113388 |
+
"loss": 0.7348,
|
| 113389 |
+
"step": 16197
|
| 113390 |
+
},
|
| 113391 |
+
{
|
| 113392 |
+
"epoch": 18.44900284900285,
|
| 113393 |
+
"grad_norm": 0.20759250223636627,
|
| 113394 |
+
"learning_rate": 8.02240136923077e-07,
|
| 113395 |
+
"loss": 0.6929,
|
| 113396 |
+
"step": 16198
|
| 113397 |
+
},
|
| 113398 |
+
{
|
| 113399 |
+
"epoch": 18.45014245014245,
|
| 113400 |
+
"grad_norm": 0.18193592131137848,
|
| 113401 |
+
"learning_rate": 8.010697255356469e-07,
|
| 113402 |
+
"loss": 0.7964,
|
| 113403 |
+
"step": 16199
|
| 113404 |
+
},
|
| 113405 |
+
{
|
| 113406 |
+
"epoch": 18.45128205128205,
|
| 113407 |
+
"grad_norm": 0.1874343603849411,
|
| 113408 |
+
"learning_rate": 7.999001546436152e-07,
|
| 113409 |
+
"loss": 0.6776,
|
| 113410 |
+
"step": 16200
|
| 113411 |
}
|
| 113412 |
],
|
| 113413 |
"logging_steps": 1,
|
|
|
|
| 113427 |
"attributes": {}
|
| 113428 |
}
|
| 113429 |
},
|
| 113430 |
+
"total_flos": 9.057731143409787e+19,
|
| 113431 |
"train_batch_size": 8,
|
| 113432 |
"trial_name": null,
|
| 113433 |
"trial_params": null
|