Upload 10 files
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +2103 -3
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 46336400
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f87a907da662f0f7cd1c78bc7e116dc34bf7bc822bd88d0d8be318cb9b6c530
|
| 3 |
size 46336400
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 92717818
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3540ab6a715c86446c0fcc17747212409c242e583821d6556df5df779c3b4fbc
|
| 3 |
size 92717818
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36bbf17e45bd87663cd98ff4d6027892aa4320c31d67540c8ee33c1d805a30c7
|
| 3 |
size 14244
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a971a8dd2d90d918014f25aed8de35f62e388573fdd5a7706b6d6fe96f8fb76
|
| 3 |
size 1064
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -10508,6 +10508,2106 @@
|
|
| 10508 |
"learning_rate": 0.0008358565005963849,
|
| 10509 |
"loss": 2.7849,
|
| 10510 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10511 |
}
|
| 10512 |
],
|
| 10513 |
"logging_steps": 10,
|
|
@@ -10527,7 +12627,7 @@
|
|
| 10527 |
"attributes": {}
|
| 10528 |
}
|
| 10529 |
},
|
| 10530 |
-
"total_flos":
|
| 10531 |
"train_batch_size": 64,
|
| 10532 |
"trial_name": null,
|
| 10533 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 14.86035710599649,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 18000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 10508 |
"learning_rate": 0.0008358565005963849,
|
| 10509 |
"loss": 2.7849,
|
| 10510 |
"step": 15000
|
| 10511 |
+
},
|
| 10512 |
+
{
|
| 10513 |
+
"epoch": 12.393023015791103,
|
| 10514 |
+
"grad_norm": 0.5857816338539124,
|
| 10515 |
+
"learning_rate": 0.0008356729975227085,
|
| 10516 |
+
"loss": 2.7702,
|
| 10517 |
+
"step": 15010
|
| 10518 |
+
},
|
| 10519 |
+
{
|
| 10520 |
+
"epoch": 12.401279801837134,
|
| 10521 |
+
"grad_norm": 0.5820302367210388,
|
| 10522 |
+
"learning_rate": 0.000835489494449032,
|
| 10523 |
+
"loss": 2.7862,
|
| 10524 |
+
"step": 15020
|
| 10525 |
+
},
|
| 10526 |
+
{
|
| 10527 |
+
"epoch": 12.409536587883167,
|
| 10528 |
+
"grad_norm": 0.6015300750732422,
|
| 10529 |
+
"learning_rate": 0.0008353059913753556,
|
| 10530 |
+
"loss": 2.7827,
|
| 10531 |
+
"step": 15030
|
| 10532 |
+
},
|
| 10533 |
+
{
|
| 10534 |
+
"epoch": 12.417793373929198,
|
| 10535 |
+
"grad_norm": 0.5810590386390686,
|
| 10536 |
+
"learning_rate": 0.000835122488301679,
|
| 10537 |
+
"loss": 2.7848,
|
| 10538 |
+
"step": 15040
|
| 10539 |
+
},
|
| 10540 |
+
{
|
| 10541 |
+
"epoch": 12.42605015997523,
|
| 10542 |
+
"grad_norm": 0.525604784488678,
|
| 10543 |
+
"learning_rate": 0.0008349389852280026,
|
| 10544 |
+
"loss": 2.7693,
|
| 10545 |
+
"step": 15050
|
| 10546 |
+
},
|
| 10547 |
+
{
|
| 10548 |
+
"epoch": 12.43430694602126,
|
| 10549 |
+
"grad_norm": 0.5634535551071167,
|
| 10550 |
+
"learning_rate": 0.0008347554821543261,
|
| 10551 |
+
"loss": 2.782,
|
| 10552 |
+
"step": 15060
|
| 10553 |
+
},
|
| 10554 |
+
{
|
| 10555 |
+
"epoch": 12.442563732067292,
|
| 10556 |
+
"grad_norm": 0.5564500689506531,
|
| 10557 |
+
"learning_rate": 0.0008345719790806497,
|
| 10558 |
+
"loss": 2.7656,
|
| 10559 |
+
"step": 15070
|
| 10560 |
+
},
|
| 10561 |
+
{
|
| 10562 |
+
"epoch": 12.450820518113325,
|
| 10563 |
+
"grad_norm": 0.570466160774231,
|
| 10564 |
+
"learning_rate": 0.0008343884760069731,
|
| 10565 |
+
"loss": 2.7781,
|
| 10566 |
+
"step": 15080
|
| 10567 |
+
},
|
| 10568 |
+
{
|
| 10569 |
+
"epoch": 12.459077304159356,
|
| 10570 |
+
"grad_norm": 0.5621691942214966,
|
| 10571 |
+
"learning_rate": 0.0008342049729332967,
|
| 10572 |
+
"loss": 2.774,
|
| 10573 |
+
"step": 15090
|
| 10574 |
+
},
|
| 10575 |
+
{
|
| 10576 |
+
"epoch": 12.467334090205387,
|
| 10577 |
+
"grad_norm": 0.5975548624992371,
|
| 10578 |
+
"learning_rate": 0.0008340214698596202,
|
| 10579 |
+
"loss": 2.7771,
|
| 10580 |
+
"step": 15100
|
| 10581 |
+
},
|
| 10582 |
+
{
|
| 10583 |
+
"epoch": 12.475590876251418,
|
| 10584 |
+
"grad_norm": 0.5807538628578186,
|
| 10585 |
+
"learning_rate": 0.0008338379667859437,
|
| 10586 |
+
"loss": 2.7786,
|
| 10587 |
+
"step": 15110
|
| 10588 |
+
},
|
| 10589 |
+
{
|
| 10590 |
+
"epoch": 12.483847662297451,
|
| 10591 |
+
"grad_norm": 0.61223965883255,
|
| 10592 |
+
"learning_rate": 0.0008336544637122671,
|
| 10593 |
+
"loss": 2.7769,
|
| 10594 |
+
"step": 15120
|
| 10595 |
+
},
|
| 10596 |
+
{
|
| 10597 |
+
"epoch": 12.492104448343483,
|
| 10598 |
+
"grad_norm": 0.5965583324432373,
|
| 10599 |
+
"learning_rate": 0.0008334709606385907,
|
| 10600 |
+
"loss": 2.777,
|
| 10601 |
+
"step": 15130
|
| 10602 |
+
},
|
| 10603 |
+
{
|
| 10604 |
+
"epoch": 12.500361234389514,
|
| 10605 |
+
"grad_norm": 0.5752902626991272,
|
| 10606 |
+
"learning_rate": 0.0008332874575649142,
|
| 10607 |
+
"loss": 2.7799,
|
| 10608 |
+
"step": 15140
|
| 10609 |
+
},
|
| 10610 |
+
{
|
| 10611 |
+
"epoch": 12.508618020435545,
|
| 10612 |
+
"grad_norm": 0.5716910362243652,
|
| 10613 |
+
"learning_rate": 0.0008331039544912378,
|
| 10614 |
+
"loss": 2.7791,
|
| 10615 |
+
"step": 15150
|
| 10616 |
+
},
|
| 10617 |
+
{
|
| 10618 |
+
"epoch": 12.516874806481576,
|
| 10619 |
+
"grad_norm": 0.6005849242210388,
|
| 10620 |
+
"learning_rate": 0.0008329204514175612,
|
| 10621 |
+
"loss": 2.7852,
|
| 10622 |
+
"step": 15160
|
| 10623 |
+
},
|
| 10624 |
+
{
|
| 10625 |
+
"epoch": 12.52513159252761,
|
| 10626 |
+
"grad_norm": 0.5944279432296753,
|
| 10627 |
+
"learning_rate": 0.0008327369483438848,
|
| 10628 |
+
"loss": 2.7757,
|
| 10629 |
+
"step": 15170
|
| 10630 |
+
},
|
| 10631 |
+
{
|
| 10632 |
+
"epoch": 12.53338837857364,
|
| 10633 |
+
"grad_norm": 0.6027126908302307,
|
| 10634 |
+
"learning_rate": 0.0008325534452702083,
|
| 10635 |
+
"loss": 2.7776,
|
| 10636 |
+
"step": 15180
|
| 10637 |
+
},
|
| 10638 |
+
{
|
| 10639 |
+
"epoch": 12.541645164619672,
|
| 10640 |
+
"grad_norm": 0.6317790746688843,
|
| 10641 |
+
"learning_rate": 0.0008323699421965319,
|
| 10642 |
+
"loss": 2.773,
|
| 10643 |
+
"step": 15190
|
| 10644 |
+
},
|
| 10645 |
+
{
|
| 10646 |
+
"epoch": 12.549901950665703,
|
| 10647 |
+
"grad_norm": 0.5477875471115112,
|
| 10648 |
+
"learning_rate": 0.0008321864391228553,
|
| 10649 |
+
"loss": 2.7858,
|
| 10650 |
+
"step": 15200
|
| 10651 |
+
},
|
| 10652 |
+
{
|
| 10653 |
+
"epoch": 12.558158736711736,
|
| 10654 |
+
"grad_norm": 0.5689815282821655,
|
| 10655 |
+
"learning_rate": 0.0008320029360491789,
|
| 10656 |
+
"loss": 2.7735,
|
| 10657 |
+
"step": 15210
|
| 10658 |
+
},
|
| 10659 |
+
{
|
| 10660 |
+
"epoch": 12.566415522757767,
|
| 10661 |
+
"grad_norm": 0.6152288317680359,
|
| 10662 |
+
"learning_rate": 0.0008318194329755024,
|
| 10663 |
+
"loss": 2.7855,
|
| 10664 |
+
"step": 15220
|
| 10665 |
+
},
|
| 10666 |
+
{
|
| 10667 |
+
"epoch": 12.574672308803798,
|
| 10668 |
+
"grad_norm": 0.5703557133674622,
|
| 10669 |
+
"learning_rate": 0.000831635929901826,
|
| 10670 |
+
"loss": 2.7674,
|
| 10671 |
+
"step": 15230
|
| 10672 |
+
},
|
| 10673 |
+
{
|
| 10674 |
+
"epoch": 12.58292909484983,
|
| 10675 |
+
"grad_norm": 0.6394575834274292,
|
| 10676 |
+
"learning_rate": 0.0008314524268281493,
|
| 10677 |
+
"loss": 2.7703,
|
| 10678 |
+
"step": 15240
|
| 10679 |
+
},
|
| 10680 |
+
{
|
| 10681 |
+
"epoch": 12.59118588089586,
|
| 10682 |
+
"grad_norm": 0.5734837055206299,
|
| 10683 |
+
"learning_rate": 0.0008312689237544729,
|
| 10684 |
+
"loss": 2.7765,
|
| 10685 |
+
"step": 15250
|
| 10686 |
+
},
|
| 10687 |
+
{
|
| 10688 |
+
"epoch": 12.599442666941894,
|
| 10689 |
+
"grad_norm": 0.594292402267456,
|
| 10690 |
+
"learning_rate": 0.0008310854206807964,
|
| 10691 |
+
"loss": 2.7674,
|
| 10692 |
+
"step": 15260
|
| 10693 |
+
},
|
| 10694 |
+
{
|
| 10695 |
+
"epoch": 12.607699452987925,
|
| 10696 |
+
"grad_norm": 0.5458073616027832,
|
| 10697 |
+
"learning_rate": 0.0008309019176071199,
|
| 10698 |
+
"loss": 2.7778,
|
| 10699 |
+
"step": 15270
|
| 10700 |
+
},
|
| 10701 |
+
{
|
| 10702 |
+
"epoch": 12.615956239033956,
|
| 10703 |
+
"grad_norm": 0.5974953174591064,
|
| 10704 |
+
"learning_rate": 0.0008307184145334434,
|
| 10705 |
+
"loss": 2.771,
|
| 10706 |
+
"step": 15280
|
| 10707 |
+
},
|
| 10708 |
+
{
|
| 10709 |
+
"epoch": 12.624213025079987,
|
| 10710 |
+
"grad_norm": 0.6053661108016968,
|
| 10711 |
+
"learning_rate": 0.000830534911459767,
|
| 10712 |
+
"loss": 2.7737,
|
| 10713 |
+
"step": 15290
|
| 10714 |
+
},
|
| 10715 |
+
{
|
| 10716 |
+
"epoch": 12.632469811126018,
|
| 10717 |
+
"grad_norm": 0.5710778832435608,
|
| 10718 |
+
"learning_rate": 0.0008303514083860905,
|
| 10719 |
+
"loss": 2.7705,
|
| 10720 |
+
"step": 15300
|
| 10721 |
+
},
|
| 10722 |
+
{
|
| 10723 |
+
"epoch": 12.640726597172051,
|
| 10724 |
+
"grad_norm": 0.5878491401672363,
|
| 10725 |
+
"learning_rate": 0.000830167905312414,
|
| 10726 |
+
"loss": 2.7832,
|
| 10727 |
+
"step": 15310
|
| 10728 |
+
},
|
| 10729 |
+
{
|
| 10730 |
+
"epoch": 12.648983383218082,
|
| 10731 |
+
"grad_norm": 0.5833500623703003,
|
| 10732 |
+
"learning_rate": 0.0008299844022387375,
|
| 10733 |
+
"loss": 2.7734,
|
| 10734 |
+
"step": 15320
|
| 10735 |
+
},
|
| 10736 |
+
{
|
| 10737 |
+
"epoch": 12.657240169264114,
|
| 10738 |
+
"grad_norm": 0.5963436961174011,
|
| 10739 |
+
"learning_rate": 0.0008298008991650611,
|
| 10740 |
+
"loss": 2.7795,
|
| 10741 |
+
"step": 15330
|
| 10742 |
+
},
|
| 10743 |
+
{
|
| 10744 |
+
"epoch": 12.665496955310145,
|
| 10745 |
+
"grad_norm": 0.6217861175537109,
|
| 10746 |
+
"learning_rate": 0.0008296173960913846,
|
| 10747 |
+
"loss": 2.7715,
|
| 10748 |
+
"step": 15340
|
| 10749 |
+
},
|
| 10750 |
+
{
|
| 10751 |
+
"epoch": 12.673753741356178,
|
| 10752 |
+
"grad_norm": 0.546258807182312,
|
| 10753 |
+
"learning_rate": 0.0008294338930177081,
|
| 10754 |
+
"loss": 2.7821,
|
| 10755 |
+
"step": 15350
|
| 10756 |
+
},
|
| 10757 |
+
{
|
| 10758 |
+
"epoch": 12.682010527402209,
|
| 10759 |
+
"grad_norm": 0.6429739594459534,
|
| 10760 |
+
"learning_rate": 0.0008292503899440316,
|
| 10761 |
+
"loss": 2.7808,
|
| 10762 |
+
"step": 15360
|
| 10763 |
+
},
|
| 10764 |
+
{
|
| 10765 |
+
"epoch": 12.69026731344824,
|
| 10766 |
+
"grad_norm": 0.6150422096252441,
|
| 10767 |
+
"learning_rate": 0.0008290668868703551,
|
| 10768 |
+
"loss": 2.7709,
|
| 10769 |
+
"step": 15370
|
| 10770 |
+
},
|
| 10771 |
+
{
|
| 10772 |
+
"epoch": 12.698524099494271,
|
| 10773 |
+
"grad_norm": 0.5569972991943359,
|
| 10774 |
+
"learning_rate": 0.0008288833837966786,
|
| 10775 |
+
"loss": 2.7778,
|
| 10776 |
+
"step": 15380
|
| 10777 |
+
},
|
| 10778 |
+
{
|
| 10779 |
+
"epoch": 12.706780885540303,
|
| 10780 |
+
"grad_norm": 0.5828894972801208,
|
| 10781 |
+
"learning_rate": 0.0008286998807230021,
|
| 10782 |
+
"loss": 2.7719,
|
| 10783 |
+
"step": 15390
|
| 10784 |
+
},
|
| 10785 |
+
{
|
| 10786 |
+
"epoch": 12.715037671586336,
|
| 10787 |
+
"grad_norm": 0.5625948309898376,
|
| 10788 |
+
"learning_rate": 0.0008285163776493256,
|
| 10789 |
+
"loss": 2.7635,
|
| 10790 |
+
"step": 15400
|
| 10791 |
+
},
|
| 10792 |
+
{
|
| 10793 |
+
"epoch": 12.723294457632367,
|
| 10794 |
+
"grad_norm": 0.6146851778030396,
|
| 10795 |
+
"learning_rate": 0.0008283328745756492,
|
| 10796 |
+
"loss": 2.791,
|
| 10797 |
+
"step": 15410
|
| 10798 |
+
},
|
| 10799 |
+
{
|
| 10800 |
+
"epoch": 12.731551243678398,
|
| 10801 |
+
"grad_norm": 0.5903885364532471,
|
| 10802 |
+
"learning_rate": 0.0008281493715019727,
|
| 10803 |
+
"loss": 2.7738,
|
| 10804 |
+
"step": 15420
|
| 10805 |
+
},
|
| 10806 |
+
{
|
| 10807 |
+
"epoch": 12.73980802972443,
|
| 10808 |
+
"grad_norm": 0.5333955883979797,
|
| 10809 |
+
"learning_rate": 0.0008279658684282962,
|
| 10810 |
+
"loss": 2.7711,
|
| 10811 |
+
"step": 15430
|
| 10812 |
+
},
|
| 10813 |
+
{
|
| 10814 |
+
"epoch": 12.748064815770462,
|
| 10815 |
+
"grad_norm": 0.5588700175285339,
|
| 10816 |
+
"learning_rate": 0.0008277823653546197,
|
| 10817 |
+
"loss": 2.7776,
|
| 10818 |
+
"step": 15440
|
| 10819 |
+
},
|
| 10820 |
+
{
|
| 10821 |
+
"epoch": 12.756321601816493,
|
| 10822 |
+
"grad_norm": 0.6176479458808899,
|
| 10823 |
+
"learning_rate": 0.0008275988622809433,
|
| 10824 |
+
"loss": 2.7769,
|
| 10825 |
+
"step": 15450
|
| 10826 |
+
},
|
| 10827 |
+
{
|
| 10828 |
+
"epoch": 12.764578387862525,
|
| 10829 |
+
"grad_norm": 0.5709108114242554,
|
| 10830 |
+
"learning_rate": 0.0008274153592072668,
|
| 10831 |
+
"loss": 2.7691,
|
| 10832 |
+
"step": 15460
|
| 10833 |
+
},
|
| 10834 |
+
{
|
| 10835 |
+
"epoch": 12.772835173908556,
|
| 10836 |
+
"grad_norm": 0.5612215995788574,
|
| 10837 |
+
"learning_rate": 0.0008272318561335903,
|
| 10838 |
+
"loss": 2.7771,
|
| 10839 |
+
"step": 15470
|
| 10840 |
+
},
|
| 10841 |
+
{
|
| 10842 |
+
"epoch": 12.781091959954587,
|
| 10843 |
+
"grad_norm": 0.582386314868927,
|
| 10844 |
+
"learning_rate": 0.0008270483530599138,
|
| 10845 |
+
"loss": 2.7688,
|
| 10846 |
+
"step": 15480
|
| 10847 |
+
},
|
| 10848 |
+
{
|
| 10849 |
+
"epoch": 12.78934874600062,
|
| 10850 |
+
"grad_norm": 0.5977119207382202,
|
| 10851 |
+
"learning_rate": 0.0008268648499862373,
|
| 10852 |
+
"loss": 2.776,
|
| 10853 |
+
"step": 15490
|
| 10854 |
+
},
|
| 10855 |
+
{
|
| 10856 |
+
"epoch": 12.797605532046651,
|
| 10857 |
+
"grad_norm": 0.5754312872886658,
|
| 10858 |
+
"learning_rate": 0.0008266813469125608,
|
| 10859 |
+
"loss": 2.761,
|
| 10860 |
+
"step": 15500
|
| 10861 |
+
},
|
| 10862 |
+
{
|
| 10863 |
+
"epoch": 12.805862318092682,
|
| 10864 |
+
"grad_norm": 0.56341552734375,
|
| 10865 |
+
"learning_rate": 0.0008264978438388842,
|
| 10866 |
+
"loss": 2.7812,
|
| 10867 |
+
"step": 15510
|
| 10868 |
+
},
|
| 10869 |
+
{
|
| 10870 |
+
"epoch": 12.814119104138713,
|
| 10871 |
+
"grad_norm": 0.5888708829879761,
|
| 10872 |
+
"learning_rate": 0.0008263143407652078,
|
| 10873 |
+
"loss": 2.7708,
|
| 10874 |
+
"step": 15520
|
| 10875 |
+
},
|
| 10876 |
+
{
|
| 10877 |
+
"epoch": 12.822375890184745,
|
| 10878 |
+
"grad_norm": 0.5750503540039062,
|
| 10879 |
+
"learning_rate": 0.0008261308376915313,
|
| 10880 |
+
"loss": 2.7895,
|
| 10881 |
+
"step": 15530
|
| 10882 |
+
},
|
| 10883 |
+
{
|
| 10884 |
+
"epoch": 12.830632676230778,
|
| 10885 |
+
"grad_norm": 0.5679807662963867,
|
| 10886 |
+
"learning_rate": 0.0008259473346178549,
|
| 10887 |
+
"loss": 2.7826,
|
| 10888 |
+
"step": 15540
|
| 10889 |
+
},
|
| 10890 |
+
{
|
| 10891 |
+
"epoch": 12.838889462276809,
|
| 10892 |
+
"grad_norm": 0.5332905054092407,
|
| 10893 |
+
"learning_rate": 0.0008257638315441783,
|
| 10894 |
+
"loss": 2.774,
|
| 10895 |
+
"step": 15550
|
| 10896 |
+
},
|
| 10897 |
+
{
|
| 10898 |
+
"epoch": 12.84714624832284,
|
| 10899 |
+
"grad_norm": 0.5367740392684937,
|
| 10900 |
+
"learning_rate": 0.0008255803284705019,
|
| 10901 |
+
"loss": 2.7621,
|
| 10902 |
+
"step": 15560
|
| 10903 |
+
},
|
| 10904 |
+
{
|
| 10905 |
+
"epoch": 12.855403034368871,
|
| 10906 |
+
"grad_norm": 0.6053501963615417,
|
| 10907 |
+
"learning_rate": 0.0008253968253968254,
|
| 10908 |
+
"loss": 2.7633,
|
| 10909 |
+
"step": 15570
|
| 10910 |
+
},
|
| 10911 |
+
{
|
| 10912 |
+
"epoch": 12.863659820414904,
|
| 10913 |
+
"grad_norm": 0.5788416862487793,
|
| 10914 |
+
"learning_rate": 0.000825213322323149,
|
| 10915 |
+
"loss": 2.7689,
|
| 10916 |
+
"step": 15580
|
| 10917 |
+
},
|
| 10918 |
+
{
|
| 10919 |
+
"epoch": 12.871916606460935,
|
| 10920 |
+
"grad_norm": 0.5835745334625244,
|
| 10921 |
+
"learning_rate": 0.0008250298192494724,
|
| 10922 |
+
"loss": 2.7746,
|
| 10923 |
+
"step": 15590
|
| 10924 |
+
},
|
| 10925 |
+
{
|
| 10926 |
+
"epoch": 12.880173392506967,
|
| 10927 |
+
"grad_norm": 0.6038824915885925,
|
| 10928 |
+
"learning_rate": 0.000824846316175796,
|
| 10929 |
+
"loss": 2.7778,
|
| 10930 |
+
"step": 15600
|
| 10931 |
+
},
|
| 10932 |
+
{
|
| 10933 |
+
"epoch": 12.888430178552998,
|
| 10934 |
+
"grad_norm": 0.5711358785629272,
|
| 10935 |
+
"learning_rate": 0.0008246628131021195,
|
| 10936 |
+
"loss": 2.7828,
|
| 10937 |
+
"step": 15610
|
| 10938 |
+
},
|
| 10939 |
+
{
|
| 10940 |
+
"epoch": 12.89668696459903,
|
| 10941 |
+
"grad_norm": 0.6088118553161621,
|
| 10942 |
+
"learning_rate": 0.0008244793100284431,
|
| 10943 |
+
"loss": 2.7833,
|
| 10944 |
+
"step": 15620
|
| 10945 |
+
},
|
| 10946 |
+
{
|
| 10947 |
+
"epoch": 12.904943750645062,
|
| 10948 |
+
"grad_norm": 0.6028804183006287,
|
| 10949 |
+
"learning_rate": 0.0008242958069547664,
|
| 10950 |
+
"loss": 2.7823,
|
| 10951 |
+
"step": 15630
|
| 10952 |
+
},
|
| 10953 |
+
{
|
| 10954 |
+
"epoch": 12.913200536691093,
|
| 10955 |
+
"grad_norm": 0.5889461636543274,
|
| 10956 |
+
"learning_rate": 0.00082411230388109,
|
| 10957 |
+
"loss": 2.7751,
|
| 10958 |
+
"step": 15640
|
| 10959 |
+
},
|
| 10960 |
+
{
|
| 10961 |
+
"epoch": 12.921457322737124,
|
| 10962 |
+
"grad_norm": 0.5903311967849731,
|
| 10963 |
+
"learning_rate": 0.0008239288008074135,
|
| 10964 |
+
"loss": 2.7712,
|
| 10965 |
+
"step": 15650
|
| 10966 |
+
},
|
| 10967 |
+
{
|
| 10968 |
+
"epoch": 12.929714108783156,
|
| 10969 |
+
"grad_norm": 0.5665178894996643,
|
| 10970 |
+
"learning_rate": 0.000823745297733737,
|
| 10971 |
+
"loss": 2.7639,
|
| 10972 |
+
"step": 15660
|
| 10973 |
+
},
|
| 10974 |
+
{
|
| 10975 |
+
"epoch": 12.937970894829188,
|
| 10976 |
+
"grad_norm": 0.5634979605674744,
|
| 10977 |
+
"learning_rate": 0.0008235617946600605,
|
| 10978 |
+
"loss": 2.7664,
|
| 10979 |
+
"step": 15670
|
| 10980 |
+
},
|
| 10981 |
+
{
|
| 10982 |
+
"epoch": 12.94622768087522,
|
| 10983 |
+
"grad_norm": 0.5990162491798401,
|
| 10984 |
+
"learning_rate": 0.0008233782915863841,
|
| 10985 |
+
"loss": 2.778,
|
| 10986 |
+
"step": 15680
|
| 10987 |
+
},
|
| 10988 |
+
{
|
| 10989 |
+
"epoch": 12.95448446692125,
|
| 10990 |
+
"grad_norm": 0.558689296245575,
|
| 10991 |
+
"learning_rate": 0.0008231947885127076,
|
| 10992 |
+
"loss": 2.7743,
|
| 10993 |
+
"step": 15690
|
| 10994 |
+
},
|
| 10995 |
+
{
|
| 10996 |
+
"epoch": 12.962741252967282,
|
| 10997 |
+
"grad_norm": 0.546913206577301,
|
| 10998 |
+
"learning_rate": 0.0008230112854390311,
|
| 10999 |
+
"loss": 2.7657,
|
| 11000 |
+
"step": 15700
|
| 11001 |
+
},
|
| 11002 |
+
{
|
| 11003 |
+
"epoch": 12.970998039013313,
|
| 11004 |
+
"grad_norm": 0.6025896072387695,
|
| 11005 |
+
"learning_rate": 0.0008228277823653546,
|
| 11006 |
+
"loss": 2.768,
|
| 11007 |
+
"step": 15710
|
| 11008 |
+
},
|
| 11009 |
+
{
|
| 11010 |
+
"epoch": 12.979254825059346,
|
| 11011 |
+
"grad_norm": 0.5498492121696472,
|
| 11012 |
+
"learning_rate": 0.0008226442792916782,
|
| 11013 |
+
"loss": 2.7725,
|
| 11014 |
+
"step": 15720
|
| 11015 |
+
},
|
| 11016 |
+
{
|
| 11017 |
+
"epoch": 12.987511611105377,
|
| 11018 |
+
"grad_norm": 0.6049798130989075,
|
| 11019 |
+
"learning_rate": 0.0008224607762180017,
|
| 11020 |
+
"loss": 2.7694,
|
| 11021 |
+
"step": 15730
|
| 11022 |
+
},
|
| 11023 |
+
{
|
| 11024 |
+
"epoch": 12.995768397151409,
|
| 11025 |
+
"grad_norm": 0.5635313987731934,
|
| 11026 |
+
"learning_rate": 0.0008222772731443252,
|
| 11027 |
+
"loss": 2.7658,
|
| 11028 |
+
"step": 15740
|
| 11029 |
+
},
|
| 11030 |
+
{
|
| 11031 |
+
"epoch": 13.003302714418412,
|
| 11032 |
+
"grad_norm": 0.6339975595474243,
|
| 11033 |
+
"learning_rate": 0.0008220937700706487,
|
| 11034 |
+
"loss": 2.5294,
|
| 11035 |
+
"step": 15750
|
| 11036 |
+
},
|
| 11037 |
+
{
|
| 11038 |
+
"epoch": 13.011559500464445,
|
| 11039 |
+
"grad_norm": 0.5738035440444946,
|
| 11040 |
+
"learning_rate": 0.0008219102669969723,
|
| 11041 |
+
"loss": 2.768,
|
| 11042 |
+
"step": 15760
|
| 11043 |
+
},
|
| 11044 |
+
{
|
| 11045 |
+
"epoch": 13.019816286510476,
|
| 11046 |
+
"grad_norm": 0.6072455644607544,
|
| 11047 |
+
"learning_rate": 0.0008217267639232957,
|
| 11048 |
+
"loss": 2.7673,
|
| 11049 |
+
"step": 15770
|
| 11050 |
+
},
|
| 11051 |
+
{
|
| 11052 |
+
"epoch": 13.028073072556507,
|
| 11053 |
+
"grad_norm": 0.5527037978172302,
|
| 11054 |
+
"learning_rate": 0.0008215432608496192,
|
| 11055 |
+
"loss": 2.7617,
|
| 11056 |
+
"step": 15780
|
| 11057 |
+
},
|
| 11058 |
+
{
|
| 11059 |
+
"epoch": 13.036329858602539,
|
| 11060 |
+
"grad_norm": 0.5809829831123352,
|
| 11061 |
+
"learning_rate": 0.0008213597577759427,
|
| 11062 |
+
"loss": 2.7681,
|
| 11063 |
+
"step": 15790
|
| 11064 |
+
},
|
| 11065 |
+
{
|
| 11066 |
+
"epoch": 13.04458664464857,
|
| 11067 |
+
"grad_norm": 0.6070693135261536,
|
| 11068 |
+
"learning_rate": 0.0008211762547022663,
|
| 11069 |
+
"loss": 2.7738,
|
| 11070 |
+
"step": 15800
|
| 11071 |
+
},
|
| 11072 |
+
{
|
| 11073 |
+
"epoch": 13.052843430694603,
|
| 11074 |
+
"grad_norm": 0.6138054132461548,
|
| 11075 |
+
"learning_rate": 0.0008209927516285898,
|
| 11076 |
+
"loss": 2.7669,
|
| 11077 |
+
"step": 15810
|
| 11078 |
+
},
|
| 11079 |
+
{
|
| 11080 |
+
"epoch": 13.061100216740634,
|
| 11081 |
+
"grad_norm": 0.5852046012878418,
|
| 11082 |
+
"learning_rate": 0.0008208092485549133,
|
| 11083 |
+
"loss": 2.757,
|
| 11084 |
+
"step": 15820
|
| 11085 |
+
},
|
| 11086 |
+
{
|
| 11087 |
+
"epoch": 13.069357002786665,
|
| 11088 |
+
"grad_norm": 0.5757762789726257,
|
| 11089 |
+
"learning_rate": 0.0008206257454812368,
|
| 11090 |
+
"loss": 2.7668,
|
| 11091 |
+
"step": 15830
|
| 11092 |
+
},
|
| 11093 |
+
{
|
| 11094 |
+
"epoch": 13.077613788832696,
|
| 11095 |
+
"grad_norm": 0.5901942253112793,
|
| 11096 |
+
"learning_rate": 0.0008204422424075604,
|
| 11097 |
+
"loss": 2.7672,
|
| 11098 |
+
"step": 15840
|
| 11099 |
+
},
|
| 11100 |
+
{
|
| 11101 |
+
"epoch": 13.085870574878728,
|
| 11102 |
+
"grad_norm": 0.6006907224655151,
|
| 11103 |
+
"learning_rate": 0.0008202587393338839,
|
| 11104 |
+
"loss": 2.7778,
|
| 11105 |
+
"step": 15850
|
| 11106 |
+
},
|
| 11107 |
+
{
|
| 11108 |
+
"epoch": 13.09412736092476,
|
| 11109 |
+
"grad_norm": 0.6453226208686829,
|
| 11110 |
+
"learning_rate": 0.0008200752362602074,
|
| 11111 |
+
"loss": 2.7627,
|
| 11112 |
+
"step": 15860
|
| 11113 |
+
},
|
| 11114 |
+
{
|
| 11115 |
+
"epoch": 13.102384146970792,
|
| 11116 |
+
"grad_norm": 0.5957190990447998,
|
| 11117 |
+
"learning_rate": 0.0008198917331865309,
|
| 11118 |
+
"loss": 2.7666,
|
| 11119 |
+
"step": 15870
|
| 11120 |
+
},
|
| 11121 |
+
{
|
| 11122 |
+
"epoch": 13.110640933016823,
|
| 11123 |
+
"grad_norm": 0.5730419754981995,
|
| 11124 |
+
"learning_rate": 0.0008197082301128545,
|
| 11125 |
+
"loss": 2.7531,
|
| 11126 |
+
"step": 15880
|
| 11127 |
+
},
|
| 11128 |
+
{
|
| 11129 |
+
"epoch": 13.118897719062854,
|
| 11130 |
+
"grad_norm": 0.5487211346626282,
|
| 11131 |
+
"learning_rate": 0.000819524727039178,
|
| 11132 |
+
"loss": 2.762,
|
| 11133 |
+
"step": 15890
|
| 11134 |
+
},
|
| 11135 |
+
{
|
| 11136 |
+
"epoch": 13.127154505108887,
|
| 11137 |
+
"grad_norm": 0.605567216873169,
|
| 11138 |
+
"learning_rate": 0.0008193412239655014,
|
| 11139 |
+
"loss": 2.7731,
|
| 11140 |
+
"step": 15900
|
| 11141 |
+
},
|
| 11142 |
+
{
|
| 11143 |
+
"epoch": 13.135411291154918,
|
| 11144 |
+
"grad_norm": 0.6126657128334045,
|
| 11145 |
+
"learning_rate": 0.0008191577208918249,
|
| 11146 |
+
"loss": 2.7694,
|
| 11147 |
+
"step": 15910
|
| 11148 |
+
},
|
| 11149 |
+
{
|
| 11150 |
+
"epoch": 13.14366807720095,
|
| 11151 |
+
"grad_norm": 0.5657448172569275,
|
| 11152 |
+
"learning_rate": 0.0008189742178181485,
|
| 11153 |
+
"loss": 2.7749,
|
| 11154 |
+
"step": 15920
|
| 11155 |
+
},
|
| 11156 |
+
{
|
| 11157 |
+
"epoch": 13.15192486324698,
|
| 11158 |
+
"grad_norm": 0.5270867347717285,
|
| 11159 |
+
"learning_rate": 0.000818790714744472,
|
| 11160 |
+
"loss": 2.7536,
|
| 11161 |
+
"step": 15930
|
| 11162 |
+
},
|
| 11163 |
+
{
|
| 11164 |
+
"epoch": 13.160181649293012,
|
| 11165 |
+
"grad_norm": 0.5588110685348511,
|
| 11166 |
+
"learning_rate": 0.0008186072116707955,
|
| 11167 |
+
"loss": 2.7682,
|
| 11168 |
+
"step": 15940
|
| 11169 |
+
},
|
| 11170 |
+
{
|
| 11171 |
+
"epoch": 13.168438435339045,
|
| 11172 |
+
"grad_norm": 0.5444889664649963,
|
| 11173 |
+
"learning_rate": 0.000818423708597119,
|
| 11174 |
+
"loss": 2.7521,
|
| 11175 |
+
"step": 15950
|
| 11176 |
+
},
|
| 11177 |
+
{
|
| 11178 |
+
"epoch": 13.176695221385076,
|
| 11179 |
+
"grad_norm": 0.5641809105873108,
|
| 11180 |
+
"learning_rate": 0.0008182402055234426,
|
| 11181 |
+
"loss": 2.7611,
|
| 11182 |
+
"step": 15960
|
| 11183 |
+
},
|
| 11184 |
+
{
|
| 11185 |
+
"epoch": 13.184952007431107,
|
| 11186 |
+
"grad_norm": 0.572223424911499,
|
| 11187 |
+
"learning_rate": 0.0008180567024497661,
|
| 11188 |
+
"loss": 2.7601,
|
| 11189 |
+
"step": 15970
|
| 11190 |
+
},
|
| 11191 |
+
{
|
| 11192 |
+
"epoch": 13.193208793477138,
|
| 11193 |
+
"grad_norm": 0.6031824946403503,
|
| 11194 |
+
"learning_rate": 0.0008178731993760896,
|
| 11195 |
+
"loss": 2.7721,
|
| 11196 |
+
"step": 15980
|
| 11197 |
+
},
|
| 11198 |
+
{
|
| 11199 |
+
"epoch": 13.201465579523171,
|
| 11200 |
+
"grad_norm": 0.5700808167457581,
|
| 11201 |
+
"learning_rate": 0.0008176896963024131,
|
| 11202 |
+
"loss": 2.7592,
|
| 11203 |
+
"step": 15990
|
| 11204 |
+
},
|
| 11205 |
+
{
|
| 11206 |
+
"epoch": 13.209722365569203,
|
| 11207 |
+
"grad_norm": 0.5641034245491028,
|
| 11208 |
+
"learning_rate": 0.0008175061932287367,
|
| 11209 |
+
"loss": 2.7622,
|
| 11210 |
+
"step": 16000
|
| 11211 |
+
},
|
| 11212 |
+
{
|
| 11213 |
+
"epoch": 13.217979151615234,
|
| 11214 |
+
"grad_norm": 0.5520575642585754,
|
| 11215 |
+
"learning_rate": 0.0008173226901550602,
|
| 11216 |
+
"loss": 2.7675,
|
| 11217 |
+
"step": 16010
|
| 11218 |
+
},
|
| 11219 |
+
{
|
| 11220 |
+
"epoch": 13.226235937661265,
|
| 11221 |
+
"grad_norm": 0.6408013701438904,
|
| 11222 |
+
"learning_rate": 0.0008171391870813837,
|
| 11223 |
+
"loss": 2.7563,
|
| 11224 |
+
"step": 16020
|
| 11225 |
+
},
|
| 11226 |
+
{
|
| 11227 |
+
"epoch": 13.234492723707296,
|
| 11228 |
+
"grad_norm": 0.5883532762527466,
|
| 11229 |
+
"learning_rate": 0.0008169556840077071,
|
| 11230 |
+
"loss": 2.7659,
|
| 11231 |
+
"step": 16030
|
| 11232 |
+
},
|
| 11233 |
+
{
|
| 11234 |
+
"epoch": 13.24274950975333,
|
| 11235 |
+
"grad_norm": 0.5909677147865295,
|
| 11236 |
+
"learning_rate": 0.0008167721809340307,
|
| 11237 |
+
"loss": 2.7637,
|
| 11238 |
+
"step": 16040
|
| 11239 |
+
},
|
| 11240 |
+
{
|
| 11241 |
+
"epoch": 13.25100629579936,
|
| 11242 |
+
"grad_norm": 0.5736511945724487,
|
| 11243 |
+
"learning_rate": 0.0008165886778603541,
|
| 11244 |
+
"loss": 2.7526,
|
| 11245 |
+
"step": 16050
|
| 11246 |
+
},
|
| 11247 |
+
{
|
| 11248 |
+
"epoch": 13.259263081845392,
|
| 11249 |
+
"grad_norm": 0.5763906240463257,
|
| 11250 |
+
"learning_rate": 0.0008164051747866776,
|
| 11251 |
+
"loss": 2.7717,
|
| 11252 |
+
"step": 16060
|
| 11253 |
+
},
|
| 11254 |
+
{
|
| 11255 |
+
"epoch": 13.267519867891423,
|
| 11256 |
+
"grad_norm": 0.5538901090621948,
|
| 11257 |
+
"learning_rate": 0.0008162216717130012,
|
| 11258 |
+
"loss": 2.7666,
|
| 11259 |
+
"step": 16070
|
| 11260 |
+
},
|
| 11261 |
+
{
|
| 11262 |
+
"epoch": 13.275776653937454,
|
| 11263 |
+
"grad_norm": 0.6136773824691772,
|
| 11264 |
+
"learning_rate": 0.0008160381686393247,
|
| 11265 |
+
"loss": 2.7473,
|
| 11266 |
+
"step": 16080
|
| 11267 |
+
},
|
| 11268 |
+
{
|
| 11269 |
+
"epoch": 13.284033439983487,
|
| 11270 |
+
"grad_norm": 0.5361644625663757,
|
| 11271 |
+
"learning_rate": 0.0008158546655656482,
|
| 11272 |
+
"loss": 2.7581,
|
| 11273 |
+
"step": 16090
|
| 11274 |
+
},
|
| 11275 |
+
{
|
| 11276 |
+
"epoch": 13.292290226029518,
|
| 11277 |
+
"grad_norm": 0.5708776116371155,
|
| 11278 |
+
"learning_rate": 0.0008156711624919717,
|
| 11279 |
+
"loss": 2.758,
|
| 11280 |
+
"step": 16100
|
| 11281 |
+
},
|
| 11282 |
+
{
|
| 11283 |
+
"epoch": 13.30054701207555,
|
| 11284 |
+
"grad_norm": 0.5603443384170532,
|
| 11285 |
+
"learning_rate": 0.0008154876594182953,
|
| 11286 |
+
"loss": 2.7581,
|
| 11287 |
+
"step": 16110
|
| 11288 |
+
},
|
| 11289 |
+
{
|
| 11290 |
+
"epoch": 13.30880379812158,
|
| 11291 |
+
"grad_norm": 0.56572026014328,
|
| 11292 |
+
"learning_rate": 0.0008153041563446188,
|
| 11293 |
+
"loss": 2.7618,
|
| 11294 |
+
"step": 16120
|
| 11295 |
+
},
|
| 11296 |
+
{
|
| 11297 |
+
"epoch": 13.317060584167614,
|
| 11298 |
+
"grad_norm": 0.6429868936538696,
|
| 11299 |
+
"learning_rate": 0.0008151206532709423,
|
| 11300 |
+
"loss": 2.7641,
|
| 11301 |
+
"step": 16130
|
| 11302 |
+
},
|
| 11303 |
+
{
|
| 11304 |
+
"epoch": 13.325317370213645,
|
| 11305 |
+
"grad_norm": 0.6419973969459534,
|
| 11306 |
+
"learning_rate": 0.0008149371501972658,
|
| 11307 |
+
"loss": 2.7685,
|
| 11308 |
+
"step": 16140
|
| 11309 |
+
},
|
| 11310 |
+
{
|
| 11311 |
+
"epoch": 13.333574156259676,
|
| 11312 |
+
"grad_norm": 0.6389254331588745,
|
| 11313 |
+
"learning_rate": 0.0008147536471235894,
|
| 11314 |
+
"loss": 2.7607,
|
| 11315 |
+
"step": 16150
|
| 11316 |
+
},
|
| 11317 |
+
{
|
| 11318 |
+
"epoch": 13.341830942305707,
|
| 11319 |
+
"grad_norm": 0.5886973142623901,
|
| 11320 |
+
"learning_rate": 0.0008145701440499128,
|
| 11321 |
+
"loss": 2.7545,
|
| 11322 |
+
"step": 16160
|
| 11323 |
+
},
|
| 11324 |
+
{
|
| 11325 |
+
"epoch": 13.350087728351738,
|
| 11326 |
+
"grad_norm": 0.6030955910682678,
|
| 11327 |
+
"learning_rate": 0.0008143866409762363,
|
| 11328 |
+
"loss": 2.7703,
|
| 11329 |
+
"step": 16170
|
| 11330 |
+
},
|
| 11331 |
+
{
|
| 11332 |
+
"epoch": 13.358344514397771,
|
| 11333 |
+
"grad_norm": 0.5616552233695984,
|
| 11334 |
+
"learning_rate": 0.0008142031379025598,
|
| 11335 |
+
"loss": 2.763,
|
| 11336 |
+
"step": 16180
|
| 11337 |
+
},
|
| 11338 |
+
{
|
| 11339 |
+
"epoch": 13.366601300443802,
|
| 11340 |
+
"grad_norm": 0.6055645942687988,
|
| 11341 |
+
"learning_rate": 0.0008140196348288834,
|
| 11342 |
+
"loss": 2.7692,
|
| 11343 |
+
"step": 16190
|
| 11344 |
+
},
|
| 11345 |
+
{
|
| 11346 |
+
"epoch": 13.374858086489834,
|
| 11347 |
+
"grad_norm": 0.6489038467407227,
|
| 11348 |
+
"learning_rate": 0.0008138361317552069,
|
| 11349 |
+
"loss": 2.7599,
|
| 11350 |
+
"step": 16200
|
| 11351 |
+
},
|
| 11352 |
+
{
|
| 11353 |
+
"epoch": 13.383114872535865,
|
| 11354 |
+
"grad_norm": 0.5819231867790222,
|
| 11355 |
+
"learning_rate": 0.0008136526286815304,
|
| 11356 |
+
"loss": 2.7609,
|
| 11357 |
+
"step": 16210
|
| 11358 |
+
},
|
| 11359 |
+
{
|
| 11360 |
+
"epoch": 13.391371658581898,
|
| 11361 |
+
"grad_norm": 0.6634029150009155,
|
| 11362 |
+
"learning_rate": 0.0008134691256078539,
|
| 11363 |
+
"loss": 2.7662,
|
| 11364 |
+
"step": 16220
|
| 11365 |
+
},
|
| 11366 |
+
{
|
| 11367 |
+
"epoch": 13.399628444627929,
|
| 11368 |
+
"grad_norm": 0.591643750667572,
|
| 11369 |
+
"learning_rate": 0.0008132856225341775,
|
| 11370 |
+
"loss": 2.762,
|
| 11371 |
+
"step": 16230
|
| 11372 |
+
},
|
| 11373 |
+
{
|
| 11374 |
+
"epoch": 13.40788523067396,
|
| 11375 |
+
"grad_norm": 0.5705656409263611,
|
| 11376 |
+
"learning_rate": 0.000813102119460501,
|
| 11377 |
+
"loss": 2.7483,
|
| 11378 |
+
"step": 16240
|
| 11379 |
+
},
|
| 11380 |
+
{
|
| 11381 |
+
"epoch": 13.416142016719991,
|
| 11382 |
+
"grad_norm": 0.597012996673584,
|
| 11383 |
+
"learning_rate": 0.0008129186163868245,
|
| 11384 |
+
"loss": 2.77,
|
| 11385 |
+
"step": 16250
|
| 11386 |
+
},
|
| 11387 |
+
{
|
| 11388 |
+
"epoch": 13.424398802766023,
|
| 11389 |
+
"grad_norm": 0.6613156795501709,
|
| 11390 |
+
"learning_rate": 0.000812735113313148,
|
| 11391 |
+
"loss": 2.7585,
|
| 11392 |
+
"step": 16260
|
| 11393 |
+
},
|
| 11394 |
+
{
|
| 11395 |
+
"epoch": 13.432655588812056,
|
| 11396 |
+
"grad_norm": 0.5879620313644409,
|
| 11397 |
+
"learning_rate": 0.0008125516102394716,
|
| 11398 |
+
"loss": 2.7628,
|
| 11399 |
+
"step": 16270
|
| 11400 |
+
},
|
| 11401 |
+
{
|
| 11402 |
+
"epoch": 13.440912374858087,
|
| 11403 |
+
"grad_norm": 0.5478769540786743,
|
| 11404 |
+
"learning_rate": 0.0008123681071657951,
|
| 11405 |
+
"loss": 2.7576,
|
| 11406 |
+
"step": 16280
|
| 11407 |
+
},
|
| 11408 |
+
{
|
| 11409 |
+
"epoch": 13.449169160904118,
|
| 11410 |
+
"grad_norm": 0.5667441487312317,
|
| 11411 |
+
"learning_rate": 0.0008121846040921185,
|
| 11412 |
+
"loss": 2.7507,
|
| 11413 |
+
"step": 16290
|
| 11414 |
+
},
|
| 11415 |
+
{
|
| 11416 |
+
"epoch": 13.45742594695015,
|
| 11417 |
+
"grad_norm": 0.5518149733543396,
|
| 11418 |
+
"learning_rate": 0.000812001101018442,
|
| 11419 |
+
"loss": 2.7519,
|
| 11420 |
+
"step": 16300
|
| 11421 |
+
},
|
| 11422 |
+
{
|
| 11423 |
+
"epoch": 13.465682732996182,
|
| 11424 |
+
"grad_norm": 0.6225078701972961,
|
| 11425 |
+
"learning_rate": 0.0008118175979447656,
|
| 11426 |
+
"loss": 2.7632,
|
| 11427 |
+
"step": 16310
|
| 11428 |
+
},
|
| 11429 |
+
{
|
| 11430 |
+
"epoch": 13.473939519042213,
|
| 11431 |
+
"grad_norm": 0.587518572807312,
|
| 11432 |
+
"learning_rate": 0.0008116340948710891,
|
| 11433 |
+
"loss": 2.7691,
|
| 11434 |
+
"step": 16320
|
| 11435 |
+
},
|
| 11436 |
+
{
|
| 11437 |
+
"epoch": 13.482196305088245,
|
| 11438 |
+
"grad_norm": 0.5977440476417542,
|
| 11439 |
+
"learning_rate": 0.0008114505917974126,
|
| 11440 |
+
"loss": 2.7503,
|
| 11441 |
+
"step": 16330
|
| 11442 |
+
},
|
| 11443 |
+
{
|
| 11444 |
+
"epoch": 13.490453091134276,
|
| 11445 |
+
"grad_norm": 0.5600082278251648,
|
| 11446 |
+
"learning_rate": 0.0008112670887237361,
|
| 11447 |
+
"loss": 2.7622,
|
| 11448 |
+
"step": 16340
|
| 11449 |
+
},
|
| 11450 |
+
{
|
| 11451 |
+
"epoch": 13.498709877180307,
|
| 11452 |
+
"grad_norm": 0.560407817363739,
|
| 11453 |
+
"learning_rate": 0.0008110835856500597,
|
| 11454 |
+
"loss": 2.7697,
|
| 11455 |
+
"step": 16350
|
| 11456 |
+
},
|
| 11457 |
+
{
|
| 11458 |
+
"epoch": 13.50696666322634,
|
| 11459 |
+
"grad_norm": 0.5983129143714905,
|
| 11460 |
+
"learning_rate": 0.0008109000825763832,
|
| 11461 |
+
"loss": 2.7498,
|
| 11462 |
+
"step": 16360
|
| 11463 |
+
},
|
| 11464 |
+
{
|
| 11465 |
+
"epoch": 13.515223449272371,
|
| 11466 |
+
"grad_norm": 0.5887816548347473,
|
| 11467 |
+
"learning_rate": 0.0008107165795027067,
|
| 11468 |
+
"loss": 2.7707,
|
| 11469 |
+
"step": 16370
|
| 11470 |
+
},
|
| 11471 |
+
{
|
| 11472 |
+
"epoch": 13.523480235318402,
|
| 11473 |
+
"grad_norm": 0.5647554993629456,
|
| 11474 |
+
"learning_rate": 0.0008105330764290302,
|
| 11475 |
+
"loss": 2.7625,
|
| 11476 |
+
"step": 16380
|
| 11477 |
+
},
|
| 11478 |
+
{
|
| 11479 |
+
"epoch": 13.531737021364433,
|
| 11480 |
+
"grad_norm": 0.551149845123291,
|
| 11481 |
+
"learning_rate": 0.0008103495733553538,
|
| 11482 |
+
"loss": 2.7564,
|
| 11483 |
+
"step": 16390
|
| 11484 |
+
},
|
| 11485 |
+
{
|
| 11486 |
+
"epoch": 13.539993807410465,
|
| 11487 |
+
"grad_norm": 0.568866491317749,
|
| 11488 |
+
"learning_rate": 0.0008101660702816773,
|
| 11489 |
+
"loss": 2.751,
|
| 11490 |
+
"step": 16400
|
| 11491 |
+
},
|
| 11492 |
+
{
|
| 11493 |
+
"epoch": 13.548250593456498,
|
| 11494 |
+
"grad_norm": 0.5884142518043518,
|
| 11495 |
+
"learning_rate": 0.0008099825672080008,
|
| 11496 |
+
"loss": 2.7613,
|
| 11497 |
+
"step": 16410
|
| 11498 |
+
},
|
| 11499 |
+
{
|
| 11500 |
+
"epoch": 13.556507379502529,
|
| 11501 |
+
"grad_norm": 0.5169154405593872,
|
| 11502 |
+
"learning_rate": 0.0008097990641343242,
|
| 11503 |
+
"loss": 2.7544,
|
| 11504 |
+
"step": 16420
|
| 11505 |
+
},
|
| 11506 |
+
{
|
| 11507 |
+
"epoch": 13.56476416554856,
|
| 11508 |
+
"grad_norm": 0.6176019310951233,
|
| 11509 |
+
"learning_rate": 0.0008096155610606478,
|
| 11510 |
+
"loss": 2.7581,
|
| 11511 |
+
"step": 16430
|
| 11512 |
+
},
|
| 11513 |
+
{
|
| 11514 |
+
"epoch": 13.573020951594591,
|
| 11515 |
+
"grad_norm": 0.6097131967544556,
|
| 11516 |
+
"learning_rate": 0.0008094320579869712,
|
| 11517 |
+
"loss": 2.7634,
|
| 11518 |
+
"step": 16440
|
| 11519 |
+
},
|
| 11520 |
+
{
|
| 11521 |
+
"epoch": 13.581277737640624,
|
| 11522 |
+
"grad_norm": 0.6191734075546265,
|
| 11523 |
+
"learning_rate": 0.0008092485549132948,
|
| 11524 |
+
"loss": 2.758,
|
| 11525 |
+
"step": 16450
|
| 11526 |
+
},
|
| 11527 |
+
{
|
| 11528 |
+
"epoch": 13.589534523686655,
|
| 11529 |
+
"grad_norm": 0.5689364075660706,
|
| 11530 |
+
"learning_rate": 0.0008090650518396183,
|
| 11531 |
+
"loss": 2.7614,
|
| 11532 |
+
"step": 16460
|
| 11533 |
+
},
|
| 11534 |
+
{
|
| 11535 |
+
"epoch": 13.597791309732687,
|
| 11536 |
+
"grad_norm": 0.6023751497268677,
|
| 11537 |
+
"learning_rate": 0.0008088815487659419,
|
| 11538 |
+
"loss": 2.775,
|
| 11539 |
+
"step": 16470
|
| 11540 |
+
},
|
| 11541 |
+
{
|
| 11542 |
+
"epoch": 13.606048095778718,
|
| 11543 |
+
"grad_norm": 0.5691829323768616,
|
| 11544 |
+
"learning_rate": 0.0008086980456922653,
|
| 11545 |
+
"loss": 2.7631,
|
| 11546 |
+
"step": 16480
|
| 11547 |
+
},
|
| 11548 |
+
{
|
| 11549 |
+
"epoch": 13.614304881824749,
|
| 11550 |
+
"grad_norm": 0.5723507404327393,
|
| 11551 |
+
"learning_rate": 0.0008085145426185889,
|
| 11552 |
+
"loss": 2.7529,
|
| 11553 |
+
"step": 16490
|
| 11554 |
+
},
|
| 11555 |
+
{
|
| 11556 |
+
"epoch": 13.622561667870782,
|
| 11557 |
+
"grad_norm": 0.5878212451934814,
|
| 11558 |
+
"learning_rate": 0.0008083310395449124,
|
| 11559 |
+
"loss": 2.7629,
|
| 11560 |
+
"step": 16500
|
| 11561 |
+
},
|
| 11562 |
+
{
|
| 11563 |
+
"epoch": 13.630818453916813,
|
| 11564 |
+
"grad_norm": 0.651943564414978,
|
| 11565 |
+
"learning_rate": 0.000808147536471236,
|
| 11566 |
+
"loss": 2.7539,
|
| 11567 |
+
"step": 16510
|
| 11568 |
+
},
|
| 11569 |
+
{
|
| 11570 |
+
"epoch": 13.639075239962844,
|
| 11571 |
+
"grad_norm": 0.6334558129310608,
|
| 11572 |
+
"learning_rate": 0.0008079640333975594,
|
| 11573 |
+
"loss": 2.7562,
|
| 11574 |
+
"step": 16520
|
| 11575 |
+
},
|
| 11576 |
+
{
|
| 11577 |
+
"epoch": 13.647332026008876,
|
| 11578 |
+
"grad_norm": 0.6675853133201599,
|
| 11579 |
+
"learning_rate": 0.000807780530323883,
|
| 11580 |
+
"loss": 2.7666,
|
| 11581 |
+
"step": 16530
|
| 11582 |
+
},
|
| 11583 |
+
{
|
| 11584 |
+
"epoch": 13.655588812054908,
|
| 11585 |
+
"grad_norm": 0.5692960023880005,
|
| 11586 |
+
"learning_rate": 0.0008075970272502065,
|
| 11587 |
+
"loss": 2.7527,
|
| 11588 |
+
"step": 16540
|
| 11589 |
+
},
|
| 11590 |
+
{
|
| 11591 |
+
"epoch": 13.66384559810094,
|
| 11592 |
+
"grad_norm": 0.5518311858177185,
|
| 11593 |
+
"learning_rate": 0.00080741352417653,
|
| 11594 |
+
"loss": 2.7626,
|
| 11595 |
+
"step": 16550
|
| 11596 |
+
},
|
| 11597 |
+
{
|
| 11598 |
+
"epoch": 13.67210238414697,
|
| 11599 |
+
"grad_norm": 0.6077815890312195,
|
| 11600 |
+
"learning_rate": 0.0008072300211028534,
|
| 11601 |
+
"loss": 2.7613,
|
| 11602 |
+
"step": 16560
|
| 11603 |
+
},
|
| 11604 |
+
{
|
| 11605 |
+
"epoch": 13.680359170193002,
|
| 11606 |
+
"grad_norm": 0.5508092641830444,
|
| 11607 |
+
"learning_rate": 0.000807046518029177,
|
| 11608 |
+
"loss": 2.7574,
|
| 11609 |
+
"step": 16570
|
| 11610 |
+
},
|
| 11611 |
+
{
|
| 11612 |
+
"epoch": 13.688615956239033,
|
| 11613 |
+
"grad_norm": 0.5735660791397095,
|
| 11614 |
+
"learning_rate": 0.0008068630149555005,
|
| 11615 |
+
"loss": 2.7573,
|
| 11616 |
+
"step": 16580
|
| 11617 |
+
},
|
| 11618 |
+
{
|
| 11619 |
+
"epoch": 13.696872742285066,
|
| 11620 |
+
"grad_norm": 0.5603192448616028,
|
| 11621 |
+
"learning_rate": 0.0008066795118818241,
|
| 11622 |
+
"loss": 2.7544,
|
| 11623 |
+
"step": 16590
|
| 11624 |
+
},
|
| 11625 |
+
{
|
| 11626 |
+
"epoch": 13.705129528331097,
|
| 11627 |
+
"grad_norm": 0.556424081325531,
|
| 11628 |
+
"learning_rate": 0.0008064960088081475,
|
| 11629 |
+
"loss": 2.756,
|
| 11630 |
+
"step": 16600
|
| 11631 |
+
},
|
| 11632 |
+
{
|
| 11633 |
+
"epoch": 13.713386314377129,
|
| 11634 |
+
"grad_norm": 0.5140565037727356,
|
| 11635 |
+
"learning_rate": 0.0008063125057344711,
|
| 11636 |
+
"loss": 2.7666,
|
| 11637 |
+
"step": 16610
|
| 11638 |
+
},
|
| 11639 |
+
{
|
| 11640 |
+
"epoch": 13.72164310042316,
|
| 11641 |
+
"grad_norm": 0.5534517168998718,
|
| 11642 |
+
"learning_rate": 0.0008061290026607946,
|
| 11643 |
+
"loss": 2.7569,
|
| 11644 |
+
"step": 16620
|
| 11645 |
+
},
|
| 11646 |
+
{
|
| 11647 |
+
"epoch": 13.729899886469191,
|
| 11648 |
+
"grad_norm": 0.6492647528648376,
|
| 11649 |
+
"learning_rate": 0.0008059454995871181,
|
| 11650 |
+
"loss": 2.7567,
|
| 11651 |
+
"step": 16630
|
| 11652 |
+
},
|
| 11653 |
+
{
|
| 11654 |
+
"epoch": 13.738156672515224,
|
| 11655 |
+
"grad_norm": 0.5888465642929077,
|
| 11656 |
+
"learning_rate": 0.0008057619965134416,
|
| 11657 |
+
"loss": 2.7451,
|
| 11658 |
+
"step": 16640
|
| 11659 |
+
},
|
| 11660 |
+
{
|
| 11661 |
+
"epoch": 13.746413458561255,
|
| 11662 |
+
"grad_norm": 0.6425179243087769,
|
| 11663 |
+
"learning_rate": 0.0008055784934397651,
|
| 11664 |
+
"loss": 2.7575,
|
| 11665 |
+
"step": 16650
|
| 11666 |
+
},
|
| 11667 |
+
{
|
| 11668 |
+
"epoch": 13.754670244607286,
|
| 11669 |
+
"grad_norm": 0.5842881202697754,
|
| 11670 |
+
"learning_rate": 0.0008053949903660887,
|
| 11671 |
+
"loss": 2.756,
|
| 11672 |
+
"step": 16660
|
| 11673 |
+
},
|
| 11674 |
+
{
|
| 11675 |
+
"epoch": 13.762927030653318,
|
| 11676 |
+
"grad_norm": 0.5675920248031616,
|
| 11677 |
+
"learning_rate": 0.0008052114872924122,
|
| 11678 |
+
"loss": 2.761,
|
| 11679 |
+
"step": 16670
|
| 11680 |
+
},
|
| 11681 |
+
{
|
| 11682 |
+
"epoch": 13.77118381669935,
|
| 11683 |
+
"grad_norm": 0.532641589641571,
|
| 11684 |
+
"learning_rate": 0.0008050279842187356,
|
| 11685 |
+
"loss": 2.7615,
|
| 11686 |
+
"step": 16680
|
| 11687 |
+
},
|
| 11688 |
+
{
|
| 11689 |
+
"epoch": 13.779440602745382,
|
| 11690 |
+
"grad_norm": 0.5731536149978638,
|
| 11691 |
+
"learning_rate": 0.0008048444811450591,
|
| 11692 |
+
"loss": 2.7562,
|
| 11693 |
+
"step": 16690
|
| 11694 |
+
},
|
| 11695 |
+
{
|
| 11696 |
+
"epoch": 13.787697388791413,
|
| 11697 |
+
"grad_norm": 0.567754328250885,
|
| 11698 |
+
"learning_rate": 0.0008046609780713827,
|
| 11699 |
+
"loss": 2.7479,
|
| 11700 |
+
"step": 16700
|
| 11701 |
+
},
|
| 11702 |
+
{
|
| 11703 |
+
"epoch": 13.795954174837444,
|
| 11704 |
+
"grad_norm": 0.524221658706665,
|
| 11705 |
+
"learning_rate": 0.0008044774749977062,
|
| 11706 |
+
"loss": 2.7509,
|
| 11707 |
+
"step": 16710
|
| 11708 |
+
},
|
| 11709 |
+
{
|
| 11710 |
+
"epoch": 13.804210960883475,
|
| 11711 |
+
"grad_norm": 0.5846814513206482,
|
| 11712 |
+
"learning_rate": 0.0008042939719240297,
|
| 11713 |
+
"loss": 2.7475,
|
| 11714 |
+
"step": 16720
|
| 11715 |
+
},
|
| 11716 |
+
{
|
| 11717 |
+
"epoch": 13.812467746929508,
|
| 11718 |
+
"grad_norm": 0.5527751445770264,
|
| 11719 |
+
"learning_rate": 0.0008041104688503532,
|
| 11720 |
+
"loss": 2.7608,
|
| 11721 |
+
"step": 16730
|
| 11722 |
+
},
|
| 11723 |
+
{
|
| 11724 |
+
"epoch": 13.82072453297554,
|
| 11725 |
+
"grad_norm": 0.6005294919013977,
|
| 11726 |
+
"learning_rate": 0.0008039269657766768,
|
| 11727 |
+
"loss": 2.7499,
|
| 11728 |
+
"step": 16740
|
| 11729 |
+
},
|
| 11730 |
+
{
|
| 11731 |
+
"epoch": 13.82898131902157,
|
| 11732 |
+
"grad_norm": 0.5409100651741028,
|
| 11733 |
+
"learning_rate": 0.0008037434627030003,
|
| 11734 |
+
"loss": 2.7473,
|
| 11735 |
+
"step": 16750
|
| 11736 |
+
},
|
| 11737 |
+
{
|
| 11738 |
+
"epoch": 13.837238105067602,
|
| 11739 |
+
"grad_norm": 0.5972150564193726,
|
| 11740 |
+
"learning_rate": 0.0008035599596293238,
|
| 11741 |
+
"loss": 2.7597,
|
| 11742 |
+
"step": 16760
|
| 11743 |
+
},
|
| 11744 |
+
{
|
| 11745 |
+
"epoch": 13.845494891113635,
|
| 11746 |
+
"grad_norm": 0.5449065566062927,
|
| 11747 |
+
"learning_rate": 0.0008033764565556473,
|
| 11748 |
+
"loss": 2.7471,
|
| 11749 |
+
"step": 16770
|
| 11750 |
+
},
|
| 11751 |
+
{
|
| 11752 |
+
"epoch": 13.853751677159666,
|
| 11753 |
+
"grad_norm": 0.5764107704162598,
|
| 11754 |
+
"learning_rate": 0.0008031929534819709,
|
| 11755 |
+
"loss": 2.7548,
|
| 11756 |
+
"step": 16780
|
| 11757 |
+
},
|
| 11758 |
+
{
|
| 11759 |
+
"epoch": 13.862008463205697,
|
| 11760 |
+
"grad_norm": 0.5843521356582642,
|
| 11761 |
+
"learning_rate": 0.0008030094504082944,
|
| 11762 |
+
"loss": 2.7527,
|
| 11763 |
+
"step": 16790
|
| 11764 |
+
},
|
| 11765 |
+
{
|
| 11766 |
+
"epoch": 13.870265249251728,
|
| 11767 |
+
"grad_norm": 0.5988937020301819,
|
| 11768 |
+
"learning_rate": 0.0008028259473346179,
|
| 11769 |
+
"loss": 2.7538,
|
| 11770 |
+
"step": 16800
|
| 11771 |
+
},
|
| 11772 |
+
{
|
| 11773 |
+
"epoch": 13.87852203529776,
|
| 11774 |
+
"grad_norm": 0.5904337763786316,
|
| 11775 |
+
"learning_rate": 0.0008026424442609413,
|
| 11776 |
+
"loss": 2.7502,
|
| 11777 |
+
"step": 16810
|
| 11778 |
+
},
|
| 11779 |
+
{
|
| 11780 |
+
"epoch": 13.886778821343793,
|
| 11781 |
+
"grad_norm": 0.5412918329238892,
|
| 11782 |
+
"learning_rate": 0.0008024589411872649,
|
| 11783 |
+
"loss": 2.7522,
|
| 11784 |
+
"step": 16820
|
| 11785 |
+
},
|
| 11786 |
+
{
|
| 11787 |
+
"epoch": 13.895035607389824,
|
| 11788 |
+
"grad_norm": 0.5681438446044922,
|
| 11789 |
+
"learning_rate": 0.0008022754381135883,
|
| 11790 |
+
"loss": 2.7576,
|
| 11791 |
+
"step": 16830
|
| 11792 |
+
},
|
| 11793 |
+
{
|
| 11794 |
+
"epoch": 13.903292393435855,
|
| 11795 |
+
"grad_norm": 0.5728694796562195,
|
| 11796 |
+
"learning_rate": 0.0008020919350399119,
|
| 11797 |
+
"loss": 2.7549,
|
| 11798 |
+
"step": 16840
|
| 11799 |
+
},
|
| 11800 |
+
{
|
| 11801 |
+
"epoch": 13.911549179481886,
|
| 11802 |
+
"grad_norm": 0.5923236608505249,
|
| 11803 |
+
"learning_rate": 0.0008019084319662354,
|
| 11804 |
+
"loss": 2.7553,
|
| 11805 |
+
"step": 16850
|
| 11806 |
+
},
|
| 11807 |
+
{
|
| 11808 |
+
"epoch": 13.919805965527917,
|
| 11809 |
+
"grad_norm": 0.5946152210235596,
|
| 11810 |
+
"learning_rate": 0.000801724928892559,
|
| 11811 |
+
"loss": 2.7457,
|
| 11812 |
+
"step": 16860
|
| 11813 |
+
},
|
| 11814 |
+
{
|
| 11815 |
+
"epoch": 13.92806275157395,
|
| 11816 |
+
"grad_norm": 0.5166122913360596,
|
| 11817 |
+
"learning_rate": 0.0008015414258188824,
|
| 11818 |
+
"loss": 2.7488,
|
| 11819 |
+
"step": 16870
|
| 11820 |
+
},
|
| 11821 |
+
{
|
| 11822 |
+
"epoch": 13.936319537619982,
|
| 11823 |
+
"grad_norm": 0.5555543303489685,
|
| 11824 |
+
"learning_rate": 0.000801357922745206,
|
| 11825 |
+
"loss": 2.7606,
|
| 11826 |
+
"step": 16880
|
| 11827 |
+
},
|
| 11828 |
+
{
|
| 11829 |
+
"epoch": 13.944576323666013,
|
| 11830 |
+
"grad_norm": 0.5452257990837097,
|
| 11831 |
+
"learning_rate": 0.0008011744196715295,
|
| 11832 |
+
"loss": 2.7558,
|
| 11833 |
+
"step": 16890
|
| 11834 |
+
},
|
| 11835 |
+
{
|
| 11836 |
+
"epoch": 13.952833109712044,
|
| 11837 |
+
"grad_norm": 0.5303358435630798,
|
| 11838 |
+
"learning_rate": 0.0008009909165978531,
|
| 11839 |
+
"loss": 2.7481,
|
| 11840 |
+
"step": 16900
|
| 11841 |
+
},
|
| 11842 |
+
{
|
| 11843 |
+
"epoch": 13.961089895758077,
|
| 11844 |
+
"grad_norm": 0.5449009537696838,
|
| 11845 |
+
"learning_rate": 0.0008008074135241765,
|
| 11846 |
+
"loss": 2.7548,
|
| 11847 |
+
"step": 16910
|
| 11848 |
+
},
|
| 11849 |
+
{
|
| 11850 |
+
"epoch": 13.969346681804108,
|
| 11851 |
+
"grad_norm": 0.5688961148262024,
|
| 11852 |
+
"learning_rate": 0.0008006239104505001,
|
| 11853 |
+
"loss": 2.7543,
|
| 11854 |
+
"step": 16920
|
| 11855 |
+
},
|
| 11856 |
+
{
|
| 11857 |
+
"epoch": 13.97760346785014,
|
| 11858 |
+
"grad_norm": 0.6097021698951721,
|
| 11859 |
+
"learning_rate": 0.0008004404073768236,
|
| 11860 |
+
"loss": 2.7521,
|
| 11861 |
+
"step": 16930
|
| 11862 |
+
},
|
| 11863 |
+
{
|
| 11864 |
+
"epoch": 13.98586025389617,
|
| 11865 |
+
"grad_norm": 0.6139764189720154,
|
| 11866 |
+
"learning_rate": 0.0008002569043031472,
|
| 11867 |
+
"loss": 2.7544,
|
| 11868 |
+
"step": 16940
|
| 11869 |
+
},
|
| 11870 |
+
{
|
| 11871 |
+
"epoch": 13.994117039942202,
|
| 11872 |
+
"grad_norm": 0.5823282599449158,
|
| 11873 |
+
"learning_rate": 0.0008000734012294705,
|
| 11874 |
+
"loss": 2.7485,
|
| 11875 |
+
"step": 16950
|
| 11876 |
+
},
|
| 11877 |
+
{
|
| 11878 |
+
"epoch": 14.001651357209207,
|
| 11879 |
+
"grad_norm": 0.5491234064102173,
|
| 11880 |
+
"learning_rate": 0.0007998898981557941,
|
| 11881 |
+
"loss": 2.5171,
|
| 11882 |
+
"step": 16960
|
| 11883 |
+
},
|
| 11884 |
+
{
|
| 11885 |
+
"epoch": 14.009908143255238,
|
| 11886 |
+
"grad_norm": 0.6469337940216064,
|
| 11887 |
+
"learning_rate": 0.0007997063950821176,
|
| 11888 |
+
"loss": 2.7401,
|
| 11889 |
+
"step": 16970
|
| 11890 |
+
},
|
| 11891 |
+
{
|
| 11892 |
+
"epoch": 14.01816492930127,
|
| 11893 |
+
"grad_norm": 0.622250497341156,
|
| 11894 |
+
"learning_rate": 0.0007995228920084412,
|
| 11895 |
+
"loss": 2.7526,
|
| 11896 |
+
"step": 16980
|
| 11897 |
+
},
|
| 11898 |
+
{
|
| 11899 |
+
"epoch": 14.0264217153473,
|
| 11900 |
+
"grad_norm": 0.6488636136054993,
|
| 11901 |
+
"learning_rate": 0.0007993393889347646,
|
| 11902 |
+
"loss": 2.7349,
|
| 11903 |
+
"step": 16990
|
| 11904 |
+
},
|
| 11905 |
+
{
|
| 11906 |
+
"epoch": 14.034678501393334,
|
| 11907 |
+
"grad_norm": 0.5935384631156921,
|
| 11908 |
+
"learning_rate": 0.0007991558858610882,
|
| 11909 |
+
"loss": 2.7503,
|
| 11910 |
+
"step": 17000
|
| 11911 |
+
},
|
| 11912 |
+
{
|
| 11913 |
+
"epoch": 14.042935287439365,
|
| 11914 |
+
"grad_norm": 0.6315668821334839,
|
| 11915 |
+
"learning_rate": 0.0007989723827874117,
|
| 11916 |
+
"loss": 2.7522,
|
| 11917 |
+
"step": 17010
|
| 11918 |
+
},
|
| 11919 |
+
{
|
| 11920 |
+
"epoch": 14.051192073485396,
|
| 11921 |
+
"grad_norm": 0.607702910900116,
|
| 11922 |
+
"learning_rate": 0.0007987888797137353,
|
| 11923 |
+
"loss": 2.7474,
|
| 11924 |
+
"step": 17020
|
| 11925 |
+
},
|
| 11926 |
+
{
|
| 11927 |
+
"epoch": 14.059448859531427,
|
| 11928 |
+
"grad_norm": 0.55247962474823,
|
| 11929 |
+
"learning_rate": 0.0007986053766400587,
|
| 11930 |
+
"loss": 2.7366,
|
| 11931 |
+
"step": 17030
|
| 11932 |
+
},
|
| 11933 |
+
{
|
| 11934 |
+
"epoch": 14.067705645577458,
|
| 11935 |
+
"grad_norm": 0.5892691016197205,
|
| 11936 |
+
"learning_rate": 0.0007984218735663823,
|
| 11937 |
+
"loss": 2.7319,
|
| 11938 |
+
"step": 17040
|
| 11939 |
+
},
|
| 11940 |
+
{
|
| 11941 |
+
"epoch": 14.075962431623491,
|
| 11942 |
+
"grad_norm": 0.5575072765350342,
|
| 11943 |
+
"learning_rate": 0.0007982383704927058,
|
| 11944 |
+
"loss": 2.753,
|
| 11945 |
+
"step": 17050
|
| 11946 |
+
},
|
| 11947 |
+
{
|
| 11948 |
+
"epoch": 14.084219217669522,
|
| 11949 |
+
"grad_norm": 0.6110917329788208,
|
| 11950 |
+
"learning_rate": 0.0007980548674190294,
|
| 11951 |
+
"loss": 2.7465,
|
| 11952 |
+
"step": 17060
|
| 11953 |
+
},
|
| 11954 |
+
{
|
| 11955 |
+
"epoch": 14.092476003715554,
|
| 11956 |
+
"grad_norm": 0.6070433855056763,
|
| 11957 |
+
"learning_rate": 0.0007978713643453528,
|
| 11958 |
+
"loss": 2.7533,
|
| 11959 |
+
"step": 17070
|
| 11960 |
+
},
|
| 11961 |
+
{
|
| 11962 |
+
"epoch": 14.100732789761585,
|
| 11963 |
+
"grad_norm": 0.5724040865898132,
|
| 11964 |
+
"learning_rate": 0.0007976878612716763,
|
| 11965 |
+
"loss": 2.7412,
|
| 11966 |
+
"step": 17080
|
| 11967 |
+
},
|
| 11968 |
+
{
|
| 11969 |
+
"epoch": 14.108989575807616,
|
| 11970 |
+
"grad_norm": 0.5734650492668152,
|
| 11971 |
+
"learning_rate": 0.0007975043581979998,
|
| 11972 |
+
"loss": 2.7417,
|
| 11973 |
+
"step": 17090
|
| 11974 |
+
},
|
| 11975 |
+
{
|
| 11976 |
+
"epoch": 14.117246361853649,
|
| 11977 |
+
"grad_norm": 0.5555775165557861,
|
| 11978 |
+
"learning_rate": 0.0007973208551243234,
|
| 11979 |
+
"loss": 2.7436,
|
| 11980 |
+
"step": 17100
|
| 11981 |
+
},
|
| 11982 |
+
{
|
| 11983 |
+
"epoch": 14.12550314789968,
|
| 11984 |
+
"grad_norm": 0.5774323344230652,
|
| 11985 |
+
"learning_rate": 0.0007971373520506468,
|
| 11986 |
+
"loss": 2.7413,
|
| 11987 |
+
"step": 17110
|
| 11988 |
+
},
|
| 11989 |
+
{
|
| 11990 |
+
"epoch": 14.133759933945711,
|
| 11991 |
+
"grad_norm": 0.6438599824905396,
|
| 11992 |
+
"learning_rate": 0.0007969538489769704,
|
| 11993 |
+
"loss": 2.7539,
|
| 11994 |
+
"step": 17120
|
| 11995 |
+
},
|
| 11996 |
+
{
|
| 11997 |
+
"epoch": 14.142016719991743,
|
| 11998 |
+
"grad_norm": 0.5561356544494629,
|
| 11999 |
+
"learning_rate": 0.0007967703459032939,
|
| 12000 |
+
"loss": 2.7405,
|
| 12001 |
+
"step": 17130
|
| 12002 |
+
},
|
| 12003 |
+
{
|
| 12004 |
+
"epoch": 14.150273506037776,
|
| 12005 |
+
"grad_norm": 0.5886418223381042,
|
| 12006 |
+
"learning_rate": 0.0007965868428296175,
|
| 12007 |
+
"loss": 2.7599,
|
| 12008 |
+
"step": 17140
|
| 12009 |
+
},
|
| 12010 |
+
{
|
| 12011 |
+
"epoch": 14.158530292083807,
|
| 12012 |
+
"grad_norm": 0.5819487571716309,
|
| 12013 |
+
"learning_rate": 0.0007964033397559409,
|
| 12014 |
+
"loss": 2.7569,
|
| 12015 |
+
"step": 17150
|
| 12016 |
+
},
|
| 12017 |
+
{
|
| 12018 |
+
"epoch": 14.166787078129838,
|
| 12019 |
+
"grad_norm": 0.5723300576210022,
|
| 12020 |
+
"learning_rate": 0.0007962198366822645,
|
| 12021 |
+
"loss": 2.7404,
|
| 12022 |
+
"step": 17160
|
| 12023 |
+
},
|
| 12024 |
+
{
|
| 12025 |
+
"epoch": 14.17504386417587,
|
| 12026 |
+
"grad_norm": 0.5738250017166138,
|
| 12027 |
+
"learning_rate": 0.000796036333608588,
|
| 12028 |
+
"loss": 2.7518,
|
| 12029 |
+
"step": 17170
|
| 12030 |
+
},
|
| 12031 |
+
{
|
| 12032 |
+
"epoch": 14.1833006502219,
|
| 12033 |
+
"grad_norm": 0.5601485967636108,
|
| 12034 |
+
"learning_rate": 0.0007958528305349116,
|
| 12035 |
+
"loss": 2.7477,
|
| 12036 |
+
"step": 17180
|
| 12037 |
+
},
|
| 12038 |
+
{
|
| 12039 |
+
"epoch": 14.191557436267933,
|
| 12040 |
+
"grad_norm": 0.5593155026435852,
|
| 12041 |
+
"learning_rate": 0.000795669327461235,
|
| 12042 |
+
"loss": 2.751,
|
| 12043 |
+
"step": 17190
|
| 12044 |
+
},
|
| 12045 |
+
{
|
| 12046 |
+
"epoch": 14.199814222313965,
|
| 12047 |
+
"grad_norm": 0.5404049158096313,
|
| 12048 |
+
"learning_rate": 0.0007954858243875585,
|
| 12049 |
+
"loss": 2.7537,
|
| 12050 |
+
"step": 17200
|
| 12051 |
+
},
|
| 12052 |
+
{
|
| 12053 |
+
"epoch": 14.208071008359996,
|
| 12054 |
+
"grad_norm": 0.5567106008529663,
|
| 12055 |
+
"learning_rate": 0.000795302321313882,
|
| 12056 |
+
"loss": 2.7499,
|
| 12057 |
+
"step": 17210
|
| 12058 |
+
},
|
| 12059 |
+
{
|
| 12060 |
+
"epoch": 14.216327794406027,
|
| 12061 |
+
"grad_norm": 0.5681931376457214,
|
| 12062 |
+
"learning_rate": 0.0007951188182402054,
|
| 12063 |
+
"loss": 2.7598,
|
| 12064 |
+
"step": 17220
|
| 12065 |
+
},
|
| 12066 |
+
{
|
| 12067 |
+
"epoch": 14.22458458045206,
|
| 12068 |
+
"grad_norm": 0.5726577639579773,
|
| 12069 |
+
"learning_rate": 0.000794935315166529,
|
| 12070 |
+
"loss": 2.7431,
|
| 12071 |
+
"step": 17230
|
| 12072 |
+
},
|
| 12073 |
+
{
|
| 12074 |
+
"epoch": 14.232841366498091,
|
| 12075 |
+
"grad_norm": 0.5552230477333069,
|
| 12076 |
+
"learning_rate": 0.0007947518120928525,
|
| 12077 |
+
"loss": 2.7498,
|
| 12078 |
+
"step": 17240
|
| 12079 |
+
},
|
| 12080 |
+
{
|
| 12081 |
+
"epoch": 14.241098152544122,
|
| 12082 |
+
"grad_norm": 0.5898513793945312,
|
| 12083 |
+
"learning_rate": 0.0007945683090191761,
|
| 12084 |
+
"loss": 2.7503,
|
| 12085 |
+
"step": 17250
|
| 12086 |
+
},
|
| 12087 |
+
{
|
| 12088 |
+
"epoch": 14.249354938590153,
|
| 12089 |
+
"grad_norm": 0.5322459936141968,
|
| 12090 |
+
"learning_rate": 0.0007943848059454995,
|
| 12091 |
+
"loss": 2.7343,
|
| 12092 |
+
"step": 17260
|
| 12093 |
+
},
|
| 12094 |
+
{
|
| 12095 |
+
"epoch": 14.257611724636185,
|
| 12096 |
+
"grad_norm": 0.62173992395401,
|
| 12097 |
+
"learning_rate": 0.0007942013028718231,
|
| 12098 |
+
"loss": 2.7424,
|
| 12099 |
+
"step": 17270
|
| 12100 |
+
},
|
| 12101 |
+
{
|
| 12102 |
+
"epoch": 14.265868510682218,
|
| 12103 |
+
"grad_norm": 0.5796912908554077,
|
| 12104 |
+
"learning_rate": 0.0007940177997981466,
|
| 12105 |
+
"loss": 2.7477,
|
| 12106 |
+
"step": 17280
|
| 12107 |
+
},
|
| 12108 |
+
{
|
| 12109 |
+
"epoch": 14.274125296728249,
|
| 12110 |
+
"grad_norm": 0.6236594915390015,
|
| 12111 |
+
"learning_rate": 0.0007938342967244702,
|
| 12112 |
+
"loss": 2.7553,
|
| 12113 |
+
"step": 17290
|
| 12114 |
+
},
|
| 12115 |
+
{
|
| 12116 |
+
"epoch": 14.28238208277428,
|
| 12117 |
+
"grad_norm": 0.5684297680854797,
|
| 12118 |
+
"learning_rate": 0.0007936507936507937,
|
| 12119 |
+
"loss": 2.7347,
|
| 12120 |
+
"step": 17300
|
| 12121 |
+
},
|
| 12122 |
+
{
|
| 12123 |
+
"epoch": 14.290638868820311,
|
| 12124 |
+
"grad_norm": 0.576805830001831,
|
| 12125 |
+
"learning_rate": 0.0007934672905771172,
|
| 12126 |
+
"loss": 2.7465,
|
| 12127 |
+
"step": 17310
|
| 12128 |
+
},
|
| 12129 |
+
{
|
| 12130 |
+
"epoch": 14.298895654866342,
|
| 12131 |
+
"grad_norm": 0.6182284951210022,
|
| 12132 |
+
"learning_rate": 0.0007932837875034407,
|
| 12133 |
+
"loss": 2.746,
|
| 12134 |
+
"step": 17320
|
| 12135 |
+
},
|
| 12136 |
+
{
|
| 12137 |
+
"epoch": 14.307152440912375,
|
| 12138 |
+
"grad_norm": 0.5486750602722168,
|
| 12139 |
+
"learning_rate": 0.0007931002844297643,
|
| 12140 |
+
"loss": 2.7496,
|
| 12141 |
+
"step": 17330
|
| 12142 |
+
},
|
| 12143 |
+
{
|
| 12144 |
+
"epoch": 14.315409226958407,
|
| 12145 |
+
"grad_norm": 0.5673812627792358,
|
| 12146 |
+
"learning_rate": 0.0007929167813560876,
|
| 12147 |
+
"loss": 2.7365,
|
| 12148 |
+
"step": 17340
|
| 12149 |
+
},
|
| 12150 |
+
{
|
| 12151 |
+
"epoch": 14.323666013004438,
|
| 12152 |
+
"grad_norm": 0.606238067150116,
|
| 12153 |
+
"learning_rate": 0.0007927332782824112,
|
| 12154 |
+
"loss": 2.7423,
|
| 12155 |
+
"step": 17350
|
| 12156 |
+
},
|
| 12157 |
+
{
|
| 12158 |
+
"epoch": 14.331922799050469,
|
| 12159 |
+
"grad_norm": 0.555072009563446,
|
| 12160 |
+
"learning_rate": 0.0007925497752087347,
|
| 12161 |
+
"loss": 2.746,
|
| 12162 |
+
"step": 17360
|
| 12163 |
+
},
|
| 12164 |
+
{
|
| 12165 |
+
"epoch": 14.340179585096502,
|
| 12166 |
+
"grad_norm": 0.5399696826934814,
|
| 12167 |
+
"learning_rate": 0.0007923662721350583,
|
| 12168 |
+
"loss": 2.7488,
|
| 12169 |
+
"step": 17370
|
| 12170 |
+
},
|
| 12171 |
+
{
|
| 12172 |
+
"epoch": 14.348436371142533,
|
| 12173 |
+
"grad_norm": 0.5781683921813965,
|
| 12174 |
+
"learning_rate": 0.0007921827690613817,
|
| 12175 |
+
"loss": 2.7525,
|
| 12176 |
+
"step": 17380
|
| 12177 |
+
},
|
| 12178 |
+
{
|
| 12179 |
+
"epoch": 14.356693157188564,
|
| 12180 |
+
"grad_norm": 0.5473909378051758,
|
| 12181 |
+
"learning_rate": 0.0007919992659877053,
|
| 12182 |
+
"loss": 2.7469,
|
| 12183 |
+
"step": 17390
|
| 12184 |
+
},
|
| 12185 |
+
{
|
| 12186 |
+
"epoch": 14.364949943234596,
|
| 12187 |
+
"grad_norm": 0.5242516398429871,
|
| 12188 |
+
"learning_rate": 0.0007918157629140288,
|
| 12189 |
+
"loss": 2.737,
|
| 12190 |
+
"step": 17400
|
| 12191 |
+
},
|
| 12192 |
+
{
|
| 12193 |
+
"epoch": 14.373206729280627,
|
| 12194 |
+
"grad_norm": 0.5968852043151855,
|
| 12195 |
+
"learning_rate": 0.0007916322598403524,
|
| 12196 |
+
"loss": 2.7457,
|
| 12197 |
+
"step": 17410
|
| 12198 |
+
},
|
| 12199 |
+
{
|
| 12200 |
+
"epoch": 14.38146351532666,
|
| 12201 |
+
"grad_norm": 0.5766412615776062,
|
| 12202 |
+
"learning_rate": 0.0007914487567666758,
|
| 12203 |
+
"loss": 2.7326,
|
| 12204 |
+
"step": 17420
|
| 12205 |
+
},
|
| 12206 |
+
{
|
| 12207 |
+
"epoch": 14.38972030137269,
|
| 12208 |
+
"grad_norm": 0.6067407131195068,
|
| 12209 |
+
"learning_rate": 0.0007912652536929994,
|
| 12210 |
+
"loss": 2.7426,
|
| 12211 |
+
"step": 17430
|
| 12212 |
+
},
|
| 12213 |
+
{
|
| 12214 |
+
"epoch": 14.397977087418722,
|
| 12215 |
+
"grad_norm": 0.6106924414634705,
|
| 12216 |
+
"learning_rate": 0.0007910817506193229,
|
| 12217 |
+
"loss": 2.7525,
|
| 12218 |
+
"step": 17440
|
| 12219 |
+
},
|
| 12220 |
+
{
|
| 12221 |
+
"epoch": 14.406233873464753,
|
| 12222 |
+
"grad_norm": 0.6435558199882507,
|
| 12223 |
+
"learning_rate": 0.0007908982475456465,
|
| 12224 |
+
"loss": 2.7428,
|
| 12225 |
+
"step": 17450
|
| 12226 |
+
},
|
| 12227 |
+
{
|
| 12228 |
+
"epoch": 14.414490659510786,
|
| 12229 |
+
"grad_norm": 0.6241771578788757,
|
| 12230 |
+
"learning_rate": 0.0007907147444719699,
|
| 12231 |
+
"loss": 2.7438,
|
| 12232 |
+
"step": 17460
|
| 12233 |
+
},
|
| 12234 |
+
{
|
| 12235 |
+
"epoch": 14.422747445556817,
|
| 12236 |
+
"grad_norm": 0.6236996054649353,
|
| 12237 |
+
"learning_rate": 0.0007905312413982934,
|
| 12238 |
+
"loss": 2.7496,
|
| 12239 |
+
"step": 17470
|
| 12240 |
+
},
|
| 12241 |
+
{
|
| 12242 |
+
"epoch": 14.431004231602849,
|
| 12243 |
+
"grad_norm": 0.6004934310913086,
|
| 12244 |
+
"learning_rate": 0.0007903477383246169,
|
| 12245 |
+
"loss": 2.7483,
|
| 12246 |
+
"step": 17480
|
| 12247 |
+
},
|
| 12248 |
+
{
|
| 12249 |
+
"epoch": 14.43926101764888,
|
| 12250 |
+
"grad_norm": 0.5864703059196472,
|
| 12251 |
+
"learning_rate": 0.0007901642352509405,
|
| 12252 |
+
"loss": 2.7421,
|
| 12253 |
+
"step": 17490
|
| 12254 |
+
},
|
| 12255 |
+
{
|
| 12256 |
+
"epoch": 14.447517803694911,
|
| 12257 |
+
"grad_norm": 0.5803243517875671,
|
| 12258 |
+
"learning_rate": 0.0007899807321772639,
|
| 12259 |
+
"loss": 2.7512,
|
| 12260 |
+
"step": 17500
|
| 12261 |
+
},
|
| 12262 |
+
{
|
| 12263 |
+
"epoch": 14.455774589740944,
|
| 12264 |
+
"grad_norm": 0.5815431475639343,
|
| 12265 |
+
"learning_rate": 0.0007897972291035875,
|
| 12266 |
+
"loss": 2.7539,
|
| 12267 |
+
"step": 17510
|
| 12268 |
+
},
|
| 12269 |
+
{
|
| 12270 |
+
"epoch": 14.464031375786975,
|
| 12271 |
+
"grad_norm": 0.5773807168006897,
|
| 12272 |
+
"learning_rate": 0.000789613726029911,
|
| 12273 |
+
"loss": 2.7282,
|
| 12274 |
+
"step": 17520
|
| 12275 |
+
},
|
| 12276 |
+
{
|
| 12277 |
+
"epoch": 14.472288161833006,
|
| 12278 |
+
"grad_norm": 0.561482846736908,
|
| 12279 |
+
"learning_rate": 0.0007894302229562346,
|
| 12280 |
+
"loss": 2.7368,
|
| 12281 |
+
"step": 17530
|
| 12282 |
+
},
|
| 12283 |
+
{
|
| 12284 |
+
"epoch": 14.480544947879038,
|
| 12285 |
+
"grad_norm": 0.6419026255607605,
|
| 12286 |
+
"learning_rate": 0.000789246719882558,
|
| 12287 |
+
"loss": 2.752,
|
| 12288 |
+
"step": 17540
|
| 12289 |
+
},
|
| 12290 |
+
{
|
| 12291 |
+
"epoch": 14.488801733925069,
|
| 12292 |
+
"grad_norm": 0.5817477107048035,
|
| 12293 |
+
"learning_rate": 0.0007890632168088816,
|
| 12294 |
+
"loss": 2.7464,
|
| 12295 |
+
"step": 17550
|
| 12296 |
+
},
|
| 12297 |
+
{
|
| 12298 |
+
"epoch": 14.497058519971102,
|
| 12299 |
+
"grad_norm": 0.6521551609039307,
|
| 12300 |
+
"learning_rate": 0.0007888797137352051,
|
| 12301 |
+
"loss": 2.7582,
|
| 12302 |
+
"step": 17560
|
| 12303 |
+
},
|
| 12304 |
+
{
|
| 12305 |
+
"epoch": 14.505315306017133,
|
| 12306 |
+
"grad_norm": 0.6004222631454468,
|
| 12307 |
+
"learning_rate": 0.0007886962106615287,
|
| 12308 |
+
"loss": 2.7429,
|
| 12309 |
+
"step": 17570
|
| 12310 |
+
},
|
| 12311 |
+
{
|
| 12312 |
+
"epoch": 14.513572092063164,
|
| 12313 |
+
"grad_norm": 0.6220718026161194,
|
| 12314 |
+
"learning_rate": 0.0007885127075878521,
|
| 12315 |
+
"loss": 2.7376,
|
| 12316 |
+
"step": 17580
|
| 12317 |
+
},
|
| 12318 |
+
{
|
| 12319 |
+
"epoch": 14.521828878109195,
|
| 12320 |
+
"grad_norm": 0.5441803336143494,
|
| 12321 |
+
"learning_rate": 0.0007883292045141757,
|
| 12322 |
+
"loss": 2.7418,
|
| 12323 |
+
"step": 17590
|
| 12324 |
+
},
|
| 12325 |
+
{
|
| 12326 |
+
"epoch": 14.530085664155228,
|
| 12327 |
+
"grad_norm": 0.5832270383834839,
|
| 12328 |
+
"learning_rate": 0.0007881457014404991,
|
| 12329 |
+
"loss": 2.7377,
|
| 12330 |
+
"step": 17600
|
| 12331 |
+
},
|
| 12332 |
+
{
|
| 12333 |
+
"epoch": 14.53834245020126,
|
| 12334 |
+
"grad_norm": 0.536746621131897,
|
| 12335 |
+
"learning_rate": 0.0007879621983668227,
|
| 12336 |
+
"loss": 2.7456,
|
| 12337 |
+
"step": 17610
|
| 12338 |
+
},
|
| 12339 |
+
{
|
| 12340 |
+
"epoch": 14.54659923624729,
|
| 12341 |
+
"grad_norm": 0.5866507887840271,
|
| 12342 |
+
"learning_rate": 0.0007877786952931461,
|
| 12343 |
+
"loss": 2.7429,
|
| 12344 |
+
"step": 17620
|
| 12345 |
+
},
|
| 12346 |
+
{
|
| 12347 |
+
"epoch": 14.554856022293322,
|
| 12348 |
+
"grad_norm": 0.5756723880767822,
|
| 12349 |
+
"learning_rate": 0.0007875951922194697,
|
| 12350 |
+
"loss": 2.743,
|
| 12351 |
+
"step": 17630
|
| 12352 |
+
},
|
| 12353 |
+
{
|
| 12354 |
+
"epoch": 14.563112808339355,
|
| 12355 |
+
"grad_norm": 0.5826034545898438,
|
| 12356 |
+
"learning_rate": 0.0007874116891457932,
|
| 12357 |
+
"loss": 2.7452,
|
| 12358 |
+
"step": 17640
|
| 12359 |
+
},
|
| 12360 |
+
{
|
| 12361 |
+
"epoch": 14.571369594385386,
|
| 12362 |
+
"grad_norm": 0.5977003574371338,
|
| 12363 |
+
"learning_rate": 0.0007872281860721168,
|
| 12364 |
+
"loss": 2.7443,
|
| 12365 |
+
"step": 17650
|
| 12366 |
+
},
|
| 12367 |
+
{
|
| 12368 |
+
"epoch": 14.579626380431417,
|
| 12369 |
+
"grad_norm": 0.551539957523346,
|
| 12370 |
+
"learning_rate": 0.0007870446829984402,
|
| 12371 |
+
"loss": 2.7434,
|
| 12372 |
+
"step": 17660
|
| 12373 |
+
},
|
| 12374 |
+
{
|
| 12375 |
+
"epoch": 14.587883166477448,
|
| 12376 |
+
"grad_norm": 0.6162058115005493,
|
| 12377 |
+
"learning_rate": 0.0007868611799247638,
|
| 12378 |
+
"loss": 2.734,
|
| 12379 |
+
"step": 17670
|
| 12380 |
+
},
|
| 12381 |
+
{
|
| 12382 |
+
"epoch": 14.59613995252348,
|
| 12383 |
+
"grad_norm": 0.5811628103256226,
|
| 12384 |
+
"learning_rate": 0.0007866776768510873,
|
| 12385 |
+
"loss": 2.7447,
|
| 12386 |
+
"step": 17680
|
| 12387 |
+
},
|
| 12388 |
+
{
|
| 12389 |
+
"epoch": 14.604396738569513,
|
| 12390 |
+
"grad_norm": 0.6103553771972656,
|
| 12391 |
+
"learning_rate": 0.0007864941737774109,
|
| 12392 |
+
"loss": 2.7472,
|
| 12393 |
+
"step": 17690
|
| 12394 |
+
},
|
| 12395 |
+
{
|
| 12396 |
+
"epoch": 14.612653524615544,
|
| 12397 |
+
"grad_norm": 0.569419264793396,
|
| 12398 |
+
"learning_rate": 0.0007863106707037343,
|
| 12399 |
+
"loss": 2.7578,
|
| 12400 |
+
"step": 17700
|
| 12401 |
+
},
|
| 12402 |
+
{
|
| 12403 |
+
"epoch": 14.620910310661575,
|
| 12404 |
+
"grad_norm": 0.6102364659309387,
|
| 12405 |
+
"learning_rate": 0.0007861271676300579,
|
| 12406 |
+
"loss": 2.7364,
|
| 12407 |
+
"step": 17710
|
| 12408 |
+
},
|
| 12409 |
+
{
|
| 12410 |
+
"epoch": 14.629167096707606,
|
| 12411 |
+
"grad_norm": 0.5832472443580627,
|
| 12412 |
+
"learning_rate": 0.0007859436645563814,
|
| 12413 |
+
"loss": 2.7449,
|
| 12414 |
+
"step": 17720
|
| 12415 |
+
},
|
| 12416 |
+
{
|
| 12417 |
+
"epoch": 14.637423882753637,
|
| 12418 |
+
"grad_norm": 0.5760400891304016,
|
| 12419 |
+
"learning_rate": 0.0007857601614827049,
|
| 12420 |
+
"loss": 2.7581,
|
| 12421 |
+
"step": 17730
|
| 12422 |
+
},
|
| 12423 |
+
{
|
| 12424 |
+
"epoch": 14.64568066879967,
|
| 12425 |
+
"grad_norm": 0.6216306686401367,
|
| 12426 |
+
"learning_rate": 0.0007855766584090283,
|
| 12427 |
+
"loss": 2.732,
|
| 12428 |
+
"step": 17740
|
| 12429 |
+
},
|
| 12430 |
+
{
|
| 12431 |
+
"epoch": 14.653937454845702,
|
| 12432 |
+
"grad_norm": 0.5639564394950867,
|
| 12433 |
+
"learning_rate": 0.0007853931553353519,
|
| 12434 |
+
"loss": 2.7582,
|
| 12435 |
+
"step": 17750
|
| 12436 |
+
},
|
| 12437 |
+
{
|
| 12438 |
+
"epoch": 14.662194240891733,
|
| 12439 |
+
"grad_norm": 0.5887823700904846,
|
| 12440 |
+
"learning_rate": 0.0007852096522616754,
|
| 12441 |
+
"loss": 2.7492,
|
| 12442 |
+
"step": 17760
|
| 12443 |
+
},
|
| 12444 |
+
{
|
| 12445 |
+
"epoch": 14.670451026937764,
|
| 12446 |
+
"grad_norm": 0.5743685364723206,
|
| 12447 |
+
"learning_rate": 0.0007850261491879988,
|
| 12448 |
+
"loss": 2.7484,
|
| 12449 |
+
"step": 17770
|
| 12450 |
+
},
|
| 12451 |
+
{
|
| 12452 |
+
"epoch": 14.678707812983795,
|
| 12453 |
+
"grad_norm": 0.6122255921363831,
|
| 12454 |
+
"learning_rate": 0.0007848426461143224,
|
| 12455 |
+
"loss": 2.7326,
|
| 12456 |
+
"step": 17780
|
| 12457 |
+
},
|
| 12458 |
+
{
|
| 12459 |
+
"epoch": 14.686964599029828,
|
| 12460 |
+
"grad_norm": 0.6089203357696533,
|
| 12461 |
+
"learning_rate": 0.0007846591430406459,
|
| 12462 |
+
"loss": 2.7411,
|
| 12463 |
+
"step": 17790
|
| 12464 |
+
},
|
| 12465 |
+
{
|
| 12466 |
+
"epoch": 14.69522138507586,
|
| 12467 |
+
"grad_norm": 0.5829803347587585,
|
| 12468 |
+
"learning_rate": 0.0007844756399669695,
|
| 12469 |
+
"loss": 2.7406,
|
| 12470 |
+
"step": 17800
|
| 12471 |
+
},
|
| 12472 |
+
{
|
| 12473 |
+
"epoch": 14.70347817112189,
|
| 12474 |
+
"grad_norm": 0.5928598642349243,
|
| 12475 |
+
"learning_rate": 0.0007842921368932929,
|
| 12476 |
+
"loss": 2.7462,
|
| 12477 |
+
"step": 17810
|
| 12478 |
+
},
|
| 12479 |
+
{
|
| 12480 |
+
"epoch": 14.711734957167922,
|
| 12481 |
+
"grad_norm": 0.6143853664398193,
|
| 12482 |
+
"learning_rate": 0.0007841086338196165,
|
| 12483 |
+
"loss": 2.7446,
|
| 12484 |
+
"step": 17820
|
| 12485 |
+
},
|
| 12486 |
+
{
|
| 12487 |
+
"epoch": 14.719991743213955,
|
| 12488 |
+
"grad_norm": 0.6457964777946472,
|
| 12489 |
+
"learning_rate": 0.00078392513074594,
|
| 12490 |
+
"loss": 2.7416,
|
| 12491 |
+
"step": 17830
|
| 12492 |
+
},
|
| 12493 |
+
{
|
| 12494 |
+
"epoch": 14.728248529259986,
|
| 12495 |
+
"grad_norm": 0.6104548573493958,
|
| 12496 |
+
"learning_rate": 0.0007837416276722636,
|
| 12497 |
+
"loss": 2.7332,
|
| 12498 |
+
"step": 17840
|
| 12499 |
+
},
|
| 12500 |
+
{
|
| 12501 |
+
"epoch": 14.736505315306017,
|
| 12502 |
+
"grad_norm": 0.5743314623832703,
|
| 12503 |
+
"learning_rate": 0.000783558124598587,
|
| 12504 |
+
"loss": 2.7459,
|
| 12505 |
+
"step": 17850
|
| 12506 |
+
},
|
| 12507 |
+
{
|
| 12508 |
+
"epoch": 14.744762101352048,
|
| 12509 |
+
"grad_norm": 0.552040159702301,
|
| 12510 |
+
"learning_rate": 0.0007833746215249105,
|
| 12511 |
+
"loss": 2.747,
|
| 12512 |
+
"step": 17860
|
| 12513 |
+
},
|
| 12514 |
+
{
|
| 12515 |
+
"epoch": 14.753018887398081,
|
| 12516 |
+
"grad_norm": 0.57485431432724,
|
| 12517 |
+
"learning_rate": 0.000783191118451234,
|
| 12518 |
+
"loss": 2.7543,
|
| 12519 |
+
"step": 17870
|
| 12520 |
+
},
|
| 12521 |
+
{
|
| 12522 |
+
"epoch": 14.761275673444112,
|
| 12523 |
+
"grad_norm": 0.5415575504302979,
|
| 12524 |
+
"learning_rate": 0.0007830076153775576,
|
| 12525 |
+
"loss": 2.7432,
|
| 12526 |
+
"step": 17880
|
| 12527 |
+
},
|
| 12528 |
+
{
|
| 12529 |
+
"epoch": 14.769532459490144,
|
| 12530 |
+
"grad_norm": 0.58236163854599,
|
| 12531 |
+
"learning_rate": 0.000782824112303881,
|
| 12532 |
+
"loss": 2.7429,
|
| 12533 |
+
"step": 17890
|
| 12534 |
+
},
|
| 12535 |
+
{
|
| 12536 |
+
"epoch": 14.777789245536175,
|
| 12537 |
+
"grad_norm": 0.5532475709915161,
|
| 12538 |
+
"learning_rate": 0.0007826406092302046,
|
| 12539 |
+
"loss": 2.7298,
|
| 12540 |
+
"step": 17900
|
| 12541 |
+
},
|
| 12542 |
+
{
|
| 12543 |
+
"epoch": 14.786046031582206,
|
| 12544 |
+
"grad_norm": 0.5620941519737244,
|
| 12545 |
+
"learning_rate": 0.0007824571061565281,
|
| 12546 |
+
"loss": 2.7398,
|
| 12547 |
+
"step": 17910
|
| 12548 |
+
},
|
| 12549 |
+
{
|
| 12550 |
+
"epoch": 14.794302817628239,
|
| 12551 |
+
"grad_norm": 0.5772944688796997,
|
| 12552 |
+
"learning_rate": 0.0007822736030828517,
|
| 12553 |
+
"loss": 2.7326,
|
| 12554 |
+
"step": 17920
|
| 12555 |
+
},
|
| 12556 |
+
{
|
| 12557 |
+
"epoch": 14.80255960367427,
|
| 12558 |
+
"grad_norm": 0.6066027879714966,
|
| 12559 |
+
"learning_rate": 0.0007820901000091751,
|
| 12560 |
+
"loss": 2.7341,
|
| 12561 |
+
"step": 17930
|
| 12562 |
+
},
|
| 12563 |
+
{
|
| 12564 |
+
"epoch": 14.810816389720301,
|
| 12565 |
+
"grad_norm": 0.5544676184654236,
|
| 12566 |
+
"learning_rate": 0.0007819065969354987,
|
| 12567 |
+
"loss": 2.7498,
|
| 12568 |
+
"step": 17940
|
| 12569 |
+
},
|
| 12570 |
+
{
|
| 12571 |
+
"epoch": 14.819073175766333,
|
| 12572 |
+
"grad_norm": 0.6160995364189148,
|
| 12573 |
+
"learning_rate": 0.0007817230938618222,
|
| 12574 |
+
"loss": 2.7362,
|
| 12575 |
+
"step": 17950
|
| 12576 |
+
},
|
| 12577 |
+
{
|
| 12578 |
+
"epoch": 14.827329961812364,
|
| 12579 |
+
"grad_norm": 0.6500398516654968,
|
| 12580 |
+
"learning_rate": 0.0007815395907881458,
|
| 12581 |
+
"loss": 2.7412,
|
| 12582 |
+
"step": 17960
|
| 12583 |
+
},
|
| 12584 |
+
{
|
| 12585 |
+
"epoch": 14.835586747858397,
|
| 12586 |
+
"grad_norm": 0.5683214068412781,
|
| 12587 |
+
"learning_rate": 0.0007813560877144692,
|
| 12588 |
+
"loss": 2.7469,
|
| 12589 |
+
"step": 17970
|
| 12590 |
+
},
|
| 12591 |
+
{
|
| 12592 |
+
"epoch": 14.843843533904428,
|
| 12593 |
+
"grad_norm": 0.5637840032577515,
|
| 12594 |
+
"learning_rate": 0.0007811725846407928,
|
| 12595 |
+
"loss": 2.7378,
|
| 12596 |
+
"step": 17980
|
| 12597 |
+
},
|
| 12598 |
+
{
|
| 12599 |
+
"epoch": 14.852100319950459,
|
| 12600 |
+
"grad_norm": 0.5927807092666626,
|
| 12601 |
+
"learning_rate": 0.0007809890815671162,
|
| 12602 |
+
"loss": 2.727,
|
| 12603 |
+
"step": 17990
|
| 12604 |
+
},
|
| 12605 |
+
{
|
| 12606 |
+
"epoch": 14.86035710599649,
|
| 12607 |
+
"grad_norm": 0.611671507358551,
|
| 12608 |
+
"learning_rate": 0.0007808055784934398,
|
| 12609 |
+
"loss": 2.7437,
|
| 12610 |
+
"step": 18000
|
| 12611 |
}
|
| 12612 |
],
|
| 12613 |
"logging_steps": 10,
|
|
|
|
| 12627 |
"attributes": {}
|
| 12628 |
}
|
| 12629 |
},
|
| 12630 |
+
"total_flos": 9.215610343472333e+16,
|
| 12631 |
"train_batch_size": 64,
|
| 12632 |
"trial_name": null,
|
| 12633 |
"trial_params": null
|