Training in progress, step 53887, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 306619286
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:011159f6c7dc96dd5ac1366209b10dab05e097df1b102941e45c82da68b59f2f
|
| 3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919972410
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:779761eab32018c8446b727954ef35d7120f4a3ef7c4759e47144a985a28812e
|
| 3 |
size 919972410
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25d300bc86beb0072df8e8773ec27f6e09afa7db83f06d03ffc9b42be6152925
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 5000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -35087,6 +35087,2722 @@
|
|
| 35087 |
"eval_samples_per_second": 3188.771,
|
| 35088 |
"eval_steps_per_second": 49.826,
|
| 35089 |
"step": 50000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35090 |
}
|
| 35091 |
],
|
| 35092 |
"logging_steps": 10,
|
|
@@ -35101,12 +37817,12 @@
|
|
| 35101 |
"should_evaluate": false,
|
| 35102 |
"should_log": false,
|
| 35103 |
"should_save": true,
|
| 35104 |
-
"should_training_stop":
|
| 35105 |
},
|
| 35106 |
"attributes": {}
|
| 35107 |
}
|
| 35108 |
},
|
| 35109 |
-
"total_flos":
|
| 35110 |
"train_batch_size": 8,
|
| 35111 |
"trial_name": null,
|
| 35112 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.999997680336444,
|
| 5 |
"eval_steps": 5000,
|
| 6 |
+
"global_step": 53887,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 35087 |
"eval_samples_per_second": 3188.771,
|
| 35088 |
"eval_steps_per_second": 49.826,
|
| 35089 |
"step": 50000
|
| 35090 |
+
},
|
| 35091 |
+
{
|
| 35092 |
+
"epoch": 0.928050995483615,
|
| 35093 |
+
"grad_norm": 36.09375,
|
| 35094 |
+
"learning_rate": 9.854992074001885e-06,
|
| 35095 |
+
"loss": 18.631,
|
| 35096 |
+
"step": 50010
|
| 35097 |
+
},
|
| 35098 |
+
{
|
| 35099 |
+
"epoch": 0.9282365685680949,
|
| 35100 |
+
"grad_norm": 38.5,
|
| 35101 |
+
"learning_rate": 9.854963078215844e-06,
|
| 35102 |
+
"loss": 18.7004,
|
| 35103 |
+
"step": 50020
|
| 35104 |
+
},
|
| 35105 |
+
{
|
| 35106 |
+
"epoch": 0.9284221416525748,
|
| 35107 |
+
"grad_norm": 37.15625,
|
| 35108 |
+
"learning_rate": 9.854934082429802e-06,
|
| 35109 |
+
"loss": 18.9797,
|
| 35110 |
+
"step": 50030
|
| 35111 |
+
},
|
| 35112 |
+
{
|
| 35113 |
+
"epoch": 0.9286077147370545,
|
| 35114 |
+
"grad_norm": 37.1875,
|
| 35115 |
+
"learning_rate": 9.854905086643759e-06,
|
| 35116 |
+
"loss": 18.6544,
|
| 35117 |
+
"step": 50040
|
| 35118 |
+
},
|
| 35119 |
+
{
|
| 35120 |
+
"epoch": 0.9287932878215344,
|
| 35121 |
+
"grad_norm": 36.0625,
|
| 35122 |
+
"learning_rate": 9.854876090857716e-06,
|
| 35123 |
+
"loss": 18.7909,
|
| 35124 |
+
"step": 50050
|
| 35125 |
+
},
|
| 35126 |
+
{
|
| 35127 |
+
"epoch": 0.9289788609060142,
|
| 35128 |
+
"grad_norm": 38.3125,
|
| 35129 |
+
"learning_rate": 9.854847095071675e-06,
|
| 35130 |
+
"loss": 19.1664,
|
| 35131 |
+
"step": 50060
|
| 35132 |
+
},
|
| 35133 |
+
{
|
| 35134 |
+
"epoch": 0.929164433990494,
|
| 35135 |
+
"grad_norm": 37.15625,
|
| 35136 |
+
"learning_rate": 9.854818099285631e-06,
|
| 35137 |
+
"loss": 18.7882,
|
| 35138 |
+
"step": 50070
|
| 35139 |
+
},
|
| 35140 |
+
{
|
| 35141 |
+
"epoch": 0.9293500070749738,
|
| 35142 |
+
"grad_norm": 34.5625,
|
| 35143 |
+
"learning_rate": 9.854789103499589e-06,
|
| 35144 |
+
"loss": 18.8722,
|
| 35145 |
+
"step": 50080
|
| 35146 |
+
},
|
| 35147 |
+
{
|
| 35148 |
+
"epoch": 0.9295355801594537,
|
| 35149 |
+
"grad_norm": 39.28125,
|
| 35150 |
+
"learning_rate": 9.854760107713548e-06,
|
| 35151 |
+
"loss": 18.4605,
|
| 35152 |
+
"step": 50090
|
| 35153 |
+
},
|
| 35154 |
+
{
|
| 35155 |
+
"epoch": 0.9297211532439335,
|
| 35156 |
+
"grad_norm": 36.0625,
|
| 35157 |
+
"learning_rate": 9.854731111927505e-06,
|
| 35158 |
+
"loss": 18.957,
|
| 35159 |
+
"step": 50100
|
| 35160 |
+
},
|
| 35161 |
+
{
|
| 35162 |
+
"epoch": 0.9299067263284133,
|
| 35163 |
+
"grad_norm": 36.9375,
|
| 35164 |
+
"learning_rate": 9.854702116141462e-06,
|
| 35165 |
+
"loss": 18.9987,
|
| 35166 |
+
"step": 50110
|
| 35167 |
+
},
|
| 35168 |
+
{
|
| 35169 |
+
"epoch": 0.9300922994128932,
|
| 35170 |
+
"grad_norm": 37.09375,
|
| 35171 |
+
"learning_rate": 9.85467312035542e-06,
|
| 35172 |
+
"loss": 18.3386,
|
| 35173 |
+
"step": 50120
|
| 35174 |
+
},
|
| 35175 |
+
{
|
| 35176 |
+
"epoch": 0.9302778724973729,
|
| 35177 |
+
"grad_norm": 36.78125,
|
| 35178 |
+
"learning_rate": 9.854644124569377e-06,
|
| 35179 |
+
"loss": 18.7949,
|
| 35180 |
+
"step": 50130
|
| 35181 |
+
},
|
| 35182 |
+
{
|
| 35183 |
+
"epoch": 0.9304634455818528,
|
| 35184 |
+
"grad_norm": 35.59375,
|
| 35185 |
+
"learning_rate": 9.854615128783335e-06,
|
| 35186 |
+
"loss": 18.5927,
|
| 35187 |
+
"step": 50140
|
| 35188 |
+
},
|
| 35189 |
+
{
|
| 35190 |
+
"epoch": 0.9306490186663327,
|
| 35191 |
+
"grad_norm": 35.78125,
|
| 35192 |
+
"learning_rate": 9.854586132997292e-06,
|
| 35193 |
+
"loss": 18.7013,
|
| 35194 |
+
"step": 50150
|
| 35195 |
+
},
|
| 35196 |
+
{
|
| 35197 |
+
"epoch": 0.9308345917508125,
|
| 35198 |
+
"grad_norm": 37.1875,
|
| 35199 |
+
"learning_rate": 9.85455713721125e-06,
|
| 35200 |
+
"loss": 18.6819,
|
| 35201 |
+
"step": 50160
|
| 35202 |
+
},
|
| 35203 |
+
{
|
| 35204 |
+
"epoch": 0.9310201648352923,
|
| 35205 |
+
"grad_norm": 36.3125,
|
| 35206 |
+
"learning_rate": 9.854528141425207e-06,
|
| 35207 |
+
"loss": 19.1635,
|
| 35208 |
+
"step": 50170
|
| 35209 |
+
},
|
| 35210 |
+
{
|
| 35211 |
+
"epoch": 0.9312057379197721,
|
| 35212 |
+
"grad_norm": 34.40625,
|
| 35213 |
+
"learning_rate": 9.854499145639164e-06,
|
| 35214 |
+
"loss": 18.2726,
|
| 35215 |
+
"step": 50180
|
| 35216 |
+
},
|
| 35217 |
+
{
|
| 35218 |
+
"epoch": 0.931391311004252,
|
| 35219 |
+
"grad_norm": 37.28125,
|
| 35220 |
+
"learning_rate": 9.854470149853122e-06,
|
| 35221 |
+
"loss": 18.9638,
|
| 35222 |
+
"step": 50190
|
| 35223 |
+
},
|
| 35224 |
+
{
|
| 35225 |
+
"epoch": 0.9315768840887317,
|
| 35226 |
+
"grad_norm": 35.28125,
|
| 35227 |
+
"learning_rate": 9.85444115406708e-06,
|
| 35228 |
+
"loss": 19.0953,
|
| 35229 |
+
"step": 50200
|
| 35230 |
+
},
|
| 35231 |
+
{
|
| 35232 |
+
"epoch": 0.9317624571732116,
|
| 35233 |
+
"grad_norm": 38.5,
|
| 35234 |
+
"learning_rate": 9.854412158281038e-06,
|
| 35235 |
+
"loss": 18.7024,
|
| 35236 |
+
"step": 50210
|
| 35237 |
+
},
|
| 35238 |
+
{
|
| 35239 |
+
"epoch": 0.9319480302576915,
|
| 35240 |
+
"grad_norm": 37.0,
|
| 35241 |
+
"learning_rate": 9.854383162494996e-06,
|
| 35242 |
+
"loss": 18.825,
|
| 35243 |
+
"step": 50220
|
| 35244 |
+
},
|
| 35245 |
+
{
|
| 35246 |
+
"epoch": 0.9321336033421712,
|
| 35247 |
+
"grad_norm": 36.59375,
|
| 35248 |
+
"learning_rate": 9.854354166708953e-06,
|
| 35249 |
+
"loss": 19.0501,
|
| 35250 |
+
"step": 50230
|
| 35251 |
+
},
|
| 35252 |
+
{
|
| 35253 |
+
"epoch": 0.9323191764266511,
|
| 35254 |
+
"grad_norm": 35.90625,
|
| 35255 |
+
"learning_rate": 9.85432517092291e-06,
|
| 35256 |
+
"loss": 18.4288,
|
| 35257 |
+
"step": 50240
|
| 35258 |
+
},
|
| 35259 |
+
{
|
| 35260 |
+
"epoch": 0.9325047495111309,
|
| 35261 |
+
"grad_norm": 35.90625,
|
| 35262 |
+
"learning_rate": 9.854296175136868e-06,
|
| 35263 |
+
"loss": 19.0825,
|
| 35264 |
+
"step": 50250
|
| 35265 |
+
},
|
| 35266 |
+
{
|
| 35267 |
+
"epoch": 0.9326903225956107,
|
| 35268 |
+
"grad_norm": 38.90625,
|
| 35269 |
+
"learning_rate": 9.854267179350825e-06,
|
| 35270 |
+
"loss": 19.0764,
|
| 35271 |
+
"step": 50260
|
| 35272 |
+
},
|
| 35273 |
+
{
|
| 35274 |
+
"epoch": 0.9328758956800905,
|
| 35275 |
+
"grad_norm": 37.75,
|
| 35276 |
+
"learning_rate": 9.854238183564784e-06,
|
| 35277 |
+
"loss": 18.8452,
|
| 35278 |
+
"step": 50270
|
| 35279 |
+
},
|
| 35280 |
+
{
|
| 35281 |
+
"epoch": 0.9330614687645704,
|
| 35282 |
+
"grad_norm": 34.59375,
|
| 35283 |
+
"learning_rate": 9.85420918777874e-06,
|
| 35284 |
+
"loss": 18.7928,
|
| 35285 |
+
"step": 50280
|
| 35286 |
+
},
|
| 35287 |
+
{
|
| 35288 |
+
"epoch": 0.9332470418490502,
|
| 35289 |
+
"grad_norm": 33.78125,
|
| 35290 |
+
"learning_rate": 9.854180191992697e-06,
|
| 35291 |
+
"loss": 18.1697,
|
| 35292 |
+
"step": 50290
|
| 35293 |
+
},
|
| 35294 |
+
{
|
| 35295 |
+
"epoch": 0.93343261493353,
|
| 35296 |
+
"grad_norm": 37.875,
|
| 35297 |
+
"learning_rate": 9.854151196206657e-06,
|
| 35298 |
+
"loss": 18.7602,
|
| 35299 |
+
"step": 50300
|
| 35300 |
+
},
|
| 35301 |
+
{
|
| 35302 |
+
"epoch": 0.9336181880180099,
|
| 35303 |
+
"grad_norm": 35.84375,
|
| 35304 |
+
"learning_rate": 9.854122200420614e-06,
|
| 35305 |
+
"loss": 18.7859,
|
| 35306 |
+
"step": 50310
|
| 35307 |
+
},
|
| 35308 |
+
{
|
| 35309 |
+
"epoch": 0.9338037611024897,
|
| 35310 |
+
"grad_norm": 37.0625,
|
| 35311 |
+
"learning_rate": 9.854093204634571e-06,
|
| 35312 |
+
"loss": 18.5895,
|
| 35313 |
+
"step": 50320
|
| 35314 |
+
},
|
| 35315 |
+
{
|
| 35316 |
+
"epoch": 0.9339893341869695,
|
| 35317 |
+
"grad_norm": 34.59375,
|
| 35318 |
+
"learning_rate": 9.854064208848529e-06,
|
| 35319 |
+
"loss": 18.5902,
|
| 35320 |
+
"step": 50330
|
| 35321 |
+
},
|
| 35322 |
+
{
|
| 35323 |
+
"epoch": 0.9341749072714494,
|
| 35324 |
+
"grad_norm": 37.28125,
|
| 35325 |
+
"learning_rate": 9.854035213062486e-06,
|
| 35326 |
+
"loss": 19.0408,
|
| 35327 |
+
"step": 50340
|
| 35328 |
+
},
|
| 35329 |
+
{
|
| 35330 |
+
"epoch": 0.9343604803559292,
|
| 35331 |
+
"grad_norm": 34.125,
|
| 35332 |
+
"learning_rate": 9.854006217276444e-06,
|
| 35333 |
+
"loss": 18.3564,
|
| 35334 |
+
"step": 50350
|
| 35335 |
+
},
|
| 35336 |
+
{
|
| 35337 |
+
"epoch": 0.934546053440409,
|
| 35338 |
+
"grad_norm": 38.4375,
|
| 35339 |
+
"learning_rate": 9.853977221490401e-06,
|
| 35340 |
+
"loss": 18.8423,
|
| 35341 |
+
"step": 50360
|
| 35342 |
+
},
|
| 35343 |
+
{
|
| 35344 |
+
"epoch": 0.9347316265248888,
|
| 35345 |
+
"grad_norm": 35.8125,
|
| 35346 |
+
"learning_rate": 9.85394822570436e-06,
|
| 35347 |
+
"loss": 18.3884,
|
| 35348 |
+
"step": 50370
|
| 35349 |
+
},
|
| 35350 |
+
{
|
| 35351 |
+
"epoch": 0.9349171996093687,
|
| 35352 |
+
"grad_norm": 35.4375,
|
| 35353 |
+
"learning_rate": 9.853919229918318e-06,
|
| 35354 |
+
"loss": 19.1228,
|
| 35355 |
+
"step": 50380
|
| 35356 |
+
},
|
| 35357 |
+
{
|
| 35358 |
+
"epoch": 0.9351027726938484,
|
| 35359 |
+
"grad_norm": 38.21875,
|
| 35360 |
+
"learning_rate": 9.853890234132273e-06,
|
| 35361 |
+
"loss": 18.5342,
|
| 35362 |
+
"step": 50390
|
| 35363 |
+
},
|
| 35364 |
+
{
|
| 35365 |
+
"epoch": 0.9352883457783283,
|
| 35366 |
+
"grad_norm": 35.125,
|
| 35367 |
+
"learning_rate": 9.853861238346232e-06,
|
| 35368 |
+
"loss": 19.0061,
|
| 35369 |
+
"step": 50400
|
| 35370 |
+
},
|
| 35371 |
+
{
|
| 35372 |
+
"epoch": 0.9354739188628082,
|
| 35373 |
+
"grad_norm": 36.53125,
|
| 35374 |
+
"learning_rate": 9.85383224256019e-06,
|
| 35375 |
+
"loss": 18.631,
|
| 35376 |
+
"step": 50410
|
| 35377 |
+
},
|
| 35378 |
+
{
|
| 35379 |
+
"epoch": 0.9356594919472879,
|
| 35380 |
+
"grad_norm": 37.0625,
|
| 35381 |
+
"learning_rate": 9.853803246774147e-06,
|
| 35382 |
+
"loss": 18.9214,
|
| 35383 |
+
"step": 50420
|
| 35384 |
+
},
|
| 35385 |
+
{
|
| 35386 |
+
"epoch": 0.9358450650317678,
|
| 35387 |
+
"grad_norm": 38.3125,
|
| 35388 |
+
"learning_rate": 9.853774250988105e-06,
|
| 35389 |
+
"loss": 18.8773,
|
| 35390 |
+
"step": 50430
|
| 35391 |
+
},
|
| 35392 |
+
{
|
| 35393 |
+
"epoch": 0.9360306381162476,
|
| 35394 |
+
"grad_norm": 38.125,
|
| 35395 |
+
"learning_rate": 9.853745255202062e-06,
|
| 35396 |
+
"loss": 19.1509,
|
| 35397 |
+
"step": 50440
|
| 35398 |
+
},
|
| 35399 |
+
{
|
| 35400 |
+
"epoch": 0.9362162112007274,
|
| 35401 |
+
"grad_norm": 35.28125,
|
| 35402 |
+
"learning_rate": 9.85371625941602e-06,
|
| 35403 |
+
"loss": 18.7762,
|
| 35404 |
+
"step": 50450
|
| 35405 |
+
},
|
| 35406 |
+
{
|
| 35407 |
+
"epoch": 0.9364017842852073,
|
| 35408 |
+
"grad_norm": 37.09375,
|
| 35409 |
+
"learning_rate": 9.853687263629977e-06,
|
| 35410 |
+
"loss": 18.6366,
|
| 35411 |
+
"step": 50460
|
| 35412 |
+
},
|
| 35413 |
+
{
|
| 35414 |
+
"epoch": 0.9365873573696871,
|
| 35415 |
+
"grad_norm": 37.71875,
|
| 35416 |
+
"learning_rate": 9.853658267843936e-06,
|
| 35417 |
+
"loss": 18.7425,
|
| 35418 |
+
"step": 50470
|
| 35419 |
+
},
|
| 35420 |
+
{
|
| 35421 |
+
"epoch": 0.9367729304541669,
|
| 35422 |
+
"grad_norm": 36.28125,
|
| 35423 |
+
"learning_rate": 9.853629272057893e-06,
|
| 35424 |
+
"loss": 18.5633,
|
| 35425 |
+
"step": 50480
|
| 35426 |
+
},
|
| 35427 |
+
{
|
| 35428 |
+
"epoch": 0.9369585035386467,
|
| 35429 |
+
"grad_norm": 37.65625,
|
| 35430 |
+
"learning_rate": 9.853600276271849e-06,
|
| 35431 |
+
"loss": 18.5449,
|
| 35432 |
+
"step": 50490
|
| 35433 |
+
},
|
| 35434 |
+
{
|
| 35435 |
+
"epoch": 0.9371440766231266,
|
| 35436 |
+
"grad_norm": 36.34375,
|
| 35437 |
+
"learning_rate": 9.853571280485808e-06,
|
| 35438 |
+
"loss": 19.0544,
|
| 35439 |
+
"step": 50500
|
| 35440 |
+
},
|
| 35441 |
+
{
|
| 35442 |
+
"epoch": 0.9373296497076065,
|
| 35443 |
+
"grad_norm": 37.71875,
|
| 35444 |
+
"learning_rate": 9.853542284699766e-06,
|
| 35445 |
+
"loss": 18.8355,
|
| 35446 |
+
"step": 50510
|
| 35447 |
+
},
|
| 35448 |
+
{
|
| 35449 |
+
"epoch": 0.9375152227920862,
|
| 35450 |
+
"grad_norm": 37.03125,
|
| 35451 |
+
"learning_rate": 9.853513288913723e-06,
|
| 35452 |
+
"loss": 18.7822,
|
| 35453 |
+
"step": 50520
|
| 35454 |
+
},
|
| 35455 |
+
{
|
| 35456 |
+
"epoch": 0.9377007958765661,
|
| 35457 |
+
"grad_norm": 35.71875,
|
| 35458 |
+
"learning_rate": 9.85348429312768e-06,
|
| 35459 |
+
"loss": 18.5719,
|
| 35460 |
+
"step": 50530
|
| 35461 |
+
},
|
| 35462 |
+
{
|
| 35463 |
+
"epoch": 0.9378863689610459,
|
| 35464 |
+
"grad_norm": 35.84375,
|
| 35465 |
+
"learning_rate": 9.85345529734164e-06,
|
| 35466 |
+
"loss": 18.8113,
|
| 35467 |
+
"step": 50540
|
| 35468 |
+
},
|
| 35469 |
+
{
|
| 35470 |
+
"epoch": 0.9380719420455257,
|
| 35471 |
+
"grad_norm": 37.28125,
|
| 35472 |
+
"learning_rate": 9.853426301555595e-06,
|
| 35473 |
+
"loss": 18.984,
|
| 35474 |
+
"step": 50550
|
| 35475 |
+
},
|
| 35476 |
+
{
|
| 35477 |
+
"epoch": 0.9382575151300055,
|
| 35478 |
+
"grad_norm": 35.28125,
|
| 35479 |
+
"learning_rate": 9.853397305769553e-06,
|
| 35480 |
+
"loss": 18.7287,
|
| 35481 |
+
"step": 50560
|
| 35482 |
+
},
|
| 35483 |
+
{
|
| 35484 |
+
"epoch": 0.9384430882144854,
|
| 35485 |
+
"grad_norm": 35.96875,
|
| 35486 |
+
"learning_rate": 9.853368309983512e-06,
|
| 35487 |
+
"loss": 18.6256,
|
| 35488 |
+
"step": 50570
|
| 35489 |
+
},
|
| 35490 |
+
{
|
| 35491 |
+
"epoch": 0.9386286612989652,
|
| 35492 |
+
"grad_norm": 35.96875,
|
| 35493 |
+
"learning_rate": 9.853339314197469e-06,
|
| 35494 |
+
"loss": 18.8356,
|
| 35495 |
+
"step": 50580
|
| 35496 |
+
},
|
| 35497 |
+
{
|
| 35498 |
+
"epoch": 0.938814234383445,
|
| 35499 |
+
"grad_norm": 37.75,
|
| 35500 |
+
"learning_rate": 9.853310318411426e-06,
|
| 35501 |
+
"loss": 18.5802,
|
| 35502 |
+
"step": 50590
|
| 35503 |
+
},
|
| 35504 |
+
{
|
| 35505 |
+
"epoch": 0.9389998074679249,
|
| 35506 |
+
"grad_norm": 35.84375,
|
| 35507 |
+
"learning_rate": 9.853281322625384e-06,
|
| 35508 |
+
"loss": 18.9444,
|
| 35509 |
+
"step": 50600
|
| 35510 |
+
},
|
| 35511 |
+
{
|
| 35512 |
+
"epoch": 0.9391853805524046,
|
| 35513 |
+
"grad_norm": 37.4375,
|
| 35514 |
+
"learning_rate": 9.853252326839341e-06,
|
| 35515 |
+
"loss": 18.9265,
|
| 35516 |
+
"step": 50610
|
| 35517 |
+
},
|
| 35518 |
+
{
|
| 35519 |
+
"epoch": 0.9393709536368845,
|
| 35520 |
+
"grad_norm": 35.625,
|
| 35521 |
+
"learning_rate": 9.853223331053299e-06,
|
| 35522 |
+
"loss": 19.3268,
|
| 35523 |
+
"step": 50620
|
| 35524 |
+
},
|
| 35525 |
+
{
|
| 35526 |
+
"epoch": 0.9395565267213644,
|
| 35527 |
+
"grad_norm": 35.25,
|
| 35528 |
+
"learning_rate": 9.853194335267256e-06,
|
| 35529 |
+
"loss": 18.3115,
|
| 35530 |
+
"step": 50630
|
| 35531 |
+
},
|
| 35532 |
+
{
|
| 35533 |
+
"epoch": 0.9397420998058441,
|
| 35534 |
+
"grad_norm": 37.53125,
|
| 35535 |
+
"learning_rate": 9.853165339481214e-06,
|
| 35536 |
+
"loss": 19.0,
|
| 35537 |
+
"step": 50640
|
| 35538 |
+
},
|
| 35539 |
+
{
|
| 35540 |
+
"epoch": 0.939927672890324,
|
| 35541 |
+
"grad_norm": 36.4375,
|
| 35542 |
+
"learning_rate": 9.853136343695173e-06,
|
| 35543 |
+
"loss": 18.8586,
|
| 35544 |
+
"step": 50650
|
| 35545 |
+
},
|
| 35546 |
+
{
|
| 35547 |
+
"epoch": 0.9401132459748038,
|
| 35548 |
+
"grad_norm": 37.53125,
|
| 35549 |
+
"learning_rate": 9.853107347909128e-06,
|
| 35550 |
+
"loss": 18.5812,
|
| 35551 |
+
"step": 50660
|
| 35552 |
+
},
|
| 35553 |
+
{
|
| 35554 |
+
"epoch": 0.9402988190592837,
|
| 35555 |
+
"grad_norm": 37.25,
|
| 35556 |
+
"learning_rate": 9.853078352123087e-06,
|
| 35557 |
+
"loss": 18.7525,
|
| 35558 |
+
"step": 50670
|
| 35559 |
+
},
|
| 35560 |
+
{
|
| 35561 |
+
"epoch": 0.9404843921437634,
|
| 35562 |
+
"grad_norm": 37.4375,
|
| 35563 |
+
"learning_rate": 9.853049356337045e-06,
|
| 35564 |
+
"loss": 18.8099,
|
| 35565 |
+
"step": 50680
|
| 35566 |
+
},
|
| 35567 |
+
{
|
| 35568 |
+
"epoch": 0.9406699652282433,
|
| 35569 |
+
"grad_norm": 37.875,
|
| 35570 |
+
"learning_rate": 9.853020360551002e-06,
|
| 35571 |
+
"loss": 18.8271,
|
| 35572 |
+
"step": 50690
|
| 35573 |
+
},
|
| 35574 |
+
{
|
| 35575 |
+
"epoch": 0.9408555383127232,
|
| 35576 |
+
"grad_norm": 36.59375,
|
| 35577 |
+
"learning_rate": 9.85299136476496e-06,
|
| 35578 |
+
"loss": 18.7296,
|
| 35579 |
+
"step": 50700
|
| 35580 |
+
},
|
| 35581 |
+
{
|
| 35582 |
+
"epoch": 0.9410411113972029,
|
| 35583 |
+
"grad_norm": 36.75,
|
| 35584 |
+
"learning_rate": 9.852962368978917e-06,
|
| 35585 |
+
"loss": 18.7892,
|
| 35586 |
+
"step": 50710
|
| 35587 |
+
},
|
| 35588 |
+
{
|
| 35589 |
+
"epoch": 0.9412266844816828,
|
| 35590 |
+
"grad_norm": 35.6875,
|
| 35591 |
+
"learning_rate": 9.852933373192874e-06,
|
| 35592 |
+
"loss": 18.672,
|
| 35593 |
+
"step": 50720
|
| 35594 |
+
},
|
| 35595 |
+
{
|
| 35596 |
+
"epoch": 0.9414122575661626,
|
| 35597 |
+
"grad_norm": 37.28125,
|
| 35598 |
+
"learning_rate": 9.852904377406832e-06,
|
| 35599 |
+
"loss": 18.4865,
|
| 35600 |
+
"step": 50730
|
| 35601 |
+
},
|
| 35602 |
+
{
|
| 35603 |
+
"epoch": 0.9415978306506424,
|
| 35604 |
+
"grad_norm": 36.59375,
|
| 35605 |
+
"learning_rate": 9.85287538162079e-06,
|
| 35606 |
+
"loss": 18.6127,
|
| 35607 |
+
"step": 50740
|
| 35608 |
+
},
|
| 35609 |
+
{
|
| 35610 |
+
"epoch": 0.9417834037351223,
|
| 35611 |
+
"grad_norm": 35.0625,
|
| 35612 |
+
"learning_rate": 9.852846385834748e-06,
|
| 35613 |
+
"loss": 18.6142,
|
| 35614 |
+
"step": 50750
|
| 35615 |
+
},
|
| 35616 |
+
{
|
| 35617 |
+
"epoch": 0.9419689768196021,
|
| 35618 |
+
"grad_norm": 35.71875,
|
| 35619 |
+
"learning_rate": 9.852817390048704e-06,
|
| 35620 |
+
"loss": 18.4889,
|
| 35621 |
+
"step": 50760
|
| 35622 |
+
},
|
| 35623 |
+
{
|
| 35624 |
+
"epoch": 0.9421545499040819,
|
| 35625 |
+
"grad_norm": 36.03125,
|
| 35626 |
+
"learning_rate": 9.852788394262661e-06,
|
| 35627 |
+
"loss": 18.0942,
|
| 35628 |
+
"step": 50770
|
| 35629 |
+
},
|
| 35630 |
+
{
|
| 35631 |
+
"epoch": 0.9423401229885617,
|
| 35632 |
+
"grad_norm": 38.0625,
|
| 35633 |
+
"learning_rate": 9.85275939847662e-06,
|
| 35634 |
+
"loss": 18.5683,
|
| 35635 |
+
"step": 50780
|
| 35636 |
+
},
|
| 35637 |
+
{
|
| 35638 |
+
"epoch": 0.9425256960730416,
|
| 35639 |
+
"grad_norm": 37.0625,
|
| 35640 |
+
"learning_rate": 9.852730402690578e-06,
|
| 35641 |
+
"loss": 18.7116,
|
| 35642 |
+
"step": 50790
|
| 35643 |
+
},
|
| 35644 |
+
{
|
| 35645 |
+
"epoch": 0.9427112691575213,
|
| 35646 |
+
"grad_norm": 34.65625,
|
| 35647 |
+
"learning_rate": 9.852701406904535e-06,
|
| 35648 |
+
"loss": 18.6401,
|
| 35649 |
+
"step": 50800
|
| 35650 |
+
},
|
| 35651 |
+
{
|
| 35652 |
+
"epoch": 0.9428968422420012,
|
| 35653 |
+
"grad_norm": 36.125,
|
| 35654 |
+
"learning_rate": 9.852672411118493e-06,
|
| 35655 |
+
"loss": 19.084,
|
| 35656 |
+
"step": 50810
|
| 35657 |
+
},
|
| 35658 |
+
{
|
| 35659 |
+
"epoch": 0.9430824153264811,
|
| 35660 |
+
"grad_norm": 37.34375,
|
| 35661 |
+
"learning_rate": 9.85264341533245e-06,
|
| 35662 |
+
"loss": 18.4527,
|
| 35663 |
+
"step": 50820
|
| 35664 |
+
},
|
| 35665 |
+
{
|
| 35666 |
+
"epoch": 0.9432679884109608,
|
| 35667 |
+
"grad_norm": 34.96875,
|
| 35668 |
+
"learning_rate": 9.852614419546408e-06,
|
| 35669 |
+
"loss": 18.6703,
|
| 35670 |
+
"step": 50830
|
| 35671 |
+
},
|
| 35672 |
+
{
|
| 35673 |
+
"epoch": 0.9434535614954407,
|
| 35674 |
+
"grad_norm": 39.15625,
|
| 35675 |
+
"learning_rate": 9.852585423760365e-06,
|
| 35676 |
+
"loss": 18.8782,
|
| 35677 |
+
"step": 50840
|
| 35678 |
+
},
|
| 35679 |
+
{
|
| 35680 |
+
"epoch": 0.9436391345799205,
|
| 35681 |
+
"grad_norm": 33.625,
|
| 35682 |
+
"learning_rate": 9.852556427974324e-06,
|
| 35683 |
+
"loss": 18.3635,
|
| 35684 |
+
"step": 50850
|
| 35685 |
+
},
|
| 35686 |
+
{
|
| 35687 |
+
"epoch": 0.9438247076644004,
|
| 35688 |
+
"grad_norm": 38.25,
|
| 35689 |
+
"learning_rate": 9.852527432188282e-06,
|
| 35690 |
+
"loss": 18.8192,
|
| 35691 |
+
"step": 50860
|
| 35692 |
+
},
|
| 35693 |
+
{
|
| 35694 |
+
"epoch": 0.9440102807488802,
|
| 35695 |
+
"grad_norm": 35.9375,
|
| 35696 |
+
"learning_rate": 9.852498436402237e-06,
|
| 35697 |
+
"loss": 18.9524,
|
| 35698 |
+
"step": 50870
|
| 35699 |
+
},
|
| 35700 |
+
{
|
| 35701 |
+
"epoch": 0.94419585383336,
|
| 35702 |
+
"grad_norm": 37.84375,
|
| 35703 |
+
"learning_rate": 9.852469440616196e-06,
|
| 35704 |
+
"loss": 18.9869,
|
| 35705 |
+
"step": 50880
|
| 35706 |
+
},
|
| 35707 |
+
{
|
| 35708 |
+
"epoch": 0.9443814269178399,
|
| 35709 |
+
"grad_norm": 37.4375,
|
| 35710 |
+
"learning_rate": 9.852440444830154e-06,
|
| 35711 |
+
"loss": 18.8108,
|
| 35712 |
+
"step": 50890
|
| 35713 |
+
},
|
| 35714 |
+
{
|
| 35715 |
+
"epoch": 0.9445670000023196,
|
| 35716 |
+
"grad_norm": 36.1875,
|
| 35717 |
+
"learning_rate": 9.852411449044111e-06,
|
| 35718 |
+
"loss": 18.7039,
|
| 35719 |
+
"step": 50900
|
| 35720 |
+
},
|
| 35721 |
+
{
|
| 35722 |
+
"epoch": 0.9447525730867995,
|
| 35723 |
+
"grad_norm": 38.59375,
|
| 35724 |
+
"learning_rate": 9.852382453258069e-06,
|
| 35725 |
+
"loss": 18.6205,
|
| 35726 |
+
"step": 50910
|
| 35727 |
+
},
|
| 35728 |
+
{
|
| 35729 |
+
"epoch": 0.9449381461712794,
|
| 35730 |
+
"grad_norm": 38.09375,
|
| 35731 |
+
"learning_rate": 9.852353457472026e-06,
|
| 35732 |
+
"loss": 18.9012,
|
| 35733 |
+
"step": 50920
|
| 35734 |
+
},
|
| 35735 |
+
{
|
| 35736 |
+
"epoch": 0.9451237192557591,
|
| 35737 |
+
"grad_norm": 35.03125,
|
| 35738 |
+
"learning_rate": 9.852324461685983e-06,
|
| 35739 |
+
"loss": 18.9504,
|
| 35740 |
+
"step": 50930
|
| 35741 |
+
},
|
| 35742 |
+
{
|
| 35743 |
+
"epoch": 0.945309292340239,
|
| 35744 |
+
"grad_norm": 36.78125,
|
| 35745 |
+
"learning_rate": 9.85229546589994e-06,
|
| 35746 |
+
"loss": 18.4827,
|
| 35747 |
+
"step": 50940
|
| 35748 |
+
},
|
| 35749 |
+
{
|
| 35750 |
+
"epoch": 0.9454948654247188,
|
| 35751 |
+
"grad_norm": 37.5625,
|
| 35752 |
+
"learning_rate": 9.8522664701139e-06,
|
| 35753 |
+
"loss": 19.0303,
|
| 35754 |
+
"step": 50950
|
| 35755 |
+
},
|
| 35756 |
+
{
|
| 35757 |
+
"epoch": 0.9456804385091986,
|
| 35758 |
+
"grad_norm": 38.09375,
|
| 35759 |
+
"learning_rate": 9.852237474327857e-06,
|
| 35760 |
+
"loss": 18.2305,
|
| 35761 |
+
"step": 50960
|
| 35762 |
+
},
|
| 35763 |
+
{
|
| 35764 |
+
"epoch": 0.9458660115936784,
|
| 35765 |
+
"grad_norm": 35.65625,
|
| 35766 |
+
"learning_rate": 9.852208478541815e-06,
|
| 35767 |
+
"loss": 18.5792,
|
| 35768 |
+
"step": 50970
|
| 35769 |
+
},
|
| 35770 |
+
{
|
| 35771 |
+
"epoch": 0.9460515846781583,
|
| 35772 |
+
"grad_norm": 38.46875,
|
| 35773 |
+
"learning_rate": 9.852179482755772e-06,
|
| 35774 |
+
"loss": 18.5712,
|
| 35775 |
+
"step": 50980
|
| 35776 |
+
},
|
| 35777 |
+
{
|
| 35778 |
+
"epoch": 0.9462371577626381,
|
| 35779 |
+
"grad_norm": 37.34375,
|
| 35780 |
+
"learning_rate": 9.85215048696973e-06,
|
| 35781 |
+
"loss": 18.6123,
|
| 35782 |
+
"step": 50990
|
| 35783 |
+
},
|
| 35784 |
+
{
|
| 35785 |
+
"epoch": 0.9464227308471179,
|
| 35786 |
+
"grad_norm": 37.125,
|
| 35787 |
+
"learning_rate": 9.852121491183687e-06,
|
| 35788 |
+
"loss": 18.5901,
|
| 35789 |
+
"step": 51000
|
| 35790 |
+
},
|
| 35791 |
+
{
|
| 35792 |
+
"epoch": 0.9466083039315978,
|
| 35793 |
+
"grad_norm": 36.46875,
|
| 35794 |
+
"learning_rate": 9.852092495397644e-06,
|
| 35795 |
+
"loss": 19.0493,
|
| 35796 |
+
"step": 51010
|
| 35797 |
+
},
|
| 35798 |
+
{
|
| 35799 |
+
"epoch": 0.9467938770160775,
|
| 35800 |
+
"grad_norm": 35.375,
|
| 35801 |
+
"learning_rate": 9.852063499611603e-06,
|
| 35802 |
+
"loss": 18.5143,
|
| 35803 |
+
"step": 51020
|
| 35804 |
+
},
|
| 35805 |
+
{
|
| 35806 |
+
"epoch": 0.9469794501005574,
|
| 35807 |
+
"grad_norm": 34.375,
|
| 35808 |
+
"learning_rate": 9.85203450382556e-06,
|
| 35809 |
+
"loss": 18.7262,
|
| 35810 |
+
"step": 51030
|
| 35811 |
+
},
|
| 35812 |
+
{
|
| 35813 |
+
"epoch": 0.9471650231850373,
|
| 35814 |
+
"grad_norm": 35.65625,
|
| 35815 |
+
"learning_rate": 9.852005508039517e-06,
|
| 35816 |
+
"loss": 18.5407,
|
| 35817 |
+
"step": 51040
|
| 35818 |
+
},
|
| 35819 |
+
{
|
| 35820 |
+
"epoch": 0.9473505962695171,
|
| 35821 |
+
"grad_norm": 35.84375,
|
| 35822 |
+
"learning_rate": 9.851976512253476e-06,
|
| 35823 |
+
"loss": 18.7274,
|
| 35824 |
+
"step": 51050
|
| 35825 |
+
},
|
| 35826 |
+
{
|
| 35827 |
+
"epoch": 0.9475361693539969,
|
| 35828 |
+
"grad_norm": 35.5,
|
| 35829 |
+
"learning_rate": 9.851947516467433e-06,
|
| 35830 |
+
"loss": 18.6067,
|
| 35831 |
+
"step": 51060
|
| 35832 |
+
},
|
| 35833 |
+
{
|
| 35834 |
+
"epoch": 0.9477217424384767,
|
| 35835 |
+
"grad_norm": 35.96875,
|
| 35836 |
+
"learning_rate": 9.85191852068139e-06,
|
| 35837 |
+
"loss": 18.9094,
|
| 35838 |
+
"step": 51070
|
| 35839 |
+
},
|
| 35840 |
+
{
|
| 35841 |
+
"epoch": 0.9479073155229566,
|
| 35842 |
+
"grad_norm": 34.96875,
|
| 35843 |
+
"learning_rate": 9.851889524895348e-06,
|
| 35844 |
+
"loss": 18.7977,
|
| 35845 |
+
"step": 51080
|
| 35846 |
+
},
|
| 35847 |
+
{
|
| 35848 |
+
"epoch": 0.9480928886074363,
|
| 35849 |
+
"grad_norm": 37.75,
|
| 35850 |
+
"learning_rate": 9.851860529109305e-06,
|
| 35851 |
+
"loss": 19.2816,
|
| 35852 |
+
"step": 51090
|
| 35853 |
+
},
|
| 35854 |
+
{
|
| 35855 |
+
"epoch": 0.9482784616919162,
|
| 35856 |
+
"grad_norm": 34.4375,
|
| 35857 |
+
"learning_rate": 9.851831533323263e-06,
|
| 35858 |
+
"loss": 19.179,
|
| 35859 |
+
"step": 51100
|
| 35860 |
+
},
|
| 35861 |
+
{
|
| 35862 |
+
"epoch": 0.9484640347763961,
|
| 35863 |
+
"grad_norm": 35.96875,
|
| 35864 |
+
"learning_rate": 9.85180253753722e-06,
|
| 35865 |
+
"loss": 18.4052,
|
| 35866 |
+
"step": 51110
|
| 35867 |
+
},
|
| 35868 |
+
{
|
| 35869 |
+
"epoch": 0.9486496078608758,
|
| 35870 |
+
"grad_norm": 36.0,
|
| 35871 |
+
"learning_rate": 9.85177354175118e-06,
|
| 35872 |
+
"loss": 18.9433,
|
| 35873 |
+
"step": 51120
|
| 35874 |
+
},
|
| 35875 |
+
{
|
| 35876 |
+
"epoch": 0.9488351809453557,
|
| 35877 |
+
"grad_norm": 37.4375,
|
| 35878 |
+
"learning_rate": 9.851744545965137e-06,
|
| 35879 |
+
"loss": 19.0316,
|
| 35880 |
+
"step": 51130
|
| 35881 |
+
},
|
| 35882 |
+
{
|
| 35883 |
+
"epoch": 0.9490207540298355,
|
| 35884 |
+
"grad_norm": 37.40625,
|
| 35885 |
+
"learning_rate": 9.851715550179092e-06,
|
| 35886 |
+
"loss": 18.5718,
|
| 35887 |
+
"step": 51140
|
| 35888 |
+
},
|
| 35889 |
+
{
|
| 35890 |
+
"epoch": 0.9492063271143153,
|
| 35891 |
+
"grad_norm": 36.5625,
|
| 35892 |
+
"learning_rate": 9.851686554393051e-06,
|
| 35893 |
+
"loss": 18.4393,
|
| 35894 |
+
"step": 51150
|
| 35895 |
+
},
|
| 35896 |
+
{
|
| 35897 |
+
"epoch": 0.9493919001987952,
|
| 35898 |
+
"grad_norm": 35.84375,
|
| 35899 |
+
"learning_rate": 9.851657558607009e-06,
|
| 35900 |
+
"loss": 18.1548,
|
| 35901 |
+
"step": 51160
|
| 35902 |
+
},
|
| 35903 |
+
{
|
| 35904 |
+
"epoch": 0.949577473283275,
|
| 35905 |
+
"grad_norm": 36.40625,
|
| 35906 |
+
"learning_rate": 9.851628562820966e-06,
|
| 35907 |
+
"loss": 18.8235,
|
| 35908 |
+
"step": 51170
|
| 35909 |
+
},
|
| 35910 |
+
{
|
| 35911 |
+
"epoch": 0.9497630463677548,
|
| 35912 |
+
"grad_norm": 34.90625,
|
| 35913 |
+
"learning_rate": 9.851599567034924e-06,
|
| 35914 |
+
"loss": 18.7454,
|
| 35915 |
+
"step": 51180
|
| 35916 |
+
},
|
| 35917 |
+
{
|
| 35918 |
+
"epoch": 0.9499486194522346,
|
| 35919 |
+
"grad_norm": 37.21875,
|
| 35920 |
+
"learning_rate": 9.851570571248881e-06,
|
| 35921 |
+
"loss": 18.0793,
|
| 35922 |
+
"step": 51190
|
| 35923 |
+
},
|
| 35924 |
+
{
|
| 35925 |
+
"epoch": 0.9501341925367145,
|
| 35926 |
+
"grad_norm": 36.125,
|
| 35927 |
+
"learning_rate": 9.851541575462838e-06,
|
| 35928 |
+
"loss": 18.5923,
|
| 35929 |
+
"step": 51200
|
| 35930 |
+
},
|
| 35931 |
+
{
|
| 35932 |
+
"epoch": 0.9503197656211944,
|
| 35933 |
+
"grad_norm": 37.3125,
|
| 35934 |
+
"learning_rate": 9.851512579676796e-06,
|
| 35935 |
+
"loss": 19.1447,
|
| 35936 |
+
"step": 51210
|
| 35937 |
+
},
|
| 35938 |
+
{
|
| 35939 |
+
"epoch": 0.9505053387056741,
|
| 35940 |
+
"grad_norm": 35.96875,
|
| 35941 |
+
"learning_rate": 9.851483583890753e-06,
|
| 35942 |
+
"loss": 18.39,
|
| 35943 |
+
"step": 51220
|
| 35944 |
+
},
|
| 35945 |
+
{
|
| 35946 |
+
"epoch": 0.950690911790154,
|
| 35947 |
+
"grad_norm": 37.96875,
|
| 35948 |
+
"learning_rate": 9.851454588104712e-06,
|
| 35949 |
+
"loss": 18.8331,
|
| 35950 |
+
"step": 51230
|
| 35951 |
+
},
|
| 35952 |
+
{
|
| 35953 |
+
"epoch": 0.9508764848746338,
|
| 35954 |
+
"grad_norm": 37.1875,
|
| 35955 |
+
"learning_rate": 9.85142559231867e-06,
|
| 35956 |
+
"loss": 18.3683,
|
| 35957 |
+
"step": 51240
|
| 35958 |
+
},
|
| 35959 |
+
{
|
| 35960 |
+
"epoch": 0.9510620579591136,
|
| 35961 |
+
"grad_norm": 36.4375,
|
| 35962 |
+
"learning_rate": 9.851396596532627e-06,
|
| 35963 |
+
"loss": 18.497,
|
| 35964 |
+
"step": 51250
|
| 35965 |
+
},
|
| 35966 |
+
{
|
| 35967 |
+
"epoch": 0.9512476310435934,
|
| 35968 |
+
"grad_norm": 37.3125,
|
| 35969 |
+
"learning_rate": 9.851367600746585e-06,
|
| 35970 |
+
"loss": 18.5485,
|
| 35971 |
+
"step": 51260
|
| 35972 |
+
},
|
| 35973 |
+
{
|
| 35974 |
+
"epoch": 0.9514332041280733,
|
| 35975 |
+
"grad_norm": 37.5625,
|
| 35976 |
+
"learning_rate": 9.851338604960542e-06,
|
| 35977 |
+
"loss": 18.4425,
|
| 35978 |
+
"step": 51270
|
| 35979 |
+
},
|
| 35980 |
+
{
|
| 35981 |
+
"epoch": 0.9516187772125531,
|
| 35982 |
+
"grad_norm": 37.96875,
|
| 35983 |
+
"learning_rate": 9.8513096091745e-06,
|
| 35984 |
+
"loss": 18.5681,
|
| 35985 |
+
"step": 51280
|
| 35986 |
+
},
|
| 35987 |
+
{
|
| 35988 |
+
"epoch": 0.9518043502970329,
|
| 35989 |
+
"grad_norm": 36.53125,
|
| 35990 |
+
"learning_rate": 9.851280613388457e-06,
|
| 35991 |
+
"loss": 18.5103,
|
| 35992 |
+
"step": 51290
|
| 35993 |
+
},
|
| 35994 |
+
{
|
| 35995 |
+
"epoch": 0.9519899233815128,
|
| 35996 |
+
"grad_norm": 35.46875,
|
| 35997 |
+
"learning_rate": 9.851251617602414e-06,
|
| 35998 |
+
"loss": 18.7339,
|
| 35999 |
+
"step": 51300
|
| 36000 |
+
},
|
| 36001 |
+
{
|
| 36002 |
+
"epoch": 0.9521754964659925,
|
| 36003 |
+
"grad_norm": 35.28125,
|
| 36004 |
+
"learning_rate": 9.851222621816372e-06,
|
| 36005 |
+
"loss": 18.6217,
|
| 36006 |
+
"step": 51310
|
| 36007 |
+
},
|
| 36008 |
+
{
|
| 36009 |
+
"epoch": 0.9523610695504724,
|
| 36010 |
+
"grad_norm": 37.125,
|
| 36011 |
+
"learning_rate": 9.851193626030329e-06,
|
| 36012 |
+
"loss": 18.5986,
|
| 36013 |
+
"step": 51320
|
| 36014 |
+
},
|
| 36015 |
+
{
|
| 36016 |
+
"epoch": 0.9525466426349523,
|
| 36017 |
+
"grad_norm": 39.3125,
|
| 36018 |
+
"learning_rate": 9.851164630244288e-06,
|
| 36019 |
+
"loss": 18.7082,
|
| 36020 |
+
"step": 51330
|
| 36021 |
+
},
|
| 36022 |
+
{
|
| 36023 |
+
"epoch": 0.952732215719432,
|
| 36024 |
+
"grad_norm": 35.78125,
|
| 36025 |
+
"learning_rate": 9.851135634458246e-06,
|
| 36026 |
+
"loss": 18.6332,
|
| 36027 |
+
"step": 51340
|
| 36028 |
+
},
|
| 36029 |
+
{
|
| 36030 |
+
"epoch": 0.9529177888039119,
|
| 36031 |
+
"grad_norm": 35.78125,
|
| 36032 |
+
"learning_rate": 9.851106638672201e-06,
|
| 36033 |
+
"loss": 18.7516,
|
| 36034 |
+
"step": 51350
|
| 36035 |
+
},
|
| 36036 |
+
{
|
| 36037 |
+
"epoch": 0.9531033618883917,
|
| 36038 |
+
"grad_norm": 35.75,
|
| 36039 |
+
"learning_rate": 9.85107764288616e-06,
|
| 36040 |
+
"loss": 18.8567,
|
| 36041 |
+
"step": 51360
|
| 36042 |
+
},
|
| 36043 |
+
{
|
| 36044 |
+
"epoch": 0.9532889349728715,
|
| 36045 |
+
"grad_norm": 35.15625,
|
| 36046 |
+
"learning_rate": 9.851048647100118e-06,
|
| 36047 |
+
"loss": 18.5148,
|
| 36048 |
+
"step": 51370
|
| 36049 |
+
},
|
| 36050 |
+
{
|
| 36051 |
+
"epoch": 0.9534745080573513,
|
| 36052 |
+
"grad_norm": 37.71875,
|
| 36053 |
+
"learning_rate": 9.851019651314075e-06,
|
| 36054 |
+
"loss": 18.6169,
|
| 36055 |
+
"step": 51380
|
| 36056 |
+
},
|
| 36057 |
+
{
|
| 36058 |
+
"epoch": 0.9536600811418312,
|
| 36059 |
+
"grad_norm": 36.375,
|
| 36060 |
+
"learning_rate": 9.850990655528033e-06,
|
| 36061 |
+
"loss": 18.8565,
|
| 36062 |
+
"step": 51390
|
| 36063 |
+
},
|
| 36064 |
+
{
|
| 36065 |
+
"epoch": 0.9538456542263111,
|
| 36066 |
+
"grad_norm": 36.28125,
|
| 36067 |
+
"learning_rate": 9.850961659741992e-06,
|
| 36068 |
+
"loss": 18.5506,
|
| 36069 |
+
"step": 51400
|
| 36070 |
+
},
|
| 36071 |
+
{
|
| 36072 |
+
"epoch": 0.9540312273107908,
|
| 36073 |
+
"grad_norm": 37.90625,
|
| 36074 |
+
"learning_rate": 9.850932663955947e-06,
|
| 36075 |
+
"loss": 18.6192,
|
| 36076 |
+
"step": 51410
|
| 36077 |
+
},
|
| 36078 |
+
{
|
| 36079 |
+
"epoch": 0.9542168003952707,
|
| 36080 |
+
"grad_norm": 38.71875,
|
| 36081 |
+
"learning_rate": 9.850903668169905e-06,
|
| 36082 |
+
"loss": 18.5314,
|
| 36083 |
+
"step": 51420
|
| 36084 |
+
},
|
| 36085 |
+
{
|
| 36086 |
+
"epoch": 0.9544023734797505,
|
| 36087 |
+
"grad_norm": 35.84375,
|
| 36088 |
+
"learning_rate": 9.850874672383864e-06,
|
| 36089 |
+
"loss": 18.5046,
|
| 36090 |
+
"step": 51430
|
| 36091 |
+
},
|
| 36092 |
+
{
|
| 36093 |
+
"epoch": 0.9545879465642303,
|
| 36094 |
+
"grad_norm": 34.71875,
|
| 36095 |
+
"learning_rate": 9.850845676597821e-06,
|
| 36096 |
+
"loss": 18.7414,
|
| 36097 |
+
"step": 51440
|
| 36098 |
+
},
|
| 36099 |
+
{
|
| 36100 |
+
"epoch": 0.9547735196487102,
|
| 36101 |
+
"grad_norm": 38.21875,
|
| 36102 |
+
"learning_rate": 9.850816680811779e-06,
|
| 36103 |
+
"loss": 18.5634,
|
| 36104 |
+
"step": 51450
|
| 36105 |
+
},
|
| 36106 |
+
{
|
| 36107 |
+
"epoch": 0.95495909273319,
|
| 36108 |
+
"grad_norm": 38.0,
|
| 36109 |
+
"learning_rate": 9.850787685025736e-06,
|
| 36110 |
+
"loss": 18.6599,
|
| 36111 |
+
"step": 51460
|
| 36112 |
+
},
|
| 36113 |
+
{
|
| 36114 |
+
"epoch": 0.9551446658176698,
|
| 36115 |
+
"grad_norm": 36.65625,
|
| 36116 |
+
"learning_rate": 9.850758689239694e-06,
|
| 36117 |
+
"loss": 18.7972,
|
| 36118 |
+
"step": 51470
|
| 36119 |
+
},
|
| 36120 |
+
{
|
| 36121 |
+
"epoch": 0.9553302389021496,
|
| 36122 |
+
"grad_norm": 38.3125,
|
| 36123 |
+
"learning_rate": 9.850729693453651e-06,
|
| 36124 |
+
"loss": 18.6985,
|
| 36125 |
+
"step": 51480
|
| 36126 |
+
},
|
| 36127 |
+
{
|
| 36128 |
+
"epoch": 0.9555158119866295,
|
| 36129 |
+
"grad_norm": 36.40625,
|
| 36130 |
+
"learning_rate": 9.850700697667608e-06,
|
| 36131 |
+
"loss": 18.596,
|
| 36132 |
+
"step": 51490
|
| 36133 |
+
},
|
| 36134 |
+
{
|
| 36135 |
+
"epoch": 0.9557013850711092,
|
| 36136 |
+
"grad_norm": 36.65625,
|
| 36137 |
+
"learning_rate": 9.850671701881567e-06,
|
| 36138 |
+
"loss": 18.604,
|
| 36139 |
+
"step": 51500
|
| 36140 |
+
},
|
| 36141 |
+
{
|
| 36142 |
+
"epoch": 0.9558869581555891,
|
| 36143 |
+
"grad_norm": 38.15625,
|
| 36144 |
+
"learning_rate": 9.850642706095523e-06,
|
| 36145 |
+
"loss": 18.7036,
|
| 36146 |
+
"step": 51510
|
| 36147 |
+
},
|
| 36148 |
+
{
|
| 36149 |
+
"epoch": 0.956072531240069,
|
| 36150 |
+
"grad_norm": 37.21875,
|
| 36151 |
+
"learning_rate": 9.85061371030948e-06,
|
| 36152 |
+
"loss": 19.1632,
|
| 36153 |
+
"step": 51520
|
| 36154 |
+
},
|
| 36155 |
+
{
|
| 36156 |
+
"epoch": 0.9562581043245487,
|
| 36157 |
+
"grad_norm": 34.875,
|
| 36158 |
+
"learning_rate": 9.85058471452344e-06,
|
| 36159 |
+
"loss": 18.6759,
|
| 36160 |
+
"step": 51530
|
| 36161 |
+
},
|
| 36162 |
+
{
|
| 36163 |
+
"epoch": 0.9564436774090286,
|
| 36164 |
+
"grad_norm": 37.4375,
|
| 36165 |
+
"learning_rate": 9.850555718737397e-06,
|
| 36166 |
+
"loss": 18.9442,
|
| 36167 |
+
"step": 51540
|
| 36168 |
+
},
|
| 36169 |
+
{
|
| 36170 |
+
"epoch": 0.9566292504935084,
|
| 36171 |
+
"grad_norm": 36.96875,
|
| 36172 |
+
"learning_rate": 9.850526722951354e-06,
|
| 36173 |
+
"loss": 18.8462,
|
| 36174 |
+
"step": 51550
|
| 36175 |
+
},
|
| 36176 |
+
{
|
| 36177 |
+
"epoch": 0.9568148235779882,
|
| 36178 |
+
"grad_norm": 37.40625,
|
| 36179 |
+
"learning_rate": 9.850497727165312e-06,
|
| 36180 |
+
"loss": 18.6035,
|
| 36181 |
+
"step": 51560
|
| 36182 |
+
},
|
| 36183 |
+
{
|
| 36184 |
+
"epoch": 0.9570003966624681,
|
| 36185 |
+
"grad_norm": 36.46875,
|
| 36186 |
+
"learning_rate": 9.85046873137927e-06,
|
| 36187 |
+
"loss": 18.6443,
|
| 36188 |
+
"step": 51570
|
| 36189 |
+
},
|
| 36190 |
+
{
|
| 36191 |
+
"epoch": 0.9571859697469479,
|
| 36192 |
+
"grad_norm": 39.125,
|
| 36193 |
+
"learning_rate": 9.850439735593227e-06,
|
| 36194 |
+
"loss": 18.9952,
|
| 36195 |
+
"step": 51580
|
| 36196 |
+
},
|
| 36197 |
+
{
|
| 36198 |
+
"epoch": 0.9573715428314278,
|
| 36199 |
+
"grad_norm": 38.1875,
|
| 36200 |
+
"learning_rate": 9.850410739807184e-06,
|
| 36201 |
+
"loss": 18.9026,
|
| 36202 |
+
"step": 51590
|
| 36203 |
+
},
|
| 36204 |
+
{
|
| 36205 |
+
"epoch": 0.9575571159159075,
|
| 36206 |
+
"grad_norm": 35.5625,
|
| 36207 |
+
"learning_rate": 9.850381744021143e-06,
|
| 36208 |
+
"loss": 18.7487,
|
| 36209 |
+
"step": 51600
|
| 36210 |
+
},
|
| 36211 |
+
{
|
| 36212 |
+
"epoch": 0.9577426890003874,
|
| 36213 |
+
"grad_norm": 38.0,
|
| 36214 |
+
"learning_rate": 9.8503527482351e-06,
|
| 36215 |
+
"loss": 18.9155,
|
| 36216 |
+
"step": 51610
|
| 36217 |
+
},
|
| 36218 |
+
{
|
| 36219 |
+
"epoch": 0.9579282620848673,
|
| 36220 |
+
"grad_norm": 34.65625,
|
| 36221 |
+
"learning_rate": 9.850323752449056e-06,
|
| 36222 |
+
"loss": 18.8469,
|
| 36223 |
+
"step": 51620
|
| 36224 |
+
},
|
| 36225 |
+
{
|
| 36226 |
+
"epoch": 0.958113835169347,
|
| 36227 |
+
"grad_norm": 37.0625,
|
| 36228 |
+
"learning_rate": 9.850294756663015e-06,
|
| 36229 |
+
"loss": 18.7591,
|
| 36230 |
+
"step": 51630
|
| 36231 |
+
},
|
| 36232 |
+
{
|
| 36233 |
+
"epoch": 0.9582994082538269,
|
| 36234 |
+
"grad_norm": 33.6875,
|
| 36235 |
+
"learning_rate": 9.850265760876973e-06,
|
| 36236 |
+
"loss": 18.4839,
|
| 36237 |
+
"step": 51640
|
| 36238 |
+
},
|
| 36239 |
+
{
|
| 36240 |
+
"epoch": 0.9584849813383067,
|
| 36241 |
+
"grad_norm": 34.71875,
|
| 36242 |
+
"learning_rate": 9.85023676509093e-06,
|
| 36243 |
+
"loss": 18.9344,
|
| 36244 |
+
"step": 51650
|
| 36245 |
+
},
|
| 36246 |
+
{
|
| 36247 |
+
"epoch": 0.9586705544227865,
|
| 36248 |
+
"grad_norm": 39.59375,
|
| 36249 |
+
"learning_rate": 9.850207769304888e-06,
|
| 36250 |
+
"loss": 18.9862,
|
| 36251 |
+
"step": 51660
|
| 36252 |
+
},
|
| 36253 |
+
{
|
| 36254 |
+
"epoch": 0.9588561275072663,
|
| 36255 |
+
"grad_norm": 37.875,
|
| 36256 |
+
"learning_rate": 9.850178773518845e-06,
|
| 36257 |
+
"loss": 18.5865,
|
| 36258 |
+
"step": 51670
|
| 36259 |
+
},
|
| 36260 |
+
{
|
| 36261 |
+
"epoch": 0.9590417005917462,
|
| 36262 |
+
"grad_norm": 36.8125,
|
| 36263 |
+
"learning_rate": 9.850149777732802e-06,
|
| 36264 |
+
"loss": 18.9963,
|
| 36265 |
+
"step": 51680
|
| 36266 |
+
},
|
| 36267 |
+
{
|
| 36268 |
+
"epoch": 0.959227273676226,
|
| 36269 |
+
"grad_norm": 37.21875,
|
| 36270 |
+
"learning_rate": 9.85012078194676e-06,
|
| 36271 |
+
"loss": 18.5694,
|
| 36272 |
+
"step": 51690
|
| 36273 |
+
},
|
| 36274 |
+
{
|
| 36275 |
+
"epoch": 0.9594128467607058,
|
| 36276 |
+
"grad_norm": 37.65625,
|
| 36277 |
+
"learning_rate": 9.850091786160719e-06,
|
| 36278 |
+
"loss": 18.5936,
|
| 36279 |
+
"step": 51700
|
| 36280 |
+
},
|
| 36281 |
+
{
|
| 36282 |
+
"epoch": 0.9595984198451857,
|
| 36283 |
+
"grad_norm": 38.09375,
|
| 36284 |
+
"learning_rate": 9.850062790374676e-06,
|
| 36285 |
+
"loss": 18.8217,
|
| 36286 |
+
"step": 51710
|
| 36287 |
+
},
|
| 36288 |
+
{
|
| 36289 |
+
"epoch": 0.9597839929296654,
|
| 36290 |
+
"grad_norm": 35.96875,
|
| 36291 |
+
"learning_rate": 9.850033794588634e-06,
|
| 36292 |
+
"loss": 18.8549,
|
| 36293 |
+
"step": 51720
|
| 36294 |
+
},
|
| 36295 |
+
{
|
| 36296 |
+
"epoch": 0.9599695660141453,
|
| 36297 |
+
"grad_norm": 36.46875,
|
| 36298 |
+
"learning_rate": 9.850004798802591e-06,
|
| 36299 |
+
"loss": 19.1592,
|
| 36300 |
+
"step": 51730
|
| 36301 |
+
},
|
| 36302 |
+
{
|
| 36303 |
+
"epoch": 0.9601551390986252,
|
| 36304 |
+
"grad_norm": 36.40625,
|
| 36305 |
+
"learning_rate": 9.849975803016549e-06,
|
| 36306 |
+
"loss": 18.723,
|
| 36307 |
+
"step": 51740
|
| 36308 |
+
},
|
| 36309 |
+
{
|
| 36310 |
+
"epoch": 0.960340712183105,
|
| 36311 |
+
"grad_norm": 36.5625,
|
| 36312 |
+
"learning_rate": 9.849946807230506e-06,
|
| 36313 |
+
"loss": 18.8267,
|
| 36314 |
+
"step": 51750
|
| 36315 |
+
},
|
| 36316 |
+
{
|
| 36317 |
+
"epoch": 0.9605262852675848,
|
| 36318 |
+
"grad_norm": 36.78125,
|
| 36319 |
+
"learning_rate": 9.849917811444463e-06,
|
| 36320 |
+
"loss": 18.6763,
|
| 36321 |
+
"step": 51760
|
| 36322 |
+
},
|
| 36323 |
+
{
|
| 36324 |
+
"epoch": 0.9607118583520646,
|
| 36325 |
+
"grad_norm": 33.90625,
|
| 36326 |
+
"learning_rate": 9.84988881565842e-06,
|
| 36327 |
+
"loss": 18.8519,
|
| 36328 |
+
"step": 51770
|
| 36329 |
+
},
|
| 36330 |
+
{
|
| 36331 |
+
"epoch": 0.9608974314365445,
|
| 36332 |
+
"grad_norm": 36.59375,
|
| 36333 |
+
"learning_rate": 9.849859819872378e-06,
|
| 36334 |
+
"loss": 18.4374,
|
| 36335 |
+
"step": 51780
|
| 36336 |
+
},
|
| 36337 |
+
{
|
| 36338 |
+
"epoch": 0.9610830045210242,
|
| 36339 |
+
"grad_norm": 36.4375,
|
| 36340 |
+
"learning_rate": 9.849830824086336e-06,
|
| 36341 |
+
"loss": 19.033,
|
| 36342 |
+
"step": 51790
|
| 36343 |
+
},
|
| 36344 |
+
{
|
| 36345 |
+
"epoch": 0.9612685776055041,
|
| 36346 |
+
"grad_norm": 36.3125,
|
| 36347 |
+
"learning_rate": 9.849801828300293e-06,
|
| 36348 |
+
"loss": 18.6082,
|
| 36349 |
+
"step": 51800
|
| 36350 |
+
},
|
| 36351 |
+
{
|
| 36352 |
+
"epoch": 0.961454150689984,
|
| 36353 |
+
"grad_norm": 37.625,
|
| 36354 |
+
"learning_rate": 9.849772832514252e-06,
|
| 36355 |
+
"loss": 18.4141,
|
| 36356 |
+
"step": 51810
|
| 36357 |
+
},
|
| 36358 |
+
{
|
| 36359 |
+
"epoch": 0.9616397237744637,
|
| 36360 |
+
"grad_norm": 35.34375,
|
| 36361 |
+
"learning_rate": 9.84974383672821e-06,
|
| 36362 |
+
"loss": 18.9108,
|
| 36363 |
+
"step": 51820
|
| 36364 |
+
},
|
| 36365 |
+
{
|
| 36366 |
+
"epoch": 0.9618252968589436,
|
| 36367 |
+
"grad_norm": 35.8125,
|
| 36368 |
+
"learning_rate": 9.849714840942167e-06,
|
| 36369 |
+
"loss": 18.8733,
|
| 36370 |
+
"step": 51830
|
| 36371 |
+
},
|
| 36372 |
+
{
|
| 36373 |
+
"epoch": 0.9620108699434234,
|
| 36374 |
+
"grad_norm": 37.21875,
|
| 36375 |
+
"learning_rate": 9.849685845156124e-06,
|
| 36376 |
+
"loss": 18.7665,
|
| 36377 |
+
"step": 51840
|
| 36378 |
+
},
|
| 36379 |
+
{
|
| 36380 |
+
"epoch": 0.9621964430279032,
|
| 36381 |
+
"grad_norm": 35.4375,
|
| 36382 |
+
"learning_rate": 9.849656849370082e-06,
|
| 36383 |
+
"loss": 18.7238,
|
| 36384 |
+
"step": 51850
|
| 36385 |
+
},
|
| 36386 |
+
{
|
| 36387 |
+
"epoch": 0.962382016112383,
|
| 36388 |
+
"grad_norm": 34.59375,
|
| 36389 |
+
"learning_rate": 9.84962785358404e-06,
|
| 36390 |
+
"loss": 18.8423,
|
| 36391 |
+
"step": 51860
|
| 36392 |
+
},
|
| 36393 |
+
{
|
| 36394 |
+
"epoch": 0.9625675891968629,
|
| 36395 |
+
"grad_norm": 36.65625,
|
| 36396 |
+
"learning_rate": 9.849598857797997e-06,
|
| 36397 |
+
"loss": 18.623,
|
| 36398 |
+
"step": 51870
|
| 36399 |
+
},
|
| 36400 |
+
{
|
| 36401 |
+
"epoch": 0.9627531622813427,
|
| 36402 |
+
"grad_norm": 36.84375,
|
| 36403 |
+
"learning_rate": 9.849569862011956e-06,
|
| 36404 |
+
"loss": 18.4343,
|
| 36405 |
+
"step": 51880
|
| 36406 |
+
},
|
| 36407 |
+
{
|
| 36408 |
+
"epoch": 0.9629387353658225,
|
| 36409 |
+
"grad_norm": 34.9375,
|
| 36410 |
+
"learning_rate": 9.849540866225911e-06,
|
| 36411 |
+
"loss": 18.7979,
|
| 36412 |
+
"step": 51890
|
| 36413 |
+
},
|
| 36414 |
+
{
|
| 36415 |
+
"epoch": 0.9631243084503024,
|
| 36416 |
+
"grad_norm": 36.875,
|
| 36417 |
+
"learning_rate": 9.849511870439869e-06,
|
| 36418 |
+
"loss": 18.9062,
|
| 36419 |
+
"step": 51900
|
| 36420 |
+
},
|
| 36421 |
+
{
|
| 36422 |
+
"epoch": 0.9633098815347821,
|
| 36423 |
+
"grad_norm": 38.59375,
|
| 36424 |
+
"learning_rate": 9.849482874653828e-06,
|
| 36425 |
+
"loss": 18.7673,
|
| 36426 |
+
"step": 51910
|
| 36427 |
+
},
|
| 36428 |
+
{
|
| 36429 |
+
"epoch": 0.963495454619262,
|
| 36430 |
+
"grad_norm": 36.71875,
|
| 36431 |
+
"learning_rate": 9.849453878867785e-06,
|
| 36432 |
+
"loss": 18.5212,
|
| 36433 |
+
"step": 51920
|
| 36434 |
+
},
|
| 36435 |
+
{
|
| 36436 |
+
"epoch": 0.9636810277037419,
|
| 36437 |
+
"grad_norm": 36.625,
|
| 36438 |
+
"learning_rate": 9.849424883081743e-06,
|
| 36439 |
+
"loss": 18.6261,
|
| 36440 |
+
"step": 51930
|
| 36441 |
+
},
|
| 36442 |
+
{
|
| 36443 |
+
"epoch": 0.9638666007882217,
|
| 36444 |
+
"grad_norm": 37.84375,
|
| 36445 |
+
"learning_rate": 9.8493958872957e-06,
|
| 36446 |
+
"loss": 19.0427,
|
| 36447 |
+
"step": 51940
|
| 36448 |
+
},
|
| 36449 |
+
{
|
| 36450 |
+
"epoch": 0.9640521738727015,
|
| 36451 |
+
"grad_norm": 36.9375,
|
| 36452 |
+
"learning_rate": 9.849366891509658e-06,
|
| 36453 |
+
"loss": 18.7989,
|
| 36454 |
+
"step": 51950
|
| 36455 |
+
},
|
| 36456 |
+
{
|
| 36457 |
+
"epoch": 0.9642377469571813,
|
| 36458 |
+
"grad_norm": 34.875,
|
| 36459 |
+
"learning_rate": 9.849337895723615e-06,
|
| 36460 |
+
"loss": 18.5892,
|
| 36461 |
+
"step": 51960
|
| 36462 |
+
},
|
| 36463 |
+
{
|
| 36464 |
+
"epoch": 0.9644233200416612,
|
| 36465 |
+
"grad_norm": 37.0,
|
| 36466 |
+
"learning_rate": 9.849308899937572e-06,
|
| 36467 |
+
"loss": 18.9571,
|
| 36468 |
+
"step": 51970
|
| 36469 |
+
},
|
| 36470 |
+
{
|
| 36471 |
+
"epoch": 0.964608893126141,
|
| 36472 |
+
"grad_norm": 34.90625,
|
| 36473 |
+
"learning_rate": 9.849279904151531e-06,
|
| 36474 |
+
"loss": 18.6608,
|
| 36475 |
+
"step": 51980
|
| 36476 |
+
},
|
| 36477 |
+
{
|
| 36478 |
+
"epoch": 0.9647944662106208,
|
| 36479 |
+
"grad_norm": 36.0,
|
| 36480 |
+
"learning_rate": 9.849250908365489e-06,
|
| 36481 |
+
"loss": 18.6838,
|
| 36482 |
+
"step": 51990
|
| 36483 |
+
},
|
| 36484 |
+
{
|
| 36485 |
+
"epoch": 0.9649800392951007,
|
| 36486 |
+
"grad_norm": 35.96875,
|
| 36487 |
+
"learning_rate": 9.849221912579445e-06,
|
| 36488 |
+
"loss": 18.5241,
|
| 36489 |
+
"step": 52000
|
| 36490 |
+
},
|
| 36491 |
+
{
|
| 36492 |
+
"epoch": 0.9651656123795804,
|
| 36493 |
+
"grad_norm": 34.5625,
|
| 36494 |
+
"learning_rate": 9.849192916793404e-06,
|
| 36495 |
+
"loss": 18.7984,
|
| 36496 |
+
"step": 52010
|
| 36497 |
+
},
|
| 36498 |
+
{
|
| 36499 |
+
"epoch": 0.9653511854640603,
|
| 36500 |
+
"grad_norm": 38.59375,
|
| 36501 |
+
"learning_rate": 9.849163921007361e-06,
|
| 36502 |
+
"loss": 18.8855,
|
| 36503 |
+
"step": 52020
|
| 36504 |
+
},
|
| 36505 |
+
{
|
| 36506 |
+
"epoch": 0.9655367585485402,
|
| 36507 |
+
"grad_norm": 38.34375,
|
| 36508 |
+
"learning_rate": 9.849134925221319e-06,
|
| 36509 |
+
"loss": 18.8712,
|
| 36510 |
+
"step": 52030
|
| 36511 |
+
},
|
| 36512 |
+
{
|
| 36513 |
+
"epoch": 0.9657223316330199,
|
| 36514 |
+
"grad_norm": 37.5,
|
| 36515 |
+
"learning_rate": 9.849105929435276e-06,
|
| 36516 |
+
"loss": 18.4082,
|
| 36517 |
+
"step": 52040
|
| 36518 |
+
},
|
| 36519 |
+
{
|
| 36520 |
+
"epoch": 0.9659079047174998,
|
| 36521 |
+
"grad_norm": 35.9375,
|
| 36522 |
+
"learning_rate": 9.849076933649233e-06,
|
| 36523 |
+
"loss": 18.8569,
|
| 36524 |
+
"step": 52050
|
| 36525 |
+
},
|
| 36526 |
+
{
|
| 36527 |
+
"epoch": 0.9660934778019796,
|
| 36528 |
+
"grad_norm": 35.5,
|
| 36529 |
+
"learning_rate": 9.84904793786319e-06,
|
| 36530 |
+
"loss": 19.0493,
|
| 36531 |
+
"step": 52060
|
| 36532 |
+
},
|
| 36533 |
+
{
|
| 36534 |
+
"epoch": 0.9662790508864594,
|
| 36535 |
+
"grad_norm": 35.0,
|
| 36536 |
+
"learning_rate": 9.849018942077148e-06,
|
| 36537 |
+
"loss": 18.684,
|
| 36538 |
+
"step": 52070
|
| 36539 |
+
},
|
| 36540 |
+
{
|
| 36541 |
+
"epoch": 0.9664646239709392,
|
| 36542 |
+
"grad_norm": 36.4375,
|
| 36543 |
+
"learning_rate": 9.848989946291107e-06,
|
| 36544 |
+
"loss": 18.4338,
|
| 36545 |
+
"step": 52080
|
| 36546 |
+
},
|
| 36547 |
+
{
|
| 36548 |
+
"epoch": 0.9666501970554191,
|
| 36549 |
+
"grad_norm": 37.75,
|
| 36550 |
+
"learning_rate": 9.848960950505065e-06,
|
| 36551 |
+
"loss": 19.0642,
|
| 36552 |
+
"step": 52090
|
| 36553 |
+
},
|
| 36554 |
+
{
|
| 36555 |
+
"epoch": 0.966835770139899,
|
| 36556 |
+
"grad_norm": 35.84375,
|
| 36557 |
+
"learning_rate": 9.84893195471902e-06,
|
| 36558 |
+
"loss": 18.8723,
|
| 36559 |
+
"step": 52100
|
| 36560 |
+
},
|
| 36561 |
+
{
|
| 36562 |
+
"epoch": 0.9670213432243787,
|
| 36563 |
+
"grad_norm": 37.125,
|
| 36564 |
+
"learning_rate": 9.84890295893298e-06,
|
| 36565 |
+
"loss": 18.7843,
|
| 36566 |
+
"step": 52110
|
| 36567 |
+
},
|
| 36568 |
+
{
|
| 36569 |
+
"epoch": 0.9672069163088586,
|
| 36570 |
+
"grad_norm": 36.1875,
|
| 36571 |
+
"learning_rate": 9.848873963146937e-06,
|
| 36572 |
+
"loss": 18.6314,
|
| 36573 |
+
"step": 52120
|
| 36574 |
+
},
|
| 36575 |
+
{
|
| 36576 |
+
"epoch": 0.9673924893933384,
|
| 36577 |
+
"grad_norm": 34.96875,
|
| 36578 |
+
"learning_rate": 9.848844967360894e-06,
|
| 36579 |
+
"loss": 18.5492,
|
| 36580 |
+
"step": 52130
|
| 36581 |
+
},
|
| 36582 |
+
{
|
| 36583 |
+
"epoch": 0.9675780624778182,
|
| 36584 |
+
"grad_norm": 35.8125,
|
| 36585 |
+
"learning_rate": 9.848815971574852e-06,
|
| 36586 |
+
"loss": 18.4559,
|
| 36587 |
+
"step": 52140
|
| 36588 |
+
},
|
| 36589 |
+
{
|
| 36590 |
+
"epoch": 0.967763635562298,
|
| 36591 |
+
"grad_norm": 36.40625,
|
| 36592 |
+
"learning_rate": 9.848786975788809e-06,
|
| 36593 |
+
"loss": 18.3422,
|
| 36594 |
+
"step": 52150
|
| 36595 |
+
},
|
| 36596 |
+
{
|
| 36597 |
+
"epoch": 0.9679492086467779,
|
| 36598 |
+
"grad_norm": 37.65625,
|
| 36599 |
+
"learning_rate": 9.848757980002766e-06,
|
| 36600 |
+
"loss": 18.8372,
|
| 36601 |
+
"step": 52160
|
| 36602 |
+
},
|
| 36603 |
+
{
|
| 36604 |
+
"epoch": 0.9681347817312577,
|
| 36605 |
+
"grad_norm": 37.59375,
|
| 36606 |
+
"learning_rate": 9.848728984216724e-06,
|
| 36607 |
+
"loss": 18.6309,
|
| 36608 |
+
"step": 52170
|
| 36609 |
+
},
|
| 36610 |
+
{
|
| 36611 |
+
"epoch": 0.9683203548157375,
|
| 36612 |
+
"grad_norm": 36.5625,
|
| 36613 |
+
"learning_rate": 9.848699988430683e-06,
|
| 36614 |
+
"loss": 18.9326,
|
| 36615 |
+
"step": 52180
|
| 36616 |
+
},
|
| 36617 |
+
{
|
| 36618 |
+
"epoch": 0.9685059279002174,
|
| 36619 |
+
"grad_norm": 36.625,
|
| 36620 |
+
"learning_rate": 9.84867099264464e-06,
|
| 36621 |
+
"loss": 18.8778,
|
| 36622 |
+
"step": 52190
|
| 36623 |
+
},
|
| 36624 |
+
{
|
| 36625 |
+
"epoch": 0.9686915009846971,
|
| 36626 |
+
"grad_norm": 37.6875,
|
| 36627 |
+
"learning_rate": 9.848641996858598e-06,
|
| 36628 |
+
"loss": 18.4787,
|
| 36629 |
+
"step": 52200
|
| 36630 |
+
},
|
| 36631 |
+
{
|
| 36632 |
+
"epoch": 0.968877074069177,
|
| 36633 |
+
"grad_norm": 35.84375,
|
| 36634 |
+
"learning_rate": 9.848613001072555e-06,
|
| 36635 |
+
"loss": 18.6075,
|
| 36636 |
+
"step": 52210
|
| 36637 |
+
},
|
| 36638 |
+
{
|
| 36639 |
+
"epoch": 0.9690626471536569,
|
| 36640 |
+
"grad_norm": 37.8125,
|
| 36641 |
+
"learning_rate": 9.848584005286513e-06,
|
| 36642 |
+
"loss": 18.5023,
|
| 36643 |
+
"step": 52220
|
| 36644 |
+
},
|
| 36645 |
+
{
|
| 36646 |
+
"epoch": 0.9692482202381366,
|
| 36647 |
+
"grad_norm": 35.75,
|
| 36648 |
+
"learning_rate": 9.84855500950047e-06,
|
| 36649 |
+
"loss": 18.3154,
|
| 36650 |
+
"step": 52230
|
| 36651 |
+
},
|
| 36652 |
+
{
|
| 36653 |
+
"epoch": 0.9694337933226165,
|
| 36654 |
+
"grad_norm": 37.28125,
|
| 36655 |
+
"learning_rate": 9.848526013714427e-06,
|
| 36656 |
+
"loss": 19.0297,
|
| 36657 |
+
"step": 52240
|
| 36658 |
+
},
|
| 36659 |
+
{
|
| 36660 |
+
"epoch": 0.9696193664070963,
|
| 36661 |
+
"grad_norm": 35.9375,
|
| 36662 |
+
"learning_rate": 9.848497017928385e-06,
|
| 36663 |
+
"loss": 18.438,
|
| 36664 |
+
"step": 52250
|
| 36665 |
+
},
|
| 36666 |
+
{
|
| 36667 |
+
"epoch": 0.9698049394915761,
|
| 36668 |
+
"grad_norm": 36.53125,
|
| 36669 |
+
"learning_rate": 9.848468022142342e-06,
|
| 36670 |
+
"loss": 18.473,
|
| 36671 |
+
"step": 52260
|
| 36672 |
+
},
|
| 36673 |
+
{
|
| 36674 |
+
"epoch": 0.969990512576056,
|
| 36675 |
+
"grad_norm": 37.34375,
|
| 36676 |
+
"learning_rate": 9.8484390263563e-06,
|
| 36677 |
+
"loss": 19.1365,
|
| 36678 |
+
"step": 52270
|
| 36679 |
+
},
|
| 36680 |
+
{
|
| 36681 |
+
"epoch": 0.9701760856605358,
|
| 36682 |
+
"grad_norm": 37.40625,
|
| 36683 |
+
"learning_rate": 9.848410030570257e-06,
|
| 36684 |
+
"loss": 18.9908,
|
| 36685 |
+
"step": 52280
|
| 36686 |
+
},
|
| 36687 |
+
{
|
| 36688 |
+
"epoch": 0.9703616587450157,
|
| 36689 |
+
"grad_norm": 37.96875,
|
| 36690 |
+
"learning_rate": 9.848381034784216e-06,
|
| 36691 |
+
"loss": 19.0627,
|
| 36692 |
+
"step": 52290
|
| 36693 |
+
},
|
| 36694 |
+
{
|
| 36695 |
+
"epoch": 0.9705472318294954,
|
| 36696 |
+
"grad_norm": 36.59375,
|
| 36697 |
+
"learning_rate": 9.848352038998174e-06,
|
| 36698 |
+
"loss": 18.6671,
|
| 36699 |
+
"step": 52300
|
| 36700 |
+
},
|
| 36701 |
+
{
|
| 36702 |
+
"epoch": 0.9707328049139753,
|
| 36703 |
+
"grad_norm": 37.0625,
|
| 36704 |
+
"learning_rate": 9.848323043212131e-06,
|
| 36705 |
+
"loss": 18.7139,
|
| 36706 |
+
"step": 52310
|
| 36707 |
+
},
|
| 36708 |
+
{
|
| 36709 |
+
"epoch": 0.9709183779984552,
|
| 36710 |
+
"grad_norm": 36.34375,
|
| 36711 |
+
"learning_rate": 9.848294047426088e-06,
|
| 36712 |
+
"loss": 18.8432,
|
| 36713 |
+
"step": 52320
|
| 36714 |
+
},
|
| 36715 |
+
{
|
| 36716 |
+
"epoch": 0.9711039510829349,
|
| 36717 |
+
"grad_norm": 35.34375,
|
| 36718 |
+
"learning_rate": 9.848265051640046e-06,
|
| 36719 |
+
"loss": 18.6022,
|
| 36720 |
+
"step": 52330
|
| 36721 |
+
},
|
| 36722 |
+
{
|
| 36723 |
+
"epoch": 0.9712895241674148,
|
| 36724 |
+
"grad_norm": 36.9375,
|
| 36725 |
+
"learning_rate": 9.848236055854003e-06,
|
| 36726 |
+
"loss": 18.6221,
|
| 36727 |
+
"step": 52340
|
| 36728 |
+
},
|
| 36729 |
+
{
|
| 36730 |
+
"epoch": 0.9714750972518946,
|
| 36731 |
+
"grad_norm": 37.59375,
|
| 36732 |
+
"learning_rate": 9.84820706006796e-06,
|
| 36733 |
+
"loss": 18.5104,
|
| 36734 |
+
"step": 52350
|
| 36735 |
+
},
|
| 36736 |
+
{
|
| 36737 |
+
"epoch": 0.9716606703363744,
|
| 36738 |
+
"grad_norm": 37.28125,
|
| 36739 |
+
"learning_rate": 9.84817806428192e-06,
|
| 36740 |
+
"loss": 18.7772,
|
| 36741 |
+
"step": 52360
|
| 36742 |
+
},
|
| 36743 |
+
{
|
| 36744 |
+
"epoch": 0.9718462434208542,
|
| 36745 |
+
"grad_norm": 38.84375,
|
| 36746 |
+
"learning_rate": 9.848149068495875e-06,
|
| 36747 |
+
"loss": 18.6431,
|
| 36748 |
+
"step": 52370
|
| 36749 |
+
},
|
| 36750 |
+
{
|
| 36751 |
+
"epoch": 0.9720318165053341,
|
| 36752 |
+
"grad_norm": 37.15625,
|
| 36753 |
+
"learning_rate": 9.848120072709833e-06,
|
| 36754 |
+
"loss": 18.5793,
|
| 36755 |
+
"step": 52380
|
| 36756 |
+
},
|
| 36757 |
+
{
|
| 36758 |
+
"epoch": 0.9722173895898139,
|
| 36759 |
+
"grad_norm": 36.125,
|
| 36760 |
+
"learning_rate": 9.848091076923792e-06,
|
| 36761 |
+
"loss": 18.8981,
|
| 36762 |
+
"step": 52390
|
| 36763 |
+
},
|
| 36764 |
+
{
|
| 36765 |
+
"epoch": 0.9724029626742937,
|
| 36766 |
+
"grad_norm": 37.28125,
|
| 36767 |
+
"learning_rate": 9.84806208113775e-06,
|
| 36768 |
+
"loss": 18.5569,
|
| 36769 |
+
"step": 52400
|
| 36770 |
+
},
|
| 36771 |
+
{
|
| 36772 |
+
"epoch": 0.9725885357587736,
|
| 36773 |
+
"grad_norm": 35.8125,
|
| 36774 |
+
"learning_rate": 9.848033085351707e-06,
|
| 36775 |
+
"loss": 18.7243,
|
| 36776 |
+
"step": 52410
|
| 36777 |
+
},
|
| 36778 |
+
{
|
| 36779 |
+
"epoch": 0.9727741088432533,
|
| 36780 |
+
"grad_norm": 37.4375,
|
| 36781 |
+
"learning_rate": 9.848004089565664e-06,
|
| 36782 |
+
"loss": 19.0858,
|
| 36783 |
+
"step": 52420
|
| 36784 |
+
},
|
| 36785 |
+
{
|
| 36786 |
+
"epoch": 0.9729596819277332,
|
| 36787 |
+
"grad_norm": 36.5,
|
| 36788 |
+
"learning_rate": 9.847975093779622e-06,
|
| 36789 |
+
"loss": 18.5945,
|
| 36790 |
+
"step": 52430
|
| 36791 |
+
},
|
| 36792 |
+
{
|
| 36793 |
+
"epoch": 0.973145255012213,
|
| 36794 |
+
"grad_norm": 38.4375,
|
| 36795 |
+
"learning_rate": 9.847946097993579e-06,
|
| 36796 |
+
"loss": 18.5578,
|
| 36797 |
+
"step": 52440
|
| 36798 |
+
},
|
| 36799 |
+
{
|
| 36800 |
+
"epoch": 0.9733308280966928,
|
| 36801 |
+
"grad_norm": 35.40625,
|
| 36802 |
+
"learning_rate": 9.847917102207536e-06,
|
| 36803 |
+
"loss": 18.5476,
|
| 36804 |
+
"step": 52450
|
| 36805 |
+
},
|
| 36806 |
+
{
|
| 36807 |
+
"epoch": 0.9735164011811727,
|
| 36808 |
+
"grad_norm": 37.34375,
|
| 36809 |
+
"learning_rate": 9.847888106421495e-06,
|
| 36810 |
+
"loss": 18.6942,
|
| 36811 |
+
"step": 52460
|
| 36812 |
+
},
|
| 36813 |
+
{
|
| 36814 |
+
"epoch": 0.9737019742656525,
|
| 36815 |
+
"grad_norm": 35.71875,
|
| 36816 |
+
"learning_rate": 9.847859110635453e-06,
|
| 36817 |
+
"loss": 18.4927,
|
| 36818 |
+
"step": 52470
|
| 36819 |
+
},
|
| 36820 |
+
{
|
| 36821 |
+
"epoch": 0.9738875473501324,
|
| 36822 |
+
"grad_norm": 34.90625,
|
| 36823 |
+
"learning_rate": 9.847830114849409e-06,
|
| 36824 |
+
"loss": 18.5665,
|
| 36825 |
+
"step": 52480
|
| 36826 |
+
},
|
| 36827 |
+
{
|
| 36828 |
+
"epoch": 0.9740731204346121,
|
| 36829 |
+
"grad_norm": 35.46875,
|
| 36830 |
+
"learning_rate": 9.847801119063368e-06,
|
| 36831 |
+
"loss": 18.9818,
|
| 36832 |
+
"step": 52490
|
| 36833 |
+
},
|
| 36834 |
+
{
|
| 36835 |
+
"epoch": 0.974258693519092,
|
| 36836 |
+
"grad_norm": 35.90625,
|
| 36837 |
+
"learning_rate": 9.847772123277325e-06,
|
| 36838 |
+
"loss": 18.3819,
|
| 36839 |
+
"step": 52500
|
| 36840 |
+
},
|
| 36841 |
+
{
|
| 36842 |
+
"epoch": 0.9744442666035719,
|
| 36843 |
+
"grad_norm": 37.40625,
|
| 36844 |
+
"learning_rate": 9.847743127491283e-06,
|
| 36845 |
+
"loss": 18.266,
|
| 36846 |
+
"step": 52510
|
| 36847 |
+
},
|
| 36848 |
+
{
|
| 36849 |
+
"epoch": 0.9746298396880516,
|
| 36850 |
+
"grad_norm": 36.40625,
|
| 36851 |
+
"learning_rate": 9.84771413170524e-06,
|
| 36852 |
+
"loss": 19.177,
|
| 36853 |
+
"step": 52520
|
| 36854 |
+
},
|
| 36855 |
+
{
|
| 36856 |
+
"epoch": 0.9748154127725315,
|
| 36857 |
+
"grad_norm": 35.25,
|
| 36858 |
+
"learning_rate": 9.847685135919197e-06,
|
| 36859 |
+
"loss": 18.7006,
|
| 36860 |
+
"step": 52530
|
| 36861 |
+
},
|
| 36862 |
+
{
|
| 36863 |
+
"epoch": 0.9750009858570113,
|
| 36864 |
+
"grad_norm": 36.75,
|
| 36865 |
+
"learning_rate": 9.847656140133155e-06,
|
| 36866 |
+
"loss": 18.8634,
|
| 36867 |
+
"step": 52540
|
| 36868 |
+
},
|
| 36869 |
+
{
|
| 36870 |
+
"epoch": 0.9751865589414911,
|
| 36871 |
+
"grad_norm": 36.25,
|
| 36872 |
+
"learning_rate": 9.847627144347112e-06,
|
| 36873 |
+
"loss": 18.3737,
|
| 36874 |
+
"step": 52550
|
| 36875 |
+
},
|
| 36876 |
+
{
|
| 36877 |
+
"epoch": 0.975372132025971,
|
| 36878 |
+
"grad_norm": 37.21875,
|
| 36879 |
+
"learning_rate": 9.847598148561071e-06,
|
| 36880 |
+
"loss": 18.6165,
|
| 36881 |
+
"step": 52560
|
| 36882 |
+
},
|
| 36883 |
+
{
|
| 36884 |
+
"epoch": 0.9755577051104508,
|
| 36885 |
+
"grad_norm": 37.03125,
|
| 36886 |
+
"learning_rate": 9.847569152775029e-06,
|
| 36887 |
+
"loss": 18.7161,
|
| 36888 |
+
"step": 52570
|
| 36889 |
+
},
|
| 36890 |
+
{
|
| 36891 |
+
"epoch": 0.9757432781949306,
|
| 36892 |
+
"grad_norm": 34.65625,
|
| 36893 |
+
"learning_rate": 9.847540156988986e-06,
|
| 36894 |
+
"loss": 19.034,
|
| 36895 |
+
"step": 52580
|
| 36896 |
+
},
|
| 36897 |
+
{
|
| 36898 |
+
"epoch": 0.9759288512794104,
|
| 36899 |
+
"grad_norm": 37.09375,
|
| 36900 |
+
"learning_rate": 9.847511161202943e-06,
|
| 36901 |
+
"loss": 18.5442,
|
| 36902 |
+
"step": 52590
|
| 36903 |
+
},
|
| 36904 |
+
{
|
| 36905 |
+
"epoch": 0.9761144243638903,
|
| 36906 |
+
"grad_norm": 36.09375,
|
| 36907 |
+
"learning_rate": 9.847482165416901e-06,
|
| 36908 |
+
"loss": 18.6233,
|
| 36909 |
+
"step": 52600
|
| 36910 |
+
},
|
| 36911 |
+
{
|
| 36912 |
+
"epoch": 0.97629999744837,
|
| 36913 |
+
"grad_norm": 36.5,
|
| 36914 |
+
"learning_rate": 9.847453169630858e-06,
|
| 36915 |
+
"loss": 18.7935,
|
| 36916 |
+
"step": 52610
|
| 36917 |
+
},
|
| 36918 |
+
{
|
| 36919 |
+
"epoch": 0.9764855705328499,
|
| 36920 |
+
"grad_norm": 38.0,
|
| 36921 |
+
"learning_rate": 9.847424173844816e-06,
|
| 36922 |
+
"loss": 19.0615,
|
| 36923 |
+
"step": 52620
|
| 36924 |
+
},
|
| 36925 |
+
{
|
| 36926 |
+
"epoch": 0.9766711436173298,
|
| 36927 |
+
"grad_norm": 36.78125,
|
| 36928 |
+
"learning_rate": 9.847395178058775e-06,
|
| 36929 |
+
"loss": 18.2889,
|
| 36930 |
+
"step": 52630
|
| 36931 |
+
},
|
| 36932 |
+
{
|
| 36933 |
+
"epoch": 0.9768567167018096,
|
| 36934 |
+
"grad_norm": 37.28125,
|
| 36935 |
+
"learning_rate": 9.84736618227273e-06,
|
| 36936 |
+
"loss": 18.338,
|
| 36937 |
+
"step": 52640
|
| 36938 |
+
},
|
| 36939 |
+
{
|
| 36940 |
+
"epoch": 0.9770422897862894,
|
| 36941 |
+
"grad_norm": 38.15625,
|
| 36942 |
+
"learning_rate": 9.847337186486688e-06,
|
| 36943 |
+
"loss": 18.6097,
|
| 36944 |
+
"step": 52650
|
| 36945 |
+
},
|
| 36946 |
+
{
|
| 36947 |
+
"epoch": 0.9772278628707692,
|
| 36948 |
+
"grad_norm": 37.5625,
|
| 36949 |
+
"learning_rate": 9.847308190700647e-06,
|
| 36950 |
+
"loss": 18.959,
|
| 36951 |
+
"step": 52660
|
| 36952 |
+
},
|
| 36953 |
+
{
|
| 36954 |
+
"epoch": 0.9774134359552491,
|
| 36955 |
+
"grad_norm": 35.9375,
|
| 36956 |
+
"learning_rate": 9.847279194914604e-06,
|
| 36957 |
+
"loss": 18.3508,
|
| 36958 |
+
"step": 52670
|
| 36959 |
+
},
|
| 36960 |
+
{
|
| 36961 |
+
"epoch": 0.9775990090397289,
|
| 36962 |
+
"grad_norm": 36.65625,
|
| 36963 |
+
"learning_rate": 9.847250199128562e-06,
|
| 36964 |
+
"loss": 18.6836,
|
| 36965 |
+
"step": 52680
|
| 36966 |
+
},
|
| 36967 |
+
{
|
| 36968 |
+
"epoch": 0.9777845821242087,
|
| 36969 |
+
"grad_norm": 35.6875,
|
| 36970 |
+
"learning_rate": 9.84722120334252e-06,
|
| 36971 |
+
"loss": 18.8362,
|
| 36972 |
+
"step": 52690
|
| 36973 |
+
},
|
| 36974 |
+
{
|
| 36975 |
+
"epoch": 0.9779701552086886,
|
| 36976 |
+
"grad_norm": 35.78125,
|
| 36977 |
+
"learning_rate": 9.847192207556477e-06,
|
| 36978 |
+
"loss": 18.6928,
|
| 36979 |
+
"step": 52700
|
| 36980 |
+
},
|
| 36981 |
+
{
|
| 36982 |
+
"epoch": 0.9781557282931683,
|
| 36983 |
+
"grad_norm": 34.125,
|
| 36984 |
+
"learning_rate": 9.847163211770434e-06,
|
| 36985 |
+
"loss": 18.8454,
|
| 36986 |
+
"step": 52710
|
| 36987 |
+
},
|
| 36988 |
+
{
|
| 36989 |
+
"epoch": 0.9783413013776482,
|
| 36990 |
+
"grad_norm": 35.78125,
|
| 36991 |
+
"learning_rate": 9.847134215984391e-06,
|
| 36992 |
+
"loss": 18.703,
|
| 36993 |
+
"step": 52720
|
| 36994 |
+
},
|
| 36995 |
+
{
|
| 36996 |
+
"epoch": 0.978526874462128,
|
| 36997 |
+
"grad_norm": 35.25,
|
| 36998 |
+
"learning_rate": 9.847105220198349e-06,
|
| 36999 |
+
"loss": 18.9489,
|
| 37000 |
+
"step": 52730
|
| 37001 |
+
},
|
| 37002 |
+
{
|
| 37003 |
+
"epoch": 0.9787124475466078,
|
| 37004 |
+
"grad_norm": 36.875,
|
| 37005 |
+
"learning_rate": 9.847076224412308e-06,
|
| 37006 |
+
"loss": 19.0491,
|
| 37007 |
+
"step": 52740
|
| 37008 |
+
},
|
| 37009 |
+
{
|
| 37010 |
+
"epoch": 0.9788980206310877,
|
| 37011 |
+
"grad_norm": 37.0,
|
| 37012 |
+
"learning_rate": 9.847047228626264e-06,
|
| 37013 |
+
"loss": 18.4266,
|
| 37014 |
+
"step": 52750
|
| 37015 |
+
},
|
| 37016 |
+
{
|
| 37017 |
+
"epoch": 0.9790835937155675,
|
| 37018 |
+
"grad_norm": 35.84375,
|
| 37019 |
+
"learning_rate": 9.847018232840223e-06,
|
| 37020 |
+
"loss": 18.529,
|
| 37021 |
+
"step": 52760
|
| 37022 |
+
},
|
| 37023 |
+
{
|
| 37024 |
+
"epoch": 0.9792691668000473,
|
| 37025 |
+
"grad_norm": 36.0625,
|
| 37026 |
+
"learning_rate": 9.84698923705418e-06,
|
| 37027 |
+
"loss": 18.3869,
|
| 37028 |
+
"step": 52770
|
| 37029 |
+
},
|
| 37030 |
+
{
|
| 37031 |
+
"epoch": 0.9794547398845271,
|
| 37032 |
+
"grad_norm": 38.0,
|
| 37033 |
+
"learning_rate": 9.846960241268138e-06,
|
| 37034 |
+
"loss": 18.7703,
|
| 37035 |
+
"step": 52780
|
| 37036 |
+
},
|
| 37037 |
+
{
|
| 37038 |
+
"epoch": 0.979640312969007,
|
| 37039 |
+
"grad_norm": 38.09375,
|
| 37040 |
+
"learning_rate": 9.846931245482095e-06,
|
| 37041 |
+
"loss": 18.6845,
|
| 37042 |
+
"step": 52790
|
| 37043 |
+
},
|
| 37044 |
+
{
|
| 37045 |
+
"epoch": 0.9798258860534867,
|
| 37046 |
+
"grad_norm": 35.96875,
|
| 37047 |
+
"learning_rate": 9.846902249696052e-06,
|
| 37048 |
+
"loss": 18.5895,
|
| 37049 |
+
"step": 52800
|
| 37050 |
+
},
|
| 37051 |
+
{
|
| 37052 |
+
"epoch": 0.9800114591379666,
|
| 37053 |
+
"grad_norm": 36.75,
|
| 37054 |
+
"learning_rate": 9.84687325391001e-06,
|
| 37055 |
+
"loss": 18.6341,
|
| 37056 |
+
"step": 52810
|
| 37057 |
+
},
|
| 37058 |
+
{
|
| 37059 |
+
"epoch": 0.9801970322224465,
|
| 37060 |
+
"grad_norm": 38.5625,
|
| 37061 |
+
"learning_rate": 9.846844258123967e-06,
|
| 37062 |
+
"loss": 18.6194,
|
| 37063 |
+
"step": 52820
|
| 37064 |
+
},
|
| 37065 |
+
{
|
| 37066 |
+
"epoch": 0.9803826053069263,
|
| 37067 |
+
"grad_norm": 36.28125,
|
| 37068 |
+
"learning_rate": 9.846815262337925e-06,
|
| 37069 |
+
"loss": 18.7706,
|
| 37070 |
+
"step": 52830
|
| 37071 |
+
},
|
| 37072 |
+
{
|
| 37073 |
+
"epoch": 0.9805681783914061,
|
| 37074 |
+
"grad_norm": 37.4375,
|
| 37075 |
+
"learning_rate": 9.846786266551884e-06,
|
| 37076 |
+
"loss": 18.7345,
|
| 37077 |
+
"step": 52840
|
| 37078 |
+
},
|
| 37079 |
+
{
|
| 37080 |
+
"epoch": 0.980753751475886,
|
| 37081 |
+
"grad_norm": 37.125,
|
| 37082 |
+
"learning_rate": 9.84675727076584e-06,
|
| 37083 |
+
"loss": 18.2325,
|
| 37084 |
+
"step": 52850
|
| 37085 |
+
},
|
| 37086 |
+
{
|
| 37087 |
+
"epoch": 0.9809393245603658,
|
| 37088 |
+
"grad_norm": 36.53125,
|
| 37089 |
+
"learning_rate": 9.846728274979797e-06,
|
| 37090 |
+
"loss": 18.2759,
|
| 37091 |
+
"step": 52860
|
| 37092 |
+
},
|
| 37093 |
+
{
|
| 37094 |
+
"epoch": 0.9811248976448456,
|
| 37095 |
+
"grad_norm": 35.5625,
|
| 37096 |
+
"learning_rate": 9.846699279193756e-06,
|
| 37097 |
+
"loss": 18.5422,
|
| 37098 |
+
"step": 52870
|
| 37099 |
+
},
|
| 37100 |
+
{
|
| 37101 |
+
"epoch": 0.9813104707293254,
|
| 37102 |
+
"grad_norm": 36.21875,
|
| 37103 |
+
"learning_rate": 9.846670283407713e-06,
|
| 37104 |
+
"loss": 18.8652,
|
| 37105 |
+
"step": 52880
|
| 37106 |
+
},
|
| 37107 |
+
{
|
| 37108 |
+
"epoch": 0.9814960438138053,
|
| 37109 |
+
"grad_norm": 36.78125,
|
| 37110 |
+
"learning_rate": 9.84664128762167e-06,
|
| 37111 |
+
"loss": 18.8132,
|
| 37112 |
+
"step": 52890
|
| 37113 |
+
},
|
| 37114 |
+
{
|
| 37115 |
+
"epoch": 0.981681616898285,
|
| 37116 |
+
"grad_norm": 34.1875,
|
| 37117 |
+
"learning_rate": 9.846612291835628e-06,
|
| 37118 |
+
"loss": 18.7067,
|
| 37119 |
+
"step": 52900
|
| 37120 |
+
},
|
| 37121 |
+
{
|
| 37122 |
+
"epoch": 0.9818671899827649,
|
| 37123 |
+
"grad_norm": 38.78125,
|
| 37124 |
+
"learning_rate": 9.846583296049586e-06,
|
| 37125 |
+
"loss": 18.8695,
|
| 37126 |
+
"step": 52910
|
| 37127 |
+
},
|
| 37128 |
+
{
|
| 37129 |
+
"epoch": 0.9820527630672448,
|
| 37130 |
+
"grad_norm": 36.84375,
|
| 37131 |
+
"learning_rate": 9.846554300263543e-06,
|
| 37132 |
+
"loss": 18.9407,
|
| 37133 |
+
"step": 52920
|
| 37134 |
+
},
|
| 37135 |
+
{
|
| 37136 |
+
"epoch": 0.9822383361517245,
|
| 37137 |
+
"grad_norm": 37.0,
|
| 37138 |
+
"learning_rate": 9.8465253044775e-06,
|
| 37139 |
+
"loss": 18.5586,
|
| 37140 |
+
"step": 52930
|
| 37141 |
+
},
|
| 37142 |
+
{
|
| 37143 |
+
"epoch": 0.9824239092362044,
|
| 37144 |
+
"grad_norm": 37.0,
|
| 37145 |
+
"learning_rate": 9.84649630869146e-06,
|
| 37146 |
+
"loss": 18.701,
|
| 37147 |
+
"step": 52940
|
| 37148 |
+
},
|
| 37149 |
+
{
|
| 37150 |
+
"epoch": 0.9826094823206842,
|
| 37151 |
+
"grad_norm": 33.875,
|
| 37152 |
+
"learning_rate": 9.846467312905417e-06,
|
| 37153 |
+
"loss": 18.6175,
|
| 37154 |
+
"step": 52950
|
| 37155 |
+
},
|
| 37156 |
+
{
|
| 37157 |
+
"epoch": 0.982795055405164,
|
| 37158 |
+
"grad_norm": 36.78125,
|
| 37159 |
+
"learning_rate": 9.846438317119373e-06,
|
| 37160 |
+
"loss": 18.4298,
|
| 37161 |
+
"step": 52960
|
| 37162 |
+
},
|
| 37163 |
+
{
|
| 37164 |
+
"epoch": 0.9829806284896438,
|
| 37165 |
+
"grad_norm": 37.75,
|
| 37166 |
+
"learning_rate": 9.846409321333332e-06,
|
| 37167 |
+
"loss": 18.3273,
|
| 37168 |
+
"step": 52970
|
| 37169 |
+
},
|
| 37170 |
+
{
|
| 37171 |
+
"epoch": 0.9831662015741237,
|
| 37172 |
+
"grad_norm": 36.9375,
|
| 37173 |
+
"learning_rate": 9.846380325547289e-06,
|
| 37174 |
+
"loss": 18.5626,
|
| 37175 |
+
"step": 52980
|
| 37176 |
+
},
|
| 37177 |
+
{
|
| 37178 |
+
"epoch": 0.9833517746586035,
|
| 37179 |
+
"grad_norm": 36.21875,
|
| 37180 |
+
"learning_rate": 9.846351329761247e-06,
|
| 37181 |
+
"loss": 18.6245,
|
| 37182 |
+
"step": 52990
|
| 37183 |
+
},
|
| 37184 |
+
{
|
| 37185 |
+
"epoch": 0.9835373477430833,
|
| 37186 |
+
"grad_norm": 35.09375,
|
| 37187 |
+
"learning_rate": 9.846322333975204e-06,
|
| 37188 |
+
"loss": 18.2085,
|
| 37189 |
+
"step": 53000
|
| 37190 |
+
},
|
| 37191 |
+
{
|
| 37192 |
+
"epoch": 0.9837229208275632,
|
| 37193 |
+
"grad_norm": 37.78125,
|
| 37194 |
+
"learning_rate": 9.846293338189161e-06,
|
| 37195 |
+
"loss": 19.0781,
|
| 37196 |
+
"step": 53010
|
| 37197 |
+
},
|
| 37198 |
+
{
|
| 37199 |
+
"epoch": 0.983908493912043,
|
| 37200 |
+
"grad_norm": 37.15625,
|
| 37201 |
+
"learning_rate": 9.846264342403119e-06,
|
| 37202 |
+
"loss": 18.8288,
|
| 37203 |
+
"step": 53020
|
| 37204 |
+
},
|
| 37205 |
+
{
|
| 37206 |
+
"epoch": 0.9840940669965228,
|
| 37207 |
+
"grad_norm": 36.25,
|
| 37208 |
+
"learning_rate": 9.846235346617076e-06,
|
| 37209 |
+
"loss": 18.6047,
|
| 37210 |
+
"step": 53030
|
| 37211 |
+
},
|
| 37212 |
+
{
|
| 37213 |
+
"epoch": 0.9842796400810027,
|
| 37214 |
+
"grad_norm": 36.3125,
|
| 37215 |
+
"learning_rate": 9.846206350831035e-06,
|
| 37216 |
+
"loss": 18.9381,
|
| 37217 |
+
"step": 53040
|
| 37218 |
+
},
|
| 37219 |
+
{
|
| 37220 |
+
"epoch": 0.9844652131654825,
|
| 37221 |
+
"grad_norm": 36.4375,
|
| 37222 |
+
"learning_rate": 9.846177355044993e-06,
|
| 37223 |
+
"loss": 18.6755,
|
| 37224 |
+
"step": 53050
|
| 37225 |
+
},
|
| 37226 |
+
{
|
| 37227 |
+
"epoch": 0.9846507862499623,
|
| 37228 |
+
"grad_norm": 36.59375,
|
| 37229 |
+
"learning_rate": 9.84614835925895e-06,
|
| 37230 |
+
"loss": 18.5902,
|
| 37231 |
+
"step": 53060
|
| 37232 |
+
},
|
| 37233 |
+
{
|
| 37234 |
+
"epoch": 0.9848363593344421,
|
| 37235 |
+
"grad_norm": 35.90625,
|
| 37236 |
+
"learning_rate": 9.846119363472907e-06,
|
| 37237 |
+
"loss": 18.8222,
|
| 37238 |
+
"step": 53070
|
| 37239 |
+
},
|
| 37240 |
+
{
|
| 37241 |
+
"epoch": 0.985021932418922,
|
| 37242 |
+
"grad_norm": 36.75,
|
| 37243 |
+
"learning_rate": 9.846090367686865e-06,
|
| 37244 |
+
"loss": 18.9657,
|
| 37245 |
+
"step": 53080
|
| 37246 |
+
},
|
| 37247 |
+
{
|
| 37248 |
+
"epoch": 0.9852075055034017,
|
| 37249 |
+
"grad_norm": 36.875,
|
| 37250 |
+
"learning_rate": 9.846061371900822e-06,
|
| 37251 |
+
"loss": 18.6081,
|
| 37252 |
+
"step": 53090
|
| 37253 |
+
},
|
| 37254 |
+
{
|
| 37255 |
+
"epoch": 0.9853930785878816,
|
| 37256 |
+
"grad_norm": 38.75,
|
| 37257 |
+
"learning_rate": 9.84603237611478e-06,
|
| 37258 |
+
"loss": 19.0012,
|
| 37259 |
+
"step": 53100
|
| 37260 |
+
},
|
| 37261 |
+
{
|
| 37262 |
+
"epoch": 0.9855786516723615,
|
| 37263 |
+
"grad_norm": 35.40625,
|
| 37264 |
+
"learning_rate": 9.846003380328739e-06,
|
| 37265 |
+
"loss": 18.3228,
|
| 37266 |
+
"step": 53110
|
| 37267 |
+
},
|
| 37268 |
+
{
|
| 37269 |
+
"epoch": 0.9857642247568412,
|
| 37270 |
+
"grad_norm": 36.65625,
|
| 37271 |
+
"learning_rate": 9.845974384542695e-06,
|
| 37272 |
+
"loss": 18.4046,
|
| 37273 |
+
"step": 53120
|
| 37274 |
+
},
|
| 37275 |
+
{
|
| 37276 |
+
"epoch": 0.9859497978413211,
|
| 37277 |
+
"grad_norm": 36.21875,
|
| 37278 |
+
"learning_rate": 9.845945388756652e-06,
|
| 37279 |
+
"loss": 18.3394,
|
| 37280 |
+
"step": 53130
|
| 37281 |
+
},
|
| 37282 |
+
{
|
| 37283 |
+
"epoch": 0.986135370925801,
|
| 37284 |
+
"grad_norm": 36.96875,
|
| 37285 |
+
"learning_rate": 9.845916392970611e-06,
|
| 37286 |
+
"loss": 18.5615,
|
| 37287 |
+
"step": 53140
|
| 37288 |
+
},
|
| 37289 |
+
{
|
| 37290 |
+
"epoch": 0.9863209440102807,
|
| 37291 |
+
"grad_norm": 35.1875,
|
| 37292 |
+
"learning_rate": 9.845887397184568e-06,
|
| 37293 |
+
"loss": 18.6555,
|
| 37294 |
+
"step": 53150
|
| 37295 |
+
},
|
| 37296 |
+
{
|
| 37297 |
+
"epoch": 0.9865065170947606,
|
| 37298 |
+
"grad_norm": 38.15625,
|
| 37299 |
+
"learning_rate": 9.845858401398526e-06,
|
| 37300 |
+
"loss": 19.2572,
|
| 37301 |
+
"step": 53160
|
| 37302 |
+
},
|
| 37303 |
+
{
|
| 37304 |
+
"epoch": 0.9866920901792404,
|
| 37305 |
+
"grad_norm": 37.21875,
|
| 37306 |
+
"learning_rate": 9.845829405612483e-06,
|
| 37307 |
+
"loss": 18.6147,
|
| 37308 |
+
"step": 53170
|
| 37309 |
+
},
|
| 37310 |
+
{
|
| 37311 |
+
"epoch": 0.9868776632637203,
|
| 37312 |
+
"grad_norm": 35.09375,
|
| 37313 |
+
"learning_rate": 9.84580040982644e-06,
|
| 37314 |
+
"loss": 18.649,
|
| 37315 |
+
"step": 53180
|
| 37316 |
+
},
|
| 37317 |
+
{
|
| 37318 |
+
"epoch": 0.9870632363482,
|
| 37319 |
+
"grad_norm": 34.28125,
|
| 37320 |
+
"learning_rate": 9.845771414040398e-06,
|
| 37321 |
+
"loss": 18.5839,
|
| 37322 |
+
"step": 53190
|
| 37323 |
+
},
|
| 37324 |
+
{
|
| 37325 |
+
"epoch": 0.9872488094326799,
|
| 37326 |
+
"grad_norm": 37.71875,
|
| 37327 |
+
"learning_rate": 9.845742418254355e-06,
|
| 37328 |
+
"loss": 18.9422,
|
| 37329 |
+
"step": 53200
|
| 37330 |
+
},
|
| 37331 |
+
{
|
| 37332 |
+
"epoch": 0.9874343825171598,
|
| 37333 |
+
"grad_norm": 38.6875,
|
| 37334 |
+
"learning_rate": 9.845713422468315e-06,
|
| 37335 |
+
"loss": 19.0014,
|
| 37336 |
+
"step": 53210
|
| 37337 |
+
},
|
| 37338 |
+
{
|
| 37339 |
+
"epoch": 0.9876199556016395,
|
| 37340 |
+
"grad_norm": 38.03125,
|
| 37341 |
+
"learning_rate": 9.845684426682272e-06,
|
| 37342 |
+
"loss": 18.5322,
|
| 37343 |
+
"step": 53220
|
| 37344 |
+
},
|
| 37345 |
+
{
|
| 37346 |
+
"epoch": 0.9878055286861194,
|
| 37347 |
+
"grad_norm": 37.0,
|
| 37348 |
+
"learning_rate": 9.845655430896228e-06,
|
| 37349 |
+
"loss": 18.8219,
|
| 37350 |
+
"step": 53230
|
| 37351 |
+
},
|
| 37352 |
+
{
|
| 37353 |
+
"epoch": 0.9879911017705992,
|
| 37354 |
+
"grad_norm": 36.375,
|
| 37355 |
+
"learning_rate": 9.845626435110187e-06,
|
| 37356 |
+
"loss": 18.5651,
|
| 37357 |
+
"step": 53240
|
| 37358 |
+
},
|
| 37359 |
+
{
|
| 37360 |
+
"epoch": 0.988176674855079,
|
| 37361 |
+
"grad_norm": 35.09375,
|
| 37362 |
+
"learning_rate": 9.845597439324144e-06,
|
| 37363 |
+
"loss": 18.3392,
|
| 37364 |
+
"step": 53250
|
| 37365 |
+
},
|
| 37366 |
+
{
|
| 37367 |
+
"epoch": 0.9883622479395588,
|
| 37368 |
+
"grad_norm": 37.65625,
|
| 37369 |
+
"learning_rate": 9.845568443538102e-06,
|
| 37370 |
+
"loss": 19.2046,
|
| 37371 |
+
"step": 53260
|
| 37372 |
+
},
|
| 37373 |
+
{
|
| 37374 |
+
"epoch": 0.9885478210240387,
|
| 37375 |
+
"grad_norm": 36.3125,
|
| 37376 |
+
"learning_rate": 9.845539447752059e-06,
|
| 37377 |
+
"loss": 18.752,
|
| 37378 |
+
"step": 53270
|
| 37379 |
+
},
|
| 37380 |
+
{
|
| 37381 |
+
"epoch": 0.9887333941085185,
|
| 37382 |
+
"grad_norm": 37.09375,
|
| 37383 |
+
"learning_rate": 9.845510451966016e-06,
|
| 37384 |
+
"loss": 18.4617,
|
| 37385 |
+
"step": 53280
|
| 37386 |
+
},
|
| 37387 |
+
{
|
| 37388 |
+
"epoch": 0.9889189671929983,
|
| 37389 |
+
"grad_norm": 37.125,
|
| 37390 |
+
"learning_rate": 9.845481456179974e-06,
|
| 37391 |
+
"loss": 18.4353,
|
| 37392 |
+
"step": 53290
|
| 37393 |
+
},
|
| 37394 |
+
{
|
| 37395 |
+
"epoch": 0.9891045402774782,
|
| 37396 |
+
"grad_norm": 37.375,
|
| 37397 |
+
"learning_rate": 9.845452460393931e-06,
|
| 37398 |
+
"loss": 18.8832,
|
| 37399 |
+
"step": 53300
|
| 37400 |
+
},
|
| 37401 |
+
{
|
| 37402 |
+
"epoch": 0.9892901133619579,
|
| 37403 |
+
"grad_norm": 35.3125,
|
| 37404 |
+
"learning_rate": 9.845423464607889e-06,
|
| 37405 |
+
"loss": 19.0743,
|
| 37406 |
+
"step": 53310
|
| 37407 |
+
},
|
| 37408 |
+
{
|
| 37409 |
+
"epoch": 0.9894756864464378,
|
| 37410 |
+
"grad_norm": 35.78125,
|
| 37411 |
+
"learning_rate": 9.845394468821848e-06,
|
| 37412 |
+
"loss": 18.8587,
|
| 37413 |
+
"step": 53320
|
| 37414 |
+
},
|
| 37415 |
+
{
|
| 37416 |
+
"epoch": 0.9896612595309177,
|
| 37417 |
+
"grad_norm": 35.21875,
|
| 37418 |
+
"learning_rate": 9.845365473035805e-06,
|
| 37419 |
+
"loss": 18.5995,
|
| 37420 |
+
"step": 53330
|
| 37421 |
+
},
|
| 37422 |
+
{
|
| 37423 |
+
"epoch": 0.9898468326153974,
|
| 37424 |
+
"grad_norm": 37.40625,
|
| 37425 |
+
"learning_rate": 9.845336477249763e-06,
|
| 37426 |
+
"loss": 18.2105,
|
| 37427 |
+
"step": 53340
|
| 37428 |
+
},
|
| 37429 |
+
{
|
| 37430 |
+
"epoch": 0.9900324056998773,
|
| 37431 |
+
"grad_norm": 35.1875,
|
| 37432 |
+
"learning_rate": 9.84530748146372e-06,
|
| 37433 |
+
"loss": 18.6503,
|
| 37434 |
+
"step": 53350
|
| 37435 |
+
},
|
| 37436 |
+
{
|
| 37437 |
+
"epoch": 0.9902179787843571,
|
| 37438 |
+
"grad_norm": 37.09375,
|
| 37439 |
+
"learning_rate": 9.845278485677677e-06,
|
| 37440 |
+
"loss": 18.6598,
|
| 37441 |
+
"step": 53360
|
| 37442 |
+
},
|
| 37443 |
+
{
|
| 37444 |
+
"epoch": 0.990403551868837,
|
| 37445 |
+
"grad_norm": 36.21875,
|
| 37446 |
+
"learning_rate": 9.845249489891635e-06,
|
| 37447 |
+
"loss": 18.2827,
|
| 37448 |
+
"step": 53370
|
| 37449 |
+
},
|
| 37450 |
+
{
|
| 37451 |
+
"epoch": 0.9905891249533167,
|
| 37452 |
+
"grad_norm": 36.03125,
|
| 37453 |
+
"learning_rate": 9.845220494105592e-06,
|
| 37454 |
+
"loss": 18.5182,
|
| 37455 |
+
"step": 53380
|
| 37456 |
+
},
|
| 37457 |
+
{
|
| 37458 |
+
"epoch": 0.9907746980377966,
|
| 37459 |
+
"grad_norm": 37.5,
|
| 37460 |
+
"learning_rate": 9.84519149831955e-06,
|
| 37461 |
+
"loss": 18.4391,
|
| 37462 |
+
"step": 53390
|
| 37463 |
+
},
|
| 37464 |
+
{
|
| 37465 |
+
"epoch": 0.9909602711222765,
|
| 37466 |
+
"grad_norm": 37.21875,
|
| 37467 |
+
"learning_rate": 9.845162502533507e-06,
|
| 37468 |
+
"loss": 19.1688,
|
| 37469 |
+
"step": 53400
|
| 37470 |
+
},
|
| 37471 |
+
{
|
| 37472 |
+
"epoch": 0.9911458442067562,
|
| 37473 |
+
"grad_norm": 37.375,
|
| 37474 |
+
"learning_rate": 9.845133506747464e-06,
|
| 37475 |
+
"loss": 18.4897,
|
| 37476 |
+
"step": 53410
|
| 37477 |
+
},
|
| 37478 |
+
{
|
| 37479 |
+
"epoch": 0.9913314172912361,
|
| 37480 |
+
"grad_norm": 37.90625,
|
| 37481 |
+
"learning_rate": 9.845104510961424e-06,
|
| 37482 |
+
"loss": 19.3897,
|
| 37483 |
+
"step": 53420
|
| 37484 |
+
},
|
| 37485 |
+
{
|
| 37486 |
+
"epoch": 0.991516990375716,
|
| 37487 |
+
"grad_norm": 36.90625,
|
| 37488 |
+
"learning_rate": 9.845075515175381e-06,
|
| 37489 |
+
"loss": 18.5943,
|
| 37490 |
+
"step": 53430
|
| 37491 |
+
},
|
| 37492 |
+
{
|
| 37493 |
+
"epoch": 0.9917025634601957,
|
| 37494 |
+
"grad_norm": 36.65625,
|
| 37495 |
+
"learning_rate": 9.845046519389337e-06,
|
| 37496 |
+
"loss": 19.0475,
|
| 37497 |
+
"step": 53440
|
| 37498 |
+
},
|
| 37499 |
+
{
|
| 37500 |
+
"epoch": 0.9918881365446756,
|
| 37501 |
+
"grad_norm": 38.96875,
|
| 37502 |
+
"learning_rate": 9.845017523603296e-06,
|
| 37503 |
+
"loss": 18.6567,
|
| 37504 |
+
"step": 53450
|
| 37505 |
+
},
|
| 37506 |
+
{
|
| 37507 |
+
"epoch": 0.9920737096291554,
|
| 37508 |
+
"grad_norm": 37.5625,
|
| 37509 |
+
"learning_rate": 9.844988527817253e-06,
|
| 37510 |
+
"loss": 18.2615,
|
| 37511 |
+
"step": 53460
|
| 37512 |
+
},
|
| 37513 |
+
{
|
| 37514 |
+
"epoch": 0.9922592827136352,
|
| 37515 |
+
"grad_norm": 36.125,
|
| 37516 |
+
"learning_rate": 9.84495953203121e-06,
|
| 37517 |
+
"loss": 18.7222,
|
| 37518 |
+
"step": 53470
|
| 37519 |
+
},
|
| 37520 |
+
{
|
| 37521 |
+
"epoch": 0.992444855798115,
|
| 37522 |
+
"grad_norm": 37.46875,
|
| 37523 |
+
"learning_rate": 9.844930536245168e-06,
|
| 37524 |
+
"loss": 18.6064,
|
| 37525 |
+
"step": 53480
|
| 37526 |
+
},
|
| 37527 |
+
{
|
| 37528 |
+
"epoch": 0.9926304288825949,
|
| 37529 |
+
"grad_norm": 35.5625,
|
| 37530 |
+
"learning_rate": 9.844901540459127e-06,
|
| 37531 |
+
"loss": 18.9255,
|
| 37532 |
+
"step": 53490
|
| 37533 |
+
},
|
| 37534 |
+
{
|
| 37535 |
+
"epoch": 0.9928160019670746,
|
| 37536 |
+
"grad_norm": 37.0625,
|
| 37537 |
+
"learning_rate": 9.844872544673083e-06,
|
| 37538 |
+
"loss": 18.2984,
|
| 37539 |
+
"step": 53500
|
| 37540 |
+
},
|
| 37541 |
+
{
|
| 37542 |
+
"epoch": 0.9930015750515545,
|
| 37543 |
+
"grad_norm": 35.96875,
|
| 37544 |
+
"learning_rate": 9.84484354888704e-06,
|
| 37545 |
+
"loss": 18.5695,
|
| 37546 |
+
"step": 53510
|
| 37547 |
+
},
|
| 37548 |
+
{
|
| 37549 |
+
"epoch": 0.9931871481360344,
|
| 37550 |
+
"grad_norm": 37.21875,
|
| 37551 |
+
"learning_rate": 9.844814553101e-06,
|
| 37552 |
+
"loss": 18.8148,
|
| 37553 |
+
"step": 53520
|
| 37554 |
+
},
|
| 37555 |
+
{
|
| 37556 |
+
"epoch": 0.9933727212205142,
|
| 37557 |
+
"grad_norm": 35.28125,
|
| 37558 |
+
"learning_rate": 9.844785557314957e-06,
|
| 37559 |
+
"loss": 18.9416,
|
| 37560 |
+
"step": 53530
|
| 37561 |
+
},
|
| 37562 |
+
{
|
| 37563 |
+
"epoch": 0.993558294304994,
|
| 37564 |
+
"grad_norm": 35.96875,
|
| 37565 |
+
"learning_rate": 9.844756561528914e-06,
|
| 37566 |
+
"loss": 18.6266,
|
| 37567 |
+
"step": 53540
|
| 37568 |
+
},
|
| 37569 |
+
{
|
| 37570 |
+
"epoch": 0.9937438673894738,
|
| 37571 |
+
"grad_norm": 39.75,
|
| 37572 |
+
"learning_rate": 9.844727565742871e-06,
|
| 37573 |
+
"loss": 19.1788,
|
| 37574 |
+
"step": 53550
|
| 37575 |
+
},
|
| 37576 |
+
{
|
| 37577 |
+
"epoch": 0.9939294404739537,
|
| 37578 |
+
"grad_norm": 36.03125,
|
| 37579 |
+
"learning_rate": 9.844698569956829e-06,
|
| 37580 |
+
"loss": 18.8975,
|
| 37581 |
+
"step": 53560
|
| 37582 |
+
},
|
| 37583 |
+
{
|
| 37584 |
+
"epoch": 0.9941150135584335,
|
| 37585 |
+
"grad_norm": 36.0625,
|
| 37586 |
+
"learning_rate": 9.844669574170786e-06,
|
| 37587 |
+
"loss": 18.9393,
|
| 37588 |
+
"step": 53570
|
| 37589 |
+
},
|
| 37590 |
+
{
|
| 37591 |
+
"epoch": 0.9943005866429133,
|
| 37592 |
+
"grad_norm": 39.28125,
|
| 37593 |
+
"learning_rate": 9.844640578384744e-06,
|
| 37594 |
+
"loss": 18.5879,
|
| 37595 |
+
"step": 53580
|
| 37596 |
+
},
|
| 37597 |
+
{
|
| 37598 |
+
"epoch": 0.9944861597273932,
|
| 37599 |
+
"grad_norm": 36.46875,
|
| 37600 |
+
"learning_rate": 9.844611582598703e-06,
|
| 37601 |
+
"loss": 18.9418,
|
| 37602 |
+
"step": 53590
|
| 37603 |
+
},
|
| 37604 |
+
{
|
| 37605 |
+
"epoch": 0.9946717328118729,
|
| 37606 |
+
"grad_norm": 36.28125,
|
| 37607 |
+
"learning_rate": 9.844582586812659e-06,
|
| 37608 |
+
"loss": 18.719,
|
| 37609 |
+
"step": 53600
|
| 37610 |
+
},
|
| 37611 |
+
{
|
| 37612 |
+
"epoch": 0.9948573058963528,
|
| 37613 |
+
"grad_norm": 38.40625,
|
| 37614 |
+
"learning_rate": 9.844553591026616e-06,
|
| 37615 |
+
"loss": 18.7769,
|
| 37616 |
+
"step": 53610
|
| 37617 |
+
},
|
| 37618 |
+
{
|
| 37619 |
+
"epoch": 0.9950428789808327,
|
| 37620 |
+
"grad_norm": 35.125,
|
| 37621 |
+
"learning_rate": 9.844524595240575e-06,
|
| 37622 |
+
"loss": 18.7225,
|
| 37623 |
+
"step": 53620
|
| 37624 |
+
},
|
| 37625 |
+
{
|
| 37626 |
+
"epoch": 0.9952284520653124,
|
| 37627 |
+
"grad_norm": 36.21875,
|
| 37628 |
+
"learning_rate": 9.844495599454532e-06,
|
| 37629 |
+
"loss": 18.6759,
|
| 37630 |
+
"step": 53630
|
| 37631 |
+
},
|
| 37632 |
+
{
|
| 37633 |
+
"epoch": 0.9954140251497923,
|
| 37634 |
+
"grad_norm": 37.34375,
|
| 37635 |
+
"learning_rate": 9.84446660366849e-06,
|
| 37636 |
+
"loss": 18.6277,
|
| 37637 |
+
"step": 53640
|
| 37638 |
+
},
|
| 37639 |
+
{
|
| 37640 |
+
"epoch": 0.9955995982342721,
|
| 37641 |
+
"grad_norm": 38.28125,
|
| 37642 |
+
"learning_rate": 9.844437607882447e-06,
|
| 37643 |
+
"loss": 18.8499,
|
| 37644 |
+
"step": 53650
|
| 37645 |
+
},
|
| 37646 |
+
{
|
| 37647 |
+
"epoch": 0.9957851713187519,
|
| 37648 |
+
"grad_norm": 37.21875,
|
| 37649 |
+
"learning_rate": 9.844408612096405e-06,
|
| 37650 |
+
"loss": 18.6598,
|
| 37651 |
+
"step": 53660
|
| 37652 |
+
},
|
| 37653 |
+
{
|
| 37654 |
+
"epoch": 0.9959707444032317,
|
| 37655 |
+
"grad_norm": 36.4375,
|
| 37656 |
+
"learning_rate": 9.844379616310362e-06,
|
| 37657 |
+
"loss": 19.1049,
|
| 37658 |
+
"step": 53670
|
| 37659 |
+
},
|
| 37660 |
+
{
|
| 37661 |
+
"epoch": 0.9961563174877116,
|
| 37662 |
+
"grad_norm": 38.28125,
|
| 37663 |
+
"learning_rate": 9.84435062052432e-06,
|
| 37664 |
+
"loss": 18.3496,
|
| 37665 |
+
"step": 53680
|
| 37666 |
+
},
|
| 37667 |
+
{
|
| 37668 |
+
"epoch": 0.9963418905721914,
|
| 37669 |
+
"grad_norm": 37.5,
|
| 37670 |
+
"learning_rate": 9.844321624738279e-06,
|
| 37671 |
+
"loss": 18.6458,
|
| 37672 |
+
"step": 53690
|
| 37673 |
+
},
|
| 37674 |
+
{
|
| 37675 |
+
"epoch": 0.9965274636566712,
|
| 37676 |
+
"grad_norm": 35.90625,
|
| 37677 |
+
"learning_rate": 9.844292628952236e-06,
|
| 37678 |
+
"loss": 18.5307,
|
| 37679 |
+
"step": 53700
|
| 37680 |
+
},
|
| 37681 |
+
{
|
| 37682 |
+
"epoch": 0.9967130367411511,
|
| 37683 |
+
"grad_norm": 36.53125,
|
| 37684 |
+
"learning_rate": 9.844263633166192e-06,
|
| 37685 |
+
"loss": 19.2581,
|
| 37686 |
+
"step": 53710
|
| 37687 |
+
},
|
| 37688 |
+
{
|
| 37689 |
+
"epoch": 0.9968986098256309,
|
| 37690 |
+
"grad_norm": 36.25,
|
| 37691 |
+
"learning_rate": 9.84423463738015e-06,
|
| 37692 |
+
"loss": 18.803,
|
| 37693 |
+
"step": 53720
|
| 37694 |
+
},
|
| 37695 |
+
{
|
| 37696 |
+
"epoch": 0.9970841829101107,
|
| 37697 |
+
"grad_norm": 37.1875,
|
| 37698 |
+
"learning_rate": 9.844205641594108e-06,
|
| 37699 |
+
"loss": 18.2188,
|
| 37700 |
+
"step": 53730
|
| 37701 |
+
},
|
| 37702 |
+
{
|
| 37703 |
+
"epoch": 0.9972697559945906,
|
| 37704 |
+
"grad_norm": 36.8125,
|
| 37705 |
+
"learning_rate": 9.844176645808066e-06,
|
| 37706 |
+
"loss": 18.7958,
|
| 37707 |
+
"step": 53740
|
| 37708 |
+
},
|
| 37709 |
+
{
|
| 37710 |
+
"epoch": 0.9974553290790704,
|
| 37711 |
+
"grad_norm": 33.96875,
|
| 37712 |
+
"learning_rate": 9.844147650022023e-06,
|
| 37713 |
+
"loss": 18.6348,
|
| 37714 |
+
"step": 53750
|
| 37715 |
+
},
|
| 37716 |
+
{
|
| 37717 |
+
"epoch": 0.9976409021635502,
|
| 37718 |
+
"grad_norm": 35.96875,
|
| 37719 |
+
"learning_rate": 9.84411865423598e-06,
|
| 37720 |
+
"loss": 18.5909,
|
| 37721 |
+
"step": 53760
|
| 37722 |
+
},
|
| 37723 |
+
{
|
| 37724 |
+
"epoch": 0.99782647524803,
|
| 37725 |
+
"grad_norm": 36.0625,
|
| 37726 |
+
"learning_rate": 9.844089658449938e-06,
|
| 37727 |
+
"loss": 19.1494,
|
| 37728 |
+
"step": 53770
|
| 37729 |
+
},
|
| 37730 |
+
{
|
| 37731 |
+
"epoch": 0.9980120483325099,
|
| 37732 |
+
"grad_norm": 35.6875,
|
| 37733 |
+
"learning_rate": 9.844060662663895e-06,
|
| 37734 |
+
"loss": 18.9692,
|
| 37735 |
+
"step": 53780
|
| 37736 |
+
},
|
| 37737 |
+
{
|
| 37738 |
+
"epoch": 0.9981976214169896,
|
| 37739 |
+
"grad_norm": 36.0,
|
| 37740 |
+
"learning_rate": 9.844031666877853e-06,
|
| 37741 |
+
"loss": 18.5912,
|
| 37742 |
+
"step": 53790
|
| 37743 |
+
},
|
| 37744 |
+
{
|
| 37745 |
+
"epoch": 0.9983831945014695,
|
| 37746 |
+
"grad_norm": 33.34375,
|
| 37747 |
+
"learning_rate": 9.844002671091812e-06,
|
| 37748 |
+
"loss": 18.6809,
|
| 37749 |
+
"step": 53800
|
| 37750 |
+
},
|
| 37751 |
+
{
|
| 37752 |
+
"epoch": 0.9985687675859494,
|
| 37753 |
+
"grad_norm": 36.0625,
|
| 37754 |
+
"learning_rate": 9.84397367530577e-06,
|
| 37755 |
+
"loss": 18.3271,
|
| 37756 |
+
"step": 53810
|
| 37757 |
+
},
|
| 37758 |
+
{
|
| 37759 |
+
"epoch": 0.9987543406704291,
|
| 37760 |
+
"grad_norm": 37.28125,
|
| 37761 |
+
"learning_rate": 9.843944679519727e-06,
|
| 37762 |
+
"loss": 18.652,
|
| 37763 |
+
"step": 53820
|
| 37764 |
+
},
|
| 37765 |
+
{
|
| 37766 |
+
"epoch": 0.998939913754909,
|
| 37767 |
+
"grad_norm": 38.375,
|
| 37768 |
+
"learning_rate": 9.843915683733684e-06,
|
| 37769 |
+
"loss": 18.4857,
|
| 37770 |
+
"step": 53830
|
| 37771 |
+
},
|
| 37772 |
+
{
|
| 37773 |
+
"epoch": 0.9991254868393888,
|
| 37774 |
+
"grad_norm": 37.53125,
|
| 37775 |
+
"learning_rate": 9.843886687947641e-06,
|
| 37776 |
+
"loss": 18.413,
|
| 37777 |
+
"step": 53840
|
| 37778 |
+
},
|
| 37779 |
+
{
|
| 37780 |
+
"epoch": 0.9993110599238686,
|
| 37781 |
+
"grad_norm": 35.71875,
|
| 37782 |
+
"learning_rate": 9.843857692161599e-06,
|
| 37783 |
+
"loss": 18.8629,
|
| 37784 |
+
"step": 53850
|
| 37785 |
+
},
|
| 37786 |
+
{
|
| 37787 |
+
"epoch": 0.9994966330083485,
|
| 37788 |
+
"grad_norm": 35.84375,
|
| 37789 |
+
"learning_rate": 9.843828696375556e-06,
|
| 37790 |
+
"loss": 18.0011,
|
| 37791 |
+
"step": 53860
|
| 37792 |
+
},
|
| 37793 |
+
{
|
| 37794 |
+
"epoch": 0.9996822060928283,
|
| 37795 |
+
"grad_norm": 35.1875,
|
| 37796 |
+
"learning_rate": 9.843799700589514e-06,
|
| 37797 |
+
"loss": 18.2267,
|
| 37798 |
+
"step": 53870
|
| 37799 |
+
},
|
| 37800 |
+
{
|
| 37801 |
+
"epoch": 0.9998677791773081,
|
| 37802 |
+
"grad_norm": 36.34375,
|
| 37803 |
+
"learning_rate": 9.843770704803471e-06,
|
| 37804 |
+
"loss": 18.813,
|
| 37805 |
+
"step": 53880
|
| 37806 |
}
|
| 37807 |
],
|
| 37808 |
"logging_steps": 10,
|
|
|
|
| 37817 |
"should_evaluate": false,
|
| 37818 |
"should_log": false,
|
| 37819 |
"should_save": true,
|
| 37820 |
+
"should_training_stop": true
|
| 37821 |
},
|
| 37822 |
"attributes": {}
|
| 37823 |
}
|
| 37824 |
},
|
| 37825 |
+
"total_flos": 9.406098491698053e+18,
|
| 37826 |
"train_batch_size": 8,
|
| 37827 |
"trial_name": null,
|
| 37828 |
"trial_params": null
|