Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 271572960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bab8482c0a85eee10e169c5edff7ead037bfccebc906b0fdd71c92fcc497f21f
|
| 3 |
size 271572960
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 289714571
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4da6c39cb5d26672908acd5d1c38779da6e51f6ea49787451c6e09d6ce56ba4
|
| 3 |
size 289714571
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f70a4e15a8a60216d5ce5f4c3bfc3e59ac501bdfa626e9a279427bfc701270f
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e6fd97f105f8edb4cfe7d5753c1c5716920ac3feb07414e58ddd6a461f2d28f
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ece3666449643d44062c553ef77ee8b9bdd30ae5df49c2e417f3bbd4309fedb0
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -66641,6 +66641,3513 @@
|
|
| 66641 |
"eval_samples_per_second": 60.197,
|
| 66642 |
"eval_steps_per_second": 3.792,
|
| 66643 |
"step": 9500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66644 |
}
|
| 66645 |
],
|
| 66646 |
"logging_steps": 1,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.1049781755898116,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 10000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 66641 |
"eval_samples_per_second": 60.197,
|
| 66642 |
"eval_steps_per_second": 3.792,
|
| 66643 |
"step": 9500
|
| 66644 |
+
},
|
| 66645 |
+
{
|
| 66646 |
+
"epoch": 1.049837007569479,
|
| 66647 |
+
"grad_norm": 0.6770299673080444,
|
| 66648 |
+
"learning_rate": 0.00016606733805779662,
|
| 66649 |
+
"loss": 4.6605,
|
| 66650 |
+
"step": 9501
|
| 66651 |
+
},
|
| 66652 |
+
{
|
| 66653 |
+
"epoch": 1.0499475109122052,
|
| 66654 |
+
"grad_norm": 0.7012100219726562,
|
| 66655 |
+
"learning_rate": 0.0001660382366551886,
|
| 66656 |
+
"loss": 4.656,
|
| 66657 |
+
"step": 9502
|
| 66658 |
+
},
|
| 66659 |
+
{
|
| 66660 |
+
"epoch": 1.0500580142549312,
|
| 66661 |
+
"grad_norm": 0.7015648484230042,
|
| 66662 |
+
"learning_rate": 0.0001660091346419134,
|
| 66663 |
+
"loss": 4.7029,
|
| 66664 |
+
"step": 9503
|
| 66665 |
+
},
|
| 66666 |
+
{
|
| 66667 |
+
"epoch": 1.0501685175976574,
|
| 66668 |
+
"grad_norm": 0.685258686542511,
|
| 66669 |
+
"learning_rate": 0.00016598003201907907,
|
| 66670 |
+
"loss": 4.5659,
|
| 66671 |
+
"step": 9504
|
| 66672 |
+
},
|
| 66673 |
+
{
|
| 66674 |
+
"epoch": 1.0502790209403834,
|
| 66675 |
+
"grad_norm": 0.7035828828811646,
|
| 66676 |
+
"learning_rate": 0.00016595092878779366,
|
| 66677 |
+
"loss": 4.6319,
|
| 66678 |
+
"step": 9505
|
| 66679 |
+
},
|
| 66680 |
+
{
|
| 66681 |
+
"epoch": 1.0503895242831096,
|
| 66682 |
+
"grad_norm": 0.7030965685844421,
|
| 66683 |
+
"learning_rate": 0.00016592182494916535,
|
| 66684 |
+
"loss": 4.6144,
|
| 66685 |
+
"step": 9506
|
| 66686 |
+
},
|
| 66687 |
+
{
|
| 66688 |
+
"epoch": 1.0505000276258356,
|
| 66689 |
+
"grad_norm": 0.6900784969329834,
|
| 66690 |
+
"learning_rate": 0.0001658927205043023,
|
| 66691 |
+
"loss": 4.6347,
|
| 66692 |
+
"step": 9507
|
| 66693 |
+
},
|
| 66694 |
+
{
|
| 66695 |
+
"epoch": 1.0506105309685618,
|
| 66696 |
+
"grad_norm": 0.7021872997283936,
|
| 66697 |
+
"learning_rate": 0.00016586361545431263,
|
| 66698 |
+
"loss": 4.6358,
|
| 66699 |
+
"step": 9508
|
| 66700 |
+
},
|
| 66701 |
+
{
|
| 66702 |
+
"epoch": 1.0507210343112878,
|
| 66703 |
+
"grad_norm": 0.8038136959075928,
|
| 66704 |
+
"learning_rate": 0.00016583450980030458,
|
| 66705 |
+
"loss": 4.6523,
|
| 66706 |
+
"step": 9509
|
| 66707 |
+
},
|
| 66708 |
+
{
|
| 66709 |
+
"epoch": 1.050831537654014,
|
| 66710 |
+
"grad_norm": 0.7020832896232605,
|
| 66711 |
+
"learning_rate": 0.0001658054035433863,
|
| 66712 |
+
"loss": 4.5992,
|
| 66713 |
+
"step": 9510
|
| 66714 |
+
},
|
| 66715 |
+
{
|
| 66716 |
+
"epoch": 1.05094204099674,
|
| 66717 |
+
"grad_norm": 0.7443466186523438,
|
| 66718 |
+
"learning_rate": 0.00016577629668466617,
|
| 66719 |
+
"loss": 4.737,
|
| 66720 |
+
"step": 9511
|
| 66721 |
+
},
|
| 66722 |
+
{
|
| 66723 |
+
"epoch": 1.0510525443394663,
|
| 66724 |
+
"grad_norm": 0.7208287119865417,
|
| 66725 |
+
"learning_rate": 0.00016574718922525233,
|
| 66726 |
+
"loss": 4.6382,
|
| 66727 |
+
"step": 9512
|
| 66728 |
+
},
|
| 66729 |
+
{
|
| 66730 |
+
"epoch": 1.0511630476821925,
|
| 66731 |
+
"grad_norm": 0.6894711256027222,
|
| 66732 |
+
"learning_rate": 0.00016571808116625308,
|
| 66733 |
+
"loss": 4.628,
|
| 66734 |
+
"step": 9513
|
| 66735 |
+
},
|
| 66736 |
+
{
|
| 66737 |
+
"epoch": 1.0512735510249185,
|
| 66738 |
+
"grad_norm": 0.7114086747169495,
|
| 66739 |
+
"learning_rate": 0.00016568897250877679,
|
| 66740 |
+
"loss": 4.6736,
|
| 66741 |
+
"step": 9514
|
| 66742 |
+
},
|
| 66743 |
+
{
|
| 66744 |
+
"epoch": 1.0513840543676447,
|
| 66745 |
+
"grad_norm": 0.7034818530082703,
|
| 66746 |
+
"learning_rate": 0.00016565986325393176,
|
| 66747 |
+
"loss": 4.5731,
|
| 66748 |
+
"step": 9515
|
| 66749 |
+
},
|
| 66750 |
+
{
|
| 66751 |
+
"epoch": 1.0514945577103707,
|
| 66752 |
+
"grad_norm": 0.7546555995941162,
|
| 66753 |
+
"learning_rate": 0.00016563075340282632,
|
| 66754 |
+
"loss": 4.5597,
|
| 66755 |
+
"step": 9516
|
| 66756 |
+
},
|
| 66757 |
+
{
|
| 66758 |
+
"epoch": 1.051605061053097,
|
| 66759 |
+
"grad_norm": 0.7272023558616638,
|
| 66760 |
+
"learning_rate": 0.00016560164295656885,
|
| 66761 |
+
"loss": 4.58,
|
| 66762 |
+
"step": 9517
|
| 66763 |
+
},
|
| 66764 |
+
{
|
| 66765 |
+
"epoch": 1.051715564395823,
|
| 66766 |
+
"grad_norm": 0.6607677340507507,
|
| 66767 |
+
"learning_rate": 0.00016557253191626782,
|
| 66768 |
+
"loss": 4.6613,
|
| 66769 |
+
"step": 9518
|
| 66770 |
+
},
|
| 66771 |
+
{
|
| 66772 |
+
"epoch": 1.0518260677385491,
|
| 66773 |
+
"grad_norm": 0.6704973578453064,
|
| 66774 |
+
"learning_rate": 0.0001655434202830316,
|
| 66775 |
+
"loss": 4.5724,
|
| 66776 |
+
"step": 9519
|
| 66777 |
+
},
|
| 66778 |
+
{
|
| 66779 |
+
"epoch": 1.0519365710812751,
|
| 66780 |
+
"grad_norm": 0.7242583632469177,
|
| 66781 |
+
"learning_rate": 0.00016551430805796863,
|
| 66782 |
+
"loss": 4.6503,
|
| 66783 |
+
"step": 9520
|
| 66784 |
+
},
|
| 66785 |
+
{
|
| 66786 |
+
"epoch": 1.0520470744240014,
|
| 66787 |
+
"grad_norm": 0.6895632147789001,
|
| 66788 |
+
"learning_rate": 0.0001654851952421874,
|
| 66789 |
+
"loss": 4.6891,
|
| 66790 |
+
"step": 9521
|
| 66791 |
+
},
|
| 66792 |
+
{
|
| 66793 |
+
"epoch": 1.0521575777667274,
|
| 66794 |
+
"grad_norm": 0.6929240226745605,
|
| 66795 |
+
"learning_rate": 0.00016545608183679637,
|
| 66796 |
+
"loss": 4.662,
|
| 66797 |
+
"step": 9522
|
| 66798 |
+
},
|
| 66799 |
+
{
|
| 66800 |
+
"epoch": 1.0522680811094536,
|
| 66801 |
+
"grad_norm": 0.6846395134925842,
|
| 66802 |
+
"learning_rate": 0.0001654269678429041,
|
| 66803 |
+
"loss": 4.615,
|
| 66804 |
+
"step": 9523
|
| 66805 |
+
},
|
| 66806 |
+
{
|
| 66807 |
+
"epoch": 1.0523785844521796,
|
| 66808 |
+
"grad_norm": 0.6910046339035034,
|
| 66809 |
+
"learning_rate": 0.0001653978532616191,
|
| 66810 |
+
"loss": 4.6804,
|
| 66811 |
+
"step": 9524
|
| 66812 |
+
},
|
| 66813 |
+
{
|
| 66814 |
+
"epoch": 1.0524890877949058,
|
| 66815 |
+
"grad_norm": 0.6975002288818359,
|
| 66816 |
+
"learning_rate": 0.00016536873809404993,
|
| 66817 |
+
"loss": 4.6308,
|
| 66818 |
+
"step": 9525
|
| 66819 |
+
},
|
| 66820 |
+
{
|
| 66821 |
+
"epoch": 1.052599591137632,
|
| 66822 |
+
"grad_norm": 0.7155266404151917,
|
| 66823 |
+
"learning_rate": 0.00016533962234130511,
|
| 66824 |
+
"loss": 4.6433,
|
| 66825 |
+
"step": 9526
|
| 66826 |
+
},
|
| 66827 |
+
{
|
| 66828 |
+
"epoch": 1.052710094480358,
|
| 66829 |
+
"grad_norm": 1.0078407526016235,
|
| 66830 |
+
"learning_rate": 0.00016531050600449338,
|
| 66831 |
+
"loss": 4.5474,
|
| 66832 |
+
"step": 9527
|
| 66833 |
+
},
|
| 66834 |
+
{
|
| 66835 |
+
"epoch": 1.0528205978230842,
|
| 66836 |
+
"grad_norm": 0.7395635843276978,
|
| 66837 |
+
"learning_rate": 0.0001652813890847233,
|
| 66838 |
+
"loss": 4.6405,
|
| 66839 |
+
"step": 9528
|
| 66840 |
+
},
|
| 66841 |
+
{
|
| 66842 |
+
"epoch": 1.0529311011658102,
|
| 66843 |
+
"grad_norm": 0.7200167775154114,
|
| 66844 |
+
"learning_rate": 0.00016525227158310343,
|
| 66845 |
+
"loss": 4.6081,
|
| 66846 |
+
"step": 9529
|
| 66847 |
+
},
|
| 66848 |
+
{
|
| 66849 |
+
"epoch": 1.0530416045085365,
|
| 66850 |
+
"grad_norm": 0.7000077366828918,
|
| 66851 |
+
"learning_rate": 0.00016522315350074254,
|
| 66852 |
+
"loss": 4.5594,
|
| 66853 |
+
"step": 9530
|
| 66854 |
+
},
|
| 66855 |
+
{
|
| 66856 |
+
"epoch": 1.0531521078512625,
|
| 66857 |
+
"grad_norm": 0.8375529646873474,
|
| 66858 |
+
"learning_rate": 0.00016519403483874933,
|
| 66859 |
+
"loss": 4.5898,
|
| 66860 |
+
"step": 9531
|
| 66861 |
+
},
|
| 66862 |
+
{
|
| 66863 |
+
"epoch": 1.0532626111939887,
|
| 66864 |
+
"grad_norm": 0.6786326169967651,
|
| 66865 |
+
"learning_rate": 0.00016516491559823244,
|
| 66866 |
+
"loss": 4.5432,
|
| 66867 |
+
"step": 9532
|
| 66868 |
+
},
|
| 66869 |
+
{
|
| 66870 |
+
"epoch": 1.0533731145367147,
|
| 66871 |
+
"grad_norm": 0.7240576148033142,
|
| 66872 |
+
"learning_rate": 0.00016513579578030066,
|
| 66873 |
+
"loss": 4.5356,
|
| 66874 |
+
"step": 9533
|
| 66875 |
+
},
|
| 66876 |
+
{
|
| 66877 |
+
"epoch": 1.053483617879441,
|
| 66878 |
+
"grad_norm": 0.7084378004074097,
|
| 66879 |
+
"learning_rate": 0.0001651066753860627,
|
| 66880 |
+
"loss": 4.6639,
|
| 66881 |
+
"step": 9534
|
| 66882 |
+
},
|
| 66883 |
+
{
|
| 66884 |
+
"epoch": 1.053594121222167,
|
| 66885 |
+
"grad_norm": 0.7006887197494507,
|
| 66886 |
+
"learning_rate": 0.00016507755441662745,
|
| 66887 |
+
"loss": 4.5491,
|
| 66888 |
+
"step": 9535
|
| 66889 |
+
},
|
| 66890 |
+
{
|
| 66891 |
+
"epoch": 1.053704624564893,
|
| 66892 |
+
"grad_norm": 0.6930823922157288,
|
| 66893 |
+
"learning_rate": 0.0001650484328731036,
|
| 66894 |
+
"loss": 4.5281,
|
| 66895 |
+
"step": 9536
|
| 66896 |
+
},
|
| 66897 |
+
{
|
| 66898 |
+
"epoch": 1.053815127907619,
|
| 66899 |
+
"grad_norm": 0.6869480609893799,
|
| 66900 |
+
"learning_rate": 0.0001650193107566,
|
| 66901 |
+
"loss": 4.5491,
|
| 66902 |
+
"step": 9537
|
| 66903 |
+
},
|
| 66904 |
+
{
|
| 66905 |
+
"epoch": 1.0539256312503453,
|
| 66906 |
+
"grad_norm": 0.7025832533836365,
|
| 66907 |
+
"learning_rate": 0.0001649901880682255,
|
| 66908 |
+
"loss": 4.5586,
|
| 66909 |
+
"step": 9538
|
| 66910 |
+
},
|
| 66911 |
+
{
|
| 66912 |
+
"epoch": 1.0540361345930713,
|
| 66913 |
+
"grad_norm": 0.6760215759277344,
|
| 66914 |
+
"learning_rate": 0.00016496106480908898,
|
| 66915 |
+
"loss": 4.6273,
|
| 66916 |
+
"step": 9539
|
| 66917 |
+
},
|
| 66918 |
+
{
|
| 66919 |
+
"epoch": 1.0541466379357975,
|
| 66920 |
+
"grad_norm": 0.6814777851104736,
|
| 66921 |
+
"learning_rate": 0.00016493194098029932,
|
| 66922 |
+
"loss": 4.6333,
|
| 66923 |
+
"step": 9540
|
| 66924 |
+
},
|
| 66925 |
+
{
|
| 66926 |
+
"epoch": 1.0542571412785238,
|
| 66927 |
+
"grad_norm": 0.6664444208145142,
|
| 66928 |
+
"learning_rate": 0.00016490281658296547,
|
| 66929 |
+
"loss": 4.5725,
|
| 66930 |
+
"step": 9541
|
| 66931 |
+
},
|
| 66932 |
+
{
|
| 66933 |
+
"epoch": 1.0543676446212498,
|
| 66934 |
+
"grad_norm": 0.7044290900230408,
|
| 66935 |
+
"learning_rate": 0.0001648736916181962,
|
| 66936 |
+
"loss": 4.611,
|
| 66937 |
+
"step": 9542
|
| 66938 |
+
},
|
| 66939 |
+
{
|
| 66940 |
+
"epoch": 1.054478147963976,
|
| 66941 |
+
"grad_norm": 0.6874406337738037,
|
| 66942 |
+
"learning_rate": 0.0001648445660871007,
|
| 66943 |
+
"loss": 4.7256,
|
| 66944 |
+
"step": 9543
|
| 66945 |
+
},
|
| 66946 |
+
{
|
| 66947 |
+
"epoch": 1.054588651306702,
|
| 66948 |
+
"grad_norm": 0.7229169011116028,
|
| 66949 |
+
"learning_rate": 0.00016481543999078782,
|
| 66950 |
+
"loss": 4.6352,
|
| 66951 |
+
"step": 9544
|
| 66952 |
+
},
|
| 66953 |
+
{
|
| 66954 |
+
"epoch": 1.0546991546494282,
|
| 66955 |
+
"grad_norm": 0.6656609773635864,
|
| 66956 |
+
"learning_rate": 0.00016478631333036653,
|
| 66957 |
+
"loss": 4.4955,
|
| 66958 |
+
"step": 9545
|
| 66959 |
+
},
|
| 66960 |
+
{
|
| 66961 |
+
"epoch": 1.0548096579921542,
|
| 66962 |
+
"grad_norm": 0.6818541884422302,
|
| 66963 |
+
"learning_rate": 0.0001647571861069459,
|
| 66964 |
+
"loss": 4.6623,
|
| 66965 |
+
"step": 9546
|
| 66966 |
+
},
|
| 66967 |
+
{
|
| 66968 |
+
"epoch": 1.0549201613348804,
|
| 66969 |
+
"grad_norm": 0.73002690076828,
|
| 66970 |
+
"learning_rate": 0.00016472805832163493,
|
| 66971 |
+
"loss": 4.5435,
|
| 66972 |
+
"step": 9547
|
| 66973 |
+
},
|
| 66974 |
+
{
|
| 66975 |
+
"epoch": 1.0550306646776064,
|
| 66976 |
+
"grad_norm": 0.6924058794975281,
|
| 66977 |
+
"learning_rate": 0.00016469892997554277,
|
| 66978 |
+
"loss": 4.6104,
|
| 66979 |
+
"step": 9548
|
| 66980 |
+
},
|
| 66981 |
+
{
|
| 66982 |
+
"epoch": 1.0551411680203326,
|
| 66983 |
+
"grad_norm": 0.6893782019615173,
|
| 66984 |
+
"learning_rate": 0.0001646698010697784,
|
| 66985 |
+
"loss": 4.6554,
|
| 66986 |
+
"step": 9549
|
| 66987 |
+
},
|
| 66988 |
+
{
|
| 66989 |
+
"epoch": 1.0552516713630586,
|
| 66990 |
+
"grad_norm": 0.7029363512992859,
|
| 66991 |
+
"learning_rate": 0.00016464067160545094,
|
| 66992 |
+
"loss": 4.6552,
|
| 66993 |
+
"step": 9550
|
| 66994 |
+
},
|
| 66995 |
+
{
|
| 66996 |
+
"epoch": 1.0553621747057849,
|
| 66997 |
+
"grad_norm": 0.6971676349639893,
|
| 66998 |
+
"learning_rate": 0.00016461154158366957,
|
| 66999 |
+
"loss": 4.535,
|
| 67000 |
+
"step": 9551
|
| 67001 |
+
},
|
| 67002 |
+
{
|
| 67003 |
+
"epoch": 1.055472678048511,
|
| 67004 |
+
"grad_norm": 0.7106316685676575,
|
| 67005 |
+
"learning_rate": 0.00016458241100554338,
|
| 67006 |
+
"loss": 4.7096,
|
| 67007 |
+
"step": 9552
|
| 67008 |
+
},
|
| 67009 |
+
{
|
| 67010 |
+
"epoch": 1.055583181391237,
|
| 67011 |
+
"grad_norm": 0.7418672442436218,
|
| 67012 |
+
"learning_rate": 0.00016455327987218156,
|
| 67013 |
+
"loss": 4.6307,
|
| 67014 |
+
"step": 9553
|
| 67015 |
+
},
|
| 67016 |
+
{
|
| 67017 |
+
"epoch": 1.0556936847339633,
|
| 67018 |
+
"grad_norm": 0.7186400890350342,
|
| 67019 |
+
"learning_rate": 0.00016452414818469332,
|
| 67020 |
+
"loss": 4.6438,
|
| 67021 |
+
"step": 9554
|
| 67022 |
+
},
|
| 67023 |
+
{
|
| 67024 |
+
"epoch": 1.0558041880766893,
|
| 67025 |
+
"grad_norm": 0.6828378438949585,
|
| 67026 |
+
"learning_rate": 0.00016449501594418783,
|
| 67027 |
+
"loss": 4.7081,
|
| 67028 |
+
"step": 9555
|
| 67029 |
+
},
|
| 67030 |
+
{
|
| 67031 |
+
"epoch": 1.0559146914194155,
|
| 67032 |
+
"grad_norm": 0.7150010466575623,
|
| 67033 |
+
"learning_rate": 0.00016446588315177437,
|
| 67034 |
+
"loss": 4.6943,
|
| 67035 |
+
"step": 9556
|
| 67036 |
+
},
|
| 67037 |
+
{
|
| 67038 |
+
"epoch": 1.0560251947621415,
|
| 67039 |
+
"grad_norm": 0.7047460675239563,
|
| 67040 |
+
"learning_rate": 0.00016443674980856215,
|
| 67041 |
+
"loss": 4.5978,
|
| 67042 |
+
"step": 9557
|
| 67043 |
+
},
|
| 67044 |
+
{
|
| 67045 |
+
"epoch": 1.0561356981048677,
|
| 67046 |
+
"grad_norm": 0.7155852317810059,
|
| 67047 |
+
"learning_rate": 0.00016440761591566042,
|
| 67048 |
+
"loss": 4.5466,
|
| 67049 |
+
"step": 9558
|
| 67050 |
+
},
|
| 67051 |
+
{
|
| 67052 |
+
"epoch": 1.0562462014475937,
|
| 67053 |
+
"grad_norm": 0.7119560241699219,
|
| 67054 |
+
"learning_rate": 0.00016437848147417855,
|
| 67055 |
+
"loss": 4.5943,
|
| 67056 |
+
"step": 9559
|
| 67057 |
+
},
|
| 67058 |
+
{
|
| 67059 |
+
"epoch": 1.05635670479032,
|
| 67060 |
+
"grad_norm": 0.710186779499054,
|
| 67061 |
+
"learning_rate": 0.0001643493464852258,
|
| 67062 |
+
"loss": 4.653,
|
| 67063 |
+
"step": 9560
|
| 67064 |
+
},
|
| 67065 |
+
{
|
| 67066 |
+
"epoch": 1.056467208133046,
|
| 67067 |
+
"grad_norm": 0.7136805653572083,
|
| 67068 |
+
"learning_rate": 0.0001643202109499115,
|
| 67069 |
+
"loss": 4.5659,
|
| 67070 |
+
"step": 9561
|
| 67071 |
+
},
|
| 67072 |
+
{
|
| 67073 |
+
"epoch": 1.0565777114757722,
|
| 67074 |
+
"grad_norm": 0.738735556602478,
|
| 67075 |
+
"learning_rate": 0.00016429107486934505,
|
| 67076 |
+
"loss": 4.5649,
|
| 67077 |
+
"step": 9562
|
| 67078 |
+
},
|
| 67079 |
+
{
|
| 67080 |
+
"epoch": 1.0566882148184982,
|
| 67081 |
+
"grad_norm": 0.6875008940696716,
|
| 67082 |
+
"learning_rate": 0.00016426193824463578,
|
| 67083 |
+
"loss": 4.6022,
|
| 67084 |
+
"step": 9563
|
| 67085 |
+
},
|
| 67086 |
+
{
|
| 67087 |
+
"epoch": 1.0567987181612244,
|
| 67088 |
+
"grad_norm": 0.7216543555259705,
|
| 67089 |
+
"learning_rate": 0.00016423280107689313,
|
| 67090 |
+
"loss": 4.5472,
|
| 67091 |
+
"step": 9564
|
| 67092 |
+
},
|
| 67093 |
+
{
|
| 67094 |
+
"epoch": 1.0569092215039504,
|
| 67095 |
+
"grad_norm": 0.709260880947113,
|
| 67096 |
+
"learning_rate": 0.00016420366336722652,
|
| 67097 |
+
"loss": 4.5745,
|
| 67098 |
+
"step": 9565
|
| 67099 |
+
},
|
| 67100 |
+
{
|
| 67101 |
+
"epoch": 1.0570197248466766,
|
| 67102 |
+
"grad_norm": 0.6689321994781494,
|
| 67103 |
+
"learning_rate": 0.00016417452511674526,
|
| 67104 |
+
"loss": 4.6357,
|
| 67105 |
+
"step": 9566
|
| 67106 |
+
},
|
| 67107 |
+
{
|
| 67108 |
+
"epoch": 1.0571302281894028,
|
| 67109 |
+
"grad_norm": 0.7391270995140076,
|
| 67110 |
+
"learning_rate": 0.000164145386326559,
|
| 67111 |
+
"loss": 4.5715,
|
| 67112 |
+
"step": 9567
|
| 67113 |
+
},
|
| 67114 |
+
{
|
| 67115 |
+
"epoch": 1.0572407315321288,
|
| 67116 |
+
"grad_norm": 0.7267792224884033,
|
| 67117 |
+
"learning_rate": 0.00016411624699777717,
|
| 67118 |
+
"loss": 4.5933,
|
| 67119 |
+
"step": 9568
|
| 67120 |
+
},
|
| 67121 |
+
{
|
| 67122 |
+
"epoch": 1.057351234874855,
|
| 67123 |
+
"grad_norm": 0.7358699440956116,
|
| 67124 |
+
"learning_rate": 0.00016408710713150917,
|
| 67125 |
+
"loss": 4.652,
|
| 67126 |
+
"step": 9569
|
| 67127 |
+
},
|
| 67128 |
+
{
|
| 67129 |
+
"epoch": 1.057461738217581,
|
| 67130 |
+
"grad_norm": 0.7131432294845581,
|
| 67131 |
+
"learning_rate": 0.00016405796672886458,
|
| 67132 |
+
"loss": 4.6151,
|
| 67133 |
+
"step": 9570
|
| 67134 |
+
},
|
| 67135 |
+
{
|
| 67136 |
+
"epoch": 1.0575722415603073,
|
| 67137 |
+
"grad_norm": 0.6817795038223267,
|
| 67138 |
+
"learning_rate": 0.000164028825790953,
|
| 67139 |
+
"loss": 4.5136,
|
| 67140 |
+
"step": 9571
|
| 67141 |
+
},
|
| 67142 |
+
{
|
| 67143 |
+
"epoch": 1.0576827449030333,
|
| 67144 |
+
"grad_norm": 0.6878102421760559,
|
| 67145 |
+
"learning_rate": 0.0001639996843188839,
|
| 67146 |
+
"loss": 4.5511,
|
| 67147 |
+
"step": 9572
|
| 67148 |
+
},
|
| 67149 |
+
{
|
| 67150 |
+
"epoch": 1.0577932482457595,
|
| 67151 |
+
"grad_norm": 0.6872119307518005,
|
| 67152 |
+
"learning_rate": 0.00016397054231376694,
|
| 67153 |
+
"loss": 4.5604,
|
| 67154 |
+
"step": 9573
|
| 67155 |
+
},
|
| 67156 |
+
{
|
| 67157 |
+
"epoch": 1.0579037515884855,
|
| 67158 |
+
"grad_norm": 0.6655972003936768,
|
| 67159 |
+
"learning_rate": 0.00016394139977671162,
|
| 67160 |
+
"loss": 4.6084,
|
| 67161 |
+
"step": 9574
|
| 67162 |
+
},
|
| 67163 |
+
{
|
| 67164 |
+
"epoch": 1.0580142549312117,
|
| 67165 |
+
"grad_norm": 0.6809372305870056,
|
| 67166 |
+
"learning_rate": 0.0001639122567088277,
|
| 67167 |
+
"loss": 4.6181,
|
| 67168 |
+
"step": 9575
|
| 67169 |
+
},
|
| 67170 |
+
{
|
| 67171 |
+
"epoch": 1.0581247582739377,
|
| 67172 |
+
"grad_norm": 0.6768514513969421,
|
| 67173 |
+
"learning_rate": 0.0001638831131112247,
|
| 67174 |
+
"loss": 4.6602,
|
| 67175 |
+
"step": 9576
|
| 67176 |
+
},
|
| 67177 |
+
{
|
| 67178 |
+
"epoch": 1.058235261616664,
|
| 67179 |
+
"grad_norm": 0.668408989906311,
|
| 67180 |
+
"learning_rate": 0.00016385396898501236,
|
| 67181 |
+
"loss": 4.7139,
|
| 67182 |
+
"step": 9577
|
| 67183 |
+
},
|
| 67184 |
+
{
|
| 67185 |
+
"epoch": 1.05834576495939,
|
| 67186 |
+
"grad_norm": 0.7192389965057373,
|
| 67187 |
+
"learning_rate": 0.00016382482433130032,
|
| 67188 |
+
"loss": 4.6043,
|
| 67189 |
+
"step": 9578
|
| 67190 |
+
},
|
| 67191 |
+
{
|
| 67192 |
+
"epoch": 1.0584562683021161,
|
| 67193 |
+
"grad_norm": 0.7047439217567444,
|
| 67194 |
+
"learning_rate": 0.0001637956791511983,
|
| 67195 |
+
"loss": 4.6121,
|
| 67196 |
+
"step": 9579
|
| 67197 |
+
},
|
| 67198 |
+
{
|
| 67199 |
+
"epoch": 1.0585667716448424,
|
| 67200 |
+
"grad_norm": 0.6781920790672302,
|
| 67201 |
+
"learning_rate": 0.00016376653344581606,
|
| 67202 |
+
"loss": 4.6298,
|
| 67203 |
+
"step": 9580
|
| 67204 |
+
},
|
| 67205 |
+
{
|
| 67206 |
+
"epoch": 1.0586772749875684,
|
| 67207 |
+
"grad_norm": 0.740864098072052,
|
| 67208 |
+
"learning_rate": 0.0001637373872162633,
|
| 67209 |
+
"loss": 4.6414,
|
| 67210 |
+
"step": 9581
|
| 67211 |
+
},
|
| 67212 |
+
{
|
| 67213 |
+
"epoch": 1.0587877783302946,
|
| 67214 |
+
"grad_norm": 0.6698533296585083,
|
| 67215 |
+
"learning_rate": 0.0001637082404636497,
|
| 67216 |
+
"loss": 4.6181,
|
| 67217 |
+
"step": 9582
|
| 67218 |
+
},
|
| 67219 |
+
{
|
| 67220 |
+
"epoch": 1.0588982816730206,
|
| 67221 |
+
"grad_norm": 0.7175453305244446,
|
| 67222 |
+
"learning_rate": 0.00016367909318908526,
|
| 67223 |
+
"loss": 4.5408,
|
| 67224 |
+
"step": 9583
|
| 67225 |
+
},
|
| 67226 |
+
{
|
| 67227 |
+
"epoch": 1.0590087850157468,
|
| 67228 |
+
"grad_norm": 0.6953810453414917,
|
| 67229 |
+
"learning_rate": 0.00016364994539367958,
|
| 67230 |
+
"loss": 4.4778,
|
| 67231 |
+
"step": 9584
|
| 67232 |
+
},
|
| 67233 |
+
{
|
| 67234 |
+
"epoch": 1.0591192883584728,
|
| 67235 |
+
"grad_norm": 0.6498059630393982,
|
| 67236 |
+
"learning_rate": 0.00016362079707854258,
|
| 67237 |
+
"loss": 4.565,
|
| 67238 |
+
"step": 9585
|
| 67239 |
+
},
|
| 67240 |
+
{
|
| 67241 |
+
"epoch": 1.059229791701199,
|
| 67242 |
+
"grad_norm": 0.6916860938072205,
|
| 67243 |
+
"learning_rate": 0.0001635916482447841,
|
| 67244 |
+
"loss": 4.471,
|
| 67245 |
+
"step": 9586
|
| 67246 |
+
},
|
| 67247 |
+
{
|
| 67248 |
+
"epoch": 1.059340295043925,
|
| 67249 |
+
"grad_norm": 0.7072685360908508,
|
| 67250 |
+
"learning_rate": 0.00016356249889351395,
|
| 67251 |
+
"loss": 4.6541,
|
| 67252 |
+
"step": 9587
|
| 67253 |
+
},
|
| 67254 |
+
{
|
| 67255 |
+
"epoch": 1.0594507983866512,
|
| 67256 |
+
"grad_norm": 0.7356172800064087,
|
| 67257 |
+
"learning_rate": 0.00016353334902584208,
|
| 67258 |
+
"loss": 4.6198,
|
| 67259 |
+
"step": 9588
|
| 67260 |
+
},
|
| 67261 |
+
{
|
| 67262 |
+
"epoch": 1.0595613017293772,
|
| 67263 |
+
"grad_norm": 0.6796469688415527,
|
| 67264 |
+
"learning_rate": 0.00016350419864287838,
|
| 67265 |
+
"loss": 4.6743,
|
| 67266 |
+
"step": 9589
|
| 67267 |
+
},
|
| 67268 |
+
{
|
| 67269 |
+
"epoch": 1.0596718050721035,
|
| 67270 |
+
"grad_norm": 0.6633342504501343,
|
| 67271 |
+
"learning_rate": 0.00016347504774573264,
|
| 67272 |
+
"loss": 4.6093,
|
| 67273 |
+
"step": 9590
|
| 67274 |
+
},
|
| 67275 |
+
{
|
| 67276 |
+
"epoch": 1.0597823084148295,
|
| 67277 |
+
"grad_norm": 0.7130044102668762,
|
| 67278 |
+
"learning_rate": 0.00016344589633551502,
|
| 67279 |
+
"loss": 4.599,
|
| 67280 |
+
"step": 9591
|
| 67281 |
+
},
|
| 67282 |
+
{
|
| 67283 |
+
"epoch": 1.0598928117575557,
|
| 67284 |
+
"grad_norm": 0.6969127655029297,
|
| 67285 |
+
"learning_rate": 0.00016341674441333533,
|
| 67286 |
+
"loss": 4.6894,
|
| 67287 |
+
"step": 9592
|
| 67288 |
+
},
|
| 67289 |
+
{
|
| 67290 |
+
"epoch": 1.060003315100282,
|
| 67291 |
+
"grad_norm": 0.6777133941650391,
|
| 67292 |
+
"learning_rate": 0.00016338759198030364,
|
| 67293 |
+
"loss": 4.5822,
|
| 67294 |
+
"step": 9593
|
| 67295 |
+
},
|
| 67296 |
+
{
|
| 67297 |
+
"epoch": 1.060113818443008,
|
| 67298 |
+
"grad_norm": 0.697536289691925,
|
| 67299 |
+
"learning_rate": 0.00016335843903752983,
|
| 67300 |
+
"loss": 4.6161,
|
| 67301 |
+
"step": 9594
|
| 67302 |
+
},
|
| 67303 |
+
{
|
| 67304 |
+
"epoch": 1.0602243217857341,
|
| 67305 |
+
"grad_norm": 0.6940416693687439,
|
| 67306 |
+
"learning_rate": 0.00016332928558612404,
|
| 67307 |
+
"loss": 4.5651,
|
| 67308 |
+
"step": 9595
|
| 67309 |
+
},
|
| 67310 |
+
{
|
| 67311 |
+
"epoch": 1.0603348251284601,
|
| 67312 |
+
"grad_norm": 0.6763487458229065,
|
| 67313 |
+
"learning_rate": 0.00016330013162719624,
|
| 67314 |
+
"loss": 4.5665,
|
| 67315 |
+
"step": 9596
|
| 67316 |
+
},
|
| 67317 |
+
{
|
| 67318 |
+
"epoch": 1.0604453284711863,
|
| 67319 |
+
"grad_norm": 0.6880263090133667,
|
| 67320 |
+
"learning_rate": 0.00016327097716185653,
|
| 67321 |
+
"loss": 4.6926,
|
| 67322 |
+
"step": 9597
|
| 67323 |
+
},
|
| 67324 |
+
{
|
| 67325 |
+
"epoch": 1.0605558318139123,
|
| 67326 |
+
"grad_norm": 0.6698249578475952,
|
| 67327 |
+
"learning_rate": 0.0001632418221912149,
|
| 67328 |
+
"loss": 4.5941,
|
| 67329 |
+
"step": 9598
|
| 67330 |
+
},
|
| 67331 |
+
{
|
| 67332 |
+
"epoch": 1.0606663351566386,
|
| 67333 |
+
"grad_norm": 0.6980186104774475,
|
| 67334 |
+
"learning_rate": 0.00016321266671638157,
|
| 67335 |
+
"loss": 4.6592,
|
| 67336 |
+
"step": 9599
|
| 67337 |
+
},
|
| 67338 |
+
{
|
| 67339 |
+
"epoch": 1.0607768384993645,
|
| 67340 |
+
"grad_norm": 0.6785933375358582,
|
| 67341 |
+
"learning_rate": 0.00016318351073846656,
|
| 67342 |
+
"loss": 4.7119,
|
| 67343 |
+
"step": 9600
|
| 67344 |
+
},
|
| 67345 |
+
{
|
| 67346 |
+
"epoch": 1.0608873418420908,
|
| 67347 |
+
"grad_norm": 0.6808221340179443,
|
| 67348 |
+
"learning_rate": 0.00016315435425858006,
|
| 67349 |
+
"loss": 4.5971,
|
| 67350 |
+
"step": 9601
|
| 67351 |
+
},
|
| 67352 |
+
{
|
| 67353 |
+
"epoch": 1.0609978451848168,
|
| 67354 |
+
"grad_norm": 0.6913832426071167,
|
| 67355 |
+
"learning_rate": 0.00016312519727783222,
|
| 67356 |
+
"loss": 4.6199,
|
| 67357 |
+
"step": 9602
|
| 67358 |
+
},
|
| 67359 |
+
{
|
| 67360 |
+
"epoch": 1.061108348527543,
|
| 67361 |
+
"grad_norm": 0.7219663262367249,
|
| 67362 |
+
"learning_rate": 0.00016309603979733315,
|
| 67363 |
+
"loss": 4.6882,
|
| 67364 |
+
"step": 9603
|
| 67365 |
+
},
|
| 67366 |
+
{
|
| 67367 |
+
"epoch": 1.061218851870269,
|
| 67368 |
+
"grad_norm": 0.6851358413696289,
|
| 67369 |
+
"learning_rate": 0.00016306688181819316,
|
| 67370 |
+
"loss": 4.5895,
|
| 67371 |
+
"step": 9604
|
| 67372 |
+
},
|
| 67373 |
+
{
|
| 67374 |
+
"epoch": 1.0613293552129952,
|
| 67375 |
+
"grad_norm": 0.7397948503494263,
|
| 67376 |
+
"learning_rate": 0.00016303772334152233,
|
| 67377 |
+
"loss": 4.5343,
|
| 67378 |
+
"step": 9605
|
| 67379 |
+
},
|
| 67380 |
+
{
|
| 67381 |
+
"epoch": 1.0614398585557212,
|
| 67382 |
+
"grad_norm": 0.6957864761352539,
|
| 67383 |
+
"learning_rate": 0.00016300856436843092,
|
| 67384 |
+
"loss": 4.7281,
|
| 67385 |
+
"step": 9606
|
| 67386 |
+
},
|
| 67387 |
+
{
|
| 67388 |
+
"epoch": 1.0615503618984474,
|
| 67389 |
+
"grad_norm": 0.7005098462104797,
|
| 67390 |
+
"learning_rate": 0.00016297940490002928,
|
| 67391 |
+
"loss": 4.6451,
|
| 67392 |
+
"step": 9607
|
| 67393 |
+
},
|
| 67394 |
+
{
|
| 67395 |
+
"epoch": 1.0616608652411736,
|
| 67396 |
+
"grad_norm": 0.7604179978370667,
|
| 67397 |
+
"learning_rate": 0.00016295024493742758,
|
| 67398 |
+
"loss": 4.6104,
|
| 67399 |
+
"step": 9608
|
| 67400 |
+
},
|
| 67401 |
+
{
|
| 67402 |
+
"epoch": 1.0617713685838996,
|
| 67403 |
+
"grad_norm": 0.6816504597663879,
|
| 67404 |
+
"learning_rate": 0.00016292108448173612,
|
| 67405 |
+
"loss": 4.6101,
|
| 67406 |
+
"step": 9609
|
| 67407 |
+
},
|
| 67408 |
+
{
|
| 67409 |
+
"epoch": 1.0618818719266259,
|
| 67410 |
+
"grad_norm": 0.6791014075279236,
|
| 67411 |
+
"learning_rate": 0.0001628919235340652,
|
| 67412 |
+
"loss": 4.4602,
|
| 67413 |
+
"step": 9610
|
| 67414 |
+
},
|
| 67415 |
+
{
|
| 67416 |
+
"epoch": 1.0619923752693519,
|
| 67417 |
+
"grad_norm": 0.7398301959037781,
|
| 67418 |
+
"learning_rate": 0.00016286276209552517,
|
| 67419 |
+
"loss": 4.5566,
|
| 67420 |
+
"step": 9611
|
| 67421 |
+
},
|
| 67422 |
+
{
|
| 67423 |
+
"epoch": 1.062102878612078,
|
| 67424 |
+
"grad_norm": 0.7264760732650757,
|
| 67425 |
+
"learning_rate": 0.00016283360016722641,
|
| 67426 |
+
"loss": 4.616,
|
| 67427 |
+
"step": 9612
|
| 67428 |
+
},
|
| 67429 |
+
{
|
| 67430 |
+
"epoch": 1.062213381954804,
|
| 67431 |
+
"grad_norm": 0.675262987613678,
|
| 67432 |
+
"learning_rate": 0.0001628044377502792,
|
| 67433 |
+
"loss": 4.6071,
|
| 67434 |
+
"step": 9613
|
| 67435 |
+
},
|
| 67436 |
+
{
|
| 67437 |
+
"epoch": 1.0623238852975303,
|
| 67438 |
+
"grad_norm": 0.7211140990257263,
|
| 67439 |
+
"learning_rate": 0.0001627752748457939,
|
| 67440 |
+
"loss": 4.654,
|
| 67441 |
+
"step": 9614
|
| 67442 |
+
},
|
| 67443 |
+
{
|
| 67444 |
+
"epoch": 1.0624343886402563,
|
| 67445 |
+
"grad_norm": 0.6667706966400146,
|
| 67446 |
+
"learning_rate": 0.000162746111454881,
|
| 67447 |
+
"loss": 4.6208,
|
| 67448 |
+
"step": 9615
|
| 67449 |
+
},
|
| 67450 |
+
{
|
| 67451 |
+
"epoch": 1.0625448919829825,
|
| 67452 |
+
"grad_norm": 0.6784243583679199,
|
| 67453 |
+
"learning_rate": 0.00016271694757865086,
|
| 67454 |
+
"loss": 4.62,
|
| 67455 |
+
"step": 9616
|
| 67456 |
+
},
|
| 67457 |
+
{
|
| 67458 |
+
"epoch": 1.0626553953257085,
|
| 67459 |
+
"grad_norm": 0.7123017907142639,
|
| 67460 |
+
"learning_rate": 0.00016268778321821398,
|
| 67461 |
+
"loss": 4.6318,
|
| 67462 |
+
"step": 9617
|
| 67463 |
+
},
|
| 67464 |
+
{
|
| 67465 |
+
"epoch": 1.0627658986684347,
|
| 67466 |
+
"grad_norm": 0.6763114929199219,
|
| 67467 |
+
"learning_rate": 0.00016265861837468074,
|
| 67468 |
+
"loss": 4.5183,
|
| 67469 |
+
"step": 9618
|
| 67470 |
+
},
|
| 67471 |
+
{
|
| 67472 |
+
"epoch": 1.0628764020111607,
|
| 67473 |
+
"grad_norm": 0.7034937739372253,
|
| 67474 |
+
"learning_rate": 0.00016262945304916163,
|
| 67475 |
+
"loss": 4.5944,
|
| 67476 |
+
"step": 9619
|
| 67477 |
+
},
|
| 67478 |
+
{
|
| 67479 |
+
"epoch": 1.062986905353887,
|
| 67480 |
+
"grad_norm": 0.7120142579078674,
|
| 67481 |
+
"learning_rate": 0.00016260028724276716,
|
| 67482 |
+
"loss": 4.5908,
|
| 67483 |
+
"step": 9620
|
| 67484 |
+
},
|
| 67485 |
+
{
|
| 67486 |
+
"epoch": 1.063097408696613,
|
| 67487 |
+
"grad_norm": 0.7416344285011292,
|
| 67488 |
+
"learning_rate": 0.00016257112095660785,
|
| 67489 |
+
"loss": 4.5365,
|
| 67490 |
+
"step": 9621
|
| 67491 |
+
},
|
| 67492 |
+
{
|
| 67493 |
+
"epoch": 1.0632079120393392,
|
| 67494 |
+
"grad_norm": 0.7130916714668274,
|
| 67495 |
+
"learning_rate": 0.0001625419541917942,
|
| 67496 |
+
"loss": 4.5953,
|
| 67497 |
+
"step": 9622
|
| 67498 |
+
},
|
| 67499 |
+
{
|
| 67500 |
+
"epoch": 1.0633184153820654,
|
| 67501 |
+
"grad_norm": 0.7556804418563843,
|
| 67502 |
+
"learning_rate": 0.0001625127869494367,
|
| 67503 |
+
"loss": 4.4956,
|
| 67504 |
+
"step": 9623
|
| 67505 |
+
},
|
| 67506 |
+
{
|
| 67507 |
+
"epoch": 1.0634289187247914,
|
| 67508 |
+
"grad_norm": 0.7081581950187683,
|
| 67509 |
+
"learning_rate": 0.00016248361923064598,
|
| 67510 |
+
"loss": 4.5466,
|
| 67511 |
+
"step": 9624
|
| 67512 |
+
},
|
| 67513 |
+
{
|
| 67514 |
+
"epoch": 1.0635394220675176,
|
| 67515 |
+
"grad_norm": 0.6770861744880676,
|
| 67516 |
+
"learning_rate": 0.0001624544510365327,
|
| 67517 |
+
"loss": 4.5837,
|
| 67518 |
+
"step": 9625
|
| 67519 |
+
},
|
| 67520 |
+
{
|
| 67521 |
+
"epoch": 1.0636499254102436,
|
| 67522 |
+
"grad_norm": 0.7670760750770569,
|
| 67523 |
+
"learning_rate": 0.0001624252823682073,
|
| 67524 |
+
"loss": 4.5534,
|
| 67525 |
+
"step": 9626
|
| 67526 |
+
},
|
| 67527 |
+
{
|
| 67528 |
+
"epoch": 1.0637604287529698,
|
| 67529 |
+
"grad_norm": 0.7570769190788269,
|
| 67530 |
+
"learning_rate": 0.00016239611322678047,
|
| 67531 |
+
"loss": 4.5181,
|
| 67532 |
+
"step": 9627
|
| 67533 |
+
},
|
| 67534 |
+
{
|
| 67535 |
+
"epoch": 1.0638709320956958,
|
| 67536 |
+
"grad_norm": 0.7017266154289246,
|
| 67537 |
+
"learning_rate": 0.0001623669436133629,
|
| 67538 |
+
"loss": 4.7004,
|
| 67539 |
+
"step": 9628
|
| 67540 |
+
},
|
| 67541 |
+
{
|
| 67542 |
+
"epoch": 1.063981435438422,
|
| 67543 |
+
"grad_norm": 0.7252664566040039,
|
| 67544 |
+
"learning_rate": 0.00016233777352906513,
|
| 67545 |
+
"loss": 4.6577,
|
| 67546 |
+
"step": 9629
|
| 67547 |
+
},
|
| 67548 |
+
{
|
| 67549 |
+
"epoch": 1.064091938781148,
|
| 67550 |
+
"grad_norm": 0.6942800283432007,
|
| 67551 |
+
"learning_rate": 0.00016230860297499795,
|
| 67552 |
+
"loss": 4.6535,
|
| 67553 |
+
"step": 9630
|
| 67554 |
+
},
|
| 67555 |
+
{
|
| 67556 |
+
"epoch": 1.0642024421238743,
|
| 67557 |
+
"grad_norm": 0.6856210827827454,
|
| 67558 |
+
"learning_rate": 0.00016227943195227197,
|
| 67559 |
+
"loss": 4.6741,
|
| 67560 |
+
"step": 9631
|
| 67561 |
+
},
|
| 67562 |
+
{
|
| 67563 |
+
"epoch": 1.0643129454666003,
|
| 67564 |
+
"grad_norm": 0.6820597648620605,
|
| 67565 |
+
"learning_rate": 0.0001622502604619979,
|
| 67566 |
+
"loss": 4.6315,
|
| 67567 |
+
"step": 9632
|
| 67568 |
+
},
|
| 67569 |
+
{
|
| 67570 |
+
"epoch": 1.0644234488093265,
|
| 67571 |
+
"grad_norm": 0.7034296989440918,
|
| 67572 |
+
"learning_rate": 0.0001622210885052865,
|
| 67573 |
+
"loss": 4.4425,
|
| 67574 |
+
"step": 9633
|
| 67575 |
+
},
|
| 67576 |
+
{
|
| 67577 |
+
"epoch": 1.0645339521520527,
|
| 67578 |
+
"grad_norm": 0.6954776644706726,
|
| 67579 |
+
"learning_rate": 0.00016219191608324852,
|
| 67580 |
+
"loss": 4.5701,
|
| 67581 |
+
"step": 9634
|
| 67582 |
+
},
|
| 67583 |
+
{
|
| 67584 |
+
"epoch": 1.0646444554947787,
|
| 67585 |
+
"grad_norm": 0.6776039600372314,
|
| 67586 |
+
"learning_rate": 0.00016216274319699462,
|
| 67587 |
+
"loss": 4.5633,
|
| 67588 |
+
"step": 9635
|
| 67589 |
+
},
|
| 67590 |
+
{
|
| 67591 |
+
"epoch": 1.064754958837505,
|
| 67592 |
+
"grad_norm": 0.7073492407798767,
|
| 67593 |
+
"learning_rate": 0.00016213356984763578,
|
| 67594 |
+
"loss": 4.665,
|
| 67595 |
+
"step": 9636
|
| 67596 |
+
},
|
| 67597 |
+
{
|
| 67598 |
+
"epoch": 1.064865462180231,
|
| 67599 |
+
"grad_norm": 0.6944239139556885,
|
| 67600 |
+
"learning_rate": 0.0001621043960362826,
|
| 67601 |
+
"loss": 4.4573,
|
| 67602 |
+
"step": 9637
|
| 67603 |
+
},
|
| 67604 |
+
{
|
| 67605 |
+
"epoch": 1.0649759655229571,
|
| 67606 |
+
"grad_norm": 0.7082187533378601,
|
| 67607 |
+
"learning_rate": 0.00016207522176404598,
|
| 67608 |
+
"loss": 4.624,
|
| 67609 |
+
"step": 9638
|
| 67610 |
+
},
|
| 67611 |
+
{
|
| 67612 |
+
"epoch": 1.0650864688656831,
|
| 67613 |
+
"grad_norm": 0.6629602313041687,
|
| 67614 |
+
"learning_rate": 0.00016204604703203674,
|
| 67615 |
+
"loss": 4.5168,
|
| 67616 |
+
"step": 9639
|
| 67617 |
+
},
|
| 67618 |
+
{
|
| 67619 |
+
"epoch": 1.0651969722084094,
|
| 67620 |
+
"grad_norm": 0.7364920973777771,
|
| 67621 |
+
"learning_rate": 0.00016201687184136566,
|
| 67622 |
+
"loss": 4.6381,
|
| 67623 |
+
"step": 9640
|
| 67624 |
+
},
|
| 67625 |
+
{
|
| 67626 |
+
"epoch": 1.0653074755511354,
|
| 67627 |
+
"grad_norm": 0.735372006893158,
|
| 67628 |
+
"learning_rate": 0.0001619876961931438,
|
| 67629 |
+
"loss": 4.581,
|
| 67630 |
+
"step": 9641
|
| 67631 |
+
},
|
| 67632 |
+
{
|
| 67633 |
+
"epoch": 1.0654179788938616,
|
| 67634 |
+
"grad_norm": 0.7016334533691406,
|
| 67635 |
+
"learning_rate": 0.0001619585200884818,
|
| 67636 |
+
"loss": 4.5768,
|
| 67637 |
+
"step": 9642
|
| 67638 |
+
},
|
| 67639 |
+
{
|
| 67640 |
+
"epoch": 1.0655284822365876,
|
| 67641 |
+
"grad_norm": 0.6812182664871216,
|
| 67642 |
+
"learning_rate": 0.00016192934352849073,
|
| 67643 |
+
"loss": 4.6287,
|
| 67644 |
+
"step": 9643
|
| 67645 |
+
},
|
| 67646 |
+
{
|
| 67647 |
+
"epoch": 1.0656389855793138,
|
| 67648 |
+
"grad_norm": 0.7093214392662048,
|
| 67649 |
+
"learning_rate": 0.00016190016651428143,
|
| 67650 |
+
"loss": 4.5321,
|
| 67651 |
+
"step": 9644
|
| 67652 |
+
},
|
| 67653 |
+
{
|
| 67654 |
+
"epoch": 1.0657494889220398,
|
| 67655 |
+
"grad_norm": 0.7052456140518188,
|
| 67656 |
+
"learning_rate": 0.00016187098904696487,
|
| 67657 |
+
"loss": 4.614,
|
| 67658 |
+
"step": 9645
|
| 67659 |
+
},
|
| 67660 |
+
{
|
| 67661 |
+
"epoch": 1.065859992264766,
|
| 67662 |
+
"grad_norm": 0.6540815234184265,
|
| 67663 |
+
"learning_rate": 0.00016184181112765202,
|
| 67664 |
+
"loss": 4.6226,
|
| 67665 |
+
"step": 9646
|
| 67666 |
+
},
|
| 67667 |
+
{
|
| 67668 |
+
"epoch": 1.065970495607492,
|
| 67669 |
+
"grad_norm": 0.706958532333374,
|
| 67670 |
+
"learning_rate": 0.0001618126327574538,
|
| 67671 |
+
"loss": 4.6379,
|
| 67672 |
+
"step": 9647
|
| 67673 |
+
},
|
| 67674 |
+
{
|
| 67675 |
+
"epoch": 1.0660809989502182,
|
| 67676 |
+
"grad_norm": 0.745863139629364,
|
| 67677 |
+
"learning_rate": 0.00016178345393748122,
|
| 67678 |
+
"loss": 4.6056,
|
| 67679 |
+
"step": 9648
|
| 67680 |
+
},
|
| 67681 |
+
{
|
| 67682 |
+
"epoch": 1.0661915022929445,
|
| 67683 |
+
"grad_norm": 0.7013378739356995,
|
| 67684 |
+
"learning_rate": 0.0001617542746688453,
|
| 67685 |
+
"loss": 4.5612,
|
| 67686 |
+
"step": 9649
|
| 67687 |
+
},
|
| 67688 |
+
{
|
| 67689 |
+
"epoch": 1.0663020056356705,
|
| 67690 |
+
"grad_norm": 0.6978210210800171,
|
| 67691 |
+
"learning_rate": 0.00016172509495265703,
|
| 67692 |
+
"loss": 4.5763,
|
| 67693 |
+
"step": 9650
|
| 67694 |
+
},
|
| 67695 |
+
{
|
| 67696 |
+
"epoch": 1.0664125089783967,
|
| 67697 |
+
"grad_norm": 0.7045858502388,
|
| 67698 |
+
"learning_rate": 0.00016169591479002742,
|
| 67699 |
+
"loss": 4.5697,
|
| 67700 |
+
"step": 9651
|
| 67701 |
+
},
|
| 67702 |
+
{
|
| 67703 |
+
"epoch": 1.0665230123211227,
|
| 67704 |
+
"grad_norm": 0.7374753355979919,
|
| 67705 |
+
"learning_rate": 0.00016166673418206766,
|
| 67706 |
+
"loss": 4.5922,
|
| 67707 |
+
"step": 9652
|
| 67708 |
+
},
|
| 67709 |
+
{
|
| 67710 |
+
"epoch": 1.066633515663849,
|
| 67711 |
+
"grad_norm": 0.7129539847373962,
|
| 67712 |
+
"learning_rate": 0.00016163755312988866,
|
| 67713 |
+
"loss": 4.6264,
|
| 67714 |
+
"step": 9653
|
| 67715 |
+
},
|
| 67716 |
+
{
|
| 67717 |
+
"epoch": 1.066744019006575,
|
| 67718 |
+
"grad_norm": 0.6969203948974609,
|
| 67719 |
+
"learning_rate": 0.00016160837163460166,
|
| 67720 |
+
"loss": 4.7043,
|
| 67721 |
+
"step": 9654
|
| 67722 |
+
},
|
| 67723 |
+
{
|
| 67724 |
+
"epoch": 1.0668545223493011,
|
| 67725 |
+
"grad_norm": 0.7094765901565552,
|
| 67726 |
+
"learning_rate": 0.00016157918969731758,
|
| 67727 |
+
"loss": 4.6211,
|
| 67728 |
+
"step": 9655
|
| 67729 |
+
},
|
| 67730 |
+
{
|
| 67731 |
+
"epoch": 1.0669650256920271,
|
| 67732 |
+
"grad_norm": 0.6695538759231567,
|
| 67733 |
+
"learning_rate": 0.0001615500073191477,
|
| 67734 |
+
"loss": 4.6393,
|
| 67735 |
+
"step": 9656
|
| 67736 |
+
},
|
| 67737 |
+
{
|
| 67738 |
+
"epoch": 1.0670755290347533,
|
| 67739 |
+
"grad_norm": 0.6737130284309387,
|
| 67740 |
+
"learning_rate": 0.00016152082450120314,
|
| 67741 |
+
"loss": 4.664,
|
| 67742 |
+
"step": 9657
|
| 67743 |
+
},
|
| 67744 |
+
{
|
| 67745 |
+
"epoch": 1.0671860323774793,
|
| 67746 |
+
"grad_norm": 0.6646016836166382,
|
| 67747 |
+
"learning_rate": 0.00016149164124459498,
|
| 67748 |
+
"loss": 4.5705,
|
| 67749 |
+
"step": 9658
|
| 67750 |
+
},
|
| 67751 |
+
{
|
| 67752 |
+
"epoch": 1.0672965357202056,
|
| 67753 |
+
"grad_norm": 0.678851306438446,
|
| 67754 |
+
"learning_rate": 0.00016146245755043444,
|
| 67755 |
+
"loss": 4.62,
|
| 67756 |
+
"step": 9659
|
| 67757 |
+
},
|
| 67758 |
+
{
|
| 67759 |
+
"epoch": 1.0674070390629316,
|
| 67760 |
+
"grad_norm": 0.6565046310424805,
|
| 67761 |
+
"learning_rate": 0.00016143327341983275,
|
| 67762 |
+
"loss": 4.5726,
|
| 67763 |
+
"step": 9660
|
| 67764 |
+
},
|
| 67765 |
+
{
|
| 67766 |
+
"epoch": 1.0675175424056578,
|
| 67767 |
+
"grad_norm": 0.7007474899291992,
|
| 67768 |
+
"learning_rate": 0.00016140408885390107,
|
| 67769 |
+
"loss": 4.5521,
|
| 67770 |
+
"step": 9661
|
| 67771 |
+
},
|
| 67772 |
+
{
|
| 67773 |
+
"epoch": 1.067628045748384,
|
| 67774 |
+
"grad_norm": 0.6972928047180176,
|
| 67775 |
+
"learning_rate": 0.0001613749038537506,
|
| 67776 |
+
"loss": 4.6064,
|
| 67777 |
+
"step": 9662
|
| 67778 |
+
},
|
| 67779 |
+
{
|
| 67780 |
+
"epoch": 1.06773854909111,
|
| 67781 |
+
"grad_norm": 0.694083034992218,
|
| 67782 |
+
"learning_rate": 0.00016134571842049263,
|
| 67783 |
+
"loss": 4.6059,
|
| 67784 |
+
"step": 9663
|
| 67785 |
+
},
|
| 67786 |
+
{
|
| 67787 |
+
"epoch": 1.0678490524338362,
|
| 67788 |
+
"grad_norm": 0.687408983707428,
|
| 67789 |
+
"learning_rate": 0.00016131653255523835,
|
| 67790 |
+
"loss": 4.6464,
|
| 67791 |
+
"step": 9664
|
| 67792 |
+
},
|
| 67793 |
+
{
|
| 67794 |
+
"epoch": 1.0679595557765622,
|
| 67795 |
+
"grad_norm": 0.6626368165016174,
|
| 67796 |
+
"learning_rate": 0.00016128734625909914,
|
| 67797 |
+
"loss": 4.5693,
|
| 67798 |
+
"step": 9665
|
| 67799 |
+
},
|
| 67800 |
+
{
|
| 67801 |
+
"epoch": 1.0680700591192884,
|
| 67802 |
+
"grad_norm": 0.6817365884780884,
|
| 67803 |
+
"learning_rate": 0.0001612581595331862,
|
| 67804 |
+
"loss": 4.5068,
|
| 67805 |
+
"step": 9666
|
| 67806 |
+
},
|
| 67807 |
+
{
|
| 67808 |
+
"epoch": 1.0681805624620144,
|
| 67809 |
+
"grad_norm": 0.6916432976722717,
|
| 67810 |
+
"learning_rate": 0.00016122897237861084,
|
| 67811 |
+
"loss": 4.6575,
|
| 67812 |
+
"step": 9667
|
| 67813 |
+
},
|
| 67814 |
+
{
|
| 67815 |
+
"epoch": 1.0682910658047406,
|
| 67816 |
+
"grad_norm": 0.7027813196182251,
|
| 67817 |
+
"learning_rate": 0.00016119978479648442,
|
| 67818 |
+
"loss": 4.6359,
|
| 67819 |
+
"step": 9668
|
| 67820 |
+
},
|
| 67821 |
+
{
|
| 67822 |
+
"epoch": 1.0684015691474666,
|
| 67823 |
+
"grad_norm": 0.7251218557357788,
|
| 67824 |
+
"learning_rate": 0.00016117059678791824,
|
| 67825 |
+
"loss": 4.7071,
|
| 67826 |
+
"step": 9669
|
| 67827 |
+
},
|
| 67828 |
+
{
|
| 67829 |
+
"epoch": 1.0685120724901929,
|
| 67830 |
+
"grad_norm": 0.6979719400405884,
|
| 67831 |
+
"learning_rate": 0.00016114140835402373,
|
| 67832 |
+
"loss": 4.4844,
|
| 67833 |
+
"step": 9670
|
| 67834 |
+
},
|
| 67835 |
+
{
|
| 67836 |
+
"epoch": 1.0686225758329189,
|
| 67837 |
+
"grad_norm": 0.7082657217979431,
|
| 67838 |
+
"learning_rate": 0.00016111221949591215,
|
| 67839 |
+
"loss": 4.6712,
|
| 67840 |
+
"step": 9671
|
| 67841 |
+
},
|
| 67842 |
+
{
|
| 67843 |
+
"epoch": 1.068733079175645,
|
| 67844 |
+
"grad_norm": 0.6779708862304688,
|
| 67845 |
+
"learning_rate": 0.00016108303021469494,
|
| 67846 |
+
"loss": 4.6237,
|
| 67847 |
+
"step": 9672
|
| 67848 |
+
},
|
| 67849 |
+
{
|
| 67850 |
+
"epoch": 1.068843582518371,
|
| 67851 |
+
"grad_norm": 0.6832176446914673,
|
| 67852 |
+
"learning_rate": 0.00016105384051148352,
|
| 67853 |
+
"loss": 4.6447,
|
| 67854 |
+
"step": 9673
|
| 67855 |
+
},
|
| 67856 |
+
{
|
| 67857 |
+
"epoch": 1.0689540858610973,
|
| 67858 |
+
"grad_norm": 0.7164468169212341,
|
| 67859 |
+
"learning_rate": 0.0001610246503873893,
|
| 67860 |
+
"loss": 4.6184,
|
| 67861 |
+
"step": 9674
|
| 67862 |
+
},
|
| 67863 |
+
{
|
| 67864 |
+
"epoch": 1.0690645892038235,
|
| 67865 |
+
"grad_norm": 0.6802970767021179,
|
| 67866 |
+
"learning_rate": 0.0001609954598435237,
|
| 67867 |
+
"loss": 4.5749,
|
| 67868 |
+
"step": 9675
|
| 67869 |
+
},
|
| 67870 |
+
{
|
| 67871 |
+
"epoch": 1.0691750925465495,
|
| 67872 |
+
"grad_norm": 0.7362309694290161,
|
| 67873 |
+
"learning_rate": 0.00016096626888099819,
|
| 67874 |
+
"loss": 4.4906,
|
| 67875 |
+
"step": 9676
|
| 67876 |
+
},
|
| 67877 |
+
{
|
| 67878 |
+
"epoch": 1.0692855958892757,
|
| 67879 |
+
"grad_norm": 0.6726140975952148,
|
| 67880 |
+
"learning_rate": 0.0001609370775009242,
|
| 67881 |
+
"loss": 4.5886,
|
| 67882 |
+
"step": 9677
|
| 67883 |
+
},
|
| 67884 |
+
{
|
| 67885 |
+
"epoch": 1.0693960992320017,
|
| 67886 |
+
"grad_norm": 0.6991824507713318,
|
| 67887 |
+
"learning_rate": 0.00016090788570441328,
|
| 67888 |
+
"loss": 4.6803,
|
| 67889 |
+
"step": 9678
|
| 67890 |
+
},
|
| 67891 |
+
{
|
| 67892 |
+
"epoch": 1.069506602574728,
|
| 67893 |
+
"grad_norm": 0.6654067635536194,
|
| 67894 |
+
"learning_rate": 0.00016087869349257683,
|
| 67895 |
+
"loss": 4.6364,
|
| 67896 |
+
"step": 9679
|
| 67897 |
+
},
|
| 67898 |
+
{
|
| 67899 |
+
"epoch": 1.069617105917454,
|
| 67900 |
+
"grad_norm": 0.6740732789039612,
|
| 67901 |
+
"learning_rate": 0.00016084950086652644,
|
| 67902 |
+
"loss": 4.6043,
|
| 67903 |
+
"step": 9680
|
| 67904 |
+
},
|
| 67905 |
+
{
|
| 67906 |
+
"epoch": 1.0697276092601802,
|
| 67907 |
+
"grad_norm": 0.7066823840141296,
|
| 67908 |
+
"learning_rate": 0.0001608203078273736,
|
| 67909 |
+
"loss": 4.5826,
|
| 67910 |
+
"step": 9681
|
| 67911 |
+
},
|
| 67912 |
+
{
|
| 67913 |
+
"epoch": 1.0698381126029062,
|
| 67914 |
+
"grad_norm": 0.6747539639472961,
|
| 67915 |
+
"learning_rate": 0.00016079111437622992,
|
| 67916 |
+
"loss": 4.7107,
|
| 67917 |
+
"step": 9682
|
| 67918 |
+
},
|
| 67919 |
+
{
|
| 67920 |
+
"epoch": 1.0699486159456324,
|
| 67921 |
+
"grad_norm": 0.6674532294273376,
|
| 67922 |
+
"learning_rate": 0.00016076192051420687,
|
| 67923 |
+
"loss": 4.5954,
|
| 67924 |
+
"step": 9683
|
| 67925 |
+
},
|
| 67926 |
+
{
|
| 67927 |
+
"epoch": 1.0700591192883584,
|
| 67928 |
+
"grad_norm": 0.6761782169342041,
|
| 67929 |
+
"learning_rate": 0.00016073272624241613,
|
| 67930 |
+
"loss": 4.6132,
|
| 67931 |
+
"step": 9684
|
| 67932 |
+
},
|
| 67933 |
+
{
|
| 67934 |
+
"epoch": 1.0701696226310846,
|
| 67935 |
+
"grad_norm": 0.7254160642623901,
|
| 67936 |
+
"learning_rate": 0.00016070353156196916,
|
| 67937 |
+
"loss": 4.6342,
|
| 67938 |
+
"step": 9685
|
| 67939 |
+
},
|
| 67940 |
+
{
|
| 67941 |
+
"epoch": 1.0702801259738106,
|
| 67942 |
+
"grad_norm": 0.6975858211517334,
|
| 67943 |
+
"learning_rate": 0.00016067433647397772,
|
| 67944 |
+
"loss": 4.5476,
|
| 67945 |
+
"step": 9686
|
| 67946 |
+
},
|
| 67947 |
+
{
|
| 67948 |
+
"epoch": 1.0703906293165368,
|
| 67949 |
+
"grad_norm": 0.6576226949691772,
|
| 67950 |
+
"learning_rate": 0.0001606451409795533,
|
| 67951 |
+
"loss": 4.5888,
|
| 67952 |
+
"step": 9687
|
| 67953 |
+
},
|
| 67954 |
+
{
|
| 67955 |
+
"epoch": 1.0705011326592628,
|
| 67956 |
+
"grad_norm": 0.7064476609230042,
|
| 67957 |
+
"learning_rate": 0.00016061594507980762,
|
| 67958 |
+
"loss": 4.5645,
|
| 67959 |
+
"step": 9688
|
| 67960 |
+
},
|
| 67961 |
+
{
|
| 67962 |
+
"epoch": 1.070611636001989,
|
| 67963 |
+
"grad_norm": 0.6707605719566345,
|
| 67964 |
+
"learning_rate": 0.00016058674877585233,
|
| 67965 |
+
"loss": 4.487,
|
| 67966 |
+
"step": 9689
|
| 67967 |
+
},
|
| 67968 |
+
{
|
| 67969 |
+
"epoch": 1.0707221393447153,
|
| 67970 |
+
"grad_norm": 0.655327558517456,
|
| 67971 |
+
"learning_rate": 0.00016055755206879904,
|
| 67972 |
+
"loss": 4.5719,
|
| 67973 |
+
"step": 9690
|
| 67974 |
+
},
|
| 67975 |
+
{
|
| 67976 |
+
"epoch": 1.0708326426874413,
|
| 67977 |
+
"grad_norm": 0.6988897323608398,
|
| 67978 |
+
"learning_rate": 0.00016052835495975945,
|
| 67979 |
+
"loss": 4.6659,
|
| 67980 |
+
"step": 9691
|
| 67981 |
+
},
|
| 67982 |
+
{
|
| 67983 |
+
"epoch": 1.0709431460301675,
|
| 67984 |
+
"grad_norm": 0.6926079988479614,
|
| 67985 |
+
"learning_rate": 0.00016049915744984538,
|
| 67986 |
+
"loss": 4.6401,
|
| 67987 |
+
"step": 9692
|
| 67988 |
+
},
|
| 67989 |
+
{
|
| 67990 |
+
"epoch": 1.0710536493728935,
|
| 67991 |
+
"grad_norm": 0.6881336569786072,
|
| 67992 |
+
"learning_rate": 0.0001604699595401684,
|
| 67993 |
+
"loss": 4.5766,
|
| 67994 |
+
"step": 9693
|
| 67995 |
+
},
|
| 67996 |
+
{
|
| 67997 |
+
"epoch": 1.0711641527156197,
|
| 67998 |
+
"grad_norm": 0.668104887008667,
|
| 67999 |
+
"learning_rate": 0.00016044076123184034,
|
| 68000 |
+
"loss": 4.6888,
|
| 68001 |
+
"step": 9694
|
| 68002 |
+
},
|
| 68003 |
+
{
|
| 68004 |
+
"epoch": 1.0712746560583457,
|
| 68005 |
+
"grad_norm": 0.6902987360954285,
|
| 68006 |
+
"learning_rate": 0.0001604115625259728,
|
| 68007 |
+
"loss": 4.6269,
|
| 68008 |
+
"step": 9695
|
| 68009 |
+
},
|
| 68010 |
+
{
|
| 68011 |
+
"epoch": 1.071385159401072,
|
| 68012 |
+
"grad_norm": 0.6768040657043457,
|
| 68013 |
+
"learning_rate": 0.00016038236342367772,
|
| 68014 |
+
"loss": 4.66,
|
| 68015 |
+
"step": 9696
|
| 68016 |
+
},
|
| 68017 |
+
{
|
| 68018 |
+
"epoch": 1.071495662743798,
|
| 68019 |
+
"grad_norm": 0.7098315358161926,
|
| 68020 |
+
"learning_rate": 0.00016035316392606677,
|
| 68021 |
+
"loss": 4.6388,
|
| 68022 |
+
"step": 9697
|
| 68023 |
+
},
|
| 68024 |
+
{
|
| 68025 |
+
"epoch": 1.0716061660865241,
|
| 68026 |
+
"grad_norm": 0.7080962657928467,
|
| 68027 |
+
"learning_rate": 0.0001603239640342518,
|
| 68028 |
+
"loss": 4.6419,
|
| 68029 |
+
"step": 9698
|
| 68030 |
+
},
|
| 68031 |
+
{
|
| 68032 |
+
"epoch": 1.0717166694292501,
|
| 68033 |
+
"grad_norm": 0.6858710050582886,
|
| 68034 |
+
"learning_rate": 0.00016029476374934454,
|
| 68035 |
+
"loss": 4.5198,
|
| 68036 |
+
"step": 9699
|
| 68037 |
+
},
|
| 68038 |
+
{
|
| 68039 |
+
"epoch": 1.0718271727719764,
|
| 68040 |
+
"grad_norm": 0.6873418688774109,
|
| 68041 |
+
"learning_rate": 0.0001602655630724569,
|
| 68042 |
+
"loss": 4.6573,
|
| 68043 |
+
"step": 9700
|
| 68044 |
+
},
|
| 68045 |
+
{
|
| 68046 |
+
"epoch": 1.0719376761147026,
|
| 68047 |
+
"grad_norm": 0.706405520439148,
|
| 68048 |
+
"learning_rate": 0.00016023636200470065,
|
| 68049 |
+
"loss": 4.6154,
|
| 68050 |
+
"step": 9701
|
| 68051 |
+
},
|
| 68052 |
+
{
|
| 68053 |
+
"epoch": 1.0720481794574286,
|
| 68054 |
+
"grad_norm": 0.6948126554489136,
|
| 68055 |
+
"learning_rate": 0.00016020716054718768,
|
| 68056 |
+
"loss": 4.6304,
|
| 68057 |
+
"step": 9702
|
| 68058 |
+
},
|
| 68059 |
+
{
|
| 68060 |
+
"epoch": 1.0721586828001548,
|
| 68061 |
+
"grad_norm": 0.6804258823394775,
|
| 68062 |
+
"learning_rate": 0.0001601779587010298,
|
| 68063 |
+
"loss": 4.5518,
|
| 68064 |
+
"step": 9703
|
| 68065 |
+
},
|
| 68066 |
+
{
|
| 68067 |
+
"epoch": 1.0722691861428808,
|
| 68068 |
+
"grad_norm": 0.7091872096061707,
|
| 68069 |
+
"learning_rate": 0.00016014875646733895,
|
| 68070 |
+
"loss": 4.6979,
|
| 68071 |
+
"step": 9704
|
| 68072 |
+
},
|
| 68073 |
+
{
|
| 68074 |
+
"epoch": 1.072379689485607,
|
| 68075 |
+
"grad_norm": 0.6986128687858582,
|
| 68076 |
+
"learning_rate": 0.00016011955384722708,
|
| 68077 |
+
"loss": 4.6433,
|
| 68078 |
+
"step": 9705
|
| 68079 |
+
},
|
| 68080 |
+
{
|
| 68081 |
+
"epoch": 1.072490192828333,
|
| 68082 |
+
"grad_norm": 0.6891746520996094,
|
| 68083 |
+
"learning_rate": 0.00016009035084180596,
|
| 68084 |
+
"loss": 4.5502,
|
| 68085 |
+
"step": 9706
|
| 68086 |
+
},
|
| 68087 |
+
{
|
| 68088 |
+
"epoch": 1.0726006961710592,
|
| 68089 |
+
"grad_norm": 0.7349664568901062,
|
| 68090 |
+
"learning_rate": 0.00016006114745218758,
|
| 68091 |
+
"loss": 4.5381,
|
| 68092 |
+
"step": 9707
|
| 68093 |
+
},
|
| 68094 |
+
{
|
| 68095 |
+
"epoch": 1.0727111995137852,
|
| 68096 |
+
"grad_norm": 0.6864305734634399,
|
| 68097 |
+
"learning_rate": 0.0001600319436794839,
|
| 68098 |
+
"loss": 4.6319,
|
| 68099 |
+
"step": 9708
|
| 68100 |
+
},
|
| 68101 |
+
{
|
| 68102 |
+
"epoch": 1.0728217028565115,
|
| 68103 |
+
"grad_norm": 0.6986581683158875,
|
| 68104 |
+
"learning_rate": 0.00016000273952480686,
|
| 68105 |
+
"loss": 4.5295,
|
| 68106 |
+
"step": 9709
|
| 68107 |
+
},
|
| 68108 |
+
{
|
| 68109 |
+
"epoch": 1.0729322061992375,
|
| 68110 |
+
"grad_norm": 0.7088263630867004,
|
| 68111 |
+
"learning_rate": 0.00015997353498926843,
|
| 68112 |
+
"loss": 4.5656,
|
| 68113 |
+
"step": 9710
|
| 68114 |
+
},
|
| 68115 |
+
{
|
| 68116 |
+
"epoch": 1.0730427095419637,
|
| 68117 |
+
"grad_norm": 0.694791316986084,
|
| 68118 |
+
"learning_rate": 0.00015994433007398054,
|
| 68119 |
+
"loss": 4.5374,
|
| 68120 |
+
"step": 9711
|
| 68121 |
+
},
|
| 68122 |
+
{
|
| 68123 |
+
"epoch": 1.0731532128846897,
|
| 68124 |
+
"grad_norm": 0.6935116648674011,
|
| 68125 |
+
"learning_rate": 0.00015991512478005532,
|
| 68126 |
+
"loss": 4.5704,
|
| 68127 |
+
"step": 9712
|
| 68128 |
+
},
|
| 68129 |
+
{
|
| 68130 |
+
"epoch": 1.073263716227416,
|
| 68131 |
+
"grad_norm": 0.7357485890388489,
|
| 68132 |
+
"learning_rate": 0.0001598859191086047,
|
| 68133 |
+
"loss": 4.5556,
|
| 68134 |
+
"step": 9713
|
| 68135 |
+
},
|
| 68136 |
+
{
|
| 68137 |
+
"epoch": 1.073374219570142,
|
| 68138 |
+
"grad_norm": 0.7013515830039978,
|
| 68139 |
+
"learning_rate": 0.00015985671306074063,
|
| 68140 |
+
"loss": 4.6461,
|
| 68141 |
+
"step": 9714
|
| 68142 |
+
},
|
| 68143 |
+
{
|
| 68144 |
+
"epoch": 1.0734847229128681,
|
| 68145 |
+
"grad_norm": 0.7423861622810364,
|
| 68146 |
+
"learning_rate": 0.00015982750663757524,
|
| 68147 |
+
"loss": 4.5883,
|
| 68148 |
+
"step": 9715
|
| 68149 |
+
},
|
| 68150 |
+
{
|
| 68151 |
+
"epoch": 1.0735952262555943,
|
| 68152 |
+
"grad_norm": 0.7220031023025513,
|
| 68153 |
+
"learning_rate": 0.00015979829984022064,
|
| 68154 |
+
"loss": 4.6771,
|
| 68155 |
+
"step": 9716
|
| 68156 |
+
},
|
| 68157 |
+
{
|
| 68158 |
+
"epoch": 1.0737057295983203,
|
| 68159 |
+
"grad_norm": 0.7191951274871826,
|
| 68160 |
+
"learning_rate": 0.0001597690926697888,
|
| 68161 |
+
"loss": 4.6458,
|
| 68162 |
+
"step": 9717
|
| 68163 |
+
},
|
| 68164 |
+
{
|
| 68165 |
+
"epoch": 1.0738162329410466,
|
| 68166 |
+
"grad_norm": 0.7174352407455444,
|
| 68167 |
+
"learning_rate": 0.00015973988512739181,
|
| 68168 |
+
"loss": 4.6612,
|
| 68169 |
+
"step": 9718
|
| 68170 |
+
},
|
| 68171 |
+
{
|
| 68172 |
+
"epoch": 1.0739267362837726,
|
| 68173 |
+
"grad_norm": 0.6914805769920349,
|
| 68174 |
+
"learning_rate": 0.0001597106772141418,
|
| 68175 |
+
"loss": 4.5736,
|
| 68176 |
+
"step": 9719
|
| 68177 |
+
},
|
| 68178 |
+
{
|
| 68179 |
+
"epoch": 1.0740372396264988,
|
| 68180 |
+
"grad_norm": 0.7072942852973938,
|
| 68181 |
+
"learning_rate": 0.0001596814689311509,
|
| 68182 |
+
"loss": 4.5433,
|
| 68183 |
+
"step": 9720
|
| 68184 |
+
},
|
| 68185 |
+
{
|
| 68186 |
+
"epoch": 1.0741477429692248,
|
| 68187 |
+
"grad_norm": 0.7238595485687256,
|
| 68188 |
+
"learning_rate": 0.00015965226027953123,
|
| 68189 |
+
"loss": 4.6453,
|
| 68190 |
+
"step": 9721
|
| 68191 |
+
},
|
| 68192 |
+
{
|
| 68193 |
+
"epoch": 1.074258246311951,
|
| 68194 |
+
"grad_norm": 0.7292171120643616,
|
| 68195 |
+
"learning_rate": 0.00015962305126039487,
|
| 68196 |
+
"loss": 4.6249,
|
| 68197 |
+
"step": 9722
|
| 68198 |
+
},
|
| 68199 |
+
{
|
| 68200 |
+
"epoch": 1.074368749654677,
|
| 68201 |
+
"grad_norm": 0.6857756972312927,
|
| 68202 |
+
"learning_rate": 0.000159593841874854,
|
| 68203 |
+
"loss": 4.5737,
|
| 68204 |
+
"step": 9723
|
| 68205 |
+
},
|
| 68206 |
+
{
|
| 68207 |
+
"epoch": 1.0744792529974032,
|
| 68208 |
+
"grad_norm": 0.7355866432189941,
|
| 68209 |
+
"learning_rate": 0.00015956463212402086,
|
| 68210 |
+
"loss": 4.5813,
|
| 68211 |
+
"step": 9724
|
| 68212 |
+
},
|
| 68213 |
+
{
|
| 68214 |
+
"epoch": 1.0745897563401292,
|
| 68215 |
+
"grad_norm": 0.7348610758781433,
|
| 68216 |
+
"learning_rate": 0.00015953542200900753,
|
| 68217 |
+
"loss": 4.5247,
|
| 68218 |
+
"step": 9725
|
| 68219 |
+
},
|
| 68220 |
+
{
|
| 68221 |
+
"epoch": 1.0747002596828554,
|
| 68222 |
+
"grad_norm": 0.6830874681472778,
|
| 68223 |
+
"learning_rate": 0.0001595062115309263,
|
| 68224 |
+
"loss": 4.5316,
|
| 68225 |
+
"step": 9726
|
| 68226 |
+
},
|
| 68227 |
+
{
|
| 68228 |
+
"epoch": 1.0748107630255814,
|
| 68229 |
+
"grad_norm": 0.7119134068489075,
|
| 68230 |
+
"learning_rate": 0.0001594770006908893,
|
| 68231 |
+
"loss": 4.6068,
|
| 68232 |
+
"step": 9727
|
| 68233 |
+
},
|
| 68234 |
+
{
|
| 68235 |
+
"epoch": 1.0749212663683076,
|
| 68236 |
+
"grad_norm": 0.7670882344245911,
|
| 68237 |
+
"learning_rate": 0.0001594477894900088,
|
| 68238 |
+
"loss": 4.6409,
|
| 68239 |
+
"step": 9728
|
| 68240 |
+
},
|
| 68241 |
+
{
|
| 68242 |
+
"epoch": 1.0750317697110336,
|
| 68243 |
+
"grad_norm": 0.7309719324111938,
|
| 68244 |
+
"learning_rate": 0.000159418577929397,
|
| 68245 |
+
"loss": 4.5117,
|
| 68246 |
+
"step": 9729
|
| 68247 |
+
},
|
| 68248 |
+
{
|
| 68249 |
+
"epoch": 1.0751422730537599,
|
| 68250 |
+
"grad_norm": 0.728286623954773,
|
| 68251 |
+
"learning_rate": 0.00015938936601016617,
|
| 68252 |
+
"loss": 4.6708,
|
| 68253 |
+
"step": 9730
|
| 68254 |
+
},
|
| 68255 |
+
{
|
| 68256 |
+
"epoch": 1.075252776396486,
|
| 68257 |
+
"grad_norm": 0.7389054298400879,
|
| 68258 |
+
"learning_rate": 0.00015936015373342862,
|
| 68259 |
+
"loss": 4.4987,
|
| 68260 |
+
"step": 9731
|
| 68261 |
+
},
|
| 68262 |
+
{
|
| 68263 |
+
"epoch": 1.075363279739212,
|
| 68264 |
+
"grad_norm": 0.7210308313369751,
|
| 68265 |
+
"learning_rate": 0.00015933094110029655,
|
| 68266 |
+
"loss": 4.6227,
|
| 68267 |
+
"step": 9732
|
| 68268 |
+
},
|
| 68269 |
+
{
|
| 68270 |
+
"epoch": 1.0754737830819383,
|
| 68271 |
+
"grad_norm": 0.6961907148361206,
|
| 68272 |
+
"learning_rate": 0.0001593017281118823,
|
| 68273 |
+
"loss": 4.6548,
|
| 68274 |
+
"step": 9733
|
| 68275 |
+
},
|
| 68276 |
+
{
|
| 68277 |
+
"epoch": 1.0755842864246643,
|
| 68278 |
+
"grad_norm": 0.7422161102294922,
|
| 68279 |
+
"learning_rate": 0.0001592725147692982,
|
| 68280 |
+
"loss": 4.5428,
|
| 68281 |
+
"step": 9734
|
| 68282 |
+
},
|
| 68283 |
+
{
|
| 68284 |
+
"epoch": 1.0756947897673905,
|
| 68285 |
+
"grad_norm": 0.6886887550354004,
|
| 68286 |
+
"learning_rate": 0.00015924330107365645,
|
| 68287 |
+
"loss": 4.5715,
|
| 68288 |
+
"step": 9735
|
| 68289 |
+
},
|
| 68290 |
+
{
|
| 68291 |
+
"epoch": 1.0758052931101165,
|
| 68292 |
+
"grad_norm": 0.6801064610481262,
|
| 68293 |
+
"learning_rate": 0.0001592140870260695,
|
| 68294 |
+
"loss": 4.4857,
|
| 68295 |
+
"step": 9736
|
| 68296 |
+
},
|
| 68297 |
+
{
|
| 68298 |
+
"epoch": 1.0759157964528427,
|
| 68299 |
+
"grad_norm": 0.7079518437385559,
|
| 68300 |
+
"learning_rate": 0.0001591848726276497,
|
| 68301 |
+
"loss": 4.6101,
|
| 68302 |
+
"step": 9737
|
| 68303 |
+
},
|
| 68304 |
+
{
|
| 68305 |
+
"epoch": 1.0760262997955687,
|
| 68306 |
+
"grad_norm": 0.7202207446098328,
|
| 68307 |
+
"learning_rate": 0.00015915565787950932,
|
| 68308 |
+
"loss": 4.5434,
|
| 68309 |
+
"step": 9738
|
| 68310 |
+
},
|
| 68311 |
+
{
|
| 68312 |
+
"epoch": 1.076136803138295,
|
| 68313 |
+
"grad_norm": 0.689629316329956,
|
| 68314 |
+
"learning_rate": 0.00015912644278276078,
|
| 68315 |
+
"loss": 4.6636,
|
| 68316 |
+
"step": 9739
|
| 68317 |
+
},
|
| 68318 |
+
{
|
| 68319 |
+
"epoch": 1.076247306481021,
|
| 68320 |
+
"grad_norm": 0.6948322653770447,
|
| 68321 |
+
"learning_rate": 0.0001590972273385165,
|
| 68322 |
+
"loss": 4.6096,
|
| 68323 |
+
"step": 9740
|
| 68324 |
+
},
|
| 68325 |
+
{
|
| 68326 |
+
"epoch": 1.0763578098237472,
|
| 68327 |
+
"grad_norm": 0.694325864315033,
|
| 68328 |
+
"learning_rate": 0.00015906801154788881,
|
| 68329 |
+
"loss": 4.5866,
|
| 68330 |
+
"step": 9741
|
| 68331 |
+
},
|
| 68332 |
+
{
|
| 68333 |
+
"epoch": 1.0764683131664734,
|
| 68334 |
+
"grad_norm": 0.7426466941833496,
|
| 68335 |
+
"learning_rate": 0.00015903879541199022,
|
| 68336 |
+
"loss": 4.5965,
|
| 68337 |
+
"step": 9742
|
| 68338 |
+
},
|
| 68339 |
+
{
|
| 68340 |
+
"epoch": 1.0765788165091994,
|
| 68341 |
+
"grad_norm": 0.7042632699012756,
|
| 68342 |
+
"learning_rate": 0.00015900957893193301,
|
| 68343 |
+
"loss": 4.5687,
|
| 68344 |
+
"step": 9743
|
| 68345 |
+
},
|
| 68346 |
+
{
|
| 68347 |
+
"epoch": 1.0766893198519256,
|
| 68348 |
+
"grad_norm": 0.7953672409057617,
|
| 68349 |
+
"learning_rate": 0.0001589803621088297,
|
| 68350 |
+
"loss": 4.5289,
|
| 68351 |
+
"step": 9744
|
| 68352 |
+
},
|
| 68353 |
+
{
|
| 68354 |
+
"epoch": 1.0767998231946516,
|
| 68355 |
+
"grad_norm": 0.6892461180686951,
|
| 68356 |
+
"learning_rate": 0.00015895114494379284,
|
| 68357 |
+
"loss": 4.6028,
|
| 68358 |
+
"step": 9745
|
| 68359 |
+
},
|
| 68360 |
+
{
|
| 68361 |
+
"epoch": 1.0769103265373778,
|
| 68362 |
+
"grad_norm": 0.7118811011314392,
|
| 68363 |
+
"learning_rate": 0.00015892192743793468,
|
| 68364 |
+
"loss": 4.6013,
|
| 68365 |
+
"step": 9746
|
| 68366 |
+
},
|
| 68367 |
+
{
|
| 68368 |
+
"epoch": 1.0770208298801038,
|
| 68369 |
+
"grad_norm": 0.7487758994102478,
|
| 68370 |
+
"learning_rate": 0.00015889270959236788,
|
| 68371 |
+
"loss": 4.6468,
|
| 68372 |
+
"step": 9747
|
| 68373 |
+
},
|
| 68374 |
+
{
|
| 68375 |
+
"epoch": 1.07713133322283,
|
| 68376 |
+
"grad_norm": 0.684683620929718,
|
| 68377 |
+
"learning_rate": 0.00015886349140820485,
|
| 68378 |
+
"loss": 4.6577,
|
| 68379 |
+
"step": 9748
|
| 68380 |
+
},
|
| 68381 |
+
{
|
| 68382 |
+
"epoch": 1.077241836565556,
|
| 68383 |
+
"grad_norm": 0.7130369544029236,
|
| 68384 |
+
"learning_rate": 0.00015883427288655814,
|
| 68385 |
+
"loss": 4.6702,
|
| 68386 |
+
"step": 9749
|
| 68387 |
+
},
|
| 68388 |
+
{
|
| 68389 |
+
"epoch": 1.0773523399082823,
|
| 68390 |
+
"grad_norm": 0.7921537160873413,
|
| 68391 |
+
"learning_rate": 0.0001588050540285402,
|
| 68392 |
+
"loss": 4.5922,
|
| 68393 |
+
"step": 9750
|
| 68394 |
+
},
|
| 68395 |
+
{
|
| 68396 |
+
"epoch": 1.0774628432510083,
|
| 68397 |
+
"grad_norm": 0.678575873374939,
|
| 68398 |
+
"learning_rate": 0.00015877583483526358,
|
| 68399 |
+
"loss": 4.5522,
|
| 68400 |
+
"step": 9751
|
| 68401 |
+
},
|
| 68402 |
+
{
|
| 68403 |
+
"epoch": 1.0775733465937345,
|
| 68404 |
+
"grad_norm": 0.7006704211235046,
|
| 68405 |
+
"learning_rate": 0.0001587466153078408,
|
| 68406 |
+
"loss": 4.6823,
|
| 68407 |
+
"step": 9752
|
| 68408 |
+
},
|
| 68409 |
+
{
|
| 68410 |
+
"epoch": 1.0776838499364605,
|
| 68411 |
+
"grad_norm": 0.705590546131134,
|
| 68412 |
+
"learning_rate": 0.0001587173954473845,
|
| 68413 |
+
"loss": 4.5711,
|
| 68414 |
+
"step": 9753
|
| 68415 |
+
},
|
| 68416 |
+
{
|
| 68417 |
+
"epoch": 1.0777943532791867,
|
| 68418 |
+
"grad_norm": 0.668070375919342,
|
| 68419 |
+
"learning_rate": 0.00015868817525500716,
|
| 68420 |
+
"loss": 4.6001,
|
| 68421 |
+
"step": 9754
|
| 68422 |
+
},
|
| 68423 |
+
{
|
| 68424 |
+
"epoch": 1.0779048566219127,
|
| 68425 |
+
"grad_norm": 0.6886585354804993,
|
| 68426 |
+
"learning_rate": 0.00015865895473182143,
|
| 68427 |
+
"loss": 4.5993,
|
| 68428 |
+
"step": 9755
|
| 68429 |
+
},
|
| 68430 |
+
{
|
| 68431 |
+
"epoch": 1.078015359964639,
|
| 68432 |
+
"grad_norm": 0.7400259375572205,
|
| 68433 |
+
"learning_rate": 0.00015862973387893982,
|
| 68434 |
+
"loss": 4.6434,
|
| 68435 |
+
"step": 9756
|
| 68436 |
+
},
|
| 68437 |
+
{
|
| 68438 |
+
"epoch": 1.0781258633073652,
|
| 68439 |
+
"grad_norm": 0.671785831451416,
|
| 68440 |
+
"learning_rate": 0.000158600512697475,
|
| 68441 |
+
"loss": 4.5806,
|
| 68442 |
+
"step": 9757
|
| 68443 |
+
},
|
| 68444 |
+
{
|
| 68445 |
+
"epoch": 1.0782363666500911,
|
| 68446 |
+
"grad_norm": 0.6996983289718628,
|
| 68447 |
+
"learning_rate": 0.0001585712911885396,
|
| 68448 |
+
"loss": 4.6364,
|
| 68449 |
+
"step": 9758
|
| 68450 |
+
},
|
| 68451 |
+
{
|
| 68452 |
+
"epoch": 1.0783468699928174,
|
| 68453 |
+
"grad_norm": 0.7574987411499023,
|
| 68454 |
+
"learning_rate": 0.00015854206935324617,
|
| 68455 |
+
"loss": 4.6299,
|
| 68456 |
+
"step": 9759
|
| 68457 |
+
},
|
| 68458 |
+
{
|
| 68459 |
+
"epoch": 1.0784573733355434,
|
| 68460 |
+
"grad_norm": 0.6848738193511963,
|
| 68461 |
+
"learning_rate": 0.0001585128471927074,
|
| 68462 |
+
"loss": 4.536,
|
| 68463 |
+
"step": 9760
|
| 68464 |
+
},
|
| 68465 |
+
{
|
| 68466 |
+
"epoch": 1.0785678766782696,
|
| 68467 |
+
"grad_norm": 0.7546606063842773,
|
| 68468 |
+
"learning_rate": 0.00015848362470803596,
|
| 68469 |
+
"loss": 4.7158,
|
| 68470 |
+
"step": 9761
|
| 68471 |
+
},
|
| 68472 |
+
{
|
| 68473 |
+
"epoch": 1.0786783800209956,
|
| 68474 |
+
"grad_norm": 0.7053990364074707,
|
| 68475 |
+
"learning_rate": 0.00015845440190034453,
|
| 68476 |
+
"loss": 4.5253,
|
| 68477 |
+
"step": 9762
|
| 68478 |
+
},
|
| 68479 |
+
{
|
| 68480 |
+
"epoch": 1.0787888833637218,
|
| 68481 |
+
"grad_norm": 0.6824933290481567,
|
| 68482 |
+
"learning_rate": 0.0001584251787707457,
|
| 68483 |
+
"loss": 4.5975,
|
| 68484 |
+
"step": 9763
|
| 68485 |
+
},
|
| 68486 |
+
{
|
| 68487 |
+
"epoch": 1.0788993867064478,
|
| 68488 |
+
"grad_norm": 0.7350550293922424,
|
| 68489 |
+
"learning_rate": 0.0001583959553203522,
|
| 68490 |
+
"loss": 4.6355,
|
| 68491 |
+
"step": 9764
|
| 68492 |
+
},
|
| 68493 |
+
{
|
| 68494 |
+
"epoch": 1.079009890049174,
|
| 68495 |
+
"grad_norm": 0.7031686902046204,
|
| 68496 |
+
"learning_rate": 0.00015836673155027685,
|
| 68497 |
+
"loss": 4.6593,
|
| 68498 |
+
"step": 9765
|
| 68499 |
+
},
|
| 68500 |
+
{
|
| 68501 |
+
"epoch": 1.0791203933919,
|
| 68502 |
+
"grad_norm": 0.6792450547218323,
|
| 68503 |
+
"learning_rate": 0.00015833750746163222,
|
| 68504 |
+
"loss": 4.6428,
|
| 68505 |
+
"step": 9766
|
| 68506 |
+
},
|
| 68507 |
+
{
|
| 68508 |
+
"epoch": 1.0792308967346262,
|
| 68509 |
+
"grad_norm": 0.7289429306983948,
|
| 68510 |
+
"learning_rate": 0.0001583082830555311,
|
| 68511 |
+
"loss": 4.6247,
|
| 68512 |
+
"step": 9767
|
| 68513 |
+
},
|
| 68514 |
+
{
|
| 68515 |
+
"epoch": 1.0793414000773522,
|
| 68516 |
+
"grad_norm": 0.6944406032562256,
|
| 68517 |
+
"learning_rate": 0.00015827905833308618,
|
| 68518 |
+
"loss": 4.5978,
|
| 68519 |
+
"step": 9768
|
| 68520 |
+
},
|
| 68521 |
+
{
|
| 68522 |
+
"epoch": 1.0794519034200785,
|
| 68523 |
+
"grad_norm": 0.7166576981544495,
|
| 68524 |
+
"learning_rate": 0.00015824983329541028,
|
| 68525 |
+
"loss": 4.5719,
|
| 68526 |
+
"step": 9769
|
| 68527 |
+
},
|
| 68528 |
+
{
|
| 68529 |
+
"epoch": 1.0795624067628045,
|
| 68530 |
+
"grad_norm": 0.6816717982292175,
|
| 68531 |
+
"learning_rate": 0.00015822060794361612,
|
| 68532 |
+
"loss": 4.6194,
|
| 68533 |
+
"step": 9770
|
| 68534 |
+
},
|
| 68535 |
+
{
|
| 68536 |
+
"epoch": 1.0796729101055307,
|
| 68537 |
+
"grad_norm": 0.7008273601531982,
|
| 68538 |
+
"learning_rate": 0.0001581913822788165,
|
| 68539 |
+
"loss": 4.6141,
|
| 68540 |
+
"step": 9771
|
| 68541 |
+
},
|
| 68542 |
+
{
|
| 68543 |
+
"epoch": 1.079783413448257,
|
| 68544 |
+
"grad_norm": 0.6874610781669617,
|
| 68545 |
+
"learning_rate": 0.00015816215630212418,
|
| 68546 |
+
"loss": 4.5367,
|
| 68547 |
+
"step": 9772
|
| 68548 |
+
},
|
| 68549 |
+
{
|
| 68550 |
+
"epoch": 1.079893916790983,
|
| 68551 |
+
"grad_norm": 0.6829342842102051,
|
| 68552 |
+
"learning_rate": 0.000158132930014652,
|
| 68553 |
+
"loss": 4.4783,
|
| 68554 |
+
"step": 9773
|
| 68555 |
+
},
|
| 68556 |
+
{
|
| 68557 |
+
"epoch": 1.0800044201337091,
|
| 68558 |
+
"grad_norm": 0.6811694502830505,
|
| 68559 |
+
"learning_rate": 0.00015810370341751276,
|
| 68560 |
+
"loss": 4.5917,
|
| 68561 |
+
"step": 9774
|
| 68562 |
+
},
|
| 68563 |
+
{
|
| 68564 |
+
"epoch": 1.0801149234764351,
|
| 68565 |
+
"grad_norm": 0.6667516827583313,
|
| 68566 |
+
"learning_rate": 0.00015807447651181922,
|
| 68567 |
+
"loss": 4.6145,
|
| 68568 |
+
"step": 9775
|
| 68569 |
+
},
|
| 68570 |
+
{
|
| 68571 |
+
"epoch": 1.0802254268191613,
|
| 68572 |
+
"grad_norm": 0.6955037713050842,
|
| 68573 |
+
"learning_rate": 0.00015804524929868428,
|
| 68574 |
+
"loss": 4.5795,
|
| 68575 |
+
"step": 9776
|
| 68576 |
+
},
|
| 68577 |
+
{
|
| 68578 |
+
"epoch": 1.0803359301618873,
|
| 68579 |
+
"grad_norm": 0.6717528700828552,
|
| 68580 |
+
"learning_rate": 0.00015801602177922084,
|
| 68581 |
+
"loss": 4.6139,
|
| 68582 |
+
"step": 9777
|
| 68583 |
+
},
|
| 68584 |
+
{
|
| 68585 |
+
"epoch": 1.0804464335046136,
|
| 68586 |
+
"grad_norm": 0.658261239528656,
|
| 68587 |
+
"learning_rate": 0.0001579867939545416,
|
| 68588 |
+
"loss": 4.6019,
|
| 68589 |
+
"step": 9778
|
| 68590 |
+
},
|
| 68591 |
+
{
|
| 68592 |
+
"epoch": 1.0805569368473396,
|
| 68593 |
+
"grad_norm": 0.7554144263267517,
|
| 68594 |
+
"learning_rate": 0.00015795756582575956,
|
| 68595 |
+
"loss": 4.5632,
|
| 68596 |
+
"step": 9779
|
| 68597 |
+
},
|
| 68598 |
+
{
|
| 68599 |
+
"epoch": 1.0806674401900658,
|
| 68600 |
+
"grad_norm": 0.7248384952545166,
|
| 68601 |
+
"learning_rate": 0.00015792833739398758,
|
| 68602 |
+
"loss": 4.6619,
|
| 68603 |
+
"step": 9780
|
| 68604 |
+
},
|
| 68605 |
+
{
|
| 68606 |
+
"epoch": 1.0807779435327918,
|
| 68607 |
+
"grad_norm": 0.7080346345901489,
|
| 68608 |
+
"learning_rate": 0.00015789910866033854,
|
| 68609 |
+
"loss": 4.6411,
|
| 68610 |
+
"step": 9781
|
| 68611 |
+
},
|
| 68612 |
+
{
|
| 68613 |
+
"epoch": 1.080888446875518,
|
| 68614 |
+
"grad_norm": 0.813401460647583,
|
| 68615 |
+
"learning_rate": 0.00015786987962592534,
|
| 68616 |
+
"loss": 4.6241,
|
| 68617 |
+
"step": 9782
|
| 68618 |
+
},
|
| 68619 |
+
{
|
| 68620 |
+
"epoch": 1.0809989502182442,
|
| 68621 |
+
"grad_norm": 0.6837419271469116,
|
| 68622 |
+
"learning_rate": 0.00015784065029186088,
|
| 68623 |
+
"loss": 4.613,
|
| 68624 |
+
"step": 9783
|
| 68625 |
+
},
|
| 68626 |
+
{
|
| 68627 |
+
"epoch": 1.0811094535609702,
|
| 68628 |
+
"grad_norm": 0.7627458572387695,
|
| 68629 |
+
"learning_rate": 0.0001578114206592581,
|
| 68630 |
+
"loss": 4.6357,
|
| 68631 |
+
"step": 9784
|
| 68632 |
+
},
|
| 68633 |
+
{
|
| 68634 |
+
"epoch": 1.0812199569036964,
|
| 68635 |
+
"grad_norm": 0.7611428499221802,
|
| 68636 |
+
"learning_rate": 0.00015778219072923,
|
| 68637 |
+
"loss": 4.6341,
|
| 68638 |
+
"step": 9785
|
| 68639 |
+
},
|
| 68640 |
+
{
|
| 68641 |
+
"epoch": 1.0813304602464224,
|
| 68642 |
+
"grad_norm": 0.6623059511184692,
|
| 68643 |
+
"learning_rate": 0.0001577529605028894,
|
| 68644 |
+
"loss": 4.5433,
|
| 68645 |
+
"step": 9786
|
| 68646 |
+
},
|
| 68647 |
+
{
|
| 68648 |
+
"epoch": 1.0814409635891487,
|
| 68649 |
+
"grad_norm": 0.7150582075119019,
|
| 68650 |
+
"learning_rate": 0.00015772372998134937,
|
| 68651 |
+
"loss": 4.5481,
|
| 68652 |
+
"step": 9787
|
| 68653 |
+
},
|
| 68654 |
+
{
|
| 68655 |
+
"epoch": 1.0815514669318746,
|
| 68656 |
+
"grad_norm": 0.701525866985321,
|
| 68657 |
+
"learning_rate": 0.00015769449916572283,
|
| 68658 |
+
"loss": 4.5795,
|
| 68659 |
+
"step": 9788
|
| 68660 |
+
},
|
| 68661 |
+
{
|
| 68662 |
+
"epoch": 1.0816619702746009,
|
| 68663 |
+
"grad_norm": 0.679395854473114,
|
| 68664 |
+
"learning_rate": 0.0001576652680571228,
|
| 68665 |
+
"loss": 4.63,
|
| 68666 |
+
"step": 9789
|
| 68667 |
+
},
|
| 68668 |
+
{
|
| 68669 |
+
"epoch": 1.0817724736173269,
|
| 68670 |
+
"grad_norm": 0.7047052383422852,
|
| 68671 |
+
"learning_rate": 0.00015763603665666228,
|
| 68672 |
+
"loss": 4.6166,
|
| 68673 |
+
"step": 9790
|
| 68674 |
+
},
|
| 68675 |
+
{
|
| 68676 |
+
"epoch": 1.081882976960053,
|
| 68677 |
+
"grad_norm": 0.6824949979782104,
|
| 68678 |
+
"learning_rate": 0.00015760680496545426,
|
| 68679 |
+
"loss": 4.6669,
|
| 68680 |
+
"step": 9791
|
| 68681 |
+
},
|
| 68682 |
+
{
|
| 68683 |
+
"epoch": 1.081993480302779,
|
| 68684 |
+
"grad_norm": 0.6709694862365723,
|
| 68685 |
+
"learning_rate": 0.0001575775729846117,
|
| 68686 |
+
"loss": 4.6201,
|
| 68687 |
+
"step": 9792
|
| 68688 |
+
},
|
| 68689 |
+
{
|
| 68690 |
+
"epoch": 1.0821039836455053,
|
| 68691 |
+
"grad_norm": 0.6404644250869751,
|
| 68692 |
+
"learning_rate": 0.00015754834071524769,
|
| 68693 |
+
"loss": 4.5382,
|
| 68694 |
+
"step": 9793
|
| 68695 |
+
},
|
| 68696 |
+
{
|
| 68697 |
+
"epoch": 1.0822144869882313,
|
| 68698 |
+
"grad_norm": 0.68291175365448,
|
| 68699 |
+
"learning_rate": 0.0001575191081584753,
|
| 68700 |
+
"loss": 4.5719,
|
| 68701 |
+
"step": 9794
|
| 68702 |
+
},
|
| 68703 |
+
{
|
| 68704 |
+
"epoch": 1.0823249903309575,
|
| 68705 |
+
"grad_norm": 0.6930164694786072,
|
| 68706 |
+
"learning_rate": 0.0001574898753154075,
|
| 68707 |
+
"loss": 4.5347,
|
| 68708 |
+
"step": 9795
|
| 68709 |
+
},
|
| 68710 |
+
{
|
| 68711 |
+
"epoch": 1.0824354936736835,
|
| 68712 |
+
"grad_norm": 0.6947620511054993,
|
| 68713 |
+
"learning_rate": 0.00015746064218715743,
|
| 68714 |
+
"loss": 4.4454,
|
| 68715 |
+
"step": 9796
|
| 68716 |
+
},
|
| 68717 |
+
{
|
| 68718 |
+
"epoch": 1.0825459970164097,
|
| 68719 |
+
"grad_norm": 0.6918153166770935,
|
| 68720 |
+
"learning_rate": 0.00015743140877483812,
|
| 68721 |
+
"loss": 4.6287,
|
| 68722 |
+
"step": 9797
|
| 68723 |
+
},
|
| 68724 |
+
{
|
| 68725 |
+
"epoch": 1.082656500359136,
|
| 68726 |
+
"grad_norm": 0.7298768162727356,
|
| 68727 |
+
"learning_rate": 0.00015740217507956267,
|
| 68728 |
+
"loss": 4.6733,
|
| 68729 |
+
"step": 9798
|
| 68730 |
+
},
|
| 68731 |
+
{
|
| 68732 |
+
"epoch": 1.082767003701862,
|
| 68733 |
+
"grad_norm": 0.7070364952087402,
|
| 68734 |
+
"learning_rate": 0.0001573729411024441,
|
| 68735 |
+
"loss": 4.6297,
|
| 68736 |
+
"step": 9799
|
| 68737 |
+
},
|
| 68738 |
+
{
|
| 68739 |
+
"epoch": 1.0828775070445882,
|
| 68740 |
+
"grad_norm": 0.6483566761016846,
|
| 68741 |
+
"learning_rate": 0.00015734370684459558,
|
| 68742 |
+
"loss": 4.6211,
|
| 68743 |
+
"step": 9800
|
| 68744 |
+
},
|
| 68745 |
+
{
|
| 68746 |
+
"epoch": 1.0829880103873142,
|
| 68747 |
+
"grad_norm": 0.6738128066062927,
|
| 68748 |
+
"learning_rate": 0.00015731447230713023,
|
| 68749 |
+
"loss": 4.6466,
|
| 68750 |
+
"step": 9801
|
| 68751 |
+
},
|
| 68752 |
+
{
|
| 68753 |
+
"epoch": 1.0830985137300404,
|
| 68754 |
+
"grad_norm": 0.6970024704933167,
|
| 68755 |
+
"learning_rate": 0.0001572852374911612,
|
| 68756 |
+
"loss": 4.6215,
|
| 68757 |
+
"step": 9802
|
| 68758 |
+
},
|
| 68759 |
+
{
|
| 68760 |
+
"epoch": 1.0832090170727664,
|
| 68761 |
+
"grad_norm": 0.6948113441467285,
|
| 68762 |
+
"learning_rate": 0.0001572560023978016,
|
| 68763 |
+
"loss": 4.6094,
|
| 68764 |
+
"step": 9803
|
| 68765 |
+
},
|
| 68766 |
+
{
|
| 68767 |
+
"epoch": 1.0833195204154926,
|
| 68768 |
+
"grad_norm": 0.7193368077278137,
|
| 68769 |
+
"learning_rate": 0.0001572267670281645,
|
| 68770 |
+
"loss": 4.6117,
|
| 68771 |
+
"step": 9804
|
| 68772 |
+
},
|
| 68773 |
+
{
|
| 68774 |
+
"epoch": 1.0834300237582186,
|
| 68775 |
+
"grad_norm": 0.7204447388648987,
|
| 68776 |
+
"learning_rate": 0.0001571975313833632,
|
| 68777 |
+
"loss": 4.4874,
|
| 68778 |
+
"step": 9805
|
| 68779 |
+
},
|
| 68780 |
+
{
|
| 68781 |
+
"epoch": 1.0835405271009448,
|
| 68782 |
+
"grad_norm": 0.6877427697181702,
|
| 68783 |
+
"learning_rate": 0.0001571682954645108,
|
| 68784 |
+
"loss": 4.6547,
|
| 68785 |
+
"step": 9806
|
| 68786 |
+
},
|
| 68787 |
+
{
|
| 68788 |
+
"epoch": 1.0836510304436708,
|
| 68789 |
+
"grad_norm": 0.7240917086601257,
|
| 68790 |
+
"learning_rate": 0.00015713905927272043,
|
| 68791 |
+
"loss": 4.6449,
|
| 68792 |
+
"step": 9807
|
| 68793 |
+
},
|
| 68794 |
+
{
|
| 68795 |
+
"epoch": 1.083761533786397,
|
| 68796 |
+
"grad_norm": 0.6784619688987732,
|
| 68797 |
+
"learning_rate": 0.0001571098228091053,
|
| 68798 |
+
"loss": 4.5794,
|
| 68799 |
+
"step": 9808
|
| 68800 |
+
},
|
| 68801 |
+
{
|
| 68802 |
+
"epoch": 1.083872037129123,
|
| 68803 |
+
"grad_norm": 0.6737550497055054,
|
| 68804 |
+
"learning_rate": 0.00015708058607477864,
|
| 68805 |
+
"loss": 4.6236,
|
| 68806 |
+
"step": 9809
|
| 68807 |
+
},
|
| 68808 |
+
{
|
| 68809 |
+
"epoch": 1.0839825404718493,
|
| 68810 |
+
"grad_norm": 0.7098556756973267,
|
| 68811 |
+
"learning_rate": 0.0001570513490708537,
|
| 68812 |
+
"loss": 4.6526,
|
| 68813 |
+
"step": 9810
|
| 68814 |
+
},
|
| 68815 |
+
{
|
| 68816 |
+
"epoch": 1.0840930438145753,
|
| 68817 |
+
"grad_norm": 0.6989752054214478,
|
| 68818 |
+
"learning_rate": 0.00015702211179844362,
|
| 68819 |
+
"loss": 4.5707,
|
| 68820 |
+
"step": 9811
|
| 68821 |
+
},
|
| 68822 |
+
{
|
| 68823 |
+
"epoch": 1.0842035471573015,
|
| 68824 |
+
"grad_norm": 0.6727108955383301,
|
| 68825 |
+
"learning_rate": 0.00015699287425866162,
|
| 68826 |
+
"loss": 4.5785,
|
| 68827 |
+
"step": 9812
|
| 68828 |
+
},
|
| 68829 |
+
{
|
| 68830 |
+
"epoch": 1.0843140505000277,
|
| 68831 |
+
"grad_norm": 0.6673719882965088,
|
| 68832 |
+
"learning_rate": 0.00015696363645262106,
|
| 68833 |
+
"loss": 4.6019,
|
| 68834 |
+
"step": 9813
|
| 68835 |
+
},
|
| 68836 |
+
{
|
| 68837 |
+
"epoch": 1.0844245538427537,
|
| 68838 |
+
"grad_norm": 0.730681836605072,
|
| 68839 |
+
"learning_rate": 0.00015693439838143506,
|
| 68840 |
+
"loss": 4.4576,
|
| 68841 |
+
"step": 9814
|
| 68842 |
+
},
|
| 68843 |
+
{
|
| 68844 |
+
"epoch": 1.08453505718548,
|
| 68845 |
+
"grad_norm": 0.6929203271865845,
|
| 68846 |
+
"learning_rate": 0.0001569051600462169,
|
| 68847 |
+
"loss": 4.5319,
|
| 68848 |
+
"step": 9815
|
| 68849 |
+
},
|
| 68850 |
+
{
|
| 68851 |
+
"epoch": 1.084645560528206,
|
| 68852 |
+
"grad_norm": 0.7055780291557312,
|
| 68853 |
+
"learning_rate": 0.0001568759214480799,
|
| 68854 |
+
"loss": 4.5569,
|
| 68855 |
+
"step": 9816
|
| 68856 |
+
},
|
| 68857 |
+
{
|
| 68858 |
+
"epoch": 1.0847560638709322,
|
| 68859 |
+
"grad_norm": 0.6880096793174744,
|
| 68860 |
+
"learning_rate": 0.00015684668258813736,
|
| 68861 |
+
"loss": 4.6213,
|
| 68862 |
+
"step": 9817
|
| 68863 |
+
},
|
| 68864 |
+
{
|
| 68865 |
+
"epoch": 1.0848665672136582,
|
| 68866 |
+
"grad_norm": 0.7117776274681091,
|
| 68867 |
+
"learning_rate": 0.0001568174434675025,
|
| 68868 |
+
"loss": 4.594,
|
| 68869 |
+
"step": 9818
|
| 68870 |
+
},
|
| 68871 |
+
{
|
| 68872 |
+
"epoch": 1.0849770705563844,
|
| 68873 |
+
"grad_norm": 0.6820794343948364,
|
| 68874 |
+
"learning_rate": 0.00015678820408728867,
|
| 68875 |
+
"loss": 4.6633,
|
| 68876 |
+
"step": 9819
|
| 68877 |
+
},
|
| 68878 |
+
{
|
| 68879 |
+
"epoch": 1.0850875738991104,
|
| 68880 |
+
"grad_norm": 0.7075722813606262,
|
| 68881 |
+
"learning_rate": 0.00015675896444860912,
|
| 68882 |
+
"loss": 4.5847,
|
| 68883 |
+
"step": 9820
|
| 68884 |
+
},
|
| 68885 |
+
{
|
| 68886 |
+
"epoch": 1.0851980772418366,
|
| 68887 |
+
"grad_norm": 0.7111828923225403,
|
| 68888 |
+
"learning_rate": 0.00015672972455257723,
|
| 68889 |
+
"loss": 4.6631,
|
| 68890 |
+
"step": 9821
|
| 68891 |
+
},
|
| 68892 |
+
{
|
| 68893 |
+
"epoch": 1.0853085805845626,
|
| 68894 |
+
"grad_norm": 0.7115505337715149,
|
| 68895 |
+
"learning_rate": 0.00015670048440030634,
|
| 68896 |
+
"loss": 4.6679,
|
| 68897 |
+
"step": 9822
|
| 68898 |
+
},
|
| 68899 |
+
{
|
| 68900 |
+
"epoch": 1.0854190839272888,
|
| 68901 |
+
"grad_norm": 0.6897279620170593,
|
| 68902 |
+
"learning_rate": 0.00015667124399290974,
|
| 68903 |
+
"loss": 4.5933,
|
| 68904 |
+
"step": 9823
|
| 68905 |
+
},
|
| 68906 |
+
{
|
| 68907 |
+
"epoch": 1.085529587270015,
|
| 68908 |
+
"grad_norm": 0.6914645433425903,
|
| 68909 |
+
"learning_rate": 0.0001566420033315008,
|
| 68910 |
+
"loss": 4.6337,
|
| 68911 |
+
"step": 9824
|
| 68912 |
+
},
|
| 68913 |
+
{
|
| 68914 |
+
"epoch": 1.085640090612741,
|
| 68915 |
+
"grad_norm": 0.6804413795471191,
|
| 68916 |
+
"learning_rate": 0.0001566127624171928,
|
| 68917 |
+
"loss": 4.6274,
|
| 68918 |
+
"step": 9825
|
| 68919 |
+
},
|
| 68920 |
+
{
|
| 68921 |
+
"epoch": 1.0857505939554672,
|
| 68922 |
+
"grad_norm": 0.6959519982337952,
|
| 68923 |
+
"learning_rate": 0.0001565835212510993,
|
| 68924 |
+
"loss": 4.5856,
|
| 68925 |
+
"step": 9826
|
| 68926 |
+
},
|
| 68927 |
+
{
|
| 68928 |
+
"epoch": 1.0858610972981932,
|
| 68929 |
+
"grad_norm": 0.6987335681915283,
|
| 68930 |
+
"learning_rate": 0.00015655427983433354,
|
| 68931 |
+
"loss": 4.6368,
|
| 68932 |
+
"step": 9827
|
| 68933 |
+
},
|
| 68934 |
+
{
|
| 68935 |
+
"epoch": 1.0859716006409195,
|
| 68936 |
+
"grad_norm": 0.6729974746704102,
|
| 68937 |
+
"learning_rate": 0.0001565250381680089,
|
| 68938 |
+
"loss": 4.6099,
|
| 68939 |
+
"step": 9828
|
| 68940 |
+
},
|
| 68941 |
+
{
|
| 68942 |
+
"epoch": 1.0860821039836455,
|
| 68943 |
+
"grad_norm": 0.7280816435813904,
|
| 68944 |
+
"learning_rate": 0.0001564957962532388,
|
| 68945 |
+
"loss": 4.6572,
|
| 68946 |
+
"step": 9829
|
| 68947 |
+
},
|
| 68948 |
+
{
|
| 68949 |
+
"epoch": 1.0861926073263717,
|
| 68950 |
+
"grad_norm": 0.6726709604263306,
|
| 68951 |
+
"learning_rate": 0.00015646655409113673,
|
| 68952 |
+
"loss": 4.5937,
|
| 68953 |
+
"step": 9830
|
| 68954 |
+
},
|
| 68955 |
+
{
|
| 68956 |
+
"epoch": 1.0863031106690977,
|
| 68957 |
+
"grad_norm": 0.743985116481781,
|
| 68958 |
+
"learning_rate": 0.00015643731168281594,
|
| 68959 |
+
"loss": 4.6012,
|
| 68960 |
+
"step": 9831
|
| 68961 |
+
},
|
| 68962 |
+
{
|
| 68963 |
+
"epoch": 1.086413614011824,
|
| 68964 |
+
"grad_norm": 0.6932312250137329,
|
| 68965 |
+
"learning_rate": 0.00015640806902939,
|
| 68966 |
+
"loss": 4.6695,
|
| 68967 |
+
"step": 9832
|
| 68968 |
+
},
|
| 68969 |
+
{
|
| 68970 |
+
"epoch": 1.08652411735455,
|
| 68971 |
+
"grad_norm": 0.7451423406600952,
|
| 68972 |
+
"learning_rate": 0.00015637882613197228,
|
| 68973 |
+
"loss": 4.5175,
|
| 68974 |
+
"step": 9833
|
| 68975 |
+
},
|
| 68976 |
+
{
|
| 68977 |
+
"epoch": 1.0866346206972761,
|
| 68978 |
+
"grad_norm": 0.7328627705574036,
|
| 68979 |
+
"learning_rate": 0.00015634958299167625,
|
| 68980 |
+
"loss": 4.5709,
|
| 68981 |
+
"step": 9834
|
| 68982 |
+
},
|
| 68983 |
+
{
|
| 68984 |
+
"epoch": 1.0867451240400021,
|
| 68985 |
+
"grad_norm": 0.697624921798706,
|
| 68986 |
+
"learning_rate": 0.00015632033960961535,
|
| 68987 |
+
"loss": 4.6181,
|
| 68988 |
+
"step": 9835
|
| 68989 |
+
},
|
| 68990 |
+
{
|
| 68991 |
+
"epoch": 1.0868556273827283,
|
| 68992 |
+
"grad_norm": 0.7506673336029053,
|
| 68993 |
+
"learning_rate": 0.000156291095986903,
|
| 68994 |
+
"loss": 4.511,
|
| 68995 |
+
"step": 9836
|
| 68996 |
+
},
|
| 68997 |
+
{
|
| 68998 |
+
"epoch": 1.0869661307254543,
|
| 68999 |
+
"grad_norm": 0.7284137606620789,
|
| 69000 |
+
"learning_rate": 0.00015626185212465278,
|
| 69001 |
+
"loss": 4.6122,
|
| 69002 |
+
"step": 9837
|
| 69003 |
+
},
|
| 69004 |
+
{
|
| 69005 |
+
"epoch": 1.0870766340681806,
|
| 69006 |
+
"grad_norm": 0.719136655330658,
|
| 69007 |
+
"learning_rate": 0.00015623260802397805,
|
| 69008 |
+
"loss": 4.5904,
|
| 69009 |
+
"step": 9838
|
| 69010 |
+
},
|
| 69011 |
+
{
|
| 69012 |
+
"epoch": 1.0871871374109068,
|
| 69013 |
+
"grad_norm": 0.7696072459220886,
|
| 69014 |
+
"learning_rate": 0.00015620336368599237,
|
| 69015 |
+
"loss": 4.5887,
|
| 69016 |
+
"step": 9839
|
| 69017 |
+
},
|
| 69018 |
+
{
|
| 69019 |
+
"epoch": 1.0872976407536328,
|
| 69020 |
+
"grad_norm": 0.7046389579772949,
|
| 69021 |
+
"learning_rate": 0.00015617411911180923,
|
| 69022 |
+
"loss": 4.6002,
|
| 69023 |
+
"step": 9840
|
| 69024 |
+
},
|
| 69025 |
+
{
|
| 69026 |
+
"epoch": 1.087408144096359,
|
| 69027 |
+
"grad_norm": 0.7011882662773132,
|
| 69028 |
+
"learning_rate": 0.00015614487430254214,
|
| 69029 |
+
"loss": 4.5754,
|
| 69030 |
+
"step": 9841
|
| 69031 |
+
},
|
| 69032 |
+
{
|
| 69033 |
+
"epoch": 1.087518647439085,
|
| 69034 |
+
"grad_norm": 0.7393079400062561,
|
| 69035 |
+
"learning_rate": 0.00015611562925930462,
|
| 69036 |
+
"loss": 4.6046,
|
| 69037 |
+
"step": 9842
|
| 69038 |
+
},
|
| 69039 |
+
{
|
| 69040 |
+
"epoch": 1.0876291507818112,
|
| 69041 |
+
"grad_norm": 0.7586001753807068,
|
| 69042 |
+
"learning_rate": 0.00015608638398321016,
|
| 69043 |
+
"loss": 4.6924,
|
| 69044 |
+
"step": 9843
|
| 69045 |
+
},
|
| 69046 |
+
{
|
| 69047 |
+
"epoch": 1.0877396541245372,
|
| 69048 |
+
"grad_norm": 0.7352765798568726,
|
| 69049 |
+
"learning_rate": 0.00015605713847537226,
|
| 69050 |
+
"loss": 4.6025,
|
| 69051 |
+
"step": 9844
|
| 69052 |
+
},
|
| 69053 |
+
{
|
| 69054 |
+
"epoch": 1.0878501574672634,
|
| 69055 |
+
"grad_norm": 0.6869689226150513,
|
| 69056 |
+
"learning_rate": 0.0001560278927369046,
|
| 69057 |
+
"loss": 4.6531,
|
| 69058 |
+
"step": 9845
|
| 69059 |
+
},
|
| 69060 |
+
{
|
| 69061 |
+
"epoch": 1.0879606608099894,
|
| 69062 |
+
"grad_norm": 0.6666762828826904,
|
| 69063 |
+
"learning_rate": 0.00015599864676892064,
|
| 69064 |
+
"loss": 4.6469,
|
| 69065 |
+
"step": 9846
|
| 69066 |
+
},
|
| 69067 |
+
{
|
| 69068 |
+
"epoch": 1.0880711641527157,
|
| 69069 |
+
"grad_norm": 0.7762638926506042,
|
| 69070 |
+
"learning_rate": 0.00015596940057253396,
|
| 69071 |
+
"loss": 4.6446,
|
| 69072 |
+
"step": 9847
|
| 69073 |
+
},
|
| 69074 |
+
{
|
| 69075 |
+
"epoch": 1.0881816674954417,
|
| 69076 |
+
"grad_norm": 0.6868476867675781,
|
| 69077 |
+
"learning_rate": 0.00015594015414885812,
|
| 69078 |
+
"loss": 4.6564,
|
| 69079 |
+
"step": 9848
|
| 69080 |
+
},
|
| 69081 |
+
{
|
| 69082 |
+
"epoch": 1.0882921708381679,
|
| 69083 |
+
"grad_norm": 0.699004590511322,
|
| 69084 |
+
"learning_rate": 0.00015591090749900668,
|
| 69085 |
+
"loss": 4.6245,
|
| 69086 |
+
"step": 9849
|
| 69087 |
+
},
|
| 69088 |
+
{
|
| 69089 |
+
"epoch": 1.0884026741808939,
|
| 69090 |
+
"grad_norm": 0.7541981935501099,
|
| 69091 |
+
"learning_rate": 0.00015588166062409326,
|
| 69092 |
+
"loss": 4.5303,
|
| 69093 |
+
"step": 9850
|
| 69094 |
+
},
|
| 69095 |
+
{
|
| 69096 |
+
"epoch": 1.08851317752362,
|
| 69097 |
+
"grad_norm": 0.6953229904174805,
|
| 69098 |
+
"learning_rate": 0.00015585241352523147,
|
| 69099 |
+
"loss": 4.5151,
|
| 69100 |
+
"step": 9851
|
| 69101 |
+
},
|
| 69102 |
+
{
|
| 69103 |
+
"epoch": 1.0886236808663463,
|
| 69104 |
+
"grad_norm": 0.6996403932571411,
|
| 69105 |
+
"learning_rate": 0.00015582316620353482,
|
| 69106 |
+
"loss": 4.6429,
|
| 69107 |
+
"step": 9852
|
| 69108 |
+
},
|
| 69109 |
+
{
|
| 69110 |
+
"epoch": 1.0887341842090723,
|
| 69111 |
+
"grad_norm": 0.7261369228363037,
|
| 69112 |
+
"learning_rate": 0.00015579391866011707,
|
| 69113 |
+
"loss": 4.7019,
|
| 69114 |
+
"step": 9853
|
| 69115 |
+
},
|
| 69116 |
+
{
|
| 69117 |
+
"epoch": 1.0888446875517985,
|
| 69118 |
+
"grad_norm": 0.6871686577796936,
|
| 69119 |
+
"learning_rate": 0.00015576467089609171,
|
| 69120 |
+
"loss": 4.6504,
|
| 69121 |
+
"step": 9854
|
| 69122 |
+
},
|
| 69123 |
+
{
|
| 69124 |
+
"epoch": 1.0889551908945245,
|
| 69125 |
+
"grad_norm": 0.7350519299507141,
|
| 69126 |
+
"learning_rate": 0.00015573542291257253,
|
| 69127 |
+
"loss": 4.631,
|
| 69128 |
+
"step": 9855
|
| 69129 |
+
},
|
| 69130 |
+
{
|
| 69131 |
+
"epoch": 1.0890656942372507,
|
| 69132 |
+
"grad_norm": 0.7224756479263306,
|
| 69133 |
+
"learning_rate": 0.00015570617471067293,
|
| 69134 |
+
"loss": 4.6587,
|
| 69135 |
+
"step": 9856
|
| 69136 |
+
},
|
| 69137 |
+
{
|
| 69138 |
+
"epoch": 1.0891761975799767,
|
| 69139 |
+
"grad_norm": 0.6985225081443787,
|
| 69140 |
+
"learning_rate": 0.00015567692629150673,
|
| 69141 |
+
"loss": 4.5193,
|
| 69142 |
+
"step": 9857
|
| 69143 |
+
},
|
| 69144 |
+
{
|
| 69145 |
+
"epoch": 1.089286700922703,
|
| 69146 |
+
"grad_norm": 0.7143962979316711,
|
| 69147 |
+
"learning_rate": 0.00015564767765618756,
|
| 69148 |
+
"loss": 4.626,
|
| 69149 |
+
"step": 9858
|
| 69150 |
+
},
|
| 69151 |
+
{
|
| 69152 |
+
"epoch": 1.089397204265429,
|
| 69153 |
+
"grad_norm": 0.6994044780731201,
|
| 69154 |
+
"learning_rate": 0.00015561842880582906,
|
| 69155 |
+
"loss": 4.6732,
|
| 69156 |
+
"step": 9859
|
| 69157 |
+
},
|
| 69158 |
+
{
|
| 69159 |
+
"epoch": 1.0895077076081552,
|
| 69160 |
+
"grad_norm": 0.7047141194343567,
|
| 69161 |
+
"learning_rate": 0.00015558917974154484,
|
| 69162 |
+
"loss": 4.5895,
|
| 69163 |
+
"step": 9860
|
| 69164 |
+
},
|
| 69165 |
+
{
|
| 69166 |
+
"epoch": 1.0896182109508812,
|
| 69167 |
+
"grad_norm": 0.6923738718032837,
|
| 69168 |
+
"learning_rate": 0.00015555993046444873,
|
| 69169 |
+
"loss": 4.598,
|
| 69170 |
+
"step": 9861
|
| 69171 |
+
},
|
| 69172 |
+
{
|
| 69173 |
+
"epoch": 1.0897287142936074,
|
| 69174 |
+
"grad_norm": 0.7014240622520447,
|
| 69175 |
+
"learning_rate": 0.00015553068097565428,
|
| 69176 |
+
"loss": 4.6684,
|
| 69177 |
+
"step": 9862
|
| 69178 |
+
},
|
| 69179 |
+
{
|
| 69180 |
+
"epoch": 1.0898392176363334,
|
| 69181 |
+
"grad_norm": 0.7109822034835815,
|
| 69182 |
+
"learning_rate": 0.00015550143127627525,
|
| 69183 |
+
"loss": 4.5819,
|
| 69184 |
+
"step": 9863
|
| 69185 |
+
},
|
| 69186 |
+
{
|
| 69187 |
+
"epoch": 1.0899497209790596,
|
| 69188 |
+
"grad_norm": 0.663541316986084,
|
| 69189 |
+
"learning_rate": 0.00015547218136742531,
|
| 69190 |
+
"loss": 4.5702,
|
| 69191 |
+
"step": 9864
|
| 69192 |
+
},
|
| 69193 |
+
{
|
| 69194 |
+
"epoch": 1.0900602243217858,
|
| 69195 |
+
"grad_norm": 0.7124786376953125,
|
| 69196 |
+
"learning_rate": 0.0001554429312502182,
|
| 69197 |
+
"loss": 4.5634,
|
| 69198 |
+
"step": 9865
|
| 69199 |
+
},
|
| 69200 |
+
{
|
| 69201 |
+
"epoch": 1.0901707276645118,
|
| 69202 |
+
"grad_norm": 0.6951503157615662,
|
| 69203 |
+
"learning_rate": 0.00015541368092576765,
|
| 69204 |
+
"loss": 4.4921,
|
| 69205 |
+
"step": 9866
|
| 69206 |
+
},
|
| 69207 |
+
{
|
| 69208 |
+
"epoch": 1.090281231007238,
|
| 69209 |
+
"grad_norm": 0.6809712648391724,
|
| 69210 |
+
"learning_rate": 0.00015538443039518735,
|
| 69211 |
+
"loss": 4.6651,
|
| 69212 |
+
"step": 9867
|
| 69213 |
+
},
|
| 69214 |
+
{
|
| 69215 |
+
"epoch": 1.090391734349964,
|
| 69216 |
+
"grad_norm": 0.7201408743858337,
|
| 69217 |
+
"learning_rate": 0.000155355179659591,
|
| 69218 |
+
"loss": 4.5408,
|
| 69219 |
+
"step": 9868
|
| 69220 |
+
},
|
| 69221 |
+
{
|
| 69222 |
+
"epoch": 1.0905022376926903,
|
| 69223 |
+
"grad_norm": 0.6882060766220093,
|
| 69224 |
+
"learning_rate": 0.00015532592872009244,
|
| 69225 |
+
"loss": 4.6527,
|
| 69226 |
+
"step": 9869
|
| 69227 |
+
},
|
| 69228 |
+
{
|
| 69229 |
+
"epoch": 1.0906127410354163,
|
| 69230 |
+
"grad_norm": 0.674277663230896,
|
| 69231 |
+
"learning_rate": 0.00015529667757780533,
|
| 69232 |
+
"loss": 4.7161,
|
| 69233 |
+
"step": 9870
|
| 69234 |
+
},
|
| 69235 |
+
{
|
| 69236 |
+
"epoch": 1.0907232443781425,
|
| 69237 |
+
"grad_norm": 0.7527250647544861,
|
| 69238 |
+
"learning_rate": 0.00015526742623384355,
|
| 69239 |
+
"loss": 4.5973,
|
| 69240 |
+
"step": 9871
|
| 69241 |
+
},
|
| 69242 |
+
{
|
| 69243 |
+
"epoch": 1.0908337477208685,
|
| 69244 |
+
"grad_norm": 0.754692792892456,
|
| 69245 |
+
"learning_rate": 0.00015523817468932071,
|
| 69246 |
+
"loss": 4.4867,
|
| 69247 |
+
"step": 9872
|
| 69248 |
+
},
|
| 69249 |
+
{
|
| 69250 |
+
"epoch": 1.0909442510635947,
|
| 69251 |
+
"grad_norm": 0.7041689157485962,
|
| 69252 |
+
"learning_rate": 0.00015520892294535067,
|
| 69253 |
+
"loss": 4.6376,
|
| 69254 |
+
"step": 9873
|
| 69255 |
+
},
|
| 69256 |
+
{
|
| 69257 |
+
"epoch": 1.0910547544063207,
|
| 69258 |
+
"grad_norm": 0.7096388339996338,
|
| 69259 |
+
"learning_rate": 0.00015517967100304723,
|
| 69260 |
+
"loss": 4.593,
|
| 69261 |
+
"step": 9874
|
| 69262 |
+
},
|
| 69263 |
+
{
|
| 69264 |
+
"epoch": 1.091165257749047,
|
| 69265 |
+
"grad_norm": 0.7178173065185547,
|
| 69266 |
+
"learning_rate": 0.00015515041886352416,
|
| 69267 |
+
"loss": 4.5674,
|
| 69268 |
+
"step": 9875
|
| 69269 |
+
},
|
| 69270 |
+
{
|
| 69271 |
+
"epoch": 1.091275761091773,
|
| 69272 |
+
"grad_norm": 0.7299194931983948,
|
| 69273 |
+
"learning_rate": 0.00015512116652789516,
|
| 69274 |
+
"loss": 4.5364,
|
| 69275 |
+
"step": 9876
|
| 69276 |
+
},
|
| 69277 |
+
{
|
| 69278 |
+
"epoch": 1.0913862644344992,
|
| 69279 |
+
"grad_norm": 0.7113240957260132,
|
| 69280 |
+
"learning_rate": 0.0001550919139972742,
|
| 69281 |
+
"loss": 4.7044,
|
| 69282 |
+
"step": 9877
|
| 69283 |
+
},
|
| 69284 |
+
{
|
| 69285 |
+
"epoch": 1.0914967677772252,
|
| 69286 |
+
"grad_norm": 0.6919460296630859,
|
| 69287 |
+
"learning_rate": 0.00015506266127277496,
|
| 69288 |
+
"loss": 4.4845,
|
| 69289 |
+
"step": 9878
|
| 69290 |
+
},
|
| 69291 |
+
{
|
| 69292 |
+
"epoch": 1.0916072711199514,
|
| 69293 |
+
"grad_norm": 0.7029079794883728,
|
| 69294 |
+
"learning_rate": 0.00015503340835551133,
|
| 69295 |
+
"loss": 4.5329,
|
| 69296 |
+
"step": 9879
|
| 69297 |
+
},
|
| 69298 |
+
{
|
| 69299 |
+
"epoch": 1.0917177744626776,
|
| 69300 |
+
"grad_norm": 0.7379578948020935,
|
| 69301 |
+
"learning_rate": 0.0001550041552465971,
|
| 69302 |
+
"loss": 4.622,
|
| 69303 |
+
"step": 9880
|
| 69304 |
+
},
|
| 69305 |
+
{
|
| 69306 |
+
"epoch": 1.0918282778054036,
|
| 69307 |
+
"grad_norm": 0.7239668369293213,
|
| 69308 |
+
"learning_rate": 0.0001549749019471461,
|
| 69309 |
+
"loss": 4.6099,
|
| 69310 |
+
"step": 9881
|
| 69311 |
+
},
|
| 69312 |
+
{
|
| 69313 |
+
"epoch": 1.0919387811481298,
|
| 69314 |
+
"grad_norm": 0.6826170086860657,
|
| 69315 |
+
"learning_rate": 0.00015494564845827221,
|
| 69316 |
+
"loss": 4.6137,
|
| 69317 |
+
"step": 9882
|
| 69318 |
+
},
|
| 69319 |
+
{
|
| 69320 |
+
"epoch": 1.0920492844908558,
|
| 69321 |
+
"grad_norm": 0.7013570666313171,
|
| 69322 |
+
"learning_rate": 0.00015491639478108923,
|
| 69323 |
+
"loss": 4.5683,
|
| 69324 |
+
"step": 9883
|
| 69325 |
+
},
|
| 69326 |
+
{
|
| 69327 |
+
"epoch": 1.092159787833582,
|
| 69328 |
+
"grad_norm": 0.6897295713424683,
|
| 69329 |
+
"learning_rate": 0.00015488714091671106,
|
| 69330 |
+
"loss": 4.4616,
|
| 69331 |
+
"step": 9884
|
| 69332 |
+
},
|
| 69333 |
+
{
|
| 69334 |
+
"epoch": 1.092270291176308,
|
| 69335 |
+
"grad_norm": 0.6924406886100769,
|
| 69336 |
+
"learning_rate": 0.00015485788686625152,
|
| 69337 |
+
"loss": 4.6082,
|
| 69338 |
+
"step": 9885
|
| 69339 |
+
},
|
| 69340 |
+
{
|
| 69341 |
+
"epoch": 1.0923807945190342,
|
| 69342 |
+
"grad_norm": 0.7211266160011292,
|
| 69343 |
+
"learning_rate": 0.0001548286326308245,
|
| 69344 |
+
"loss": 4.5481,
|
| 69345 |
+
"step": 9886
|
| 69346 |
+
},
|
| 69347 |
+
{
|
| 69348 |
+
"epoch": 1.0924912978617602,
|
| 69349 |
+
"grad_norm": 0.6781579256057739,
|
| 69350 |
+
"learning_rate": 0.0001547993782115439,
|
| 69351 |
+
"loss": 4.5249,
|
| 69352 |
+
"step": 9887
|
| 69353 |
+
},
|
| 69354 |
+
{
|
| 69355 |
+
"epoch": 1.0926018012044865,
|
| 69356 |
+
"grad_norm": 0.7123322486877441,
|
| 69357 |
+
"learning_rate": 0.00015477012360952353,
|
| 69358 |
+
"loss": 4.5521,
|
| 69359 |
+
"step": 9888
|
| 69360 |
+
},
|
| 69361 |
+
{
|
| 69362 |
+
"epoch": 1.0927123045472125,
|
| 69363 |
+
"grad_norm": 0.6711974740028381,
|
| 69364 |
+
"learning_rate": 0.00015474086882587734,
|
| 69365 |
+
"loss": 4.5919,
|
| 69366 |
+
"step": 9889
|
| 69367 |
+
},
|
| 69368 |
+
{
|
| 69369 |
+
"epoch": 1.0928228078899387,
|
| 69370 |
+
"grad_norm": 0.6801334023475647,
|
| 69371 |
+
"learning_rate": 0.00015471161386171922,
|
| 69372 |
+
"loss": 4.598,
|
| 69373 |
+
"step": 9890
|
| 69374 |
+
},
|
| 69375 |
+
{
|
| 69376 |
+
"epoch": 1.092933311232665,
|
| 69377 |
+
"grad_norm": 0.6684258580207825,
|
| 69378 |
+
"learning_rate": 0.00015468235871816306,
|
| 69379 |
+
"loss": 4.6446,
|
| 69380 |
+
"step": 9891
|
| 69381 |
+
},
|
| 69382 |
+
{
|
| 69383 |
+
"epoch": 1.093043814575391,
|
| 69384 |
+
"grad_norm": 0.6796461343765259,
|
| 69385 |
+
"learning_rate": 0.00015465310339632277,
|
| 69386 |
+
"loss": 4.6317,
|
| 69387 |
+
"step": 9892
|
| 69388 |
+
},
|
| 69389 |
+
{
|
| 69390 |
+
"epoch": 1.0931543179181171,
|
| 69391 |
+
"grad_norm": 0.652413010597229,
|
| 69392 |
+
"learning_rate": 0.00015462384789731228,
|
| 69393 |
+
"loss": 4.6434,
|
| 69394 |
+
"step": 9893
|
| 69395 |
+
},
|
| 69396 |
+
{
|
| 69397 |
+
"epoch": 1.0932648212608431,
|
| 69398 |
+
"grad_norm": 0.7923116087913513,
|
| 69399 |
+
"learning_rate": 0.0001545945922222455,
|
| 69400 |
+
"loss": 4.6658,
|
| 69401 |
+
"step": 9894
|
| 69402 |
+
},
|
| 69403 |
+
{
|
| 69404 |
+
"epoch": 1.0933753246035693,
|
| 69405 |
+
"grad_norm": 0.7030662298202515,
|
| 69406 |
+
"learning_rate": 0.00015456533637223642,
|
| 69407 |
+
"loss": 4.6086,
|
| 69408 |
+
"step": 9895
|
| 69409 |
+
},
|
| 69410 |
+
{
|
| 69411 |
+
"epoch": 1.0934858279462953,
|
| 69412 |
+
"grad_norm": 0.6449236273765564,
|
| 69413 |
+
"learning_rate": 0.00015453608034839887,
|
| 69414 |
+
"loss": 4.6225,
|
| 69415 |
+
"step": 9896
|
| 69416 |
+
},
|
| 69417 |
+
{
|
| 69418 |
+
"epoch": 1.0935963312890216,
|
| 69419 |
+
"grad_norm": 0.6862993836402893,
|
| 69420 |
+
"learning_rate": 0.00015450682415184684,
|
| 69421 |
+
"loss": 4.6358,
|
| 69422 |
+
"step": 9897
|
| 69423 |
+
},
|
| 69424 |
+
{
|
| 69425 |
+
"epoch": 1.0937068346317476,
|
| 69426 |
+
"grad_norm": 0.692793071269989,
|
| 69427 |
+
"learning_rate": 0.0001544775677836943,
|
| 69428 |
+
"loss": 4.667,
|
| 69429 |
+
"step": 9898
|
| 69430 |
+
},
|
| 69431 |
+
{
|
| 69432 |
+
"epoch": 1.0938173379744738,
|
| 69433 |
+
"grad_norm": 0.6536951661109924,
|
| 69434 |
+
"learning_rate": 0.00015444831124505518,
|
| 69435 |
+
"loss": 4.5252,
|
| 69436 |
+
"step": 9899
|
| 69437 |
+
},
|
| 69438 |
+
{
|
| 69439 |
+
"epoch": 1.0939278413171998,
|
| 69440 |
+
"grad_norm": 0.6607351303100586,
|
| 69441 |
+
"learning_rate": 0.00015441905453704343,
|
| 69442 |
+
"loss": 4.5487,
|
| 69443 |
+
"step": 9900
|
| 69444 |
+
},
|
| 69445 |
+
{
|
| 69446 |
+
"epoch": 1.094038344659926,
|
| 69447 |
+
"grad_norm": 0.6684635877609253,
|
| 69448 |
+
"learning_rate": 0.00015438979766077313,
|
| 69449 |
+
"loss": 4.5786,
|
| 69450 |
+
"step": 9901
|
| 69451 |
+
},
|
| 69452 |
+
{
|
| 69453 |
+
"epoch": 1.094148848002652,
|
| 69454 |
+
"grad_norm": 0.699338972568512,
|
| 69455 |
+
"learning_rate": 0.00015436054061735812,
|
| 69456 |
+
"loss": 4.6025,
|
| 69457 |
+
"step": 9902
|
| 69458 |
+
},
|
| 69459 |
+
{
|
| 69460 |
+
"epoch": 1.0942593513453782,
|
| 69461 |
+
"grad_norm": 0.6794523000717163,
|
| 69462 |
+
"learning_rate": 0.0001543312834079125,
|
| 69463 |
+
"loss": 4.4975,
|
| 69464 |
+
"step": 9903
|
| 69465 |
+
},
|
| 69466 |
+
{
|
| 69467 |
+
"epoch": 1.0943698546881042,
|
| 69468 |
+
"grad_norm": 0.6836043000221252,
|
| 69469 |
+
"learning_rate": 0.0001543020260335501,
|
| 69470 |
+
"loss": 4.5936,
|
| 69471 |
+
"step": 9904
|
| 69472 |
+
},
|
| 69473 |
+
{
|
| 69474 |
+
"epoch": 1.0944803580308304,
|
| 69475 |
+
"grad_norm": 0.7179417014122009,
|
| 69476 |
+
"learning_rate": 0.00015427276849538506,
|
| 69477 |
+
"loss": 4.6433,
|
| 69478 |
+
"step": 9905
|
| 69479 |
+
},
|
| 69480 |
+
{
|
| 69481 |
+
"epoch": 1.0945908613735567,
|
| 69482 |
+
"grad_norm": 0.7273696064949036,
|
| 69483 |
+
"learning_rate": 0.00015424351079453135,
|
| 69484 |
+
"loss": 4.6503,
|
| 69485 |
+
"step": 9906
|
| 69486 |
+
},
|
| 69487 |
+
{
|
| 69488 |
+
"epoch": 1.0947013647162827,
|
| 69489 |
+
"grad_norm": 0.6767399311065674,
|
| 69490 |
+
"learning_rate": 0.00015421425293210295,
|
| 69491 |
+
"loss": 4.733,
|
| 69492 |
+
"step": 9907
|
| 69493 |
+
},
|
| 69494 |
+
{
|
| 69495 |
+
"epoch": 1.0948118680590089,
|
| 69496 |
+
"grad_norm": 0.700602114200592,
|
| 69497 |
+
"learning_rate": 0.00015418499490921388,
|
| 69498 |
+
"loss": 4.5917,
|
| 69499 |
+
"step": 9908
|
| 69500 |
+
},
|
| 69501 |
+
{
|
| 69502 |
+
"epoch": 1.0949223714017349,
|
| 69503 |
+
"grad_norm": 0.702167809009552,
|
| 69504 |
+
"learning_rate": 0.00015415573672697815,
|
| 69505 |
+
"loss": 4.7011,
|
| 69506 |
+
"step": 9909
|
| 69507 |
+
},
|
| 69508 |
+
{
|
| 69509 |
+
"epoch": 1.095032874744461,
|
| 69510 |
+
"grad_norm": 0.6808905005455017,
|
| 69511 |
+
"learning_rate": 0.0001541264783865098,
|
| 69512 |
+
"loss": 4.5879,
|
| 69513 |
+
"step": 9910
|
| 69514 |
+
},
|
| 69515 |
+
{
|
| 69516 |
+
"epoch": 1.095143378087187,
|
| 69517 |
+
"grad_norm": 0.7756784558296204,
|
| 69518 |
+
"learning_rate": 0.0001540972198889229,
|
| 69519 |
+
"loss": 4.6489,
|
| 69520 |
+
"step": 9911
|
| 69521 |
+
},
|
| 69522 |
+
{
|
| 69523 |
+
"epoch": 1.0952538814299133,
|
| 69524 |
+
"grad_norm": 0.7004582285881042,
|
| 69525 |
+
"learning_rate": 0.00015406796123533138,
|
| 69526 |
+
"loss": 4.5709,
|
| 69527 |
+
"step": 9912
|
| 69528 |
+
},
|
| 69529 |
+
{
|
| 69530 |
+
"epoch": 1.0953643847726393,
|
| 69531 |
+
"grad_norm": 0.6905217170715332,
|
| 69532 |
+
"learning_rate": 0.0001540387024268494,
|
| 69533 |
+
"loss": 4.5544,
|
| 69534 |
+
"step": 9913
|
| 69535 |
+
},
|
| 69536 |
+
{
|
| 69537 |
+
"epoch": 1.0954748881153655,
|
| 69538 |
+
"grad_norm": 0.7326928377151489,
|
| 69539 |
+
"learning_rate": 0.00015400944346459099,
|
| 69540 |
+
"loss": 4.6289,
|
| 69541 |
+
"step": 9914
|
| 69542 |
+
},
|
| 69543 |
+
{
|
| 69544 |
+
"epoch": 1.0955853914580915,
|
| 69545 |
+
"grad_norm": 0.7049468159675598,
|
| 69546 |
+
"learning_rate": 0.00015398018434967015,
|
| 69547 |
+
"loss": 4.5591,
|
| 69548 |
+
"step": 9915
|
| 69549 |
+
},
|
| 69550 |
+
{
|
| 69551 |
+
"epoch": 1.0956958948008177,
|
| 69552 |
+
"grad_norm": 0.6778213381767273,
|
| 69553 |
+
"learning_rate": 0.000153950925083201,
|
| 69554 |
+
"loss": 4.5134,
|
| 69555 |
+
"step": 9916
|
| 69556 |
+
},
|
| 69557 |
+
{
|
| 69558 |
+
"epoch": 1.0958063981435437,
|
| 69559 |
+
"grad_norm": 0.6689280867576599,
|
| 69560 |
+
"learning_rate": 0.0001539216656662975,
|
| 69561 |
+
"loss": 4.5456,
|
| 69562 |
+
"step": 9917
|
| 69563 |
+
},
|
| 69564 |
+
{
|
| 69565 |
+
"epoch": 1.09591690148627,
|
| 69566 |
+
"grad_norm": 0.6738157272338867,
|
| 69567 |
+
"learning_rate": 0.00015389240610007388,
|
| 69568 |
+
"loss": 4.4847,
|
| 69569 |
+
"step": 9918
|
| 69570 |
+
},
|
| 69571 |
+
{
|
| 69572 |
+
"epoch": 1.096027404828996,
|
| 69573 |
+
"grad_norm": 0.7253062725067139,
|
| 69574 |
+
"learning_rate": 0.00015386314638564413,
|
| 69575 |
+
"loss": 4.696,
|
| 69576 |
+
"step": 9919
|
| 69577 |
+
},
|
| 69578 |
+
{
|
| 69579 |
+
"epoch": 1.0961379081717222,
|
| 69580 |
+
"grad_norm": 0.7143805623054504,
|
| 69581 |
+
"learning_rate": 0.0001538338865241223,
|
| 69582 |
+
"loss": 4.7015,
|
| 69583 |
+
"step": 9920
|
| 69584 |
+
},
|
| 69585 |
+
{
|
| 69586 |
+
"epoch": 1.0962484115144484,
|
| 69587 |
+
"grad_norm": 0.701005756855011,
|
| 69588 |
+
"learning_rate": 0.00015380462651662255,
|
| 69589 |
+
"loss": 4.497,
|
| 69590 |
+
"step": 9921
|
| 69591 |
+
},
|
| 69592 |
+
{
|
| 69593 |
+
"epoch": 1.0963589148571744,
|
| 69594 |
+
"grad_norm": 0.6859719157218933,
|
| 69595 |
+
"learning_rate": 0.00015377536636425896,
|
| 69596 |
+
"loss": 4.5687,
|
| 69597 |
+
"step": 9922
|
| 69598 |
+
},
|
| 69599 |
+
{
|
| 69600 |
+
"epoch": 1.0964694181999006,
|
| 69601 |
+
"grad_norm": 0.7061294317245483,
|
| 69602 |
+
"learning_rate": 0.0001537461060681456,
|
| 69603 |
+
"loss": 4.5763,
|
| 69604 |
+
"step": 9923
|
| 69605 |
+
},
|
| 69606 |
+
{
|
| 69607 |
+
"epoch": 1.0965799215426266,
|
| 69608 |
+
"grad_norm": 0.6945258378982544,
|
| 69609 |
+
"learning_rate": 0.00015371684562939667,
|
| 69610 |
+
"loss": 4.5677,
|
| 69611 |
+
"step": 9924
|
| 69612 |
+
},
|
| 69613 |
+
{
|
| 69614 |
+
"epoch": 1.0966904248853528,
|
| 69615 |
+
"grad_norm": 0.7291182279586792,
|
| 69616 |
+
"learning_rate": 0.0001536875850491261,
|
| 69617 |
+
"loss": 4.6087,
|
| 69618 |
+
"step": 9925
|
| 69619 |
+
},
|
| 69620 |
+
{
|
| 69621 |
+
"epoch": 1.0968009282280788,
|
| 69622 |
+
"grad_norm": 0.7679137587547302,
|
| 69623 |
+
"learning_rate": 0.00015365832432844817,
|
| 69624 |
+
"loss": 4.5918,
|
| 69625 |
+
"step": 9926
|
| 69626 |
+
},
|
| 69627 |
+
{
|
| 69628 |
+
"epoch": 1.096911431570805,
|
| 69629 |
+
"grad_norm": 0.7089045643806458,
|
| 69630 |
+
"learning_rate": 0.00015362906346847696,
|
| 69631 |
+
"loss": 4.5224,
|
| 69632 |
+
"step": 9927
|
| 69633 |
+
},
|
| 69634 |
+
{
|
| 69635 |
+
"epoch": 1.097021934913531,
|
| 69636 |
+
"grad_norm": 0.7135592103004456,
|
| 69637 |
+
"learning_rate": 0.00015359980247032658,
|
| 69638 |
+
"loss": 4.6086,
|
| 69639 |
+
"step": 9928
|
| 69640 |
+
},
|
| 69641 |
+
{
|
| 69642 |
+
"epoch": 1.0971324382562573,
|
| 69643 |
+
"grad_norm": 0.6806690692901611,
|
| 69644 |
+
"learning_rate": 0.00015357054133511116,
|
| 69645 |
+
"loss": 4.5703,
|
| 69646 |
+
"step": 9929
|
| 69647 |
+
},
|
| 69648 |
+
{
|
| 69649 |
+
"epoch": 1.0972429415989833,
|
| 69650 |
+
"grad_norm": 0.7749380469322205,
|
| 69651 |
+
"learning_rate": 0.00015354128006394487,
|
| 69652 |
+
"loss": 4.5827,
|
| 69653 |
+
"step": 9930
|
| 69654 |
+
},
|
| 69655 |
+
{
|
| 69656 |
+
"epoch": 1.0973534449417095,
|
| 69657 |
+
"grad_norm": 0.6691054105758667,
|
| 69658 |
+
"learning_rate": 0.0001535120186579418,
|
| 69659 |
+
"loss": 4.6324,
|
| 69660 |
+
"step": 9931
|
| 69661 |
+
},
|
| 69662 |
+
{
|
| 69663 |
+
"epoch": 1.0974639482844357,
|
| 69664 |
+
"grad_norm": 0.7581778168678284,
|
| 69665 |
+
"learning_rate": 0.00015348275711821616,
|
| 69666 |
+
"loss": 4.5934,
|
| 69667 |
+
"step": 9932
|
| 69668 |
+
},
|
| 69669 |
+
{
|
| 69670 |
+
"epoch": 1.0975744516271617,
|
| 69671 |
+
"grad_norm": 0.7021278142929077,
|
| 69672 |
+
"learning_rate": 0.00015345349544588208,
|
| 69673 |
+
"loss": 4.5908,
|
| 69674 |
+
"step": 9933
|
| 69675 |
+
},
|
| 69676 |
+
{
|
| 69677 |
+
"epoch": 1.097684954969888,
|
| 69678 |
+
"grad_norm": 0.7055724263191223,
|
| 69679 |
+
"learning_rate": 0.00015342423364205366,
|
| 69680 |
+
"loss": 4.6781,
|
| 69681 |
+
"step": 9934
|
| 69682 |
+
},
|
| 69683 |
+
{
|
| 69684 |
+
"epoch": 1.097795458312614,
|
| 69685 |
+
"grad_norm": 0.7632391452789307,
|
| 69686 |
+
"learning_rate": 0.00015339497170784518,
|
| 69687 |
+
"loss": 4.6253,
|
| 69688 |
+
"step": 9935
|
| 69689 |
+
},
|
| 69690 |
+
{
|
| 69691 |
+
"epoch": 1.0979059616553402,
|
| 69692 |
+
"grad_norm": 0.6543636918067932,
|
| 69693 |
+
"learning_rate": 0.00015336570964437075,
|
| 69694 |
+
"loss": 4.5284,
|
| 69695 |
+
"step": 9936
|
| 69696 |
+
},
|
| 69697 |
+
{
|
| 69698 |
+
"epoch": 1.0980164649980662,
|
| 69699 |
+
"grad_norm": 0.7071706056594849,
|
| 69700 |
+
"learning_rate": 0.0001533364474527445,
|
| 69701 |
+
"loss": 4.5934,
|
| 69702 |
+
"step": 9937
|
| 69703 |
+
},
|
| 69704 |
+
{
|
| 69705 |
+
"epoch": 1.0981269683407924,
|
| 69706 |
+
"grad_norm": 0.7274437546730042,
|
| 69707 |
+
"learning_rate": 0.00015330718513408066,
|
| 69708 |
+
"loss": 4.6129,
|
| 69709 |
+
"step": 9938
|
| 69710 |
+
},
|
| 69711 |
+
{
|
| 69712 |
+
"epoch": 1.0982374716835184,
|
| 69713 |
+
"grad_norm": 0.6727540493011475,
|
| 69714 |
+
"learning_rate": 0.00015327792268949342,
|
| 69715 |
+
"loss": 4.4452,
|
| 69716 |
+
"step": 9939
|
| 69717 |
+
},
|
| 69718 |
+
{
|
| 69719 |
+
"epoch": 1.0983479750262446,
|
| 69720 |
+
"grad_norm": 0.6710218787193298,
|
| 69721 |
+
"learning_rate": 0.00015324866012009697,
|
| 69722 |
+
"loss": 4.6339,
|
| 69723 |
+
"step": 9940
|
| 69724 |
+
},
|
| 69725 |
+
{
|
| 69726 |
+
"epoch": 1.0984584783689706,
|
| 69727 |
+
"grad_norm": 0.6941389441490173,
|
| 69728 |
+
"learning_rate": 0.00015321939742700543,
|
| 69729 |
+
"loss": 4.6719,
|
| 69730 |
+
"step": 9941
|
| 69731 |
+
},
|
| 69732 |
+
{
|
| 69733 |
+
"epoch": 1.0985689817116968,
|
| 69734 |
+
"grad_norm": 0.6903566718101501,
|
| 69735 |
+
"learning_rate": 0.00015319013461133308,
|
| 69736 |
+
"loss": 4.5223,
|
| 69737 |
+
"step": 9942
|
| 69738 |
+
},
|
| 69739 |
+
{
|
| 69740 |
+
"epoch": 1.0986794850544228,
|
| 69741 |
+
"grad_norm": 0.7329546809196472,
|
| 69742 |
+
"learning_rate": 0.00015316087167419412,
|
| 69743 |
+
"loss": 4.5693,
|
| 69744 |
+
"step": 9943
|
| 69745 |
+
},
|
| 69746 |
+
{
|
| 69747 |
+
"epoch": 1.098789988397149,
|
| 69748 |
+
"grad_norm": 0.7167581915855408,
|
| 69749 |
+
"learning_rate": 0.00015313160861670267,
|
| 69750 |
+
"loss": 4.6239,
|
| 69751 |
+
"step": 9944
|
| 69752 |
+
},
|
| 69753 |
+
{
|
| 69754 |
+
"epoch": 1.098900491739875,
|
| 69755 |
+
"grad_norm": 0.6868823170661926,
|
| 69756 |
+
"learning_rate": 0.00015310234543997305,
|
| 69757 |
+
"loss": 4.6208,
|
| 69758 |
+
"step": 9945
|
| 69759 |
+
},
|
| 69760 |
+
{
|
| 69761 |
+
"epoch": 1.0990109950826012,
|
| 69762 |
+
"grad_norm": 0.7103366851806641,
|
| 69763 |
+
"learning_rate": 0.00015307308214511944,
|
| 69764 |
+
"loss": 4.6528,
|
| 69765 |
+
"step": 9946
|
| 69766 |
+
},
|
| 69767 |
+
{
|
| 69768 |
+
"epoch": 1.0991214984253275,
|
| 69769 |
+
"grad_norm": 0.6980751156806946,
|
| 69770 |
+
"learning_rate": 0.00015304381873325603,
|
| 69771 |
+
"loss": 4.5651,
|
| 69772 |
+
"step": 9947
|
| 69773 |
+
},
|
| 69774 |
+
{
|
| 69775 |
+
"epoch": 1.0992320017680535,
|
| 69776 |
+
"grad_norm": 0.6731492280960083,
|
| 69777 |
+
"learning_rate": 0.00015301455520549706,
|
| 69778 |
+
"loss": 4.5193,
|
| 69779 |
+
"step": 9948
|
| 69780 |
+
},
|
| 69781 |
+
{
|
| 69782 |
+
"epoch": 1.0993425051107797,
|
| 69783 |
+
"grad_norm": 0.7219794988632202,
|
| 69784 |
+
"learning_rate": 0.00015298529156295676,
|
| 69785 |
+
"loss": 4.6324,
|
| 69786 |
+
"step": 9949
|
| 69787 |
+
},
|
| 69788 |
+
{
|
| 69789 |
+
"epoch": 1.0994530084535057,
|
| 69790 |
+
"grad_norm": 0.6912474632263184,
|
| 69791 |
+
"learning_rate": 0.00015295602780674938,
|
| 69792 |
+
"loss": 4.669,
|
| 69793 |
+
"step": 9950
|
| 69794 |
+
},
|
| 69795 |
+
{
|
| 69796 |
+
"epoch": 1.099563511796232,
|
| 69797 |
+
"grad_norm": 0.7293168902397156,
|
| 69798 |
+
"learning_rate": 0.00015292676393798918,
|
| 69799 |
+
"loss": 4.4908,
|
| 69800 |
+
"step": 9951
|
| 69801 |
+
},
|
| 69802 |
+
{
|
| 69803 |
+
"epoch": 1.099674015138958,
|
| 69804 |
+
"grad_norm": 0.6807422637939453,
|
| 69805 |
+
"learning_rate": 0.00015289749995779032,
|
| 69806 |
+
"loss": 4.5896,
|
| 69807 |
+
"step": 9952
|
| 69808 |
+
},
|
| 69809 |
+
{
|
| 69810 |
+
"epoch": 1.0997845184816841,
|
| 69811 |
+
"grad_norm": 0.6798710227012634,
|
| 69812 |
+
"learning_rate": 0.0001528682358672671,
|
| 69813 |
+
"loss": 4.6112,
|
| 69814 |
+
"step": 9953
|
| 69815 |
+
},
|
| 69816 |
+
{
|
| 69817 |
+
"epoch": 1.0998950218244101,
|
| 69818 |
+
"grad_norm": 0.7472180724143982,
|
| 69819 |
+
"learning_rate": 0.0001528389716675338,
|
| 69820 |
+
"loss": 4.6219,
|
| 69821 |
+
"step": 9954
|
| 69822 |
+
},
|
| 69823 |
+
{
|
| 69824 |
+
"epoch": 1.1000055251671363,
|
| 69825 |
+
"grad_norm": 0.6893553137779236,
|
| 69826 |
+
"learning_rate": 0.00015280970735970462,
|
| 69827 |
+
"loss": 4.6816,
|
| 69828 |
+
"step": 9955
|
| 69829 |
+
},
|
| 69830 |
+
{
|
| 69831 |
+
"epoch": 1.1001160285098623,
|
| 69832 |
+
"grad_norm": 0.7077176570892334,
|
| 69833 |
+
"learning_rate": 0.00015278044294489386,
|
| 69834 |
+
"loss": 4.6368,
|
| 69835 |
+
"step": 9956
|
| 69836 |
+
},
|
| 69837 |
+
{
|
| 69838 |
+
"epoch": 1.1002265318525886,
|
| 69839 |
+
"grad_norm": 0.7190374135971069,
|
| 69840 |
+
"learning_rate": 0.00015275117842421574,
|
| 69841 |
+
"loss": 4.5265,
|
| 69842 |
+
"step": 9957
|
| 69843 |
+
},
|
| 69844 |
+
{
|
| 69845 |
+
"epoch": 1.1003370351953146,
|
| 69846 |
+
"grad_norm": 0.7219252586364746,
|
| 69847 |
+
"learning_rate": 0.00015272191379878453,
|
| 69848 |
+
"loss": 4.6149,
|
| 69849 |
+
"step": 9958
|
| 69850 |
+
},
|
| 69851 |
+
{
|
| 69852 |
+
"epoch": 1.1004475385380408,
|
| 69853 |
+
"grad_norm": 0.7268725037574768,
|
| 69854 |
+
"learning_rate": 0.00015269264906971457,
|
| 69855 |
+
"loss": 4.6325,
|
| 69856 |
+
"step": 9959
|
| 69857 |
+
},
|
| 69858 |
+
{
|
| 69859 |
+
"epoch": 1.1005580418807668,
|
| 69860 |
+
"grad_norm": 0.7268263101577759,
|
| 69861 |
+
"learning_rate": 0.0001526633842381201,
|
| 69862 |
+
"loss": 4.5785,
|
| 69863 |
+
"step": 9960
|
| 69864 |
+
},
|
| 69865 |
+
{
|
| 69866 |
+
"epoch": 1.100668545223493,
|
| 69867 |
+
"grad_norm": 0.7076090574264526,
|
| 69868 |
+
"learning_rate": 0.00015263411930511534,
|
| 69869 |
+
"loss": 4.6321,
|
| 69870 |
+
"step": 9961
|
| 69871 |
+
},
|
| 69872 |
+
{
|
| 69873 |
+
"epoch": 1.1007790485662192,
|
| 69874 |
+
"grad_norm": 0.6982094645500183,
|
| 69875 |
+
"learning_rate": 0.00015260485427181467,
|
| 69876 |
+
"loss": 4.5507,
|
| 69877 |
+
"step": 9962
|
| 69878 |
+
},
|
| 69879 |
+
{
|
| 69880 |
+
"epoch": 1.1008895519089452,
|
| 69881 |
+
"grad_norm": 0.7131571173667908,
|
| 69882 |
+
"learning_rate": 0.0001525755891393323,
|
| 69883 |
+
"loss": 4.5715,
|
| 69884 |
+
"step": 9963
|
| 69885 |
+
},
|
| 69886 |
+
{
|
| 69887 |
+
"epoch": 1.1010000552516714,
|
| 69888 |
+
"grad_norm": 0.7273651361465454,
|
| 69889 |
+
"learning_rate": 0.00015254632390878257,
|
| 69890 |
+
"loss": 4.7152,
|
| 69891 |
+
"step": 9964
|
| 69892 |
+
},
|
| 69893 |
+
{
|
| 69894 |
+
"epoch": 1.1011105585943974,
|
| 69895 |
+
"grad_norm": 0.7350730895996094,
|
| 69896 |
+
"learning_rate": 0.00015251705858127968,
|
| 69897 |
+
"loss": 4.6953,
|
| 69898 |
+
"step": 9965
|
| 69899 |
+
},
|
| 69900 |
+
{
|
| 69901 |
+
"epoch": 1.1012210619371237,
|
| 69902 |
+
"grad_norm": 0.7077280282974243,
|
| 69903 |
+
"learning_rate": 0.0001524877931579381,
|
| 69904 |
+
"loss": 4.5855,
|
| 69905 |
+
"step": 9966
|
| 69906 |
+
},
|
| 69907 |
+
{
|
| 69908 |
+
"epoch": 1.1013315652798497,
|
| 69909 |
+
"grad_norm": 0.7296830415725708,
|
| 69910 |
+
"learning_rate": 0.000152458527639872,
|
| 69911 |
+
"loss": 4.541,
|
| 69912 |
+
"step": 9967
|
| 69913 |
+
},
|
| 69914 |
+
{
|
| 69915 |
+
"epoch": 1.1014420686225759,
|
| 69916 |
+
"grad_norm": 0.7291825413703918,
|
| 69917 |
+
"learning_rate": 0.00015242926202819573,
|
| 69918 |
+
"loss": 4.5913,
|
| 69919 |
+
"step": 9968
|
| 69920 |
+
},
|
| 69921 |
+
{
|
| 69922 |
+
"epoch": 1.1015525719653019,
|
| 69923 |
+
"grad_norm": 0.7275636792182922,
|
| 69924 |
+
"learning_rate": 0.00015239999632402353,
|
| 69925 |
+
"loss": 4.5801,
|
| 69926 |
+
"step": 9969
|
| 69927 |
+
},
|
| 69928 |
+
{
|
| 69929 |
+
"epoch": 1.101663075308028,
|
| 69930 |
+
"grad_norm": 0.7299180626869202,
|
| 69931 |
+
"learning_rate": 0.00015237073052846984,
|
| 69932 |
+
"loss": 4.4957,
|
| 69933 |
+
"step": 9970
|
| 69934 |
+
},
|
| 69935 |
+
{
|
| 69936 |
+
"epoch": 1.101773578650754,
|
| 69937 |
+
"grad_norm": 0.721309244632721,
|
| 69938 |
+
"learning_rate": 0.00015234146464264886,
|
| 69939 |
+
"loss": 4.6692,
|
| 69940 |
+
"step": 9971
|
| 69941 |
+
},
|
| 69942 |
+
{
|
| 69943 |
+
"epoch": 1.1018840819934803,
|
| 69944 |
+
"grad_norm": 0.7274154424667358,
|
| 69945 |
+
"learning_rate": 0.000152312198667675,
|
| 69946 |
+
"loss": 4.628,
|
| 69947 |
+
"step": 9972
|
| 69948 |
+
},
|
| 69949 |
+
{
|
| 69950 |
+
"epoch": 1.1019945853362065,
|
| 69951 |
+
"grad_norm": 0.6819590330123901,
|
| 69952 |
+
"learning_rate": 0.00015228293260466244,
|
| 69953 |
+
"loss": 4.5851,
|
| 69954 |
+
"step": 9973
|
| 69955 |
+
},
|
| 69956 |
+
{
|
| 69957 |
+
"epoch": 1.1021050886789325,
|
| 69958 |
+
"grad_norm": 0.7496370077133179,
|
| 69959 |
+
"learning_rate": 0.0001522536664547257,
|
| 69960 |
+
"loss": 4.6682,
|
| 69961 |
+
"step": 9974
|
| 69962 |
+
},
|
| 69963 |
+
{
|
| 69964 |
+
"epoch": 1.1022155920216588,
|
| 69965 |
+
"grad_norm": 0.7017356753349304,
|
| 69966 |
+
"learning_rate": 0.00015222440021897898,
|
| 69967 |
+
"loss": 4.6292,
|
| 69968 |
+
"step": 9975
|
| 69969 |
+
},
|
| 69970 |
+
{
|
| 69971 |
+
"epoch": 1.1023260953643848,
|
| 69972 |
+
"grad_norm": 0.7083221673965454,
|
| 69973 |
+
"learning_rate": 0.00015219513389853667,
|
| 69974 |
+
"loss": 4.6364,
|
| 69975 |
+
"step": 9976
|
| 69976 |
+
},
|
| 69977 |
+
{
|
| 69978 |
+
"epoch": 1.102436598707111,
|
| 69979 |
+
"grad_norm": 0.7083545923233032,
|
| 69980 |
+
"learning_rate": 0.00015216586749451305,
|
| 69981 |
+
"loss": 4.6728,
|
| 69982 |
+
"step": 9977
|
| 69983 |
+
},
|
| 69984 |
+
{
|
| 69985 |
+
"epoch": 1.102547102049837,
|
| 69986 |
+
"grad_norm": 0.7250202894210815,
|
| 69987 |
+
"learning_rate": 0.00015213660100802252,
|
| 69988 |
+
"loss": 4.5993,
|
| 69989 |
+
"step": 9978
|
| 69990 |
+
},
|
| 69991 |
+
{
|
| 69992 |
+
"epoch": 1.1026576053925632,
|
| 69993 |
+
"grad_norm": 0.7101658582687378,
|
| 69994 |
+
"learning_rate": 0.00015210733444017934,
|
| 69995 |
+
"loss": 4.7058,
|
| 69996 |
+
"step": 9979
|
| 69997 |
+
},
|
| 69998 |
+
{
|
| 69999 |
+
"epoch": 1.1027681087352892,
|
| 70000 |
+
"grad_norm": 0.6896347999572754,
|
| 70001 |
+
"learning_rate": 0.000152078067792098,
|
| 70002 |
+
"loss": 4.663,
|
| 70003 |
+
"step": 9980
|
| 70004 |
+
},
|
| 70005 |
+
{
|
| 70006 |
+
"epoch": 1.1028786120780154,
|
| 70007 |
+
"grad_norm": 0.7083562612533569,
|
| 70008 |
+
"learning_rate": 0.00015204880106489262,
|
| 70009 |
+
"loss": 4.6362,
|
| 70010 |
+
"step": 9981
|
| 70011 |
+
},
|
| 70012 |
+
{
|
| 70013 |
+
"epoch": 1.1029891154207414,
|
| 70014 |
+
"grad_norm": 0.7129656076431274,
|
| 70015 |
+
"learning_rate": 0.0001520195342596778,
|
| 70016 |
+
"loss": 4.6511,
|
| 70017 |
+
"step": 9982
|
| 70018 |
+
},
|
| 70019 |
+
{
|
| 70020 |
+
"epoch": 1.1030996187634676,
|
| 70021 |
+
"grad_norm": 0.7051286697387695,
|
| 70022 |
+
"learning_rate": 0.0001519902673775678,
|
| 70023 |
+
"loss": 4.6419,
|
| 70024 |
+
"step": 9983
|
| 70025 |
+
},
|
| 70026 |
+
{
|
| 70027 |
+
"epoch": 1.1032101221061936,
|
| 70028 |
+
"grad_norm": 0.7075096964836121,
|
| 70029 |
+
"learning_rate": 0.00015196100041967687,
|
| 70030 |
+
"loss": 4.6906,
|
| 70031 |
+
"step": 9984
|
| 70032 |
+
},
|
| 70033 |
+
{
|
| 70034 |
+
"epoch": 1.1033206254489198,
|
| 70035 |
+
"grad_norm": 0.7096100449562073,
|
| 70036 |
+
"learning_rate": 0.00015193173338711949,
|
| 70037 |
+
"loss": 4.6068,
|
| 70038 |
+
"step": 9985
|
| 70039 |
+
},
|
| 70040 |
+
{
|
| 70041 |
+
"epoch": 1.1034311287916458,
|
| 70042 |
+
"grad_norm": 0.7199036478996277,
|
| 70043 |
+
"learning_rate": 0.00015190246628101002,
|
| 70044 |
+
"loss": 4.5838,
|
| 70045 |
+
"step": 9986
|
| 70046 |
+
},
|
| 70047 |
+
{
|
| 70048 |
+
"epoch": 1.103541632134372,
|
| 70049 |
+
"grad_norm": 0.6937481760978699,
|
| 70050 |
+
"learning_rate": 0.00015187319910246278,
|
| 70051 |
+
"loss": 4.6813,
|
| 70052 |
+
"step": 9987
|
| 70053 |
+
},
|
| 70054 |
+
{
|
| 70055 |
+
"epoch": 1.1036521354770983,
|
| 70056 |
+
"grad_norm": 0.6957241892814636,
|
| 70057 |
+
"learning_rate": 0.0001518439318525922,
|
| 70058 |
+
"loss": 4.6299,
|
| 70059 |
+
"step": 9988
|
| 70060 |
+
},
|
| 70061 |
+
{
|
| 70062 |
+
"epoch": 1.1037626388198243,
|
| 70063 |
+
"grad_norm": 0.7179840207099915,
|
| 70064 |
+
"learning_rate": 0.0001518146645325125,
|
| 70065 |
+
"loss": 4.4965,
|
| 70066 |
+
"step": 9989
|
| 70067 |
+
},
|
| 70068 |
+
{
|
| 70069 |
+
"epoch": 1.1038731421625505,
|
| 70070 |
+
"grad_norm": 0.6770337224006653,
|
| 70071 |
+
"learning_rate": 0.00015178539714333825,
|
| 70072 |
+
"loss": 4.683,
|
| 70073 |
+
"step": 9990
|
| 70074 |
+
},
|
| 70075 |
+
{
|
| 70076 |
+
"epoch": 1.1039836455052765,
|
| 70077 |
+
"grad_norm": 0.6959671974182129,
|
| 70078 |
+
"learning_rate": 0.00015175612968618376,
|
| 70079 |
+
"loss": 4.6057,
|
| 70080 |
+
"step": 9991
|
| 70081 |
+
},
|
| 70082 |
+
{
|
| 70083 |
+
"epoch": 1.1040941488480027,
|
| 70084 |
+
"grad_norm": 0.7052760124206543,
|
| 70085 |
+
"learning_rate": 0.00015172686216216333,
|
| 70086 |
+
"loss": 4.5712,
|
| 70087 |
+
"step": 9992
|
| 70088 |
+
},
|
| 70089 |
+
{
|
| 70090 |
+
"epoch": 1.1042046521907287,
|
| 70091 |
+
"grad_norm": 0.6573265790939331,
|
| 70092 |
+
"learning_rate": 0.0001516975945723914,
|
| 70093 |
+
"loss": 4.5938,
|
| 70094 |
+
"step": 9993
|
| 70095 |
+
},
|
| 70096 |
+
{
|
| 70097 |
+
"epoch": 1.104315155533455,
|
| 70098 |
+
"grad_norm": 0.679203987121582,
|
| 70099 |
+
"learning_rate": 0.00015166832691798236,
|
| 70100 |
+
"loss": 4.607,
|
| 70101 |
+
"step": 9994
|
| 70102 |
+
},
|
| 70103 |
+
{
|
| 70104 |
+
"epoch": 1.104425658876181,
|
| 70105 |
+
"grad_norm": 0.755894124507904,
|
| 70106 |
+
"learning_rate": 0.00015163905920005064,
|
| 70107 |
+
"loss": 4.6325,
|
| 70108 |
+
"step": 9995
|
| 70109 |
+
},
|
| 70110 |
+
{
|
| 70111 |
+
"epoch": 1.1045361622189072,
|
| 70112 |
+
"grad_norm": 0.6825706958770752,
|
| 70113 |
+
"learning_rate": 0.00015160979141971053,
|
| 70114 |
+
"loss": 4.6513,
|
| 70115 |
+
"step": 9996
|
| 70116 |
+
},
|
| 70117 |
+
{
|
| 70118 |
+
"epoch": 1.1046466655616332,
|
| 70119 |
+
"grad_norm": 0.6921175718307495,
|
| 70120 |
+
"learning_rate": 0.00015158052357807644,
|
| 70121 |
+
"loss": 4.5615,
|
| 70122 |
+
"step": 9997
|
| 70123 |
+
},
|
| 70124 |
+
{
|
| 70125 |
+
"epoch": 1.1047571689043594,
|
| 70126 |
+
"grad_norm": 0.7379427552223206,
|
| 70127 |
+
"learning_rate": 0.0001515512556762629,
|
| 70128 |
+
"loss": 4.5622,
|
| 70129 |
+
"step": 9998
|
| 70130 |
+
},
|
| 70131 |
+
{
|
| 70132 |
+
"epoch": 1.1048676722470854,
|
| 70133 |
+
"grad_norm": 0.7596529126167297,
|
| 70134 |
+
"learning_rate": 0.00015152198771538415,
|
| 70135 |
+
"loss": 4.5809,
|
| 70136 |
+
"step": 9999
|
| 70137 |
+
},
|
| 70138 |
+
{
|
| 70139 |
+
"epoch": 1.1049781755898116,
|
| 70140 |
+
"grad_norm": 0.6683835983276367,
|
| 70141 |
+
"learning_rate": 0.00015149271969655458,
|
| 70142 |
+
"loss": 4.6447,
|
| 70143 |
+
"step": 10000
|
| 70144 |
+
},
|
| 70145 |
+
{
|
| 70146 |
+
"epoch": 1.1049781755898116,
|
| 70147 |
+
"eval_runtime": 16.6164,
|
| 70148 |
+
"eval_samples_per_second": 60.182,
|
| 70149 |
+
"eval_steps_per_second": 3.791,
|
| 70150 |
+
"step": 10000
|
| 70151 |
}
|
| 70152 |
],
|
| 70153 |
"logging_steps": 1,
|