Training in progress, step 35000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3511 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 306619286
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf11920f2c9ec0d2d19594110c2b51d301a3be0b7a5c64b90c553593388f3b96
|
| 3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919972410
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9fd7ea0617e8351507c27a226d306b44f57a13cfca8832dbb9a6a416ba79c7b
|
| 3 |
size 919972410
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04cb5208648fd09a2e0403d51973f74ffbfd93cbd5da59e1e99c8df03769a86c
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7034685b36b93a4dd3a50697b0b1c314b249b2189ec2cb96b757312b1514a579
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e851fe1c1de0057f4eecefed6a131fa9021334eb43f6e7e65fdb270a25ac864
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:978379030048e432baa510ec4fc9514faa08fe564ab964b3a4d05e8f60306495
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdbc75d90af112615b53d15931e8157a80e37bcd110aac9a3089f5f6f5344171
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c8a310f6ca2ca89570eb2cc68544656b30224f00b2d6d96eeda6e0cb8be50ab
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c5b8110fcf6e044b6860c6305be969cfe03129549b92dc6fc2394448e9265d6
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f936acaf5a2d5fe8c38d945450417facbf1577584c216908a396d3cc20bec88
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ff31aac428f9992f606e05ff9d9b75bec2abb517b825e89760b21fb1796744f
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 5000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -21055,6 +21055,3514 @@
|
|
| 21055 |
"eval_samples_per_second": 3189.489,
|
| 21056 |
"eval_steps_per_second": 49.837,
|
| 21057 |
"step": 30000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21058 |
}
|
| 21059 |
],
|
| 21060 |
"logging_steps": 10,
|
|
@@ -21074,7 +24582,7 @@
|
|
| 21074 |
"attributes": {}
|
| 21075 |
}
|
| 21076 |
},
|
| 21077 |
-
"total_flos":
|
| 21078 |
"train_batch_size": 8,
|
| 21079 |
"trial_name": null,
|
| 21080 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.6495057956793947,
|
| 5 |
"eval_steps": 5000,
|
| 6 |
+
"global_step": 35000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 21055 |
"eval_samples_per_second": 3189.489,
|
| 21056 |
"eval_steps_per_second": 49.837,
|
| 21057 |
"step": 30000
|
| 21058 |
+
},
|
| 21059 |
+
{
|
| 21060 |
+
"epoch": 0.556904826523961,
|
| 21061 |
+
"grad_norm": 36.46875,
|
| 21062 |
+
"learning_rate": 9.912983646086715e-06,
|
| 21063 |
+
"loss": 19.7191,
|
| 21064 |
+
"step": 30010
|
| 21065 |
+
},
|
| 21066 |
+
{
|
| 21067 |
+
"epoch": 0.5570903996084408,
|
| 21068 |
+
"grad_norm": 34.21875,
|
| 21069 |
+
"learning_rate": 9.912954650300672e-06,
|
| 21070 |
+
"loss": 20.2245,
|
| 21071 |
+
"step": 30020
|
| 21072 |
+
},
|
| 21073 |
+
{
|
| 21074 |
+
"epoch": 0.5572759726929206,
|
| 21075 |
+
"grad_norm": 35.1875,
|
| 21076 |
+
"learning_rate": 9.91292565451463e-06,
|
| 21077 |
+
"loss": 20.1206,
|
| 21078 |
+
"step": 30030
|
| 21079 |
+
},
|
| 21080 |
+
{
|
| 21081 |
+
"epoch": 0.5574615457774005,
|
| 21082 |
+
"grad_norm": 33.0,
|
| 21083 |
+
"learning_rate": 9.912896658728587e-06,
|
| 21084 |
+
"loss": 20.1384,
|
| 21085 |
+
"step": 30040
|
| 21086 |
+
},
|
| 21087 |
+
{
|
| 21088 |
+
"epoch": 0.5576471188618802,
|
| 21089 |
+
"grad_norm": 34.1875,
|
| 21090 |
+
"learning_rate": 9.912867662942546e-06,
|
| 21091 |
+
"loss": 20.4741,
|
| 21092 |
+
"step": 30050
|
| 21093 |
+
},
|
| 21094 |
+
{
|
| 21095 |
+
"epoch": 0.5578326919463601,
|
| 21096 |
+
"grad_norm": 33.9375,
|
| 21097 |
+
"learning_rate": 9.912838667156502e-06,
|
| 21098 |
+
"loss": 19.7416,
|
| 21099 |
+
"step": 30060
|
| 21100 |
+
},
|
| 21101 |
+
{
|
| 21102 |
+
"epoch": 0.55801826503084,
|
| 21103 |
+
"grad_norm": 35.4375,
|
| 21104 |
+
"learning_rate": 9.91280967137046e-06,
|
| 21105 |
+
"loss": 20.5265,
|
| 21106 |
+
"step": 30070
|
| 21107 |
+
},
|
| 21108 |
+
{
|
| 21109 |
+
"epoch": 0.5582038381153197,
|
| 21110 |
+
"grad_norm": 33.90625,
|
| 21111 |
+
"learning_rate": 9.912780675584419e-06,
|
| 21112 |
+
"loss": 20.1603,
|
| 21113 |
+
"step": 30080
|
| 21114 |
+
},
|
| 21115 |
+
{
|
| 21116 |
+
"epoch": 0.5583894111997996,
|
| 21117 |
+
"grad_norm": 37.40625,
|
| 21118 |
+
"learning_rate": 9.912751679798376e-06,
|
| 21119 |
+
"loss": 20.2072,
|
| 21120 |
+
"step": 30090
|
| 21121 |
+
},
|
| 21122 |
+
{
|
| 21123 |
+
"epoch": 0.5585749842842794,
|
| 21124 |
+
"grad_norm": 34.90625,
|
| 21125 |
+
"learning_rate": 9.912722684012333e-06,
|
| 21126 |
+
"loss": 20.7503,
|
| 21127 |
+
"step": 30100
|
| 21128 |
+
},
|
| 21129 |
+
{
|
| 21130 |
+
"epoch": 0.5587605573687592,
|
| 21131 |
+
"grad_norm": 34.71875,
|
| 21132 |
+
"learning_rate": 9.91269368822629e-06,
|
| 21133 |
+
"loss": 20.1174,
|
| 21134 |
+
"step": 30110
|
| 21135 |
+
},
|
| 21136 |
+
{
|
| 21137 |
+
"epoch": 0.558946130453239,
|
| 21138 |
+
"grad_norm": 33.6875,
|
| 21139 |
+
"learning_rate": 9.912664692440248e-06,
|
| 21140 |
+
"loss": 19.6637,
|
| 21141 |
+
"step": 30120
|
| 21142 |
+
},
|
| 21143 |
+
{
|
| 21144 |
+
"epoch": 0.5591317035377189,
|
| 21145 |
+
"grad_norm": 33.6875,
|
| 21146 |
+
"learning_rate": 9.912635696654206e-06,
|
| 21147 |
+
"loss": 20.3168,
|
| 21148 |
+
"step": 30130
|
| 21149 |
+
},
|
| 21150 |
+
{
|
| 21151 |
+
"epoch": 0.5593172766221988,
|
| 21152 |
+
"grad_norm": 35.15625,
|
| 21153 |
+
"learning_rate": 9.912606700868163e-06,
|
| 21154 |
+
"loss": 20.4504,
|
| 21155 |
+
"step": 30140
|
| 21156 |
+
},
|
| 21157 |
+
{
|
| 21158 |
+
"epoch": 0.5595028497066785,
|
| 21159 |
+
"grad_norm": 32.21875,
|
| 21160 |
+
"learning_rate": 9.912577705082122e-06,
|
| 21161 |
+
"loss": 19.8619,
|
| 21162 |
+
"step": 30150
|
| 21163 |
+
},
|
| 21164 |
+
{
|
| 21165 |
+
"epoch": 0.5596884227911584,
|
| 21166 |
+
"grad_norm": 34.46875,
|
| 21167 |
+
"learning_rate": 9.91254870929608e-06,
|
| 21168 |
+
"loss": 20.3352,
|
| 21169 |
+
"step": 30160
|
| 21170 |
+
},
|
| 21171 |
+
{
|
| 21172 |
+
"epoch": 0.5598739958756382,
|
| 21173 |
+
"grad_norm": 35.09375,
|
| 21174 |
+
"learning_rate": 9.912519713510035e-06,
|
| 21175 |
+
"loss": 20.1644,
|
| 21176 |
+
"step": 30170
|
| 21177 |
+
},
|
| 21178 |
+
{
|
| 21179 |
+
"epoch": 0.560059568960118,
|
| 21180 |
+
"grad_norm": 35.6875,
|
| 21181 |
+
"learning_rate": 9.912490717723994e-06,
|
| 21182 |
+
"loss": 20.4346,
|
| 21183 |
+
"step": 30180
|
| 21184 |
+
},
|
| 21185 |
+
{
|
| 21186 |
+
"epoch": 0.5602451420445979,
|
| 21187 |
+
"grad_norm": 34.90625,
|
| 21188 |
+
"learning_rate": 9.912461721937952e-06,
|
| 21189 |
+
"loss": 20.3004,
|
| 21190 |
+
"step": 30190
|
| 21191 |
+
},
|
| 21192 |
+
{
|
| 21193 |
+
"epoch": 0.5604307151290777,
|
| 21194 |
+
"grad_norm": 35.3125,
|
| 21195 |
+
"learning_rate": 9.912432726151909e-06,
|
| 21196 |
+
"loss": 19.9627,
|
| 21197 |
+
"step": 30200
|
| 21198 |
+
},
|
| 21199 |
+
{
|
| 21200 |
+
"epoch": 0.5606162882135575,
|
| 21201 |
+
"grad_norm": 36.46875,
|
| 21202 |
+
"learning_rate": 9.912403730365866e-06,
|
| 21203 |
+
"loss": 20.271,
|
| 21204 |
+
"step": 30210
|
| 21205 |
+
},
|
| 21206 |
+
{
|
| 21207 |
+
"epoch": 0.5608018612980373,
|
| 21208 |
+
"grad_norm": 34.9375,
|
| 21209 |
+
"learning_rate": 9.912374734579824e-06,
|
| 21210 |
+
"loss": 20.5842,
|
| 21211 |
+
"step": 30220
|
| 21212 |
+
},
|
| 21213 |
+
{
|
| 21214 |
+
"epoch": 0.5609874343825172,
|
| 21215 |
+
"grad_norm": 34.03125,
|
| 21216 |
+
"learning_rate": 9.912345738793781e-06,
|
| 21217 |
+
"loss": 20.1096,
|
| 21218 |
+
"step": 30230
|
| 21219 |
+
},
|
| 21220 |
+
{
|
| 21221 |
+
"epoch": 0.5611730074669969,
|
| 21222 |
+
"grad_norm": 35.6875,
|
| 21223 |
+
"learning_rate": 9.912316743007739e-06,
|
| 21224 |
+
"loss": 20.4759,
|
| 21225 |
+
"step": 30240
|
| 21226 |
+
},
|
| 21227 |
+
{
|
| 21228 |
+
"epoch": 0.5613585805514768,
|
| 21229 |
+
"grad_norm": 33.0,
|
| 21230 |
+
"learning_rate": 9.912287747221698e-06,
|
| 21231 |
+
"loss": 20.4337,
|
| 21232 |
+
"step": 30250
|
| 21233 |
+
},
|
| 21234 |
+
{
|
| 21235 |
+
"epoch": 0.5615441536359567,
|
| 21236 |
+
"grad_norm": 35.34375,
|
| 21237 |
+
"learning_rate": 9.912258751435655e-06,
|
| 21238 |
+
"loss": 20.2153,
|
| 21239 |
+
"step": 30260
|
| 21240 |
+
},
|
| 21241 |
+
{
|
| 21242 |
+
"epoch": 0.5617297267204364,
|
| 21243 |
+
"grad_norm": 37.25,
|
| 21244 |
+
"learning_rate": 9.912229755649613e-06,
|
| 21245 |
+
"loss": 19.9206,
|
| 21246 |
+
"step": 30270
|
| 21247 |
+
},
|
| 21248 |
+
{
|
| 21249 |
+
"epoch": 0.5619152998049163,
|
| 21250 |
+
"grad_norm": 35.5625,
|
| 21251 |
+
"learning_rate": 9.91220075986357e-06,
|
| 21252 |
+
"loss": 19.9902,
|
| 21253 |
+
"step": 30280
|
| 21254 |
+
},
|
| 21255 |
+
{
|
| 21256 |
+
"epoch": 0.5621008728893961,
|
| 21257 |
+
"grad_norm": 35.0625,
|
| 21258 |
+
"learning_rate": 9.912171764077527e-06,
|
| 21259 |
+
"loss": 19.9837,
|
| 21260 |
+
"step": 30290
|
| 21261 |
+
},
|
| 21262 |
+
{
|
| 21263 |
+
"epoch": 0.562286445973876,
|
| 21264 |
+
"grad_norm": 33.53125,
|
| 21265 |
+
"learning_rate": 9.912142768291485e-06,
|
| 21266 |
+
"loss": 20.1273,
|
| 21267 |
+
"step": 30300
|
| 21268 |
+
},
|
| 21269 |
+
{
|
| 21270 |
+
"epoch": 0.5624720190583558,
|
| 21271 |
+
"grad_norm": 36.96875,
|
| 21272 |
+
"learning_rate": 9.912113772505442e-06,
|
| 21273 |
+
"loss": 20.5882,
|
| 21274 |
+
"step": 30310
|
| 21275 |
+
},
|
| 21276 |
+
{
|
| 21277 |
+
"epoch": 0.5626575921428356,
|
| 21278 |
+
"grad_norm": 34.46875,
|
| 21279 |
+
"learning_rate": 9.912084776719401e-06,
|
| 21280 |
+
"loss": 20.1443,
|
| 21281 |
+
"step": 30320
|
| 21282 |
+
},
|
| 21283 |
+
{
|
| 21284 |
+
"epoch": 0.5628431652273155,
|
| 21285 |
+
"grad_norm": 33.875,
|
| 21286 |
+
"learning_rate": 9.912055780933357e-06,
|
| 21287 |
+
"loss": 20.4219,
|
| 21288 |
+
"step": 30330
|
| 21289 |
+
},
|
| 21290 |
+
{
|
| 21291 |
+
"epoch": 0.5630287383117952,
|
| 21292 |
+
"grad_norm": 34.375,
|
| 21293 |
+
"learning_rate": 9.912026785147314e-06,
|
| 21294 |
+
"loss": 20.6782,
|
| 21295 |
+
"step": 30340
|
| 21296 |
+
},
|
| 21297 |
+
{
|
| 21298 |
+
"epoch": 0.5632143113962751,
|
| 21299 |
+
"grad_norm": 36.65625,
|
| 21300 |
+
"learning_rate": 9.911997789361274e-06,
|
| 21301 |
+
"loss": 20.4529,
|
| 21302 |
+
"step": 30350
|
| 21303 |
+
},
|
| 21304 |
+
{
|
| 21305 |
+
"epoch": 0.563399884480755,
|
| 21306 |
+
"grad_norm": 32.75,
|
| 21307 |
+
"learning_rate": 9.911968793575231e-06,
|
| 21308 |
+
"loss": 20.2558,
|
| 21309 |
+
"step": 30360
|
| 21310 |
+
},
|
| 21311 |
+
{
|
| 21312 |
+
"epoch": 0.5635854575652347,
|
| 21313 |
+
"grad_norm": 34.46875,
|
| 21314 |
+
"learning_rate": 9.911939797789188e-06,
|
| 21315 |
+
"loss": 20.1141,
|
| 21316 |
+
"step": 30370
|
| 21317 |
+
},
|
| 21318 |
+
{
|
| 21319 |
+
"epoch": 0.5637710306497146,
|
| 21320 |
+
"grad_norm": 34.375,
|
| 21321 |
+
"learning_rate": 9.911910802003146e-06,
|
| 21322 |
+
"loss": 20.2123,
|
| 21323 |
+
"step": 30380
|
| 21324 |
+
},
|
| 21325 |
+
{
|
| 21326 |
+
"epoch": 0.5639566037341944,
|
| 21327 |
+
"grad_norm": 34.65625,
|
| 21328 |
+
"learning_rate": 9.911881806217103e-06,
|
| 21329 |
+
"loss": 20.2384,
|
| 21330 |
+
"step": 30390
|
| 21331 |
+
},
|
| 21332 |
+
{
|
| 21333 |
+
"epoch": 0.5641421768186742,
|
| 21334 |
+
"grad_norm": 36.96875,
|
| 21335 |
+
"learning_rate": 9.91185281043106e-06,
|
| 21336 |
+
"loss": 20.17,
|
| 21337 |
+
"step": 30400
|
| 21338 |
+
},
|
| 21339 |
+
{
|
| 21340 |
+
"epoch": 0.564327749903154,
|
| 21341 |
+
"grad_norm": 34.0625,
|
| 21342 |
+
"learning_rate": 9.911823814645018e-06,
|
| 21343 |
+
"loss": 20.2601,
|
| 21344 |
+
"step": 30410
|
| 21345 |
+
},
|
| 21346 |
+
{
|
| 21347 |
+
"epoch": 0.5645133229876339,
|
| 21348 |
+
"grad_norm": 33.875,
|
| 21349 |
+
"learning_rate": 9.911794818858977e-06,
|
| 21350 |
+
"loss": 20.4207,
|
| 21351 |
+
"step": 30420
|
| 21352 |
+
},
|
| 21353 |
+
{
|
| 21354 |
+
"epoch": 0.5646988960721137,
|
| 21355 |
+
"grad_norm": 34.90625,
|
| 21356 |
+
"learning_rate": 9.911765823072935e-06,
|
| 21357 |
+
"loss": 20.1737,
|
| 21358 |
+
"step": 30430
|
| 21359 |
+
},
|
| 21360 |
+
{
|
| 21361 |
+
"epoch": 0.5648844691565935,
|
| 21362 |
+
"grad_norm": 34.4375,
|
| 21363 |
+
"learning_rate": 9.91173682728689e-06,
|
| 21364 |
+
"loss": 20.2786,
|
| 21365 |
+
"step": 30440
|
| 21366 |
+
},
|
| 21367 |
+
{
|
| 21368 |
+
"epoch": 0.5650700422410734,
|
| 21369 |
+
"grad_norm": 34.59375,
|
| 21370 |
+
"learning_rate": 9.91170783150085e-06,
|
| 21371 |
+
"loss": 20.2238,
|
| 21372 |
+
"step": 30450
|
| 21373 |
+
},
|
| 21374 |
+
{
|
| 21375 |
+
"epoch": 0.5652556153255531,
|
| 21376 |
+
"grad_norm": 35.4375,
|
| 21377 |
+
"learning_rate": 9.911678835714807e-06,
|
| 21378 |
+
"loss": 20.7817,
|
| 21379 |
+
"step": 30460
|
| 21380 |
+
},
|
| 21381 |
+
{
|
| 21382 |
+
"epoch": 0.565441188410033,
|
| 21383 |
+
"grad_norm": 36.0625,
|
| 21384 |
+
"learning_rate": 9.911649839928764e-06,
|
| 21385 |
+
"loss": 20.3482,
|
| 21386 |
+
"step": 30470
|
| 21387 |
+
},
|
| 21388 |
+
{
|
| 21389 |
+
"epoch": 0.5656267614945129,
|
| 21390 |
+
"grad_norm": 34.1875,
|
| 21391 |
+
"learning_rate": 9.911620844142722e-06,
|
| 21392 |
+
"loss": 20.3726,
|
| 21393 |
+
"step": 30480
|
| 21394 |
+
},
|
| 21395 |
+
{
|
| 21396 |
+
"epoch": 0.5658123345789927,
|
| 21397 |
+
"grad_norm": 36.28125,
|
| 21398 |
+
"learning_rate": 9.911591848356679e-06,
|
| 21399 |
+
"loss": 20.351,
|
| 21400 |
+
"step": 30490
|
| 21401 |
+
},
|
| 21402 |
+
{
|
| 21403 |
+
"epoch": 0.5659979076634725,
|
| 21404 |
+
"grad_norm": 35.625,
|
| 21405 |
+
"learning_rate": 9.911562852570636e-06,
|
| 21406 |
+
"loss": 20.0794,
|
| 21407 |
+
"step": 30500
|
| 21408 |
+
},
|
| 21409 |
+
{
|
| 21410 |
+
"epoch": 0.5661834807479523,
|
| 21411 |
+
"grad_norm": 34.3125,
|
| 21412 |
+
"learning_rate": 9.911533856784594e-06,
|
| 21413 |
+
"loss": 20.5169,
|
| 21414 |
+
"step": 30510
|
| 21415 |
+
},
|
| 21416 |
+
{
|
| 21417 |
+
"epoch": 0.5663690538324322,
|
| 21418 |
+
"grad_norm": 37.6875,
|
| 21419 |
+
"learning_rate": 9.911504860998551e-06,
|
| 21420 |
+
"loss": 20.1937,
|
| 21421 |
+
"step": 30520
|
| 21422 |
+
},
|
| 21423 |
+
{
|
| 21424 |
+
"epoch": 0.5665546269169119,
|
| 21425 |
+
"grad_norm": 34.5,
|
| 21426 |
+
"learning_rate": 9.91147586521251e-06,
|
| 21427 |
+
"loss": 20.0421,
|
| 21428 |
+
"step": 30530
|
| 21429 |
+
},
|
| 21430 |
+
{
|
| 21431 |
+
"epoch": 0.5667402000013918,
|
| 21432 |
+
"grad_norm": 36.625,
|
| 21433 |
+
"learning_rate": 9.911446869426468e-06,
|
| 21434 |
+
"loss": 20.5098,
|
| 21435 |
+
"step": 30540
|
| 21436 |
+
},
|
| 21437 |
+
{
|
| 21438 |
+
"epoch": 0.5669257730858717,
|
| 21439 |
+
"grad_norm": 33.78125,
|
| 21440 |
+
"learning_rate": 9.911417873640425e-06,
|
| 21441 |
+
"loss": 20.1718,
|
| 21442 |
+
"step": 30550
|
| 21443 |
+
},
|
| 21444 |
+
{
|
| 21445 |
+
"epoch": 0.5671113461703514,
|
| 21446 |
+
"grad_norm": 35.375,
|
| 21447 |
+
"learning_rate": 9.911388877854383e-06,
|
| 21448 |
+
"loss": 20.703,
|
| 21449 |
+
"step": 30560
|
| 21450 |
+
},
|
| 21451 |
+
{
|
| 21452 |
+
"epoch": 0.5672969192548313,
|
| 21453 |
+
"grad_norm": 35.84375,
|
| 21454 |
+
"learning_rate": 9.91135988206834e-06,
|
| 21455 |
+
"loss": 20.0026,
|
| 21456 |
+
"step": 30570
|
| 21457 |
+
},
|
| 21458 |
+
{
|
| 21459 |
+
"epoch": 0.5674824923393111,
|
| 21460 |
+
"grad_norm": 37.21875,
|
| 21461 |
+
"learning_rate": 9.911330886282297e-06,
|
| 21462 |
+
"loss": 19.9628,
|
| 21463 |
+
"step": 30580
|
| 21464 |
+
},
|
| 21465 |
+
{
|
| 21466 |
+
"epoch": 0.5676680654237909,
|
| 21467 |
+
"grad_norm": 35.59375,
|
| 21468 |
+
"learning_rate": 9.911301890496255e-06,
|
| 21469 |
+
"loss": 20.4465,
|
| 21470 |
+
"step": 30590
|
| 21471 |
+
},
|
| 21472 |
+
{
|
| 21473 |
+
"epoch": 0.5678536385082708,
|
| 21474 |
+
"grad_norm": 35.4375,
|
| 21475 |
+
"learning_rate": 9.911272894710212e-06,
|
| 21476 |
+
"loss": 20.0498,
|
| 21477 |
+
"step": 30600
|
| 21478 |
+
},
|
| 21479 |
+
{
|
| 21480 |
+
"epoch": 0.5680392115927506,
|
| 21481 |
+
"grad_norm": 38.21875,
|
| 21482 |
+
"learning_rate": 9.91124389892417e-06,
|
| 21483 |
+
"loss": 20.5764,
|
| 21484 |
+
"step": 30610
|
| 21485 |
+
},
|
| 21486 |
+
{
|
| 21487 |
+
"epoch": 0.5682247846772304,
|
| 21488 |
+
"grad_norm": 34.34375,
|
| 21489 |
+
"learning_rate": 9.911214903138127e-06,
|
| 21490 |
+
"loss": 20.0934,
|
| 21491 |
+
"step": 30620
|
| 21492 |
+
},
|
| 21493 |
+
{
|
| 21494 |
+
"epoch": 0.5684103577617102,
|
| 21495 |
+
"grad_norm": 34.75,
|
| 21496 |
+
"learning_rate": 9.911185907352086e-06,
|
| 21497 |
+
"loss": 19.9147,
|
| 21498 |
+
"step": 30630
|
| 21499 |
+
},
|
| 21500 |
+
{
|
| 21501 |
+
"epoch": 0.5685959308461901,
|
| 21502 |
+
"grad_norm": 33.65625,
|
| 21503 |
+
"learning_rate": 9.911156911566043e-06,
|
| 21504 |
+
"loss": 20.4771,
|
| 21505 |
+
"step": 30640
|
| 21506 |
+
},
|
| 21507 |
+
{
|
| 21508 |
+
"epoch": 0.5687815039306698,
|
| 21509 |
+
"grad_norm": 34.96875,
|
| 21510 |
+
"learning_rate": 9.91112791578e-06,
|
| 21511 |
+
"loss": 19.7005,
|
| 21512 |
+
"step": 30650
|
| 21513 |
+
},
|
| 21514 |
+
{
|
| 21515 |
+
"epoch": 0.5689670770151497,
|
| 21516 |
+
"grad_norm": 34.21875,
|
| 21517 |
+
"learning_rate": 9.911098919993958e-06,
|
| 21518 |
+
"loss": 20.2171,
|
| 21519 |
+
"step": 30660
|
| 21520 |
+
},
|
| 21521 |
+
{
|
| 21522 |
+
"epoch": 0.5691526500996296,
|
| 21523 |
+
"grad_norm": 34.6875,
|
| 21524 |
+
"learning_rate": 9.911069924207916e-06,
|
| 21525 |
+
"loss": 20.4261,
|
| 21526 |
+
"step": 30670
|
| 21527 |
+
},
|
| 21528 |
+
{
|
| 21529 |
+
"epoch": 0.5693382231841094,
|
| 21530 |
+
"grad_norm": 35.5,
|
| 21531 |
+
"learning_rate": 9.911040928421873e-06,
|
| 21532 |
+
"loss": 19.8097,
|
| 21533 |
+
"step": 30680
|
| 21534 |
+
},
|
| 21535 |
+
{
|
| 21536 |
+
"epoch": 0.5695237962685892,
|
| 21537 |
+
"grad_norm": 37.15625,
|
| 21538 |
+
"learning_rate": 9.91101193263583e-06,
|
| 21539 |
+
"loss": 20.2062,
|
| 21540 |
+
"step": 30690
|
| 21541 |
+
},
|
| 21542 |
+
{
|
| 21543 |
+
"epoch": 0.569709369353069,
|
| 21544 |
+
"grad_norm": 37.3125,
|
| 21545 |
+
"learning_rate": 9.91098293684979e-06,
|
| 21546 |
+
"loss": 20.2614,
|
| 21547 |
+
"step": 30700
|
| 21548 |
+
},
|
| 21549 |
+
{
|
| 21550 |
+
"epoch": 0.5698949424375489,
|
| 21551 |
+
"grad_norm": 34.96875,
|
| 21552 |
+
"learning_rate": 9.910953941063745e-06,
|
| 21553 |
+
"loss": 20.3764,
|
| 21554 |
+
"step": 30710
|
| 21555 |
+
},
|
| 21556 |
+
{
|
| 21557 |
+
"epoch": 0.5700805155220287,
|
| 21558 |
+
"grad_norm": 35.1875,
|
| 21559 |
+
"learning_rate": 9.910924945277703e-06,
|
| 21560 |
+
"loss": 19.9522,
|
| 21561 |
+
"step": 30720
|
| 21562 |
+
},
|
| 21563 |
+
{
|
| 21564 |
+
"epoch": 0.5702660886065085,
|
| 21565 |
+
"grad_norm": 35.9375,
|
| 21566 |
+
"learning_rate": 9.910895949491662e-06,
|
| 21567 |
+
"loss": 19.9363,
|
| 21568 |
+
"step": 30730
|
| 21569 |
+
},
|
| 21570 |
+
{
|
| 21571 |
+
"epoch": 0.5704516616909884,
|
| 21572 |
+
"grad_norm": 36.71875,
|
| 21573 |
+
"learning_rate": 9.91086695370562e-06,
|
| 21574 |
+
"loss": 20.4394,
|
| 21575 |
+
"step": 30740
|
| 21576 |
+
},
|
| 21577 |
+
{
|
| 21578 |
+
"epoch": 0.5706372347754681,
|
| 21579 |
+
"grad_norm": 36.78125,
|
| 21580 |
+
"learning_rate": 9.910837957919577e-06,
|
| 21581 |
+
"loss": 20.6453,
|
| 21582 |
+
"step": 30750
|
| 21583 |
+
},
|
| 21584 |
+
{
|
| 21585 |
+
"epoch": 0.570822807859948,
|
| 21586 |
+
"grad_norm": 34.3125,
|
| 21587 |
+
"learning_rate": 9.910808962133534e-06,
|
| 21588 |
+
"loss": 20.342,
|
| 21589 |
+
"step": 30760
|
| 21590 |
+
},
|
| 21591 |
+
{
|
| 21592 |
+
"epoch": 0.5710083809444279,
|
| 21593 |
+
"grad_norm": 36.25,
|
| 21594 |
+
"learning_rate": 9.910779966347491e-06,
|
| 21595 |
+
"loss": 20.1425,
|
| 21596 |
+
"step": 30770
|
| 21597 |
+
},
|
| 21598 |
+
{
|
| 21599 |
+
"epoch": 0.5711939540289076,
|
| 21600 |
+
"grad_norm": 36.40625,
|
| 21601 |
+
"learning_rate": 9.910750970561449e-06,
|
| 21602 |
+
"loss": 19.9823,
|
| 21603 |
+
"step": 30780
|
| 21604 |
+
},
|
| 21605 |
+
{
|
| 21606 |
+
"epoch": 0.5713795271133875,
|
| 21607 |
+
"grad_norm": 32.53125,
|
| 21608 |
+
"learning_rate": 9.910721974775406e-06,
|
| 21609 |
+
"loss": 19.6331,
|
| 21610 |
+
"step": 30790
|
| 21611 |
+
},
|
| 21612 |
+
{
|
| 21613 |
+
"epoch": 0.5715651001978673,
|
| 21614 |
+
"grad_norm": 35.34375,
|
| 21615 |
+
"learning_rate": 9.910692978989365e-06,
|
| 21616 |
+
"loss": 20.1497,
|
| 21617 |
+
"step": 30800
|
| 21618 |
+
},
|
| 21619 |
+
{
|
| 21620 |
+
"epoch": 0.5717506732823471,
|
| 21621 |
+
"grad_norm": 33.40625,
|
| 21622 |
+
"learning_rate": 9.910663983203321e-06,
|
| 21623 |
+
"loss": 20.4599,
|
| 21624 |
+
"step": 30810
|
| 21625 |
+
},
|
| 21626 |
+
{
|
| 21627 |
+
"epoch": 0.5719362463668269,
|
| 21628 |
+
"grad_norm": 35.96875,
|
| 21629 |
+
"learning_rate": 9.910634987417278e-06,
|
| 21630 |
+
"loss": 20.056,
|
| 21631 |
+
"step": 30820
|
| 21632 |
+
},
|
| 21633 |
+
{
|
| 21634 |
+
"epoch": 0.5721218194513068,
|
| 21635 |
+
"grad_norm": 35.84375,
|
| 21636 |
+
"learning_rate": 9.910605991631238e-06,
|
| 21637 |
+
"loss": 19.8844,
|
| 21638 |
+
"step": 30830
|
| 21639 |
+
},
|
| 21640 |
+
{
|
| 21641 |
+
"epoch": 0.5723073925357867,
|
| 21642 |
+
"grad_norm": 34.34375,
|
| 21643 |
+
"learning_rate": 9.910576995845195e-06,
|
| 21644 |
+
"loss": 20.1571,
|
| 21645 |
+
"step": 30840
|
| 21646 |
+
},
|
| 21647 |
+
{
|
| 21648 |
+
"epoch": 0.5724929656202664,
|
| 21649 |
+
"grad_norm": 33.75,
|
| 21650 |
+
"learning_rate": 9.910548000059152e-06,
|
| 21651 |
+
"loss": 20.6345,
|
| 21652 |
+
"step": 30850
|
| 21653 |
+
},
|
| 21654 |
+
{
|
| 21655 |
+
"epoch": 0.5726785387047463,
|
| 21656 |
+
"grad_norm": 35.4375,
|
| 21657 |
+
"learning_rate": 9.91051900427311e-06,
|
| 21658 |
+
"loss": 20.0653,
|
| 21659 |
+
"step": 30860
|
| 21660 |
+
},
|
| 21661 |
+
{
|
| 21662 |
+
"epoch": 0.5728641117892261,
|
| 21663 |
+
"grad_norm": 35.03125,
|
| 21664 |
+
"learning_rate": 9.910490008487067e-06,
|
| 21665 |
+
"loss": 20.1354,
|
| 21666 |
+
"step": 30870
|
| 21667 |
+
},
|
| 21668 |
+
{
|
| 21669 |
+
"epoch": 0.5730496848737059,
|
| 21670 |
+
"grad_norm": 36.5625,
|
| 21671 |
+
"learning_rate": 9.910461012701025e-06,
|
| 21672 |
+
"loss": 19.7588,
|
| 21673 |
+
"step": 30880
|
| 21674 |
+
},
|
| 21675 |
+
{
|
| 21676 |
+
"epoch": 0.5732352579581858,
|
| 21677 |
+
"grad_norm": 36.34375,
|
| 21678 |
+
"learning_rate": 9.910432016914982e-06,
|
| 21679 |
+
"loss": 20.1258,
|
| 21680 |
+
"step": 30890
|
| 21681 |
+
},
|
| 21682 |
+
{
|
| 21683 |
+
"epoch": 0.5734208310426656,
|
| 21684 |
+
"grad_norm": 35.9375,
|
| 21685 |
+
"learning_rate": 9.910403021128941e-06,
|
| 21686 |
+
"loss": 19.8419,
|
| 21687 |
+
"step": 30900
|
| 21688 |
+
},
|
| 21689 |
+
{
|
| 21690 |
+
"epoch": 0.5736064041271454,
|
| 21691 |
+
"grad_norm": 36.09375,
|
| 21692 |
+
"learning_rate": 9.910374025342899e-06,
|
| 21693 |
+
"loss": 20.1061,
|
| 21694 |
+
"step": 30910
|
| 21695 |
+
},
|
| 21696 |
+
{
|
| 21697 |
+
"epoch": 0.5737919772116252,
|
| 21698 |
+
"grad_norm": 37.25,
|
| 21699 |
+
"learning_rate": 9.910345029556854e-06,
|
| 21700 |
+
"loss": 20.4488,
|
| 21701 |
+
"step": 30920
|
| 21702 |
+
},
|
| 21703 |
+
{
|
| 21704 |
+
"epoch": 0.5739775502961051,
|
| 21705 |
+
"grad_norm": 34.40625,
|
| 21706 |
+
"learning_rate": 9.910316033770813e-06,
|
| 21707 |
+
"loss": 20.1627,
|
| 21708 |
+
"step": 30930
|
| 21709 |
+
},
|
| 21710 |
+
{
|
| 21711 |
+
"epoch": 0.5741631233805848,
|
| 21712 |
+
"grad_norm": 35.34375,
|
| 21713 |
+
"learning_rate": 9.91028703798477e-06,
|
| 21714 |
+
"loss": 20.1895,
|
| 21715 |
+
"step": 30940
|
| 21716 |
+
},
|
| 21717 |
+
{
|
| 21718 |
+
"epoch": 0.5743486964650647,
|
| 21719 |
+
"grad_norm": 35.15625,
|
| 21720 |
+
"learning_rate": 9.910258042198728e-06,
|
| 21721 |
+
"loss": 20.327,
|
| 21722 |
+
"step": 30950
|
| 21723 |
+
},
|
| 21724 |
+
{
|
| 21725 |
+
"epoch": 0.5745342695495446,
|
| 21726 |
+
"grad_norm": 35.09375,
|
| 21727 |
+
"learning_rate": 9.910229046412686e-06,
|
| 21728 |
+
"loss": 19.9475,
|
| 21729 |
+
"step": 30960
|
| 21730 |
+
},
|
| 21731 |
+
{
|
| 21732 |
+
"epoch": 0.5747198426340243,
|
| 21733 |
+
"grad_norm": 34.96875,
|
| 21734 |
+
"learning_rate": 9.910200050626643e-06,
|
| 21735 |
+
"loss": 20.4849,
|
| 21736 |
+
"step": 30970
|
| 21737 |
+
},
|
| 21738 |
+
{
|
| 21739 |
+
"epoch": 0.5749054157185042,
|
| 21740 |
+
"grad_norm": 35.28125,
|
| 21741 |
+
"learning_rate": 9.9101710548406e-06,
|
| 21742 |
+
"loss": 20.0981,
|
| 21743 |
+
"step": 30980
|
| 21744 |
+
},
|
| 21745 |
+
{
|
| 21746 |
+
"epoch": 0.575090988802984,
|
| 21747 |
+
"grad_norm": 35.53125,
|
| 21748 |
+
"learning_rate": 9.910142059054558e-06,
|
| 21749 |
+
"loss": 20.0985,
|
| 21750 |
+
"step": 30990
|
| 21751 |
+
},
|
| 21752 |
+
{
|
| 21753 |
+
"epoch": 0.5752765618874638,
|
| 21754 |
+
"grad_norm": 36.1875,
|
| 21755 |
+
"learning_rate": 9.910113063268517e-06,
|
| 21756 |
+
"loss": 20.5353,
|
| 21757 |
+
"step": 31000
|
| 21758 |
+
},
|
| 21759 |
+
{
|
| 21760 |
+
"epoch": 0.5754621349719437,
|
| 21761 |
+
"grad_norm": 34.6875,
|
| 21762 |
+
"learning_rate": 9.910084067482474e-06,
|
| 21763 |
+
"loss": 19.9665,
|
| 21764 |
+
"step": 31010
|
| 21765 |
+
},
|
| 21766 |
+
{
|
| 21767 |
+
"epoch": 0.5756477080564235,
|
| 21768 |
+
"grad_norm": 34.40625,
|
| 21769 |
+
"learning_rate": 9.910055071696432e-06,
|
| 21770 |
+
"loss": 20.7101,
|
| 21771 |
+
"step": 31020
|
| 21772 |
+
},
|
| 21773 |
+
{
|
| 21774 |
+
"epoch": 0.5758332811409034,
|
| 21775 |
+
"grad_norm": 37.21875,
|
| 21776 |
+
"learning_rate": 9.910026075910389e-06,
|
| 21777 |
+
"loss": 20.1796,
|
| 21778 |
+
"step": 31030
|
| 21779 |
+
},
|
| 21780 |
+
{
|
| 21781 |
+
"epoch": 0.5760188542253831,
|
| 21782 |
+
"grad_norm": 34.53125,
|
| 21783 |
+
"learning_rate": 9.909997080124347e-06,
|
| 21784 |
+
"loss": 20.1421,
|
| 21785 |
+
"step": 31040
|
| 21786 |
+
},
|
| 21787 |
+
{
|
| 21788 |
+
"epoch": 0.576204427309863,
|
| 21789 |
+
"grad_norm": 32.40625,
|
| 21790 |
+
"learning_rate": 9.909968084338304e-06,
|
| 21791 |
+
"loss": 20.1085,
|
| 21792 |
+
"step": 31050
|
| 21793 |
+
},
|
| 21794 |
+
{
|
| 21795 |
+
"epoch": 0.5763900003943428,
|
| 21796 |
+
"grad_norm": 34.78125,
|
| 21797 |
+
"learning_rate": 9.909939088552261e-06,
|
| 21798 |
+
"loss": 20.3996,
|
| 21799 |
+
"step": 31060
|
| 21800 |
+
},
|
| 21801 |
+
{
|
| 21802 |
+
"epoch": 0.5765755734788226,
|
| 21803 |
+
"grad_norm": 34.875,
|
| 21804 |
+
"learning_rate": 9.909910092766219e-06,
|
| 21805 |
+
"loss": 20.6353,
|
| 21806 |
+
"step": 31070
|
| 21807 |
+
},
|
| 21808 |
+
{
|
| 21809 |
+
"epoch": 0.5767611465633025,
|
| 21810 |
+
"grad_norm": 35.6875,
|
| 21811 |
+
"learning_rate": 9.909881096980176e-06,
|
| 21812 |
+
"loss": 20.0609,
|
| 21813 |
+
"step": 31080
|
| 21814 |
+
},
|
| 21815 |
+
{
|
| 21816 |
+
"epoch": 0.5769467196477823,
|
| 21817 |
+
"grad_norm": 34.625,
|
| 21818 |
+
"learning_rate": 9.909852101194134e-06,
|
| 21819 |
+
"loss": 19.9845,
|
| 21820 |
+
"step": 31090
|
| 21821 |
+
},
|
| 21822 |
+
{
|
| 21823 |
+
"epoch": 0.5771322927322621,
|
| 21824 |
+
"grad_norm": 34.125,
|
| 21825 |
+
"learning_rate": 9.909823105408091e-06,
|
| 21826 |
+
"loss": 19.8181,
|
| 21827 |
+
"step": 31100
|
| 21828 |
+
},
|
| 21829 |
+
{
|
| 21830 |
+
"epoch": 0.5773178658167419,
|
| 21831 |
+
"grad_norm": 34.25,
|
| 21832 |
+
"learning_rate": 9.90979410962205e-06,
|
| 21833 |
+
"loss": 20.0183,
|
| 21834 |
+
"step": 31110
|
| 21835 |
+
},
|
| 21836 |
+
{
|
| 21837 |
+
"epoch": 0.5775034389012218,
|
| 21838 |
+
"grad_norm": 34.09375,
|
| 21839 |
+
"learning_rate": 9.909765113836007e-06,
|
| 21840 |
+
"loss": 19.9869,
|
| 21841 |
+
"step": 31120
|
| 21842 |
+
},
|
| 21843 |
+
{
|
| 21844 |
+
"epoch": 0.5776890119857016,
|
| 21845 |
+
"grad_norm": 34.21875,
|
| 21846 |
+
"learning_rate": 9.909736118049965e-06,
|
| 21847 |
+
"loss": 20.2748,
|
| 21848 |
+
"step": 31130
|
| 21849 |
+
},
|
| 21850 |
+
{
|
| 21851 |
+
"epoch": 0.5778745850701814,
|
| 21852 |
+
"grad_norm": 34.09375,
|
| 21853 |
+
"learning_rate": 9.909707122263922e-06,
|
| 21854 |
+
"loss": 19.7719,
|
| 21855 |
+
"step": 31140
|
| 21856 |
+
},
|
| 21857 |
+
{
|
| 21858 |
+
"epoch": 0.5780601581546613,
|
| 21859 |
+
"grad_norm": 36.15625,
|
| 21860 |
+
"learning_rate": 9.90967812647788e-06,
|
| 21861 |
+
"loss": 20.0686,
|
| 21862 |
+
"step": 31150
|
| 21863 |
+
},
|
| 21864 |
+
{
|
| 21865 |
+
"epoch": 0.578245731239141,
|
| 21866 |
+
"grad_norm": 36.4375,
|
| 21867 |
+
"learning_rate": 9.909649130691837e-06,
|
| 21868 |
+
"loss": 20.0449,
|
| 21869 |
+
"step": 31160
|
| 21870 |
+
},
|
| 21871 |
+
{
|
| 21872 |
+
"epoch": 0.5784313043236209,
|
| 21873 |
+
"grad_norm": 34.65625,
|
| 21874 |
+
"learning_rate": 9.909620134905795e-06,
|
| 21875 |
+
"loss": 20.3113,
|
| 21876 |
+
"step": 31170
|
| 21877 |
+
},
|
| 21878 |
+
{
|
| 21879 |
+
"epoch": 0.5786168774081007,
|
| 21880 |
+
"grad_norm": 35.5,
|
| 21881 |
+
"learning_rate": 9.909591139119754e-06,
|
| 21882 |
+
"loss": 20.0866,
|
| 21883 |
+
"step": 31180
|
| 21884 |
+
},
|
| 21885 |
+
{
|
| 21886 |
+
"epoch": 0.5788024504925806,
|
| 21887 |
+
"grad_norm": 34.5625,
|
| 21888 |
+
"learning_rate": 9.90956214333371e-06,
|
| 21889 |
+
"loss": 19.9436,
|
| 21890 |
+
"step": 31190
|
| 21891 |
+
},
|
| 21892 |
+
{
|
| 21893 |
+
"epoch": 0.5789880235770604,
|
| 21894 |
+
"grad_norm": 34.3125,
|
| 21895 |
+
"learning_rate": 9.909533147547667e-06,
|
| 21896 |
+
"loss": 20.16,
|
| 21897 |
+
"step": 31200
|
| 21898 |
+
},
|
| 21899 |
+
{
|
| 21900 |
+
"epoch": 0.5791735966615402,
|
| 21901 |
+
"grad_norm": 32.8125,
|
| 21902 |
+
"learning_rate": 9.909504151761626e-06,
|
| 21903 |
+
"loss": 20.0966,
|
| 21904 |
+
"step": 31210
|
| 21905 |
+
},
|
| 21906 |
+
{
|
| 21907 |
+
"epoch": 0.5793591697460201,
|
| 21908 |
+
"grad_norm": 36.03125,
|
| 21909 |
+
"learning_rate": 9.909475155975583e-06,
|
| 21910 |
+
"loss": 19.8559,
|
| 21911 |
+
"step": 31220
|
| 21912 |
+
},
|
| 21913 |
+
{
|
| 21914 |
+
"epoch": 0.5795447428304998,
|
| 21915 |
+
"grad_norm": 34.625,
|
| 21916 |
+
"learning_rate": 9.90944616018954e-06,
|
| 21917 |
+
"loss": 20.0601,
|
| 21918 |
+
"step": 31230
|
| 21919 |
+
},
|
| 21920 |
+
{
|
| 21921 |
+
"epoch": 0.5797303159149797,
|
| 21922 |
+
"grad_norm": 36.28125,
|
| 21923 |
+
"learning_rate": 9.909417164403498e-06,
|
| 21924 |
+
"loss": 19.8656,
|
| 21925 |
+
"step": 31240
|
| 21926 |
+
},
|
| 21927 |
+
{
|
| 21928 |
+
"epoch": 0.5799158889994596,
|
| 21929 |
+
"grad_norm": 34.125,
|
| 21930 |
+
"learning_rate": 9.909388168617455e-06,
|
| 21931 |
+
"loss": 20.3291,
|
| 21932 |
+
"step": 31250
|
| 21933 |
+
},
|
| 21934 |
+
{
|
| 21935 |
+
"epoch": 0.5801014620839393,
|
| 21936 |
+
"grad_norm": 33.84375,
|
| 21937 |
+
"learning_rate": 9.909359172831413e-06,
|
| 21938 |
+
"loss": 20.4572,
|
| 21939 |
+
"step": 31260
|
| 21940 |
+
},
|
| 21941 |
+
{
|
| 21942 |
+
"epoch": 0.5802870351684192,
|
| 21943 |
+
"grad_norm": 34.03125,
|
| 21944 |
+
"learning_rate": 9.90933017704537e-06,
|
| 21945 |
+
"loss": 19.946,
|
| 21946 |
+
"step": 31270
|
| 21947 |
+
},
|
| 21948 |
+
{
|
| 21949 |
+
"epoch": 0.580472608252899,
|
| 21950 |
+
"grad_norm": 33.9375,
|
| 21951 |
+
"learning_rate": 9.90930118125933e-06,
|
| 21952 |
+
"loss": 20.13,
|
| 21953 |
+
"step": 31280
|
| 21954 |
+
},
|
| 21955 |
+
{
|
| 21956 |
+
"epoch": 0.5806581813373788,
|
| 21957 |
+
"grad_norm": 33.90625,
|
| 21958 |
+
"learning_rate": 9.909272185473287e-06,
|
| 21959 |
+
"loss": 20.2398,
|
| 21960 |
+
"step": 31290
|
| 21961 |
+
},
|
| 21962 |
+
{
|
| 21963 |
+
"epoch": 0.5808437544218586,
|
| 21964 |
+
"grad_norm": 33.09375,
|
| 21965 |
+
"learning_rate": 9.909243189687243e-06,
|
| 21966 |
+
"loss": 20.1966,
|
| 21967 |
+
"step": 31300
|
| 21968 |
+
},
|
| 21969 |
+
{
|
| 21970 |
+
"epoch": 0.5810293275063385,
|
| 21971 |
+
"grad_norm": 34.8125,
|
| 21972 |
+
"learning_rate": 9.909214193901202e-06,
|
| 21973 |
+
"loss": 20.0873,
|
| 21974 |
+
"step": 31310
|
| 21975 |
+
},
|
| 21976 |
+
{
|
| 21977 |
+
"epoch": 0.5812149005908183,
|
| 21978 |
+
"grad_norm": 34.875,
|
| 21979 |
+
"learning_rate": 9.909185198115159e-06,
|
| 21980 |
+
"loss": 19.9998,
|
| 21981 |
+
"step": 31320
|
| 21982 |
+
},
|
| 21983 |
+
{
|
| 21984 |
+
"epoch": 0.5814004736752981,
|
| 21985 |
+
"grad_norm": 33.8125,
|
| 21986 |
+
"learning_rate": 9.909156202329116e-06,
|
| 21987 |
+
"loss": 20.1087,
|
| 21988 |
+
"step": 31330
|
| 21989 |
+
},
|
| 21990 |
+
{
|
| 21991 |
+
"epoch": 0.581586046759778,
|
| 21992 |
+
"grad_norm": 36.125,
|
| 21993 |
+
"learning_rate": 9.909127206543074e-06,
|
| 21994 |
+
"loss": 20.1545,
|
| 21995 |
+
"step": 31340
|
| 21996 |
+
},
|
| 21997 |
+
{
|
| 21998 |
+
"epoch": 0.5817716198442577,
|
| 21999 |
+
"grad_norm": 34.40625,
|
| 22000 |
+
"learning_rate": 9.909098210757031e-06,
|
| 22001 |
+
"loss": 20.2046,
|
| 22002 |
+
"step": 31350
|
| 22003 |
+
},
|
| 22004 |
+
{
|
| 22005 |
+
"epoch": 0.5819571929287376,
|
| 22006 |
+
"grad_norm": 37.1875,
|
| 22007 |
+
"learning_rate": 9.909069214970989e-06,
|
| 22008 |
+
"loss": 20.0161,
|
| 22009 |
+
"step": 31360
|
| 22010 |
+
},
|
| 22011 |
+
{
|
| 22012 |
+
"epoch": 0.5821427660132175,
|
| 22013 |
+
"grad_norm": 36.125,
|
| 22014 |
+
"learning_rate": 9.909040219184946e-06,
|
| 22015 |
+
"loss": 19.5715,
|
| 22016 |
+
"step": 31370
|
| 22017 |
+
},
|
| 22018 |
+
{
|
| 22019 |
+
"epoch": 0.5823283390976973,
|
| 22020 |
+
"grad_norm": 35.59375,
|
| 22021 |
+
"learning_rate": 9.909011223398905e-06,
|
| 22022 |
+
"loss": 19.9348,
|
| 22023 |
+
"step": 31380
|
| 22024 |
+
},
|
| 22025 |
+
{
|
| 22026 |
+
"epoch": 0.5825139121821771,
|
| 22027 |
+
"grad_norm": 35.28125,
|
| 22028 |
+
"learning_rate": 9.908982227612863e-06,
|
| 22029 |
+
"loss": 20.2421,
|
| 22030 |
+
"step": 31390
|
| 22031 |
+
},
|
| 22032 |
+
{
|
| 22033 |
+
"epoch": 0.5826994852666569,
|
| 22034 |
+
"grad_norm": 35.09375,
|
| 22035 |
+
"learning_rate": 9.908953231826818e-06,
|
| 22036 |
+
"loss": 19.6778,
|
| 22037 |
+
"step": 31400
|
| 22038 |
+
},
|
| 22039 |
+
{
|
| 22040 |
+
"epoch": 0.5828850583511368,
|
| 22041 |
+
"grad_norm": 33.65625,
|
| 22042 |
+
"learning_rate": 9.908924236040777e-06,
|
| 22043 |
+
"loss": 20.1597,
|
| 22044 |
+
"step": 31410
|
| 22045 |
+
},
|
| 22046 |
+
{
|
| 22047 |
+
"epoch": 0.5830706314356165,
|
| 22048 |
+
"grad_norm": 34.0625,
|
| 22049 |
+
"learning_rate": 9.908895240254735e-06,
|
| 22050 |
+
"loss": 19.7993,
|
| 22051 |
+
"step": 31420
|
| 22052 |
+
},
|
| 22053 |
+
{
|
| 22054 |
+
"epoch": 0.5832562045200964,
|
| 22055 |
+
"grad_norm": 32.1875,
|
| 22056 |
+
"learning_rate": 9.908866244468692e-06,
|
| 22057 |
+
"loss": 19.741,
|
| 22058 |
+
"step": 31430
|
| 22059 |
+
},
|
| 22060 |
+
{
|
| 22061 |
+
"epoch": 0.5834417776045763,
|
| 22062 |
+
"grad_norm": 35.96875,
|
| 22063 |
+
"learning_rate": 9.90883724868265e-06,
|
| 22064 |
+
"loss": 19.8878,
|
| 22065 |
+
"step": 31440
|
| 22066 |
+
},
|
| 22067 |
+
{
|
| 22068 |
+
"epoch": 0.583627350689056,
|
| 22069 |
+
"grad_norm": 35.09375,
|
| 22070 |
+
"learning_rate": 9.908808252896609e-06,
|
| 22071 |
+
"loss": 20.1307,
|
| 22072 |
+
"step": 31450
|
| 22073 |
+
},
|
| 22074 |
+
{
|
| 22075 |
+
"epoch": 0.5838129237735359,
|
| 22076 |
+
"grad_norm": 36.75,
|
| 22077 |
+
"learning_rate": 9.908779257110564e-06,
|
| 22078 |
+
"loss": 20.0339,
|
| 22079 |
+
"step": 31460
|
| 22080 |
+
},
|
| 22081 |
+
{
|
| 22082 |
+
"epoch": 0.5839984968580157,
|
| 22083 |
+
"grad_norm": 36.1875,
|
| 22084 |
+
"learning_rate": 9.908750261324522e-06,
|
| 22085 |
+
"loss": 20.0168,
|
| 22086 |
+
"step": 31470
|
| 22087 |
+
},
|
| 22088 |
+
{
|
| 22089 |
+
"epoch": 0.5841840699424955,
|
| 22090 |
+
"grad_norm": 33.65625,
|
| 22091 |
+
"learning_rate": 9.908721265538481e-06,
|
| 22092 |
+
"loss": 19.8763,
|
| 22093 |
+
"step": 31480
|
| 22094 |
+
},
|
| 22095 |
+
{
|
| 22096 |
+
"epoch": 0.5843696430269754,
|
| 22097 |
+
"grad_norm": 35.71875,
|
| 22098 |
+
"learning_rate": 9.908692269752438e-06,
|
| 22099 |
+
"loss": 19.7346,
|
| 22100 |
+
"step": 31490
|
| 22101 |
+
},
|
| 22102 |
+
{
|
| 22103 |
+
"epoch": 0.5845552161114552,
|
| 22104 |
+
"grad_norm": 34.90625,
|
| 22105 |
+
"learning_rate": 9.908663273966396e-06,
|
| 22106 |
+
"loss": 19.9969,
|
| 22107 |
+
"step": 31500
|
| 22108 |
+
},
|
| 22109 |
+
{
|
| 22110 |
+
"epoch": 0.584740789195935,
|
| 22111 |
+
"grad_norm": 35.8125,
|
| 22112 |
+
"learning_rate": 9.908634278180353e-06,
|
| 22113 |
+
"loss": 19.6326,
|
| 22114 |
+
"step": 31510
|
| 22115 |
+
},
|
| 22116 |
+
{
|
| 22117 |
+
"epoch": 0.5849263622804148,
|
| 22118 |
+
"grad_norm": 35.4375,
|
| 22119 |
+
"learning_rate": 9.90860528239431e-06,
|
| 22120 |
+
"loss": 20.0874,
|
| 22121 |
+
"step": 31520
|
| 22122 |
+
},
|
| 22123 |
+
{
|
| 22124 |
+
"epoch": 0.5851119353648947,
|
| 22125 |
+
"grad_norm": 36.0625,
|
| 22126 |
+
"learning_rate": 9.908576286608268e-06,
|
| 22127 |
+
"loss": 20.0785,
|
| 22128 |
+
"step": 31530
|
| 22129 |
+
},
|
| 22130 |
+
{
|
| 22131 |
+
"epoch": 0.5852975084493744,
|
| 22132 |
+
"grad_norm": 37.03125,
|
| 22133 |
+
"learning_rate": 9.908547290822225e-06,
|
| 22134 |
+
"loss": 20.1829,
|
| 22135 |
+
"step": 31540
|
| 22136 |
+
},
|
| 22137 |
+
{
|
| 22138 |
+
"epoch": 0.5854830815338543,
|
| 22139 |
+
"grad_norm": 36.34375,
|
| 22140 |
+
"learning_rate": 9.908518295036183e-06,
|
| 22141 |
+
"loss": 20.1522,
|
| 22142 |
+
"step": 31550
|
| 22143 |
+
},
|
| 22144 |
+
{
|
| 22145 |
+
"epoch": 0.5856686546183342,
|
| 22146 |
+
"grad_norm": 34.0625,
|
| 22147 |
+
"learning_rate": 9.90848929925014e-06,
|
| 22148 |
+
"loss": 20.0708,
|
| 22149 |
+
"step": 31560
|
| 22150 |
+
},
|
| 22151 |
+
{
|
| 22152 |
+
"epoch": 0.585854227702814,
|
| 22153 |
+
"grad_norm": 35.84375,
|
| 22154 |
+
"learning_rate": 9.908460303464098e-06,
|
| 22155 |
+
"loss": 19.979,
|
| 22156 |
+
"step": 31570
|
| 22157 |
+
},
|
| 22158 |
+
{
|
| 22159 |
+
"epoch": 0.5860398007872938,
|
| 22160 |
+
"grad_norm": 34.3125,
|
| 22161 |
+
"learning_rate": 9.908431307678055e-06,
|
| 22162 |
+
"loss": 20.0508,
|
| 22163 |
+
"step": 31580
|
| 22164 |
+
},
|
| 22165 |
+
{
|
| 22166 |
+
"epoch": 0.5862253738717736,
|
| 22167 |
+
"grad_norm": 35.3125,
|
| 22168 |
+
"learning_rate": 9.908402311892014e-06,
|
| 22169 |
+
"loss": 20.0383,
|
| 22170 |
+
"step": 31590
|
| 22171 |
+
},
|
| 22172 |
+
{
|
| 22173 |
+
"epoch": 0.5864109469562535,
|
| 22174 |
+
"grad_norm": 36.5,
|
| 22175 |
+
"learning_rate": 9.908373316105971e-06,
|
| 22176 |
+
"loss": 19.7061,
|
| 22177 |
+
"step": 31600
|
| 22178 |
+
},
|
| 22179 |
+
{
|
| 22180 |
+
"epoch": 0.5865965200407333,
|
| 22181 |
+
"grad_norm": 34.21875,
|
| 22182 |
+
"learning_rate": 9.908344320319929e-06,
|
| 22183 |
+
"loss": 20.0088,
|
| 22184 |
+
"step": 31610
|
| 22185 |
+
},
|
| 22186 |
+
{
|
| 22187 |
+
"epoch": 0.5867820931252131,
|
| 22188 |
+
"grad_norm": 36.875,
|
| 22189 |
+
"learning_rate": 9.908315324533886e-06,
|
| 22190 |
+
"loss": 20.0155,
|
| 22191 |
+
"step": 31620
|
| 22192 |
+
},
|
| 22193 |
+
{
|
| 22194 |
+
"epoch": 0.586967666209693,
|
| 22195 |
+
"grad_norm": 34.28125,
|
| 22196 |
+
"learning_rate": 9.908286328747844e-06,
|
| 22197 |
+
"loss": 19.8223,
|
| 22198 |
+
"step": 31630
|
| 22199 |
+
},
|
| 22200 |
+
{
|
| 22201 |
+
"epoch": 0.5871532392941727,
|
| 22202 |
+
"grad_norm": 36.21875,
|
| 22203 |
+
"learning_rate": 9.908257332961801e-06,
|
| 22204 |
+
"loss": 19.9218,
|
| 22205 |
+
"step": 31640
|
| 22206 |
+
},
|
| 22207 |
+
{
|
| 22208 |
+
"epoch": 0.5873388123786526,
|
| 22209 |
+
"grad_norm": 35.125,
|
| 22210 |
+
"learning_rate": 9.908228337175759e-06,
|
| 22211 |
+
"loss": 20.3176,
|
| 22212 |
+
"step": 31650
|
| 22213 |
+
},
|
| 22214 |
+
{
|
| 22215 |
+
"epoch": 0.5875243854631325,
|
| 22216 |
+
"grad_norm": 34.5,
|
| 22217 |
+
"learning_rate": 9.908199341389718e-06,
|
| 22218 |
+
"loss": 19.8512,
|
| 22219 |
+
"step": 31660
|
| 22220 |
+
},
|
| 22221 |
+
{
|
| 22222 |
+
"epoch": 0.5877099585476122,
|
| 22223 |
+
"grad_norm": 33.65625,
|
| 22224 |
+
"learning_rate": 9.908170345603673e-06,
|
| 22225 |
+
"loss": 19.7684,
|
| 22226 |
+
"step": 31670
|
| 22227 |
+
},
|
| 22228 |
+
{
|
| 22229 |
+
"epoch": 0.5878955316320921,
|
| 22230 |
+
"grad_norm": 35.0625,
|
| 22231 |
+
"learning_rate": 9.90814134981763e-06,
|
| 22232 |
+
"loss": 19.7739,
|
| 22233 |
+
"step": 31680
|
| 22234 |
+
},
|
| 22235 |
+
{
|
| 22236 |
+
"epoch": 0.5880811047165719,
|
| 22237 |
+
"grad_norm": 33.90625,
|
| 22238 |
+
"learning_rate": 9.90811235403159e-06,
|
| 22239 |
+
"loss": 19.9795,
|
| 22240 |
+
"step": 31690
|
| 22241 |
+
},
|
| 22242 |
+
{
|
| 22243 |
+
"epoch": 0.5882666778010517,
|
| 22244 |
+
"grad_norm": 33.34375,
|
| 22245 |
+
"learning_rate": 9.908083358245547e-06,
|
| 22246 |
+
"loss": 19.7892,
|
| 22247 |
+
"step": 31700
|
| 22248 |
+
},
|
| 22249 |
+
{
|
| 22250 |
+
"epoch": 0.5884522508855315,
|
| 22251 |
+
"grad_norm": 33.3125,
|
| 22252 |
+
"learning_rate": 9.908054362459505e-06,
|
| 22253 |
+
"loss": 20.0117,
|
| 22254 |
+
"step": 31710
|
| 22255 |
+
},
|
| 22256 |
+
{
|
| 22257 |
+
"epoch": 0.5886378239700114,
|
| 22258 |
+
"grad_norm": 34.65625,
|
| 22259 |
+
"learning_rate": 9.908025366673462e-06,
|
| 22260 |
+
"loss": 20.1514,
|
| 22261 |
+
"step": 31720
|
| 22262 |
+
},
|
| 22263 |
+
{
|
| 22264 |
+
"epoch": 0.5888233970544913,
|
| 22265 |
+
"grad_norm": 34.15625,
|
| 22266 |
+
"learning_rate": 9.90799637088742e-06,
|
| 22267 |
+
"loss": 20.2976,
|
| 22268 |
+
"step": 31730
|
| 22269 |
+
},
|
| 22270 |
+
{
|
| 22271 |
+
"epoch": 0.589008970138971,
|
| 22272 |
+
"grad_norm": 34.9375,
|
| 22273 |
+
"learning_rate": 9.907967375101377e-06,
|
| 22274 |
+
"loss": 20.0669,
|
| 22275 |
+
"step": 31740
|
| 22276 |
+
},
|
| 22277 |
+
{
|
| 22278 |
+
"epoch": 0.5891945432234509,
|
| 22279 |
+
"grad_norm": 33.125,
|
| 22280 |
+
"learning_rate": 9.907938379315334e-06,
|
| 22281 |
+
"loss": 19.8704,
|
| 22282 |
+
"step": 31750
|
| 22283 |
+
},
|
| 22284 |
+
{
|
| 22285 |
+
"epoch": 0.5893801163079307,
|
| 22286 |
+
"grad_norm": 34.34375,
|
| 22287 |
+
"learning_rate": 9.907909383529293e-06,
|
| 22288 |
+
"loss": 19.7355,
|
| 22289 |
+
"step": 31760
|
| 22290 |
+
},
|
| 22291 |
+
{
|
| 22292 |
+
"epoch": 0.5895656893924105,
|
| 22293 |
+
"grad_norm": 34.8125,
|
| 22294 |
+
"learning_rate": 9.90788038774325e-06,
|
| 22295 |
+
"loss": 19.8751,
|
| 22296 |
+
"step": 31770
|
| 22297 |
+
},
|
| 22298 |
+
{
|
| 22299 |
+
"epoch": 0.5897512624768904,
|
| 22300 |
+
"grad_norm": 33.5625,
|
| 22301 |
+
"learning_rate": 9.907851391957207e-06,
|
| 22302 |
+
"loss": 19.3739,
|
| 22303 |
+
"step": 31780
|
| 22304 |
+
},
|
| 22305 |
+
{
|
| 22306 |
+
"epoch": 0.5899368355613702,
|
| 22307 |
+
"grad_norm": 33.9375,
|
| 22308 |
+
"learning_rate": 9.907822396171166e-06,
|
| 22309 |
+
"loss": 20.0175,
|
| 22310 |
+
"step": 31790
|
| 22311 |
+
},
|
| 22312 |
+
{
|
| 22313 |
+
"epoch": 0.59012240864585,
|
| 22314 |
+
"grad_norm": 35.9375,
|
| 22315 |
+
"learning_rate": 9.907793400385123e-06,
|
| 22316 |
+
"loss": 19.7817,
|
| 22317 |
+
"step": 31800
|
| 22318 |
+
},
|
| 22319 |
+
{
|
| 22320 |
+
"epoch": 0.5903079817303298,
|
| 22321 |
+
"grad_norm": 35.15625,
|
| 22322 |
+
"learning_rate": 9.90776440459908e-06,
|
| 22323 |
+
"loss": 19.909,
|
| 22324 |
+
"step": 31810
|
| 22325 |
+
},
|
| 22326 |
+
{
|
| 22327 |
+
"epoch": 0.5904935548148097,
|
| 22328 |
+
"grad_norm": 34.8125,
|
| 22329 |
+
"learning_rate": 9.907735408813038e-06,
|
| 22330 |
+
"loss": 20.1357,
|
| 22331 |
+
"step": 31820
|
| 22332 |
+
},
|
| 22333 |
+
{
|
| 22334 |
+
"epoch": 0.5906791278992894,
|
| 22335 |
+
"grad_norm": 34.25,
|
| 22336 |
+
"learning_rate": 9.907706413026995e-06,
|
| 22337 |
+
"loss": 19.3806,
|
| 22338 |
+
"step": 31830
|
| 22339 |
+
},
|
| 22340 |
+
{
|
| 22341 |
+
"epoch": 0.5908647009837693,
|
| 22342 |
+
"grad_norm": 34.84375,
|
| 22343 |
+
"learning_rate": 9.907677417240953e-06,
|
| 22344 |
+
"loss": 19.9921,
|
| 22345 |
+
"step": 31840
|
| 22346 |
+
},
|
| 22347 |
+
{
|
| 22348 |
+
"epoch": 0.5910502740682492,
|
| 22349 |
+
"grad_norm": 33.21875,
|
| 22350 |
+
"learning_rate": 9.90764842145491e-06,
|
| 22351 |
+
"loss": 19.6932,
|
| 22352 |
+
"step": 31850
|
| 22353 |
+
},
|
| 22354 |
+
{
|
| 22355 |
+
"epoch": 0.5912358471527289,
|
| 22356 |
+
"grad_norm": 34.875,
|
| 22357 |
+
"learning_rate": 9.90761942566887e-06,
|
| 22358 |
+
"loss": 19.9271,
|
| 22359 |
+
"step": 31860
|
| 22360 |
+
},
|
| 22361 |
+
{
|
| 22362 |
+
"epoch": 0.5914214202372088,
|
| 22363 |
+
"grad_norm": 35.1875,
|
| 22364 |
+
"learning_rate": 9.907590429882827e-06,
|
| 22365 |
+
"loss": 20.209,
|
| 22366 |
+
"step": 31870
|
| 22367 |
+
},
|
| 22368 |
+
{
|
| 22369 |
+
"epoch": 0.5916069933216886,
|
| 22370 |
+
"grad_norm": 34.625,
|
| 22371 |
+
"learning_rate": 9.907561434096784e-06,
|
| 22372 |
+
"loss": 19.9407,
|
| 22373 |
+
"step": 31880
|
| 22374 |
+
},
|
| 22375 |
+
{
|
| 22376 |
+
"epoch": 0.5917925664061684,
|
| 22377 |
+
"grad_norm": 35.0625,
|
| 22378 |
+
"learning_rate": 9.907532438310741e-06,
|
| 22379 |
+
"loss": 20.1724,
|
| 22380 |
+
"step": 31890
|
| 22381 |
+
},
|
| 22382 |
+
{
|
| 22383 |
+
"epoch": 0.5919781394906483,
|
| 22384 |
+
"grad_norm": 33.96875,
|
| 22385 |
+
"learning_rate": 9.907503442524699e-06,
|
| 22386 |
+
"loss": 19.872,
|
| 22387 |
+
"step": 31900
|
| 22388 |
+
},
|
| 22389 |
+
{
|
| 22390 |
+
"epoch": 0.5921637125751281,
|
| 22391 |
+
"grad_norm": 34.28125,
|
| 22392 |
+
"learning_rate": 9.907474446738656e-06,
|
| 22393 |
+
"loss": 20.0196,
|
| 22394 |
+
"step": 31910
|
| 22395 |
+
},
|
| 22396 |
+
{
|
| 22397 |
+
"epoch": 0.592349285659608,
|
| 22398 |
+
"grad_norm": 33.34375,
|
| 22399 |
+
"learning_rate": 9.907445450952614e-06,
|
| 22400 |
+
"loss": 20.2614,
|
| 22401 |
+
"step": 31920
|
| 22402 |
+
},
|
| 22403 |
+
{
|
| 22404 |
+
"epoch": 0.5925348587440877,
|
| 22405 |
+
"grad_norm": 35.1875,
|
| 22406 |
+
"learning_rate": 9.907416455166573e-06,
|
| 22407 |
+
"loss": 19.9631,
|
| 22408 |
+
"step": 31930
|
| 22409 |
+
},
|
| 22410 |
+
{
|
| 22411 |
+
"epoch": 0.5927204318285676,
|
| 22412 |
+
"grad_norm": 33.15625,
|
| 22413 |
+
"learning_rate": 9.907387459380528e-06,
|
| 22414 |
+
"loss": 19.8016,
|
| 22415 |
+
"step": 31940
|
| 22416 |
+
},
|
| 22417 |
+
{
|
| 22418 |
+
"epoch": 0.5929060049130475,
|
| 22419 |
+
"grad_norm": 35.09375,
|
| 22420 |
+
"learning_rate": 9.907358463594486e-06,
|
| 22421 |
+
"loss": 19.6743,
|
| 22422 |
+
"step": 31950
|
| 22423 |
+
},
|
| 22424 |
+
{
|
| 22425 |
+
"epoch": 0.5930915779975272,
|
| 22426 |
+
"grad_norm": 35.09375,
|
| 22427 |
+
"learning_rate": 9.907329467808445e-06,
|
| 22428 |
+
"loss": 20.0119,
|
| 22429 |
+
"step": 31960
|
| 22430 |
+
},
|
| 22431 |
+
{
|
| 22432 |
+
"epoch": 0.5932771510820071,
|
| 22433 |
+
"grad_norm": 36.375,
|
| 22434 |
+
"learning_rate": 9.907300472022402e-06,
|
| 22435 |
+
"loss": 20.3043,
|
| 22436 |
+
"step": 31970
|
| 22437 |
+
},
|
| 22438 |
+
{
|
| 22439 |
+
"epoch": 0.5934627241664869,
|
| 22440 |
+
"grad_norm": 34.84375,
|
| 22441 |
+
"learning_rate": 9.90727147623636e-06,
|
| 22442 |
+
"loss": 20.0462,
|
| 22443 |
+
"step": 31980
|
| 22444 |
+
},
|
| 22445 |
+
{
|
| 22446 |
+
"epoch": 0.5936482972509667,
|
| 22447 |
+
"grad_norm": 35.75,
|
| 22448 |
+
"learning_rate": 9.907242480450317e-06,
|
| 22449 |
+
"loss": 19.8187,
|
| 22450 |
+
"step": 31990
|
| 22451 |
+
},
|
| 22452 |
+
{
|
| 22453 |
+
"epoch": 0.5938338703354465,
|
| 22454 |
+
"grad_norm": 34.875,
|
| 22455 |
+
"learning_rate": 9.907213484664275e-06,
|
| 22456 |
+
"loss": 20.3801,
|
| 22457 |
+
"step": 32000
|
| 22458 |
+
},
|
| 22459 |
+
{
|
| 22460 |
+
"epoch": 0.5940194434199264,
|
| 22461 |
+
"grad_norm": 35.15625,
|
| 22462 |
+
"learning_rate": 9.907184488878232e-06,
|
| 22463 |
+
"loss": 20.3156,
|
| 22464 |
+
"step": 32010
|
| 22465 |
+
},
|
| 22466 |
+
{
|
| 22467 |
+
"epoch": 0.5942050165044062,
|
| 22468 |
+
"grad_norm": 36.3125,
|
| 22469 |
+
"learning_rate": 9.90715549309219e-06,
|
| 22470 |
+
"loss": 20.2171,
|
| 22471 |
+
"step": 32020
|
| 22472 |
+
},
|
| 22473 |
+
{
|
| 22474 |
+
"epoch": 0.594390589588886,
|
| 22475 |
+
"grad_norm": 35.84375,
|
| 22476 |
+
"learning_rate": 9.907126497306147e-06,
|
| 22477 |
+
"loss": 20.237,
|
| 22478 |
+
"step": 32030
|
| 22479 |
+
},
|
| 22480 |
+
{
|
| 22481 |
+
"epoch": 0.5945761626733659,
|
| 22482 |
+
"grad_norm": 35.5625,
|
| 22483 |
+
"learning_rate": 9.907097501520106e-06,
|
| 22484 |
+
"loss": 20.6729,
|
| 22485 |
+
"step": 32040
|
| 22486 |
+
},
|
| 22487 |
+
{
|
| 22488 |
+
"epoch": 0.5947617357578456,
|
| 22489 |
+
"grad_norm": 35.375,
|
| 22490 |
+
"learning_rate": 9.907068505734062e-06,
|
| 22491 |
+
"loss": 20.135,
|
| 22492 |
+
"step": 32050
|
| 22493 |
+
},
|
| 22494 |
+
{
|
| 22495 |
+
"epoch": 0.5949473088423255,
|
| 22496 |
+
"grad_norm": 36.75,
|
| 22497 |
+
"learning_rate": 9.90703950994802e-06,
|
| 22498 |
+
"loss": 20.1497,
|
| 22499 |
+
"step": 32060
|
| 22500 |
+
},
|
| 22501 |
+
{
|
| 22502 |
+
"epoch": 0.5951328819268054,
|
| 22503 |
+
"grad_norm": 34.8125,
|
| 22504 |
+
"learning_rate": 9.907010514161978e-06,
|
| 22505 |
+
"loss": 19.918,
|
| 22506 |
+
"step": 32070
|
| 22507 |
+
},
|
| 22508 |
+
{
|
| 22509 |
+
"epoch": 0.5953184550112851,
|
| 22510 |
+
"grad_norm": 33.53125,
|
| 22511 |
+
"learning_rate": 9.906981518375936e-06,
|
| 22512 |
+
"loss": 20.1092,
|
| 22513 |
+
"step": 32080
|
| 22514 |
+
},
|
| 22515 |
+
{
|
| 22516 |
+
"epoch": 0.595504028095765,
|
| 22517 |
+
"grad_norm": 34.8125,
|
| 22518 |
+
"learning_rate": 9.906952522589893e-06,
|
| 22519 |
+
"loss": 20.1522,
|
| 22520 |
+
"step": 32090
|
| 22521 |
+
},
|
| 22522 |
+
{
|
| 22523 |
+
"epoch": 0.5956896011802448,
|
| 22524 |
+
"grad_norm": 35.5,
|
| 22525 |
+
"learning_rate": 9.90692352680385e-06,
|
| 22526 |
+
"loss": 20.0647,
|
| 22527 |
+
"step": 32100
|
| 22528 |
+
},
|
| 22529 |
+
{
|
| 22530 |
+
"epoch": 0.5958751742647247,
|
| 22531 |
+
"grad_norm": 35.375,
|
| 22532 |
+
"learning_rate": 9.906894531017808e-06,
|
| 22533 |
+
"loss": 20.1672,
|
| 22534 |
+
"step": 32110
|
| 22535 |
+
},
|
| 22536 |
+
{
|
| 22537 |
+
"epoch": 0.5960607473492044,
|
| 22538 |
+
"grad_norm": 32.96875,
|
| 22539 |
+
"learning_rate": 9.906865535231765e-06,
|
| 22540 |
+
"loss": 19.8313,
|
| 22541 |
+
"step": 32120
|
| 22542 |
+
},
|
| 22543 |
+
{
|
| 22544 |
+
"epoch": 0.5962463204336843,
|
| 22545 |
+
"grad_norm": 35.34375,
|
| 22546 |
+
"learning_rate": 9.906836539445723e-06,
|
| 22547 |
+
"loss": 20.6778,
|
| 22548 |
+
"step": 32130
|
| 22549 |
+
},
|
| 22550 |
+
{
|
| 22551 |
+
"epoch": 0.5964318935181642,
|
| 22552 |
+
"grad_norm": 33.6875,
|
| 22553 |
+
"learning_rate": 9.906807543659682e-06,
|
| 22554 |
+
"loss": 20.0226,
|
| 22555 |
+
"step": 32140
|
| 22556 |
+
},
|
| 22557 |
+
{
|
| 22558 |
+
"epoch": 0.5966174666026439,
|
| 22559 |
+
"grad_norm": 36.03125,
|
| 22560 |
+
"learning_rate": 9.906778547873637e-06,
|
| 22561 |
+
"loss": 19.692,
|
| 22562 |
+
"step": 32150
|
| 22563 |
+
},
|
| 22564 |
+
{
|
| 22565 |
+
"epoch": 0.5968030396871238,
|
| 22566 |
+
"grad_norm": 34.375,
|
| 22567 |
+
"learning_rate": 9.906749552087595e-06,
|
| 22568 |
+
"loss": 20.0812,
|
| 22569 |
+
"step": 32160
|
| 22570 |
+
},
|
| 22571 |
+
{
|
| 22572 |
+
"epoch": 0.5969886127716036,
|
| 22573 |
+
"grad_norm": 34.5625,
|
| 22574 |
+
"learning_rate": 9.906720556301554e-06,
|
| 22575 |
+
"loss": 20.4617,
|
| 22576 |
+
"step": 32170
|
| 22577 |
+
},
|
| 22578 |
+
{
|
| 22579 |
+
"epoch": 0.5971741858560834,
|
| 22580 |
+
"grad_norm": 34.625,
|
| 22581 |
+
"learning_rate": 9.906691560515511e-06,
|
| 22582 |
+
"loss": 20.2078,
|
| 22583 |
+
"step": 32180
|
| 22584 |
+
},
|
| 22585 |
+
{
|
| 22586 |
+
"epoch": 0.5973597589405633,
|
| 22587 |
+
"grad_norm": 35.40625,
|
| 22588 |
+
"learning_rate": 9.906662564729469e-06,
|
| 22589 |
+
"loss": 19.5445,
|
| 22590 |
+
"step": 32190
|
| 22591 |
+
},
|
| 22592 |
+
{
|
| 22593 |
+
"epoch": 0.5975453320250431,
|
| 22594 |
+
"grad_norm": 35.3125,
|
| 22595 |
+
"learning_rate": 9.906633568943426e-06,
|
| 22596 |
+
"loss": 20.0572,
|
| 22597 |
+
"step": 32200
|
| 22598 |
+
},
|
| 22599 |
+
{
|
| 22600 |
+
"epoch": 0.5977309051095229,
|
| 22601 |
+
"grad_norm": 35.46875,
|
| 22602 |
+
"learning_rate": 9.906604573157383e-06,
|
| 22603 |
+
"loss": 20.1442,
|
| 22604 |
+
"step": 32210
|
| 22605 |
+
},
|
| 22606 |
+
{
|
| 22607 |
+
"epoch": 0.5979164781940027,
|
| 22608 |
+
"grad_norm": 35.75,
|
| 22609 |
+
"learning_rate": 9.906575577371341e-06,
|
| 22610 |
+
"loss": 20.0047,
|
| 22611 |
+
"step": 32220
|
| 22612 |
+
},
|
| 22613 |
+
{
|
| 22614 |
+
"epoch": 0.5981020512784826,
|
| 22615 |
+
"grad_norm": 36.40625,
|
| 22616 |
+
"learning_rate": 9.906546581585298e-06,
|
| 22617 |
+
"loss": 20.1083,
|
| 22618 |
+
"step": 32230
|
| 22619 |
+
},
|
| 22620 |
+
{
|
| 22621 |
+
"epoch": 0.5982876243629623,
|
| 22622 |
+
"grad_norm": 34.9375,
|
| 22623 |
+
"learning_rate": 9.906517585799257e-06,
|
| 22624 |
+
"loss": 20.0461,
|
| 22625 |
+
"step": 32240
|
| 22626 |
+
},
|
| 22627 |
+
{
|
| 22628 |
+
"epoch": 0.5984731974474422,
|
| 22629 |
+
"grad_norm": 36.0,
|
| 22630 |
+
"learning_rate": 9.906488590013215e-06,
|
| 22631 |
+
"loss": 20.0892,
|
| 22632 |
+
"step": 32250
|
| 22633 |
+
},
|
| 22634 |
+
{
|
| 22635 |
+
"epoch": 0.5986587705319221,
|
| 22636 |
+
"grad_norm": 34.875,
|
| 22637 |
+
"learning_rate": 9.90645959422717e-06,
|
| 22638 |
+
"loss": 19.8061,
|
| 22639 |
+
"step": 32260
|
| 22640 |
+
},
|
| 22641 |
+
{
|
| 22642 |
+
"epoch": 0.5988443436164019,
|
| 22643 |
+
"grad_norm": 36.0,
|
| 22644 |
+
"learning_rate": 9.90643059844113e-06,
|
| 22645 |
+
"loss": 20.1581,
|
| 22646 |
+
"step": 32270
|
| 22647 |
+
},
|
| 22648 |
+
{
|
| 22649 |
+
"epoch": 0.5990299167008817,
|
| 22650 |
+
"grad_norm": 34.90625,
|
| 22651 |
+
"learning_rate": 9.906401602655087e-06,
|
| 22652 |
+
"loss": 19.8351,
|
| 22653 |
+
"step": 32280
|
| 22654 |
+
},
|
| 22655 |
+
{
|
| 22656 |
+
"epoch": 0.5992154897853615,
|
| 22657 |
+
"grad_norm": 37.03125,
|
| 22658 |
+
"learning_rate": 9.906372606869044e-06,
|
| 22659 |
+
"loss": 20.1744,
|
| 22660 |
+
"step": 32290
|
| 22661 |
+
},
|
| 22662 |
+
{
|
| 22663 |
+
"epoch": 0.5994010628698414,
|
| 22664 |
+
"grad_norm": 34.75,
|
| 22665 |
+
"learning_rate": 9.906343611083002e-06,
|
| 22666 |
+
"loss": 19.8549,
|
| 22667 |
+
"step": 32300
|
| 22668 |
+
},
|
| 22669 |
+
{
|
| 22670 |
+
"epoch": 0.5995866359543212,
|
| 22671 |
+
"grad_norm": 35.53125,
|
| 22672 |
+
"learning_rate": 9.906314615296961e-06,
|
| 22673 |
+
"loss": 19.901,
|
| 22674 |
+
"step": 32310
|
| 22675 |
+
},
|
| 22676 |
+
{
|
| 22677 |
+
"epoch": 0.599772209038801,
|
| 22678 |
+
"grad_norm": 35.59375,
|
| 22679 |
+
"learning_rate": 9.906285619510917e-06,
|
| 22680 |
+
"loss": 20.4425,
|
| 22681 |
+
"step": 32320
|
| 22682 |
+
},
|
| 22683 |
+
{
|
| 22684 |
+
"epoch": 0.5999577821232809,
|
| 22685 |
+
"grad_norm": 37.6875,
|
| 22686 |
+
"learning_rate": 9.906256623724874e-06,
|
| 22687 |
+
"loss": 20.1879,
|
| 22688 |
+
"step": 32330
|
| 22689 |
+
},
|
| 22690 |
+
{
|
| 22691 |
+
"epoch": 0.6001433552077606,
|
| 22692 |
+
"grad_norm": 35.25,
|
| 22693 |
+
"learning_rate": 9.906227627938833e-06,
|
| 22694 |
+
"loss": 19.9083,
|
| 22695 |
+
"step": 32340
|
| 22696 |
+
},
|
| 22697 |
+
{
|
| 22698 |
+
"epoch": 0.6003289282922405,
|
| 22699 |
+
"grad_norm": 35.5625,
|
| 22700 |
+
"learning_rate": 9.90619863215279e-06,
|
| 22701 |
+
"loss": 19.769,
|
| 22702 |
+
"step": 32350
|
| 22703 |
+
},
|
| 22704 |
+
{
|
| 22705 |
+
"epoch": 0.6005145013767204,
|
| 22706 |
+
"grad_norm": 36.09375,
|
| 22707 |
+
"learning_rate": 9.906169636366748e-06,
|
| 22708 |
+
"loss": 19.8395,
|
| 22709 |
+
"step": 32360
|
| 22710 |
+
},
|
| 22711 |
+
{
|
| 22712 |
+
"epoch": 0.6007000744612001,
|
| 22713 |
+
"grad_norm": 36.34375,
|
| 22714 |
+
"learning_rate": 9.906140640580705e-06,
|
| 22715 |
+
"loss": 20.1598,
|
| 22716 |
+
"step": 32370
|
| 22717 |
+
},
|
| 22718 |
+
{
|
| 22719 |
+
"epoch": 0.60088564754568,
|
| 22720 |
+
"grad_norm": 36.8125,
|
| 22721 |
+
"learning_rate": 9.906111644794663e-06,
|
| 22722 |
+
"loss": 19.9506,
|
| 22723 |
+
"step": 32380
|
| 22724 |
+
},
|
| 22725 |
+
{
|
| 22726 |
+
"epoch": 0.6010712206301598,
|
| 22727 |
+
"grad_norm": 35.1875,
|
| 22728 |
+
"learning_rate": 9.90608264900862e-06,
|
| 22729 |
+
"loss": 20.0782,
|
| 22730 |
+
"step": 32390
|
| 22731 |
+
},
|
| 22732 |
+
{
|
| 22733 |
+
"epoch": 0.6012567937146396,
|
| 22734 |
+
"grad_norm": 35.09375,
|
| 22735 |
+
"learning_rate": 9.906053653222578e-06,
|
| 22736 |
+
"loss": 19.96,
|
| 22737 |
+
"step": 32400
|
| 22738 |
+
},
|
| 22739 |
+
{
|
| 22740 |
+
"epoch": 0.6014423667991194,
|
| 22741 |
+
"grad_norm": 35.03125,
|
| 22742 |
+
"learning_rate": 9.906024657436537e-06,
|
| 22743 |
+
"loss": 20.2103,
|
| 22744 |
+
"step": 32410
|
| 22745 |
+
},
|
| 22746 |
+
{
|
| 22747 |
+
"epoch": 0.6016279398835993,
|
| 22748 |
+
"grad_norm": 33.46875,
|
| 22749 |
+
"learning_rate": 9.905995661650492e-06,
|
| 22750 |
+
"loss": 20.0204,
|
| 22751 |
+
"step": 32420
|
| 22752 |
+
},
|
| 22753 |
+
{
|
| 22754 |
+
"epoch": 0.6018135129680791,
|
| 22755 |
+
"grad_norm": 35.6875,
|
| 22756 |
+
"learning_rate": 9.90596666586445e-06,
|
| 22757 |
+
"loss": 19.9806,
|
| 22758 |
+
"step": 32430
|
| 22759 |
+
},
|
| 22760 |
+
{
|
| 22761 |
+
"epoch": 0.6019990860525589,
|
| 22762 |
+
"grad_norm": 35.25,
|
| 22763 |
+
"learning_rate": 9.905937670078409e-06,
|
| 22764 |
+
"loss": 19.869,
|
| 22765 |
+
"step": 32440
|
| 22766 |
+
},
|
| 22767 |
+
{
|
| 22768 |
+
"epoch": 0.6021846591370388,
|
| 22769 |
+
"grad_norm": 36.84375,
|
| 22770 |
+
"learning_rate": 9.905908674292366e-06,
|
| 22771 |
+
"loss": 20.2251,
|
| 22772 |
+
"step": 32450
|
| 22773 |
+
},
|
| 22774 |
+
{
|
| 22775 |
+
"epoch": 0.6023702322215186,
|
| 22776 |
+
"grad_norm": 34.25,
|
| 22777 |
+
"learning_rate": 9.905879678506324e-06,
|
| 22778 |
+
"loss": 20.3163,
|
| 22779 |
+
"step": 32460
|
| 22780 |
+
},
|
| 22781 |
+
{
|
| 22782 |
+
"epoch": 0.6025558053059984,
|
| 22783 |
+
"grad_norm": 34.25,
|
| 22784 |
+
"learning_rate": 9.905850682720281e-06,
|
| 22785 |
+
"loss": 19.7672,
|
| 22786 |
+
"step": 32470
|
| 22787 |
+
},
|
| 22788 |
+
{
|
| 22789 |
+
"epoch": 0.6027413783904783,
|
| 22790 |
+
"grad_norm": 34.8125,
|
| 22791 |
+
"learning_rate": 9.905821686934239e-06,
|
| 22792 |
+
"loss": 20.1227,
|
| 22793 |
+
"step": 32480
|
| 22794 |
+
},
|
| 22795 |
+
{
|
| 22796 |
+
"epoch": 0.6029269514749581,
|
| 22797 |
+
"grad_norm": 35.90625,
|
| 22798 |
+
"learning_rate": 9.905792691148196e-06,
|
| 22799 |
+
"loss": 20.2727,
|
| 22800 |
+
"step": 32490
|
| 22801 |
+
},
|
| 22802 |
+
{
|
| 22803 |
+
"epoch": 0.6031125245594379,
|
| 22804 |
+
"grad_norm": 35.5625,
|
| 22805 |
+
"learning_rate": 9.905763695362153e-06,
|
| 22806 |
+
"loss": 19.9203,
|
| 22807 |
+
"step": 32500
|
| 22808 |
+
},
|
| 22809 |
+
{
|
| 22810 |
+
"epoch": 0.6032980976439177,
|
| 22811 |
+
"grad_norm": 32.9375,
|
| 22812 |
+
"learning_rate": 9.905734699576112e-06,
|
| 22813 |
+
"loss": 19.4214,
|
| 22814 |
+
"step": 32510
|
| 22815 |
+
},
|
| 22816 |
+
{
|
| 22817 |
+
"epoch": 0.6034836707283976,
|
| 22818 |
+
"grad_norm": 35.5625,
|
| 22819 |
+
"learning_rate": 9.90570570379007e-06,
|
| 22820 |
+
"loss": 19.8526,
|
| 22821 |
+
"step": 32520
|
| 22822 |
+
},
|
| 22823 |
+
{
|
| 22824 |
+
"epoch": 0.6036692438128773,
|
| 22825 |
+
"grad_norm": 35.28125,
|
| 22826 |
+
"learning_rate": 9.905676708004026e-06,
|
| 22827 |
+
"loss": 19.5245,
|
| 22828 |
+
"step": 32530
|
| 22829 |
+
},
|
| 22830 |
+
{
|
| 22831 |
+
"epoch": 0.6038548168973572,
|
| 22832 |
+
"grad_norm": 38.78125,
|
| 22833 |
+
"learning_rate": 9.905647712217985e-06,
|
| 22834 |
+
"loss": 20.1409,
|
| 22835 |
+
"step": 32540
|
| 22836 |
+
},
|
| 22837 |
+
{
|
| 22838 |
+
"epoch": 0.6040403899818371,
|
| 22839 |
+
"grad_norm": 34.6875,
|
| 22840 |
+
"learning_rate": 9.905618716431942e-06,
|
| 22841 |
+
"loss": 19.7652,
|
| 22842 |
+
"step": 32550
|
| 22843 |
+
},
|
| 22844 |
+
{
|
| 22845 |
+
"epoch": 0.6042259630663168,
|
| 22846 |
+
"grad_norm": 34.46875,
|
| 22847 |
+
"learning_rate": 9.9055897206459e-06,
|
| 22848 |
+
"loss": 20.3414,
|
| 22849 |
+
"step": 32560
|
| 22850 |
+
},
|
| 22851 |
+
{
|
| 22852 |
+
"epoch": 0.6044115361507967,
|
| 22853 |
+
"grad_norm": 36.09375,
|
| 22854 |
+
"learning_rate": 9.905560724859857e-06,
|
| 22855 |
+
"loss": 20.0828,
|
| 22856 |
+
"step": 32570
|
| 22857 |
+
},
|
| 22858 |
+
{
|
| 22859 |
+
"epoch": 0.6045971092352765,
|
| 22860 |
+
"grad_norm": 35.0,
|
| 22861 |
+
"learning_rate": 9.905531729073814e-06,
|
| 22862 |
+
"loss": 20.2113,
|
| 22863 |
+
"step": 32580
|
| 22864 |
+
},
|
| 22865 |
+
{
|
| 22866 |
+
"epoch": 0.6047826823197563,
|
| 22867 |
+
"grad_norm": 35.03125,
|
| 22868 |
+
"learning_rate": 9.905502733287772e-06,
|
| 22869 |
+
"loss": 19.9929,
|
| 22870 |
+
"step": 32590
|
| 22871 |
+
},
|
| 22872 |
+
{
|
| 22873 |
+
"epoch": 0.6049682554042362,
|
| 22874 |
+
"grad_norm": 33.90625,
|
| 22875 |
+
"learning_rate": 9.90547373750173e-06,
|
| 22876 |
+
"loss": 19.9798,
|
| 22877 |
+
"step": 32600
|
| 22878 |
+
},
|
| 22879 |
+
{
|
| 22880 |
+
"epoch": 0.605153828488716,
|
| 22881 |
+
"grad_norm": 35.0625,
|
| 22882 |
+
"learning_rate": 9.905444741715687e-06,
|
| 22883 |
+
"loss": 20.134,
|
| 22884 |
+
"step": 32610
|
| 22885 |
+
},
|
| 22886 |
+
{
|
| 22887 |
+
"epoch": 0.6053394015731959,
|
| 22888 |
+
"grad_norm": 35.21875,
|
| 22889 |
+
"learning_rate": 9.905415745929646e-06,
|
| 22890 |
+
"loss": 20.5119,
|
| 22891 |
+
"step": 32620
|
| 22892 |
+
},
|
| 22893 |
+
{
|
| 22894 |
+
"epoch": 0.6055249746576756,
|
| 22895 |
+
"grad_norm": 36.15625,
|
| 22896 |
+
"learning_rate": 9.905386750143603e-06,
|
| 22897 |
+
"loss": 20.1409,
|
| 22898 |
+
"step": 32630
|
| 22899 |
+
},
|
| 22900 |
+
{
|
| 22901 |
+
"epoch": 0.6057105477421555,
|
| 22902 |
+
"grad_norm": 34.75,
|
| 22903 |
+
"learning_rate": 9.90535775435756e-06,
|
| 22904 |
+
"loss": 19.9287,
|
| 22905 |
+
"step": 32640
|
| 22906 |
+
},
|
| 22907 |
+
{
|
| 22908 |
+
"epoch": 0.6058961208266354,
|
| 22909 |
+
"grad_norm": 34.625,
|
| 22910 |
+
"learning_rate": 9.905328758571518e-06,
|
| 22911 |
+
"loss": 20.036,
|
| 22912 |
+
"step": 32650
|
| 22913 |
+
},
|
| 22914 |
+
{
|
| 22915 |
+
"epoch": 0.6060816939111151,
|
| 22916 |
+
"grad_norm": 34.53125,
|
| 22917 |
+
"learning_rate": 9.905299762785475e-06,
|
| 22918 |
+
"loss": 19.8514,
|
| 22919 |
+
"step": 32660
|
| 22920 |
+
},
|
| 22921 |
+
{
|
| 22922 |
+
"epoch": 0.606267266995595,
|
| 22923 |
+
"grad_norm": 34.78125,
|
| 22924 |
+
"learning_rate": 9.905270766999433e-06,
|
| 22925 |
+
"loss": 19.7428,
|
| 22926 |
+
"step": 32670
|
| 22927 |
+
},
|
| 22928 |
+
{
|
| 22929 |
+
"epoch": 0.6064528400800748,
|
| 22930 |
+
"grad_norm": 32.53125,
|
| 22931 |
+
"learning_rate": 9.90524177121339e-06,
|
| 22932 |
+
"loss": 19.5254,
|
| 22933 |
+
"step": 32680
|
| 22934 |
+
},
|
| 22935 |
+
{
|
| 22936 |
+
"epoch": 0.6066384131645546,
|
| 22937 |
+
"grad_norm": 35.9375,
|
| 22938 |
+
"learning_rate": 9.905212775427348e-06,
|
| 22939 |
+
"loss": 19.9658,
|
| 22940 |
+
"step": 32690
|
| 22941 |
+
},
|
| 22942 |
+
{
|
| 22943 |
+
"epoch": 0.6068239862490344,
|
| 22944 |
+
"grad_norm": 36.375,
|
| 22945 |
+
"learning_rate": 9.905183779641305e-06,
|
| 22946 |
+
"loss": 19.4217,
|
| 22947 |
+
"step": 32700
|
| 22948 |
+
},
|
| 22949 |
+
{
|
| 22950 |
+
"epoch": 0.6070095593335143,
|
| 22951 |
+
"grad_norm": 35.3125,
|
| 22952 |
+
"learning_rate": 9.905154783855262e-06,
|
| 22953 |
+
"loss": 19.6262,
|
| 22954 |
+
"step": 32710
|
| 22955 |
+
},
|
| 22956 |
+
{
|
| 22957 |
+
"epoch": 0.607195132417994,
|
| 22958 |
+
"grad_norm": 35.5625,
|
| 22959 |
+
"learning_rate": 9.905125788069221e-06,
|
| 22960 |
+
"loss": 19.8789,
|
| 22961 |
+
"step": 32720
|
| 22962 |
+
},
|
| 22963 |
+
{
|
| 22964 |
+
"epoch": 0.6073807055024739,
|
| 22965 |
+
"grad_norm": 36.3125,
|
| 22966 |
+
"learning_rate": 9.905096792283179e-06,
|
| 22967 |
+
"loss": 19.6746,
|
| 22968 |
+
"step": 32730
|
| 22969 |
+
},
|
| 22970 |
+
{
|
| 22971 |
+
"epoch": 0.6075662785869538,
|
| 22972 |
+
"grad_norm": 37.15625,
|
| 22973 |
+
"learning_rate": 9.905067796497135e-06,
|
| 22974 |
+
"loss": 19.791,
|
| 22975 |
+
"step": 32740
|
| 22976 |
+
},
|
| 22977 |
+
{
|
| 22978 |
+
"epoch": 0.6077518516714335,
|
| 22979 |
+
"grad_norm": 33.1875,
|
| 22980 |
+
"learning_rate": 9.905038800711094e-06,
|
| 22981 |
+
"loss": 19.8378,
|
| 22982 |
+
"step": 32750
|
| 22983 |
+
},
|
| 22984 |
+
{
|
| 22985 |
+
"epoch": 0.6079374247559134,
|
| 22986 |
+
"grad_norm": 36.8125,
|
| 22987 |
+
"learning_rate": 9.905009804925051e-06,
|
| 22988 |
+
"loss": 20.3618,
|
| 22989 |
+
"step": 32760
|
| 22990 |
+
},
|
| 22991 |
+
{
|
| 22992 |
+
"epoch": 0.6081229978403933,
|
| 22993 |
+
"grad_norm": 36.65625,
|
| 22994 |
+
"learning_rate": 9.904980809139008e-06,
|
| 22995 |
+
"loss": 19.9257,
|
| 22996 |
+
"step": 32770
|
| 22997 |
+
},
|
| 22998 |
+
{
|
| 22999 |
+
"epoch": 0.608308570924873,
|
| 23000 |
+
"grad_norm": 35.03125,
|
| 23001 |
+
"learning_rate": 9.904951813352966e-06,
|
| 23002 |
+
"loss": 20.2374,
|
| 23003 |
+
"step": 32780
|
| 23004 |
+
},
|
| 23005 |
+
{
|
| 23006 |
+
"epoch": 0.6084941440093529,
|
| 23007 |
+
"grad_norm": 34.0,
|
| 23008 |
+
"learning_rate": 9.904922817566925e-06,
|
| 23009 |
+
"loss": 19.7495,
|
| 23010 |
+
"step": 32790
|
| 23011 |
+
},
|
| 23012 |
+
{
|
| 23013 |
+
"epoch": 0.6086797170938327,
|
| 23014 |
+
"grad_norm": 34.75,
|
| 23015 |
+
"learning_rate": 9.90489382178088e-06,
|
| 23016 |
+
"loss": 19.7513,
|
| 23017 |
+
"step": 32800
|
| 23018 |
+
},
|
| 23019 |
+
{
|
| 23020 |
+
"epoch": 0.6088652901783126,
|
| 23021 |
+
"grad_norm": 35.5,
|
| 23022 |
+
"learning_rate": 9.904864825994838e-06,
|
| 23023 |
+
"loss": 20.0577,
|
| 23024 |
+
"step": 32810
|
| 23025 |
+
},
|
| 23026 |
+
{
|
| 23027 |
+
"epoch": 0.6090508632627923,
|
| 23028 |
+
"grad_norm": 33.53125,
|
| 23029 |
+
"learning_rate": 9.904835830208797e-06,
|
| 23030 |
+
"loss": 19.5465,
|
| 23031 |
+
"step": 32820
|
| 23032 |
+
},
|
| 23033 |
+
{
|
| 23034 |
+
"epoch": 0.6092364363472722,
|
| 23035 |
+
"grad_norm": 34.78125,
|
| 23036 |
+
"learning_rate": 9.904806834422755e-06,
|
| 23037 |
+
"loss": 20.4513,
|
| 23038 |
+
"step": 32830
|
| 23039 |
+
},
|
| 23040 |
+
{
|
| 23041 |
+
"epoch": 0.6094220094317521,
|
| 23042 |
+
"grad_norm": 35.5,
|
| 23043 |
+
"learning_rate": 9.904777838636712e-06,
|
| 23044 |
+
"loss": 20.2129,
|
| 23045 |
+
"step": 32840
|
| 23046 |
+
},
|
| 23047 |
+
{
|
| 23048 |
+
"epoch": 0.6096075825162318,
|
| 23049 |
+
"grad_norm": 35.03125,
|
| 23050 |
+
"learning_rate": 9.90474884285067e-06,
|
| 23051 |
+
"loss": 19.8204,
|
| 23052 |
+
"step": 32850
|
| 23053 |
+
},
|
| 23054 |
+
{
|
| 23055 |
+
"epoch": 0.6097931556007117,
|
| 23056 |
+
"grad_norm": 34.75,
|
| 23057 |
+
"learning_rate": 9.904719847064627e-06,
|
| 23058 |
+
"loss": 19.8698,
|
| 23059 |
+
"step": 32860
|
| 23060 |
+
},
|
| 23061 |
+
{
|
| 23062 |
+
"epoch": 0.6099787286851915,
|
| 23063 |
+
"grad_norm": 36.0,
|
| 23064 |
+
"learning_rate": 9.904690851278584e-06,
|
| 23065 |
+
"loss": 20.123,
|
| 23066 |
+
"step": 32870
|
| 23067 |
+
},
|
| 23068 |
+
{
|
| 23069 |
+
"epoch": 0.6101643017696713,
|
| 23070 |
+
"grad_norm": 35.375,
|
| 23071 |
+
"learning_rate": 9.904661855492542e-06,
|
| 23072 |
+
"loss": 20.2864,
|
| 23073 |
+
"step": 32880
|
| 23074 |
+
},
|
| 23075 |
+
{
|
| 23076 |
+
"epoch": 0.6103498748541512,
|
| 23077 |
+
"grad_norm": 36.6875,
|
| 23078 |
+
"learning_rate": 9.9046328597065e-06,
|
| 23079 |
+
"loss": 20.3273,
|
| 23080 |
+
"step": 32890
|
| 23081 |
+
},
|
| 23082 |
+
{
|
| 23083 |
+
"epoch": 0.610535447938631,
|
| 23084 |
+
"grad_norm": 34.78125,
|
| 23085 |
+
"learning_rate": 9.904603863920456e-06,
|
| 23086 |
+
"loss": 20.335,
|
| 23087 |
+
"step": 32900
|
| 23088 |
+
},
|
| 23089 |
+
{
|
| 23090 |
+
"epoch": 0.6107210210231108,
|
| 23091 |
+
"grad_norm": 33.125,
|
| 23092 |
+
"learning_rate": 9.904574868134414e-06,
|
| 23093 |
+
"loss": 19.5401,
|
| 23094 |
+
"step": 32910
|
| 23095 |
+
},
|
| 23096 |
+
{
|
| 23097 |
+
"epoch": 0.6109065941075906,
|
| 23098 |
+
"grad_norm": 35.25,
|
| 23099 |
+
"learning_rate": 9.904545872348373e-06,
|
| 23100 |
+
"loss": 20.044,
|
| 23101 |
+
"step": 32920
|
| 23102 |
+
},
|
| 23103 |
+
{
|
| 23104 |
+
"epoch": 0.6110921671920705,
|
| 23105 |
+
"grad_norm": 35.875,
|
| 23106 |
+
"learning_rate": 9.90451687656233e-06,
|
| 23107 |
+
"loss": 19.6458,
|
| 23108 |
+
"step": 32930
|
| 23109 |
+
},
|
| 23110 |
+
{
|
| 23111 |
+
"epoch": 0.6112777402765502,
|
| 23112 |
+
"grad_norm": 34.6875,
|
| 23113 |
+
"learning_rate": 9.904487880776288e-06,
|
| 23114 |
+
"loss": 19.9171,
|
| 23115 |
+
"step": 32940
|
| 23116 |
+
},
|
| 23117 |
+
{
|
| 23118 |
+
"epoch": 0.6114633133610301,
|
| 23119 |
+
"grad_norm": 35.4375,
|
| 23120 |
+
"learning_rate": 9.904458884990245e-06,
|
| 23121 |
+
"loss": 19.9779,
|
| 23122 |
+
"step": 32950
|
| 23123 |
+
},
|
| 23124 |
+
{
|
| 23125 |
+
"epoch": 0.61164888644551,
|
| 23126 |
+
"grad_norm": 33.09375,
|
| 23127 |
+
"learning_rate": 9.904429889204203e-06,
|
| 23128 |
+
"loss": 19.7718,
|
| 23129 |
+
"step": 32960
|
| 23130 |
+
},
|
| 23131 |
+
{
|
| 23132 |
+
"epoch": 0.6118344595299897,
|
| 23133 |
+
"grad_norm": 36.40625,
|
| 23134 |
+
"learning_rate": 9.90440089341816e-06,
|
| 23135 |
+
"loss": 19.9427,
|
| 23136 |
+
"step": 32970
|
| 23137 |
+
},
|
| 23138 |
+
{
|
| 23139 |
+
"epoch": 0.6120200326144696,
|
| 23140 |
+
"grad_norm": 33.1875,
|
| 23141 |
+
"learning_rate": 9.904371897632117e-06,
|
| 23142 |
+
"loss": 19.7202,
|
| 23143 |
+
"step": 32980
|
| 23144 |
+
},
|
| 23145 |
+
{
|
| 23146 |
+
"epoch": 0.6122056056989494,
|
| 23147 |
+
"grad_norm": 36.0625,
|
| 23148 |
+
"learning_rate": 9.904342901846077e-06,
|
| 23149 |
+
"loss": 19.4229,
|
| 23150 |
+
"step": 32990
|
| 23151 |
+
},
|
| 23152 |
+
{
|
| 23153 |
+
"epoch": 0.6123911787834293,
|
| 23154 |
+
"grad_norm": 34.375,
|
| 23155 |
+
"learning_rate": 9.904313906060034e-06,
|
| 23156 |
+
"loss": 20.1885,
|
| 23157 |
+
"step": 33000
|
| 23158 |
+
},
|
| 23159 |
+
{
|
| 23160 |
+
"epoch": 0.612576751867909,
|
| 23161 |
+
"grad_norm": 38.125,
|
| 23162 |
+
"learning_rate": 9.90428491027399e-06,
|
| 23163 |
+
"loss": 20.1329,
|
| 23164 |
+
"step": 33010
|
| 23165 |
+
},
|
| 23166 |
+
{
|
| 23167 |
+
"epoch": 0.6127623249523889,
|
| 23168 |
+
"grad_norm": 35.0625,
|
| 23169 |
+
"learning_rate": 9.904255914487949e-06,
|
| 23170 |
+
"loss": 19.4893,
|
| 23171 |
+
"step": 33020
|
| 23172 |
+
},
|
| 23173 |
+
{
|
| 23174 |
+
"epoch": 0.6129478980368688,
|
| 23175 |
+
"grad_norm": 35.375,
|
| 23176 |
+
"learning_rate": 9.904226918701906e-06,
|
| 23177 |
+
"loss": 19.8205,
|
| 23178 |
+
"step": 33030
|
| 23179 |
+
},
|
| 23180 |
+
{
|
| 23181 |
+
"epoch": 0.6131334711213485,
|
| 23182 |
+
"grad_norm": 33.34375,
|
| 23183 |
+
"learning_rate": 9.904197922915864e-06,
|
| 23184 |
+
"loss": 20.1173,
|
| 23185 |
+
"step": 33040
|
| 23186 |
+
},
|
| 23187 |
+
{
|
| 23188 |
+
"epoch": 0.6133190442058284,
|
| 23189 |
+
"grad_norm": 34.8125,
|
| 23190 |
+
"learning_rate": 9.904168927129821e-06,
|
| 23191 |
+
"loss": 20.4497,
|
| 23192 |
+
"step": 33050
|
| 23193 |
+
},
|
| 23194 |
+
{
|
| 23195 |
+
"epoch": 0.6135046172903083,
|
| 23196 |
+
"grad_norm": 34.65625,
|
| 23197 |
+
"learning_rate": 9.904139931343778e-06,
|
| 23198 |
+
"loss": 20.0398,
|
| 23199 |
+
"step": 33060
|
| 23200 |
+
},
|
| 23201 |
+
{
|
| 23202 |
+
"epoch": 0.613690190374788,
|
| 23203 |
+
"grad_norm": 34.46875,
|
| 23204 |
+
"learning_rate": 9.904110935557736e-06,
|
| 23205 |
+
"loss": 20.0827,
|
| 23206 |
+
"step": 33070
|
| 23207 |
+
},
|
| 23208 |
+
{
|
| 23209 |
+
"epoch": 0.6138757634592679,
|
| 23210 |
+
"grad_norm": 33.0,
|
| 23211 |
+
"learning_rate": 9.904081939771693e-06,
|
| 23212 |
+
"loss": 19.8621,
|
| 23213 |
+
"step": 33080
|
| 23214 |
+
},
|
| 23215 |
+
{
|
| 23216 |
+
"epoch": 0.6140613365437477,
|
| 23217 |
+
"grad_norm": 35.34375,
|
| 23218 |
+
"learning_rate": 9.904052943985652e-06,
|
| 23219 |
+
"loss": 20.097,
|
| 23220 |
+
"step": 33090
|
| 23221 |
+
},
|
| 23222 |
+
{
|
| 23223 |
+
"epoch": 0.6142469096282275,
|
| 23224 |
+
"grad_norm": 34.96875,
|
| 23225 |
+
"learning_rate": 9.90402394819961e-06,
|
| 23226 |
+
"loss": 19.9632,
|
| 23227 |
+
"step": 33100
|
| 23228 |
+
},
|
| 23229 |
+
{
|
| 23230 |
+
"epoch": 0.6144324827127073,
|
| 23231 |
+
"grad_norm": 36.40625,
|
| 23232 |
+
"learning_rate": 9.903994952413567e-06,
|
| 23233 |
+
"loss": 19.9311,
|
| 23234 |
+
"step": 33110
|
| 23235 |
+
},
|
| 23236 |
+
{
|
| 23237 |
+
"epoch": 0.6146180557971872,
|
| 23238 |
+
"grad_norm": 35.40625,
|
| 23239 |
+
"learning_rate": 9.903965956627524e-06,
|
| 23240 |
+
"loss": 19.8179,
|
| 23241 |
+
"step": 33120
|
| 23242 |
+
},
|
| 23243 |
+
{
|
| 23244 |
+
"epoch": 0.614803628881667,
|
| 23245 |
+
"grad_norm": 35.15625,
|
| 23246 |
+
"learning_rate": 9.903936960841482e-06,
|
| 23247 |
+
"loss": 19.5918,
|
| 23248 |
+
"step": 33130
|
| 23249 |
+
},
|
| 23250 |
+
{
|
| 23251 |
+
"epoch": 0.6149892019661468,
|
| 23252 |
+
"grad_norm": 32.78125,
|
| 23253 |
+
"learning_rate": 9.90390796505544e-06,
|
| 23254 |
+
"loss": 19.6736,
|
| 23255 |
+
"step": 33140
|
| 23256 |
+
},
|
| 23257 |
+
{
|
| 23258 |
+
"epoch": 0.6151747750506267,
|
| 23259 |
+
"grad_norm": 34.0,
|
| 23260 |
+
"learning_rate": 9.903878969269397e-06,
|
| 23261 |
+
"loss": 19.9292,
|
| 23262 |
+
"step": 33150
|
| 23263 |
+
},
|
| 23264 |
+
{
|
| 23265 |
+
"epoch": 0.6153603481351065,
|
| 23266 |
+
"grad_norm": 33.84375,
|
| 23267 |
+
"learning_rate": 9.903849973483354e-06,
|
| 23268 |
+
"loss": 20.5943,
|
| 23269 |
+
"step": 33160
|
| 23270 |
+
},
|
| 23271 |
+
{
|
| 23272 |
+
"epoch": 0.6155459212195863,
|
| 23273 |
+
"grad_norm": 35.96875,
|
| 23274 |
+
"learning_rate": 9.903820977697312e-06,
|
| 23275 |
+
"loss": 20.0126,
|
| 23276 |
+
"step": 33170
|
| 23277 |
+
},
|
| 23278 |
+
{
|
| 23279 |
+
"epoch": 0.6157314943040662,
|
| 23280 |
+
"grad_norm": 35.09375,
|
| 23281 |
+
"learning_rate": 9.903791981911269e-06,
|
| 23282 |
+
"loss": 20.3576,
|
| 23283 |
+
"step": 33180
|
| 23284 |
+
},
|
| 23285 |
+
{
|
| 23286 |
+
"epoch": 0.615917067388546,
|
| 23287 |
+
"grad_norm": 34.9375,
|
| 23288 |
+
"learning_rate": 9.903762986125226e-06,
|
| 23289 |
+
"loss": 19.9106,
|
| 23290 |
+
"step": 33190
|
| 23291 |
+
},
|
| 23292 |
+
{
|
| 23293 |
+
"epoch": 0.6161026404730258,
|
| 23294 |
+
"grad_norm": 33.25,
|
| 23295 |
+
"learning_rate": 9.903733990339185e-06,
|
| 23296 |
+
"loss": 19.8425,
|
| 23297 |
+
"step": 33200
|
| 23298 |
+
},
|
| 23299 |
+
{
|
| 23300 |
+
"epoch": 0.6162882135575056,
|
| 23301 |
+
"grad_norm": 35.78125,
|
| 23302 |
+
"learning_rate": 9.903704994553143e-06,
|
| 23303 |
+
"loss": 19.8126,
|
| 23304 |
+
"step": 33210
|
| 23305 |
+
},
|
| 23306 |
+
{
|
| 23307 |
+
"epoch": 0.6164737866419855,
|
| 23308 |
+
"grad_norm": 34.0,
|
| 23309 |
+
"learning_rate": 9.9036759987671e-06,
|
| 23310 |
+
"loss": 19.5984,
|
| 23311 |
+
"step": 33220
|
| 23312 |
+
},
|
| 23313 |
+
{
|
| 23314 |
+
"epoch": 0.6166593597264652,
|
| 23315 |
+
"grad_norm": 34.5625,
|
| 23316 |
+
"learning_rate": 9.903647002981058e-06,
|
| 23317 |
+
"loss": 20.4081,
|
| 23318 |
+
"step": 33230
|
| 23319 |
+
},
|
| 23320 |
+
{
|
| 23321 |
+
"epoch": 0.6168449328109451,
|
| 23322 |
+
"grad_norm": 34.21875,
|
| 23323 |
+
"learning_rate": 9.903618007195015e-06,
|
| 23324 |
+
"loss": 20.1874,
|
| 23325 |
+
"step": 33240
|
| 23326 |
+
},
|
| 23327 |
+
{
|
| 23328 |
+
"epoch": 0.617030505895425,
|
| 23329 |
+
"grad_norm": 34.59375,
|
| 23330 |
+
"learning_rate": 9.903589011408972e-06,
|
| 23331 |
+
"loss": 19.9073,
|
| 23332 |
+
"step": 33250
|
| 23333 |
+
},
|
| 23334 |
+
{
|
| 23335 |
+
"epoch": 0.6172160789799047,
|
| 23336 |
+
"grad_norm": 37.0625,
|
| 23337 |
+
"learning_rate": 9.90356001562293e-06,
|
| 23338 |
+
"loss": 19.5716,
|
| 23339 |
+
"step": 33260
|
| 23340 |
+
},
|
| 23341 |
+
{
|
| 23342 |
+
"epoch": 0.6174016520643846,
|
| 23343 |
+
"grad_norm": 33.8125,
|
| 23344 |
+
"learning_rate": 9.903531019836889e-06,
|
| 23345 |
+
"loss": 19.334,
|
| 23346 |
+
"step": 33270
|
| 23347 |
+
},
|
| 23348 |
+
{
|
| 23349 |
+
"epoch": 0.6175872251488644,
|
| 23350 |
+
"grad_norm": 33.125,
|
| 23351 |
+
"learning_rate": 9.903502024050845e-06,
|
| 23352 |
+
"loss": 20.0157,
|
| 23353 |
+
"step": 33280
|
| 23354 |
+
},
|
| 23355 |
+
{
|
| 23356 |
+
"epoch": 0.6177727982333442,
|
| 23357 |
+
"grad_norm": 33.28125,
|
| 23358 |
+
"learning_rate": 9.903473028264802e-06,
|
| 23359 |
+
"loss": 19.8102,
|
| 23360 |
+
"step": 33290
|
| 23361 |
+
},
|
| 23362 |
+
{
|
| 23363 |
+
"epoch": 0.617958371317824,
|
| 23364 |
+
"grad_norm": 33.65625,
|
| 23365 |
+
"learning_rate": 9.903444032478761e-06,
|
| 23366 |
+
"loss": 19.8039,
|
| 23367 |
+
"step": 33300
|
| 23368 |
+
},
|
| 23369 |
+
{
|
| 23370 |
+
"epoch": 0.6181439444023039,
|
| 23371 |
+
"grad_norm": 34.78125,
|
| 23372 |
+
"learning_rate": 9.903415036692719e-06,
|
| 23373 |
+
"loss": 19.945,
|
| 23374 |
+
"step": 33310
|
| 23375 |
+
},
|
| 23376 |
+
{
|
| 23377 |
+
"epoch": 0.6183295174867837,
|
| 23378 |
+
"grad_norm": 35.21875,
|
| 23379 |
+
"learning_rate": 9.903386040906676e-06,
|
| 23380 |
+
"loss": 20.1298,
|
| 23381 |
+
"step": 33320
|
| 23382 |
+
},
|
| 23383 |
+
{
|
| 23384 |
+
"epoch": 0.6185150905712635,
|
| 23385 |
+
"grad_norm": 35.65625,
|
| 23386 |
+
"learning_rate": 9.903357045120633e-06,
|
| 23387 |
+
"loss": 20.0132,
|
| 23388 |
+
"step": 33330
|
| 23389 |
+
},
|
| 23390 |
+
{
|
| 23391 |
+
"epoch": 0.6187006636557434,
|
| 23392 |
+
"grad_norm": 36.4375,
|
| 23393 |
+
"learning_rate": 9.90332804933459e-06,
|
| 23394 |
+
"loss": 19.7294,
|
| 23395 |
+
"step": 33340
|
| 23396 |
+
},
|
| 23397 |
+
{
|
| 23398 |
+
"epoch": 0.6188862367402233,
|
| 23399 |
+
"grad_norm": 34.59375,
|
| 23400 |
+
"learning_rate": 9.903299053548548e-06,
|
| 23401 |
+
"loss": 20.4257,
|
| 23402 |
+
"step": 33350
|
| 23403 |
+
},
|
| 23404 |
+
{
|
| 23405 |
+
"epoch": 0.619071809824703,
|
| 23406 |
+
"grad_norm": 36.46875,
|
| 23407 |
+
"learning_rate": 9.903270057762506e-06,
|
| 23408 |
+
"loss": 19.9613,
|
| 23409 |
+
"step": 33360
|
| 23410 |
+
},
|
| 23411 |
+
{
|
| 23412 |
+
"epoch": 0.6192573829091829,
|
| 23413 |
+
"grad_norm": 33.65625,
|
| 23414 |
+
"learning_rate": 9.903241061976465e-06,
|
| 23415 |
+
"loss": 19.5663,
|
| 23416 |
+
"step": 33370
|
| 23417 |
+
},
|
| 23418 |
+
{
|
| 23419 |
+
"epoch": 0.6194429559936627,
|
| 23420 |
+
"grad_norm": 36.78125,
|
| 23421 |
+
"learning_rate": 9.903212066190422e-06,
|
| 23422 |
+
"loss": 19.7608,
|
| 23423 |
+
"step": 33380
|
| 23424 |
+
},
|
| 23425 |
+
{
|
| 23426 |
+
"epoch": 0.6196285290781425,
|
| 23427 |
+
"grad_norm": 35.375,
|
| 23428 |
+
"learning_rate": 9.903183070404378e-06,
|
| 23429 |
+
"loss": 19.912,
|
| 23430 |
+
"step": 33390
|
| 23431 |
+
},
|
| 23432 |
+
{
|
| 23433 |
+
"epoch": 0.6198141021626223,
|
| 23434 |
+
"grad_norm": 34.34375,
|
| 23435 |
+
"learning_rate": 9.903154074618337e-06,
|
| 23436 |
+
"loss": 19.6677,
|
| 23437 |
+
"step": 33400
|
| 23438 |
+
},
|
| 23439 |
+
{
|
| 23440 |
+
"epoch": 0.6199996752471022,
|
| 23441 |
+
"grad_norm": 35.46875,
|
| 23442 |
+
"learning_rate": 9.903125078832294e-06,
|
| 23443 |
+
"loss": 19.9394,
|
| 23444 |
+
"step": 33410
|
| 23445 |
+
},
|
| 23446 |
+
{
|
| 23447 |
+
"epoch": 0.620185248331582,
|
| 23448 |
+
"grad_norm": 36.4375,
|
| 23449 |
+
"learning_rate": 9.903096083046252e-06,
|
| 23450 |
+
"loss": 19.5774,
|
| 23451 |
+
"step": 33420
|
| 23452 |
+
},
|
| 23453 |
+
{
|
| 23454 |
+
"epoch": 0.6203708214160618,
|
| 23455 |
+
"grad_norm": 36.28125,
|
| 23456 |
+
"learning_rate": 9.90306708726021e-06,
|
| 23457 |
+
"loss": 19.8794,
|
| 23458 |
+
"step": 33430
|
| 23459 |
+
},
|
| 23460 |
+
{
|
| 23461 |
+
"epoch": 0.6205563945005417,
|
| 23462 |
+
"grad_norm": 37.65625,
|
| 23463 |
+
"learning_rate": 9.903038091474167e-06,
|
| 23464 |
+
"loss": 20.2317,
|
| 23465 |
+
"step": 33440
|
| 23466 |
+
},
|
| 23467 |
+
{
|
| 23468 |
+
"epoch": 0.6207419675850214,
|
| 23469 |
+
"grad_norm": 35.75,
|
| 23470 |
+
"learning_rate": 9.903009095688124e-06,
|
| 23471 |
+
"loss": 20.0775,
|
| 23472 |
+
"step": 33450
|
| 23473 |
+
},
|
| 23474 |
+
{
|
| 23475 |
+
"epoch": 0.6209275406695013,
|
| 23476 |
+
"grad_norm": 35.0,
|
| 23477 |
+
"learning_rate": 9.902980099902081e-06,
|
| 23478 |
+
"loss": 20.0078,
|
| 23479 |
+
"step": 33460
|
| 23480 |
+
},
|
| 23481 |
+
{
|
| 23482 |
+
"epoch": 0.6211131137539811,
|
| 23483 |
+
"grad_norm": 35.71875,
|
| 23484 |
+
"learning_rate": 9.90295110411604e-06,
|
| 23485 |
+
"loss": 19.8658,
|
| 23486 |
+
"step": 33470
|
| 23487 |
+
},
|
| 23488 |
+
{
|
| 23489 |
+
"epoch": 0.6212986868384609,
|
| 23490 |
+
"grad_norm": 34.71875,
|
| 23491 |
+
"learning_rate": 9.902922108329998e-06,
|
| 23492 |
+
"loss": 19.6372,
|
| 23493 |
+
"step": 33480
|
| 23494 |
+
},
|
| 23495 |
+
{
|
| 23496 |
+
"epoch": 0.6214842599229408,
|
| 23497 |
+
"grad_norm": 36.0625,
|
| 23498 |
+
"learning_rate": 9.902893112543954e-06,
|
| 23499 |
+
"loss": 19.5675,
|
| 23500 |
+
"step": 33490
|
| 23501 |
+
},
|
| 23502 |
+
{
|
| 23503 |
+
"epoch": 0.6216698330074206,
|
| 23504 |
+
"grad_norm": 35.25,
|
| 23505 |
+
"learning_rate": 9.902864116757913e-06,
|
| 23506 |
+
"loss": 19.9335,
|
| 23507 |
+
"step": 33500
|
| 23508 |
+
},
|
| 23509 |
+
{
|
| 23510 |
+
"epoch": 0.6218554060919004,
|
| 23511 |
+
"grad_norm": 34.28125,
|
| 23512 |
+
"learning_rate": 9.90283512097187e-06,
|
| 23513 |
+
"loss": 19.6987,
|
| 23514 |
+
"step": 33510
|
| 23515 |
+
},
|
| 23516 |
+
{
|
| 23517 |
+
"epoch": 0.6220409791763802,
|
| 23518 |
+
"grad_norm": 32.0625,
|
| 23519 |
+
"learning_rate": 9.902806125185828e-06,
|
| 23520 |
+
"loss": 19.9386,
|
| 23521 |
+
"step": 33520
|
| 23522 |
+
},
|
| 23523 |
+
{
|
| 23524 |
+
"epoch": 0.6222265522608601,
|
| 23525 |
+
"grad_norm": 36.5625,
|
| 23526 |
+
"learning_rate": 9.902777129399785e-06,
|
| 23527 |
+
"loss": 19.802,
|
| 23528 |
+
"step": 33530
|
| 23529 |
+
},
|
| 23530 |
+
{
|
| 23531 |
+
"epoch": 0.62241212534534,
|
| 23532 |
+
"grad_norm": 34.71875,
|
| 23533 |
+
"learning_rate": 9.902748133613742e-06,
|
| 23534 |
+
"loss": 19.8391,
|
| 23535 |
+
"step": 33540
|
| 23536 |
+
},
|
| 23537 |
+
{
|
| 23538 |
+
"epoch": 0.6225976984298197,
|
| 23539 |
+
"grad_norm": 35.875,
|
| 23540 |
+
"learning_rate": 9.9027191378277e-06,
|
| 23541 |
+
"loss": 19.5903,
|
| 23542 |
+
"step": 33550
|
| 23543 |
+
},
|
| 23544 |
+
{
|
| 23545 |
+
"epoch": 0.6227832715142996,
|
| 23546 |
+
"grad_norm": 34.375,
|
| 23547 |
+
"learning_rate": 9.902690142041657e-06,
|
| 23548 |
+
"loss": 19.9132,
|
| 23549 |
+
"step": 33560
|
| 23550 |
+
},
|
| 23551 |
+
{
|
| 23552 |
+
"epoch": 0.6229688445987794,
|
| 23553 |
+
"grad_norm": 36.96875,
|
| 23554 |
+
"learning_rate": 9.902661146255616e-06,
|
| 23555 |
+
"loss": 19.7901,
|
| 23556 |
+
"step": 33570
|
| 23557 |
+
},
|
| 23558 |
+
{
|
| 23559 |
+
"epoch": 0.6231544176832592,
|
| 23560 |
+
"grad_norm": 33.25,
|
| 23561 |
+
"learning_rate": 9.902632150469574e-06,
|
| 23562 |
+
"loss": 19.8296,
|
| 23563 |
+
"step": 33580
|
| 23564 |
+
},
|
| 23565 |
+
{
|
| 23566 |
+
"epoch": 0.623339990767739,
|
| 23567 |
+
"grad_norm": 36.0625,
|
| 23568 |
+
"learning_rate": 9.902603154683531e-06,
|
| 23569 |
+
"loss": 19.664,
|
| 23570 |
+
"step": 33590
|
| 23571 |
+
},
|
| 23572 |
+
{
|
| 23573 |
+
"epoch": 0.6235255638522189,
|
| 23574 |
+
"grad_norm": 35.84375,
|
| 23575 |
+
"learning_rate": 9.902574158897489e-06,
|
| 23576 |
+
"loss": 19.6831,
|
| 23577 |
+
"step": 33600
|
| 23578 |
+
},
|
| 23579 |
+
{
|
| 23580 |
+
"epoch": 0.6237111369366987,
|
| 23581 |
+
"grad_norm": 35.3125,
|
| 23582 |
+
"learning_rate": 9.902545163111446e-06,
|
| 23583 |
+
"loss": 19.5117,
|
| 23584 |
+
"step": 33610
|
| 23585 |
+
},
|
| 23586 |
+
{
|
| 23587 |
+
"epoch": 0.6238967100211785,
|
| 23588 |
+
"grad_norm": 36.40625,
|
| 23589 |
+
"learning_rate": 9.902516167325403e-06,
|
| 23590 |
+
"loss": 19.7426,
|
| 23591 |
+
"step": 33620
|
| 23592 |
+
},
|
| 23593 |
+
{
|
| 23594 |
+
"epoch": 0.6240822831056584,
|
| 23595 |
+
"grad_norm": 34.96875,
|
| 23596 |
+
"learning_rate": 9.90248717153936e-06,
|
| 23597 |
+
"loss": 19.4962,
|
| 23598 |
+
"step": 33630
|
| 23599 |
+
},
|
| 23600 |
+
{
|
| 23601 |
+
"epoch": 0.6242678561901381,
|
| 23602 |
+
"grad_norm": 35.28125,
|
| 23603 |
+
"learning_rate": 9.902458175753318e-06,
|
| 23604 |
+
"loss": 19.9088,
|
| 23605 |
+
"step": 33640
|
| 23606 |
+
},
|
| 23607 |
+
{
|
| 23608 |
+
"epoch": 0.624453429274618,
|
| 23609 |
+
"grad_norm": 34.15625,
|
| 23610 |
+
"learning_rate": 9.902429179967277e-06,
|
| 23611 |
+
"loss": 19.5959,
|
| 23612 |
+
"step": 33650
|
| 23613 |
+
},
|
| 23614 |
+
{
|
| 23615 |
+
"epoch": 0.6246390023590979,
|
| 23616 |
+
"grad_norm": 35.40625,
|
| 23617 |
+
"learning_rate": 9.902400184181233e-06,
|
| 23618 |
+
"loss": 19.8292,
|
| 23619 |
+
"step": 33660
|
| 23620 |
+
},
|
| 23621 |
+
{
|
| 23622 |
+
"epoch": 0.6248245754435776,
|
| 23623 |
+
"grad_norm": 35.46875,
|
| 23624 |
+
"learning_rate": 9.90237118839519e-06,
|
| 23625 |
+
"loss": 19.9731,
|
| 23626 |
+
"step": 33670
|
| 23627 |
+
},
|
| 23628 |
+
{
|
| 23629 |
+
"epoch": 0.6250101485280575,
|
| 23630 |
+
"grad_norm": 33.78125,
|
| 23631 |
+
"learning_rate": 9.90234219260915e-06,
|
| 23632 |
+
"loss": 19.4511,
|
| 23633 |
+
"step": 33680
|
| 23634 |
+
},
|
| 23635 |
+
{
|
| 23636 |
+
"epoch": 0.6251957216125373,
|
| 23637 |
+
"grad_norm": 35.1875,
|
| 23638 |
+
"learning_rate": 9.902313196823107e-06,
|
| 23639 |
+
"loss": 19.9273,
|
| 23640 |
+
"step": 33690
|
| 23641 |
+
},
|
| 23642 |
+
{
|
| 23643 |
+
"epoch": 0.6253812946970172,
|
| 23644 |
+
"grad_norm": 34.34375,
|
| 23645 |
+
"learning_rate": 9.902284201037064e-06,
|
| 23646 |
+
"loss": 20.1118,
|
| 23647 |
+
"step": 33700
|
| 23648 |
+
},
|
| 23649 |
+
{
|
| 23650 |
+
"epoch": 0.625566867781497,
|
| 23651 |
+
"grad_norm": 33.1875,
|
| 23652 |
+
"learning_rate": 9.902255205251022e-06,
|
| 23653 |
+
"loss": 19.8202,
|
| 23654 |
+
"step": 33710
|
| 23655 |
+
},
|
| 23656 |
+
{
|
| 23657 |
+
"epoch": 0.6257524408659768,
|
| 23658 |
+
"grad_norm": 37.34375,
|
| 23659 |
+
"learning_rate": 9.902226209464979e-06,
|
| 23660 |
+
"loss": 20.2005,
|
| 23661 |
+
"step": 33720
|
| 23662 |
+
},
|
| 23663 |
+
{
|
| 23664 |
+
"epoch": 0.6259380139504567,
|
| 23665 |
+
"grad_norm": 34.4375,
|
| 23666 |
+
"learning_rate": 9.902197213678936e-06,
|
| 23667 |
+
"loss": 20.3347,
|
| 23668 |
+
"step": 33730
|
| 23669 |
+
},
|
| 23670 |
+
{
|
| 23671 |
+
"epoch": 0.6261235870349364,
|
| 23672 |
+
"grad_norm": 35.78125,
|
| 23673 |
+
"learning_rate": 9.902168217892894e-06,
|
| 23674 |
+
"loss": 20.3401,
|
| 23675 |
+
"step": 33740
|
| 23676 |
+
},
|
| 23677 |
+
{
|
| 23678 |
+
"epoch": 0.6263091601194163,
|
| 23679 |
+
"grad_norm": 36.0625,
|
| 23680 |
+
"learning_rate": 9.902139222106853e-06,
|
| 23681 |
+
"loss": 19.5948,
|
| 23682 |
+
"step": 33750
|
| 23683 |
+
},
|
| 23684 |
+
{
|
| 23685 |
+
"epoch": 0.6264947332038961,
|
| 23686 |
+
"grad_norm": 34.75,
|
| 23687 |
+
"learning_rate": 9.902110226320809e-06,
|
| 23688 |
+
"loss": 20.1637,
|
| 23689 |
+
"step": 33760
|
| 23690 |
+
},
|
| 23691 |
+
{
|
| 23692 |
+
"epoch": 0.6266803062883759,
|
| 23693 |
+
"grad_norm": 34.0,
|
| 23694 |
+
"learning_rate": 9.902081230534766e-06,
|
| 23695 |
+
"loss": 19.9732,
|
| 23696 |
+
"step": 33770
|
| 23697 |
+
},
|
| 23698 |
+
{
|
| 23699 |
+
"epoch": 0.6268658793728558,
|
| 23700 |
+
"grad_norm": 33.65625,
|
| 23701 |
+
"learning_rate": 9.902052234748725e-06,
|
| 23702 |
+
"loss": 19.7471,
|
| 23703 |
+
"step": 33780
|
| 23704 |
+
},
|
| 23705 |
+
{
|
| 23706 |
+
"epoch": 0.6270514524573356,
|
| 23707 |
+
"grad_norm": 33.875,
|
| 23708 |
+
"learning_rate": 9.902023238962683e-06,
|
| 23709 |
+
"loss": 19.665,
|
| 23710 |
+
"step": 33790
|
| 23711 |
+
},
|
| 23712 |
+
{
|
| 23713 |
+
"epoch": 0.6272370255418154,
|
| 23714 |
+
"grad_norm": 34.5,
|
| 23715 |
+
"learning_rate": 9.90199424317664e-06,
|
| 23716 |
+
"loss": 19.9919,
|
| 23717 |
+
"step": 33800
|
| 23718 |
+
},
|
| 23719 |
+
{
|
| 23720 |
+
"epoch": 0.6274225986262952,
|
| 23721 |
+
"grad_norm": 34.34375,
|
| 23722 |
+
"learning_rate": 9.901965247390597e-06,
|
| 23723 |
+
"loss": 19.9491,
|
| 23724 |
+
"step": 33810
|
| 23725 |
+
},
|
| 23726 |
+
{
|
| 23727 |
+
"epoch": 0.6276081717107751,
|
| 23728 |
+
"grad_norm": 35.0,
|
| 23729 |
+
"learning_rate": 9.901936251604555e-06,
|
| 23730 |
+
"loss": 19.8758,
|
| 23731 |
+
"step": 33820
|
| 23732 |
+
},
|
| 23733 |
+
{
|
| 23734 |
+
"epoch": 0.6277937447952548,
|
| 23735 |
+
"grad_norm": 37.84375,
|
| 23736 |
+
"learning_rate": 9.901907255818512e-06,
|
| 23737 |
+
"loss": 19.9295,
|
| 23738 |
+
"step": 33830
|
| 23739 |
+
},
|
| 23740 |
+
{
|
| 23741 |
+
"epoch": 0.6279793178797347,
|
| 23742 |
+
"grad_norm": 35.28125,
|
| 23743 |
+
"learning_rate": 9.90187826003247e-06,
|
| 23744 |
+
"loss": 19.8433,
|
| 23745 |
+
"step": 33840
|
| 23746 |
+
},
|
| 23747 |
+
{
|
| 23748 |
+
"epoch": 0.6281648909642146,
|
| 23749 |
+
"grad_norm": 36.28125,
|
| 23750 |
+
"learning_rate": 9.901849264246429e-06,
|
| 23751 |
+
"loss": 19.9194,
|
| 23752 |
+
"step": 33850
|
| 23753 |
+
},
|
| 23754 |
+
{
|
| 23755 |
+
"epoch": 0.6283504640486943,
|
| 23756 |
+
"grad_norm": 35.78125,
|
| 23757 |
+
"learning_rate": 9.901820268460386e-06,
|
| 23758 |
+
"loss": 19.8389,
|
| 23759 |
+
"step": 33860
|
| 23760 |
+
},
|
| 23761 |
+
{
|
| 23762 |
+
"epoch": 0.6285360371331742,
|
| 23763 |
+
"grad_norm": 35.3125,
|
| 23764 |
+
"learning_rate": 9.901791272674342e-06,
|
| 23765 |
+
"loss": 19.8355,
|
| 23766 |
+
"step": 33870
|
| 23767 |
+
},
|
| 23768 |
+
{
|
| 23769 |
+
"epoch": 0.628721610217654,
|
| 23770 |
+
"grad_norm": 36.125,
|
| 23771 |
+
"learning_rate": 9.901762276888301e-06,
|
| 23772 |
+
"loss": 19.8938,
|
| 23773 |
+
"step": 33880
|
| 23774 |
+
},
|
| 23775 |
+
{
|
| 23776 |
+
"epoch": 0.6289071833021339,
|
| 23777 |
+
"grad_norm": 36.65625,
|
| 23778 |
+
"learning_rate": 9.901733281102258e-06,
|
| 23779 |
+
"loss": 20.1254,
|
| 23780 |
+
"step": 33890
|
| 23781 |
+
},
|
| 23782 |
+
{
|
| 23783 |
+
"epoch": 0.6290927563866137,
|
| 23784 |
+
"grad_norm": 35.0625,
|
| 23785 |
+
"learning_rate": 9.901704285316216e-06,
|
| 23786 |
+
"loss": 19.8177,
|
| 23787 |
+
"step": 33900
|
| 23788 |
+
},
|
| 23789 |
+
{
|
| 23790 |
+
"epoch": 0.6292783294710935,
|
| 23791 |
+
"grad_norm": 35.84375,
|
| 23792 |
+
"learning_rate": 9.901675289530173e-06,
|
| 23793 |
+
"loss": 19.5455,
|
| 23794 |
+
"step": 33910
|
| 23795 |
+
},
|
| 23796 |
+
{
|
| 23797 |
+
"epoch": 0.6294639025555734,
|
| 23798 |
+
"grad_norm": 35.65625,
|
| 23799 |
+
"learning_rate": 9.90164629374413e-06,
|
| 23800 |
+
"loss": 19.5187,
|
| 23801 |
+
"step": 33920
|
| 23802 |
+
},
|
| 23803 |
+
{
|
| 23804 |
+
"epoch": 0.6296494756400531,
|
| 23805 |
+
"grad_norm": 36.65625,
|
| 23806 |
+
"learning_rate": 9.901617297958088e-06,
|
| 23807 |
+
"loss": 19.8171,
|
| 23808 |
+
"step": 33930
|
| 23809 |
+
},
|
| 23810 |
+
{
|
| 23811 |
+
"epoch": 0.629835048724533,
|
| 23812 |
+
"grad_norm": 35.875,
|
| 23813 |
+
"learning_rate": 9.901588302172045e-06,
|
| 23814 |
+
"loss": 20.0522,
|
| 23815 |
+
"step": 33940
|
| 23816 |
+
},
|
| 23817 |
+
{
|
| 23818 |
+
"epoch": 0.6300206218090129,
|
| 23819 |
+
"grad_norm": 34.375,
|
| 23820 |
+
"learning_rate": 9.901559306386005e-06,
|
| 23821 |
+
"loss": 19.8402,
|
| 23822 |
+
"step": 33950
|
| 23823 |
+
},
|
| 23824 |
+
{
|
| 23825 |
+
"epoch": 0.6302061948934926,
|
| 23826 |
+
"grad_norm": 34.84375,
|
| 23827 |
+
"learning_rate": 9.901530310599962e-06,
|
| 23828 |
+
"loss": 19.9172,
|
| 23829 |
+
"step": 33960
|
| 23830 |
+
},
|
| 23831 |
+
{
|
| 23832 |
+
"epoch": 0.6303917679779725,
|
| 23833 |
+
"grad_norm": 36.8125,
|
| 23834 |
+
"learning_rate": 9.90150131481392e-06,
|
| 23835 |
+
"loss": 20.0115,
|
| 23836 |
+
"step": 33970
|
| 23837 |
+
},
|
| 23838 |
+
{
|
| 23839 |
+
"epoch": 0.6305773410624523,
|
| 23840 |
+
"grad_norm": 34.9375,
|
| 23841 |
+
"learning_rate": 9.901472319027877e-06,
|
| 23842 |
+
"loss": 19.6994,
|
| 23843 |
+
"step": 33980
|
| 23844 |
+
},
|
| 23845 |
+
{
|
| 23846 |
+
"epoch": 0.6307629141469321,
|
| 23847 |
+
"grad_norm": 34.90625,
|
| 23848 |
+
"learning_rate": 9.901443323241834e-06,
|
| 23849 |
+
"loss": 19.816,
|
| 23850 |
+
"step": 33990
|
| 23851 |
+
},
|
| 23852 |
+
{
|
| 23853 |
+
"epoch": 0.630948487231412,
|
| 23854 |
+
"grad_norm": 34.8125,
|
| 23855 |
+
"learning_rate": 9.901414327455792e-06,
|
| 23856 |
+
"loss": 19.6213,
|
| 23857 |
+
"step": 34000
|
| 23858 |
+
},
|
| 23859 |
+
{
|
| 23860 |
+
"epoch": 0.6311340603158918,
|
| 23861 |
+
"grad_norm": 34.9375,
|
| 23862 |
+
"learning_rate": 9.901385331669749e-06,
|
| 23863 |
+
"loss": 20.15,
|
| 23864 |
+
"step": 34010
|
| 23865 |
+
},
|
| 23866 |
+
{
|
| 23867 |
+
"epoch": 0.6313196334003716,
|
| 23868 |
+
"grad_norm": 35.03125,
|
| 23869 |
+
"learning_rate": 9.901356335883708e-06,
|
| 23870 |
+
"loss": 19.3955,
|
| 23871 |
+
"step": 34020
|
| 23872 |
+
},
|
| 23873 |
+
{
|
| 23874 |
+
"epoch": 0.6315052064848514,
|
| 23875 |
+
"grad_norm": 36.3125,
|
| 23876 |
+
"learning_rate": 9.901327340097664e-06,
|
| 23877 |
+
"loss": 19.6105,
|
| 23878 |
+
"step": 34030
|
| 23879 |
+
},
|
| 23880 |
+
{
|
| 23881 |
+
"epoch": 0.6316907795693313,
|
| 23882 |
+
"grad_norm": 34.875,
|
| 23883 |
+
"learning_rate": 9.901298344311621e-06,
|
| 23884 |
+
"loss": 19.9898,
|
| 23885 |
+
"step": 34040
|
| 23886 |
+
},
|
| 23887 |
+
{
|
| 23888 |
+
"epoch": 0.6318763526538111,
|
| 23889 |
+
"grad_norm": 35.75,
|
| 23890 |
+
"learning_rate": 9.90126934852558e-06,
|
| 23891 |
+
"loss": 19.8148,
|
| 23892 |
+
"step": 34050
|
| 23893 |
+
},
|
| 23894 |
+
{
|
| 23895 |
+
"epoch": 0.6320619257382909,
|
| 23896 |
+
"grad_norm": 36.5,
|
| 23897 |
+
"learning_rate": 9.901240352739538e-06,
|
| 23898 |
+
"loss": 19.6133,
|
| 23899 |
+
"step": 34060
|
| 23900 |
+
},
|
| 23901 |
+
{
|
| 23902 |
+
"epoch": 0.6322474988227708,
|
| 23903 |
+
"grad_norm": 31.59375,
|
| 23904 |
+
"learning_rate": 9.901211356953495e-06,
|
| 23905 |
+
"loss": 20.1236,
|
| 23906 |
+
"step": 34070
|
| 23907 |
+
},
|
| 23908 |
+
{
|
| 23909 |
+
"epoch": 0.6324330719072506,
|
| 23910 |
+
"grad_norm": 35.15625,
|
| 23911 |
+
"learning_rate": 9.901182361167453e-06,
|
| 23912 |
+
"loss": 20.4988,
|
| 23913 |
+
"step": 34080
|
| 23914 |
+
},
|
| 23915 |
+
{
|
| 23916 |
+
"epoch": 0.6326186449917304,
|
| 23917 |
+
"grad_norm": 35.625,
|
| 23918 |
+
"learning_rate": 9.90115336538141e-06,
|
| 23919 |
+
"loss": 19.7418,
|
| 23920 |
+
"step": 34090
|
| 23921 |
+
},
|
| 23922 |
+
{
|
| 23923 |
+
"epoch": 0.6328042180762102,
|
| 23924 |
+
"grad_norm": 34.8125,
|
| 23925 |
+
"learning_rate": 9.901124369595367e-06,
|
| 23926 |
+
"loss": 19.2006,
|
| 23927 |
+
"step": 34100
|
| 23928 |
+
},
|
| 23929 |
+
{
|
| 23930 |
+
"epoch": 0.6329897911606901,
|
| 23931 |
+
"grad_norm": 35.40625,
|
| 23932 |
+
"learning_rate": 9.901095373809325e-06,
|
| 23933 |
+
"loss": 19.9027,
|
| 23934 |
+
"step": 34110
|
| 23935 |
+
},
|
| 23936 |
+
{
|
| 23937 |
+
"epoch": 0.6331753642451698,
|
| 23938 |
+
"grad_norm": 34.71875,
|
| 23939 |
+
"learning_rate": 9.901066378023282e-06,
|
| 23940 |
+
"loss": 19.8453,
|
| 23941 |
+
"step": 34120
|
| 23942 |
+
},
|
| 23943 |
+
{
|
| 23944 |
+
"epoch": 0.6333609373296497,
|
| 23945 |
+
"grad_norm": 36.40625,
|
| 23946 |
+
"learning_rate": 9.901037382237241e-06,
|
| 23947 |
+
"loss": 19.7594,
|
| 23948 |
+
"step": 34130
|
| 23949 |
+
},
|
| 23950 |
+
{
|
| 23951 |
+
"epoch": 0.6335465104141296,
|
| 23952 |
+
"grad_norm": 35.15625,
|
| 23953 |
+
"learning_rate": 9.901008386451197e-06,
|
| 23954 |
+
"loss": 19.8507,
|
| 23955 |
+
"step": 34140
|
| 23956 |
+
},
|
| 23957 |
+
{
|
| 23958 |
+
"epoch": 0.6337320834986093,
|
| 23959 |
+
"grad_norm": 33.40625,
|
| 23960 |
+
"learning_rate": 9.900979390665156e-06,
|
| 23961 |
+
"loss": 19.9106,
|
| 23962 |
+
"step": 34150
|
| 23963 |
+
},
|
| 23964 |
+
{
|
| 23965 |
+
"epoch": 0.6339176565830892,
|
| 23966 |
+
"grad_norm": 35.6875,
|
| 23967 |
+
"learning_rate": 9.900950394879113e-06,
|
| 23968 |
+
"loss": 19.5232,
|
| 23969 |
+
"step": 34160
|
| 23970 |
+
},
|
| 23971 |
+
{
|
| 23972 |
+
"epoch": 0.634103229667569,
|
| 23973 |
+
"grad_norm": 35.9375,
|
| 23974 |
+
"learning_rate": 9.900921399093071e-06,
|
| 23975 |
+
"loss": 20.0958,
|
| 23976 |
+
"step": 34170
|
| 23977 |
+
},
|
| 23978 |
+
{
|
| 23979 |
+
"epoch": 0.6342888027520488,
|
| 23980 |
+
"grad_norm": 35.53125,
|
| 23981 |
+
"learning_rate": 9.900892403307028e-06,
|
| 23982 |
+
"loss": 19.7891,
|
| 23983 |
+
"step": 34180
|
| 23984 |
+
},
|
| 23985 |
+
{
|
| 23986 |
+
"epoch": 0.6344743758365287,
|
| 23987 |
+
"grad_norm": 35.5,
|
| 23988 |
+
"learning_rate": 9.900863407520986e-06,
|
| 23989 |
+
"loss": 19.612,
|
| 23990 |
+
"step": 34190
|
| 23991 |
+
},
|
| 23992 |
+
{
|
| 23993 |
+
"epoch": 0.6346599489210085,
|
| 23994 |
+
"grad_norm": 36.3125,
|
| 23995 |
+
"learning_rate": 9.900834411734943e-06,
|
| 23996 |
+
"loss": 19.3468,
|
| 23997 |
+
"step": 34200
|
| 23998 |
+
},
|
| 23999 |
+
{
|
| 24000 |
+
"epoch": 0.6348455220054883,
|
| 24001 |
+
"grad_norm": 34.71875,
|
| 24002 |
+
"learning_rate": 9.9008054159489e-06,
|
| 24003 |
+
"loss": 19.8482,
|
| 24004 |
+
"step": 34210
|
| 24005 |
+
},
|
| 24006 |
+
{
|
| 24007 |
+
"epoch": 0.6350310950899681,
|
| 24008 |
+
"grad_norm": 36.78125,
|
| 24009 |
+
"learning_rate": 9.900776420162858e-06,
|
| 24010 |
+
"loss": 19.6807,
|
| 24011 |
+
"step": 34220
|
| 24012 |
+
},
|
| 24013 |
+
{
|
| 24014 |
+
"epoch": 0.635216668174448,
|
| 24015 |
+
"grad_norm": 37.4375,
|
| 24016 |
+
"learning_rate": 9.900747424376817e-06,
|
| 24017 |
+
"loss": 19.9276,
|
| 24018 |
+
"step": 34230
|
| 24019 |
+
},
|
| 24020 |
+
{
|
| 24021 |
+
"epoch": 0.6354022412589279,
|
| 24022 |
+
"grad_norm": 34.21875,
|
| 24023 |
+
"learning_rate": 9.900718428590774e-06,
|
| 24024 |
+
"loss": 19.8411,
|
| 24025 |
+
"step": 34240
|
| 24026 |
+
},
|
| 24027 |
+
{
|
| 24028 |
+
"epoch": 0.6355878143434076,
|
| 24029 |
+
"grad_norm": 36.1875,
|
| 24030 |
+
"learning_rate": 9.90068943280473e-06,
|
| 24031 |
+
"loss": 19.7345,
|
| 24032 |
+
"step": 34250
|
| 24033 |
+
},
|
| 24034 |
+
{
|
| 24035 |
+
"epoch": 0.6357733874278875,
|
| 24036 |
+
"grad_norm": 32.90625,
|
| 24037 |
+
"learning_rate": 9.90066043701869e-06,
|
| 24038 |
+
"loss": 19.9992,
|
| 24039 |
+
"step": 34260
|
| 24040 |
+
},
|
| 24041 |
+
{
|
| 24042 |
+
"epoch": 0.6359589605123673,
|
| 24043 |
+
"grad_norm": 35.25,
|
| 24044 |
+
"learning_rate": 9.900631441232647e-06,
|
| 24045 |
+
"loss": 19.8676,
|
| 24046 |
+
"step": 34270
|
| 24047 |
+
},
|
| 24048 |
+
{
|
| 24049 |
+
"epoch": 0.6361445335968471,
|
| 24050 |
+
"grad_norm": 37.65625,
|
| 24051 |
+
"learning_rate": 9.900602445446604e-06,
|
| 24052 |
+
"loss": 20.3433,
|
| 24053 |
+
"step": 34280
|
| 24054 |
+
},
|
| 24055 |
+
{
|
| 24056 |
+
"epoch": 0.636330106681327,
|
| 24057 |
+
"grad_norm": 34.875,
|
| 24058 |
+
"learning_rate": 9.900573449660561e-06,
|
| 24059 |
+
"loss": 19.8522,
|
| 24060 |
+
"step": 34290
|
| 24061 |
+
},
|
| 24062 |
+
{
|
| 24063 |
+
"epoch": 0.6365156797658068,
|
| 24064 |
+
"grad_norm": 35.46875,
|
| 24065 |
+
"learning_rate": 9.900544453874519e-06,
|
| 24066 |
+
"loss": 20.1288,
|
| 24067 |
+
"step": 34300
|
| 24068 |
+
},
|
| 24069 |
+
{
|
| 24070 |
+
"epoch": 0.6367012528502866,
|
| 24071 |
+
"grad_norm": 34.5625,
|
| 24072 |
+
"learning_rate": 9.900515458088476e-06,
|
| 24073 |
+
"loss": 19.6961,
|
| 24074 |
+
"step": 34310
|
| 24075 |
+
},
|
| 24076 |
+
{
|
| 24077 |
+
"epoch": 0.6368868259347664,
|
| 24078 |
+
"grad_norm": 34.6875,
|
| 24079 |
+
"learning_rate": 9.900486462302434e-06,
|
| 24080 |
+
"loss": 19.2526,
|
| 24081 |
+
"step": 34320
|
| 24082 |
+
},
|
| 24083 |
+
{
|
| 24084 |
+
"epoch": 0.6370723990192463,
|
| 24085 |
+
"grad_norm": 35.0,
|
| 24086 |
+
"learning_rate": 9.900457466516393e-06,
|
| 24087 |
+
"loss": 19.4355,
|
| 24088 |
+
"step": 34330
|
| 24089 |
+
},
|
| 24090 |
+
{
|
| 24091 |
+
"epoch": 0.637257972103726,
|
| 24092 |
+
"grad_norm": 34.75,
|
| 24093 |
+
"learning_rate": 9.90042847073035e-06,
|
| 24094 |
+
"loss": 19.768,
|
| 24095 |
+
"step": 34340
|
| 24096 |
+
},
|
| 24097 |
+
{
|
| 24098 |
+
"epoch": 0.6374435451882059,
|
| 24099 |
+
"grad_norm": 36.21875,
|
| 24100 |
+
"learning_rate": 9.900399474944306e-06,
|
| 24101 |
+
"loss": 19.6199,
|
| 24102 |
+
"step": 34350
|
| 24103 |
+
},
|
| 24104 |
+
{
|
| 24105 |
+
"epoch": 0.6376291182726858,
|
| 24106 |
+
"grad_norm": 33.34375,
|
| 24107 |
+
"learning_rate": 9.900370479158265e-06,
|
| 24108 |
+
"loss": 19.721,
|
| 24109 |
+
"step": 34360
|
| 24110 |
+
},
|
| 24111 |
+
{
|
| 24112 |
+
"epoch": 0.6378146913571655,
|
| 24113 |
+
"grad_norm": 35.0625,
|
| 24114 |
+
"learning_rate": 9.900341483372222e-06,
|
| 24115 |
+
"loss": 19.5648,
|
| 24116 |
+
"step": 34370
|
| 24117 |
+
},
|
| 24118 |
+
{
|
| 24119 |
+
"epoch": 0.6380002644416454,
|
| 24120 |
+
"grad_norm": 36.5,
|
| 24121 |
+
"learning_rate": 9.90031248758618e-06,
|
| 24122 |
+
"loss": 19.8864,
|
| 24123 |
+
"step": 34380
|
| 24124 |
+
},
|
| 24125 |
+
{
|
| 24126 |
+
"epoch": 0.6381858375261252,
|
| 24127 |
+
"grad_norm": 36.53125,
|
| 24128 |
+
"learning_rate": 9.900283491800137e-06,
|
| 24129 |
+
"loss": 19.8828,
|
| 24130 |
+
"step": 34390
|
| 24131 |
+
},
|
| 24132 |
+
{
|
| 24133 |
+
"epoch": 0.638371410610605,
|
| 24134 |
+
"grad_norm": 33.9375,
|
| 24135 |
+
"learning_rate": 9.900254496014096e-06,
|
| 24136 |
+
"loss": 19.6419,
|
| 24137 |
+
"step": 34400
|
| 24138 |
+
},
|
| 24139 |
+
{
|
| 24140 |
+
"epoch": 0.6385569836950848,
|
| 24141 |
+
"grad_norm": 36.625,
|
| 24142 |
+
"learning_rate": 9.900225500228052e-06,
|
| 24143 |
+
"loss": 19.6846,
|
| 24144 |
+
"step": 34410
|
| 24145 |
+
},
|
| 24146 |
+
{
|
| 24147 |
+
"epoch": 0.6387425567795647,
|
| 24148 |
+
"grad_norm": 34.78125,
|
| 24149 |
+
"learning_rate": 9.90019650444201e-06,
|
| 24150 |
+
"loss": 20.1099,
|
| 24151 |
+
"step": 34420
|
| 24152 |
+
},
|
| 24153 |
+
{
|
| 24154 |
+
"epoch": 0.6389281298640446,
|
| 24155 |
+
"grad_norm": 37.34375,
|
| 24156 |
+
"learning_rate": 9.900167508655969e-06,
|
| 24157 |
+
"loss": 19.9376,
|
| 24158 |
+
"step": 34430
|
| 24159 |
+
},
|
| 24160 |
+
{
|
| 24161 |
+
"epoch": 0.6391137029485243,
|
| 24162 |
+
"grad_norm": 36.65625,
|
| 24163 |
+
"learning_rate": 9.900138512869926e-06,
|
| 24164 |
+
"loss": 19.7929,
|
| 24165 |
+
"step": 34440
|
| 24166 |
+
},
|
| 24167 |
+
{
|
| 24168 |
+
"epoch": 0.6392992760330042,
|
| 24169 |
+
"grad_norm": 35.5,
|
| 24170 |
+
"learning_rate": 9.900109517083883e-06,
|
| 24171 |
+
"loss": 20.0175,
|
| 24172 |
+
"step": 34450
|
| 24173 |
+
},
|
| 24174 |
+
{
|
| 24175 |
+
"epoch": 0.639484849117484,
|
| 24176 |
+
"grad_norm": 34.65625,
|
| 24177 |
+
"learning_rate": 9.90008052129784e-06,
|
| 24178 |
+
"loss": 19.6058,
|
| 24179 |
+
"step": 34460
|
| 24180 |
+
},
|
| 24181 |
+
{
|
| 24182 |
+
"epoch": 0.6396704222019638,
|
| 24183 |
+
"grad_norm": 35.375,
|
| 24184 |
+
"learning_rate": 9.900051525511798e-06,
|
| 24185 |
+
"loss": 19.6707,
|
| 24186 |
+
"step": 34470
|
| 24187 |
+
},
|
| 24188 |
+
{
|
| 24189 |
+
"epoch": 0.6398559952864437,
|
| 24190 |
+
"grad_norm": 35.5625,
|
| 24191 |
+
"learning_rate": 9.900022529725756e-06,
|
| 24192 |
+
"loss": 20.0595,
|
| 24193 |
+
"step": 34480
|
| 24194 |
+
},
|
| 24195 |
+
{
|
| 24196 |
+
"epoch": 0.6400415683709235,
|
| 24197 |
+
"grad_norm": 34.875,
|
| 24198 |
+
"learning_rate": 9.899993533939713e-06,
|
| 24199 |
+
"loss": 20.3709,
|
| 24200 |
+
"step": 34490
|
| 24201 |
+
},
|
| 24202 |
+
{
|
| 24203 |
+
"epoch": 0.6402271414554033,
|
| 24204 |
+
"grad_norm": 34.53125,
|
| 24205 |
+
"learning_rate": 9.899964538153672e-06,
|
| 24206 |
+
"loss": 20.1387,
|
| 24207 |
+
"step": 34500
|
| 24208 |
+
},
|
| 24209 |
+
{
|
| 24210 |
+
"epoch": 0.6404127145398831,
|
| 24211 |
+
"grad_norm": 34.9375,
|
| 24212 |
+
"learning_rate": 9.899935542367628e-06,
|
| 24213 |
+
"loss": 19.8495,
|
| 24214 |
+
"step": 34510
|
| 24215 |
+
},
|
| 24216 |
+
{
|
| 24217 |
+
"epoch": 0.640598287624363,
|
| 24218 |
+
"grad_norm": 33.9375,
|
| 24219 |
+
"learning_rate": 9.899906546581585e-06,
|
| 24220 |
+
"loss": 19.439,
|
| 24221 |
+
"step": 34520
|
| 24222 |
+
},
|
| 24223 |
+
{
|
| 24224 |
+
"epoch": 0.6407838607088427,
|
| 24225 |
+
"grad_norm": 36.15625,
|
| 24226 |
+
"learning_rate": 9.899877550795544e-06,
|
| 24227 |
+
"loss": 19.9088,
|
| 24228 |
+
"step": 34530
|
| 24229 |
+
},
|
| 24230 |
+
{
|
| 24231 |
+
"epoch": 0.6409694337933226,
|
| 24232 |
+
"grad_norm": 36.03125,
|
| 24233 |
+
"learning_rate": 9.899848555009502e-06,
|
| 24234 |
+
"loss": 20.363,
|
| 24235 |
+
"step": 34540
|
| 24236 |
+
},
|
| 24237 |
+
{
|
| 24238 |
+
"epoch": 0.6411550068778025,
|
| 24239 |
+
"grad_norm": 35.84375,
|
| 24240 |
+
"learning_rate": 9.899819559223459e-06,
|
| 24241 |
+
"loss": 19.5979,
|
| 24242 |
+
"step": 34550
|
| 24243 |
+
},
|
| 24244 |
+
{
|
| 24245 |
+
"epoch": 0.6413405799622822,
|
| 24246 |
+
"grad_norm": 35.5,
|
| 24247 |
+
"learning_rate": 9.899790563437417e-06,
|
| 24248 |
+
"loss": 19.6288,
|
| 24249 |
+
"step": 34560
|
| 24250 |
+
},
|
| 24251 |
+
{
|
| 24252 |
+
"epoch": 0.6415261530467621,
|
| 24253 |
+
"grad_norm": 32.9375,
|
| 24254 |
+
"learning_rate": 9.899761567651374e-06,
|
| 24255 |
+
"loss": 19.7712,
|
| 24256 |
+
"step": 34570
|
| 24257 |
+
},
|
| 24258 |
+
{
|
| 24259 |
+
"epoch": 0.6417117261312419,
|
| 24260 |
+
"grad_norm": 34.78125,
|
| 24261 |
+
"learning_rate": 9.899732571865331e-06,
|
| 24262 |
+
"loss": 19.8826,
|
| 24263 |
+
"step": 34580
|
| 24264 |
+
},
|
| 24265 |
+
{
|
| 24266 |
+
"epoch": 0.6418972992157218,
|
| 24267 |
+
"grad_norm": 35.53125,
|
| 24268 |
+
"learning_rate": 9.899703576079289e-06,
|
| 24269 |
+
"loss": 19.6118,
|
| 24270 |
+
"step": 34590
|
| 24271 |
+
},
|
| 24272 |
+
{
|
| 24273 |
+
"epoch": 0.6420828723002016,
|
| 24274 |
+
"grad_norm": 35.28125,
|
| 24275 |
+
"learning_rate": 9.899674580293248e-06,
|
| 24276 |
+
"loss": 20.0948,
|
| 24277 |
+
"step": 34600
|
| 24278 |
+
},
|
| 24279 |
+
{
|
| 24280 |
+
"epoch": 0.6422684453846814,
|
| 24281 |
+
"grad_norm": 34.90625,
|
| 24282 |
+
"learning_rate": 9.899645584507205e-06,
|
| 24283 |
+
"loss": 19.9192,
|
| 24284 |
+
"step": 34610
|
| 24285 |
+
},
|
| 24286 |
+
{
|
| 24287 |
+
"epoch": 0.6424540184691613,
|
| 24288 |
+
"grad_norm": 35.21875,
|
| 24289 |
+
"learning_rate": 9.899616588721161e-06,
|
| 24290 |
+
"loss": 20.5888,
|
| 24291 |
+
"step": 34620
|
| 24292 |
+
},
|
| 24293 |
+
{
|
| 24294 |
+
"epoch": 0.642639591553641,
|
| 24295 |
+
"grad_norm": 34.59375,
|
| 24296 |
+
"learning_rate": 9.89958759293512e-06,
|
| 24297 |
+
"loss": 19.441,
|
| 24298 |
+
"step": 34630
|
| 24299 |
+
},
|
| 24300 |
+
{
|
| 24301 |
+
"epoch": 0.6428251646381209,
|
| 24302 |
+
"grad_norm": 35.21875,
|
| 24303 |
+
"learning_rate": 9.899558597149077e-06,
|
| 24304 |
+
"loss": 19.7351,
|
| 24305 |
+
"step": 34640
|
| 24306 |
+
},
|
| 24307 |
+
{
|
| 24308 |
+
"epoch": 0.6430107377226008,
|
| 24309 |
+
"grad_norm": 35.625,
|
| 24310 |
+
"learning_rate": 9.899529601363035e-06,
|
| 24311 |
+
"loss": 19.9079,
|
| 24312 |
+
"step": 34650
|
| 24313 |
+
},
|
| 24314 |
+
{
|
| 24315 |
+
"epoch": 0.6431963108070805,
|
| 24316 |
+
"grad_norm": 36.4375,
|
| 24317 |
+
"learning_rate": 9.899500605576992e-06,
|
| 24318 |
+
"loss": 19.9245,
|
| 24319 |
+
"step": 34660
|
| 24320 |
+
},
|
| 24321 |
+
{
|
| 24322 |
+
"epoch": 0.6433818838915604,
|
| 24323 |
+
"grad_norm": 35.75,
|
| 24324 |
+
"learning_rate": 9.89947160979095e-06,
|
| 24325 |
+
"loss": 19.9472,
|
| 24326 |
+
"step": 34670
|
| 24327 |
+
},
|
| 24328 |
+
{
|
| 24329 |
+
"epoch": 0.6435674569760402,
|
| 24330 |
+
"grad_norm": 35.09375,
|
| 24331 |
+
"learning_rate": 9.899442614004907e-06,
|
| 24332 |
+
"loss": 19.8009,
|
| 24333 |
+
"step": 34680
|
| 24334 |
+
},
|
| 24335 |
+
{
|
| 24336 |
+
"epoch": 0.64375303006052,
|
| 24337 |
+
"grad_norm": 35.15625,
|
| 24338 |
+
"learning_rate": 9.899413618218865e-06,
|
| 24339 |
+
"loss": 20.0998,
|
| 24340 |
+
"step": 34690
|
| 24341 |
+
},
|
| 24342 |
+
{
|
| 24343 |
+
"epoch": 0.6439386031449998,
|
| 24344 |
+
"grad_norm": 35.96875,
|
| 24345 |
+
"learning_rate": 9.899384622432822e-06,
|
| 24346 |
+
"loss": 19.661,
|
| 24347 |
+
"step": 34700
|
| 24348 |
+
},
|
| 24349 |
+
{
|
| 24350 |
+
"epoch": 0.6441241762294797,
|
| 24351 |
+
"grad_norm": 35.28125,
|
| 24352 |
+
"learning_rate": 9.899355626646781e-06,
|
| 24353 |
+
"loss": 20.1212,
|
| 24354 |
+
"step": 34710
|
| 24355 |
+
},
|
| 24356 |
+
{
|
| 24357 |
+
"epoch": 0.6443097493139595,
|
| 24358 |
+
"grad_norm": 36.4375,
|
| 24359 |
+
"learning_rate": 9.899326630860738e-06,
|
| 24360 |
+
"loss": 19.703,
|
| 24361 |
+
"step": 34720
|
| 24362 |
+
},
|
| 24363 |
+
{
|
| 24364 |
+
"epoch": 0.6444953223984393,
|
| 24365 |
+
"grad_norm": 36.09375,
|
| 24366 |
+
"learning_rate": 9.899297635074694e-06,
|
| 24367 |
+
"loss": 19.6669,
|
| 24368 |
+
"step": 34730
|
| 24369 |
+
},
|
| 24370 |
+
{
|
| 24371 |
+
"epoch": 0.6446808954829192,
|
| 24372 |
+
"grad_norm": 36.34375,
|
| 24373 |
+
"learning_rate": 9.899268639288653e-06,
|
| 24374 |
+
"loss": 20.0713,
|
| 24375 |
+
"step": 34740
|
| 24376 |
+
},
|
| 24377 |
+
{
|
| 24378 |
+
"epoch": 0.6448664685673989,
|
| 24379 |
+
"grad_norm": 34.8125,
|
| 24380 |
+
"learning_rate": 9.89923964350261e-06,
|
| 24381 |
+
"loss": 19.7988,
|
| 24382 |
+
"step": 34750
|
| 24383 |
+
},
|
| 24384 |
+
{
|
| 24385 |
+
"epoch": 0.6450520416518788,
|
| 24386 |
+
"grad_norm": 34.875,
|
| 24387 |
+
"learning_rate": 9.899210647716568e-06,
|
| 24388 |
+
"loss": 19.8849,
|
| 24389 |
+
"step": 34760
|
| 24390 |
+
},
|
| 24391 |
+
{
|
| 24392 |
+
"epoch": 0.6452376147363587,
|
| 24393 |
+
"grad_norm": 34.34375,
|
| 24394 |
+
"learning_rate": 9.899181651930525e-06,
|
| 24395 |
+
"loss": 19.7736,
|
| 24396 |
+
"step": 34770
|
| 24397 |
+
},
|
| 24398 |
+
{
|
| 24399 |
+
"epoch": 0.6454231878208385,
|
| 24400 |
+
"grad_norm": 36.3125,
|
| 24401 |
+
"learning_rate": 9.899152656144483e-06,
|
| 24402 |
+
"loss": 20.3017,
|
| 24403 |
+
"step": 34780
|
| 24404 |
+
},
|
| 24405 |
+
{
|
| 24406 |
+
"epoch": 0.6456087609053183,
|
| 24407 |
+
"grad_norm": 33.65625,
|
| 24408 |
+
"learning_rate": 9.89912366035844e-06,
|
| 24409 |
+
"loss": 20.0971,
|
| 24410 |
+
"step": 34790
|
| 24411 |
+
},
|
| 24412 |
+
{
|
| 24413 |
+
"epoch": 0.6457943339897981,
|
| 24414 |
+
"grad_norm": 36.03125,
|
| 24415 |
+
"learning_rate": 9.899094664572398e-06,
|
| 24416 |
+
"loss": 19.5588,
|
| 24417 |
+
"step": 34800
|
| 24418 |
+
},
|
| 24419 |
+
{
|
| 24420 |
+
"epoch": 0.645979907074278,
|
| 24421 |
+
"grad_norm": 33.84375,
|
| 24422 |
+
"learning_rate": 9.899065668786357e-06,
|
| 24423 |
+
"loss": 19.7038,
|
| 24424 |
+
"step": 34810
|
| 24425 |
+
},
|
| 24426 |
+
{
|
| 24427 |
+
"epoch": 0.6461654801587577,
|
| 24428 |
+
"grad_norm": 35.78125,
|
| 24429 |
+
"learning_rate": 9.899036673000314e-06,
|
| 24430 |
+
"loss": 20.0109,
|
| 24431 |
+
"step": 34820
|
| 24432 |
+
},
|
| 24433 |
+
{
|
| 24434 |
+
"epoch": 0.6463510532432376,
|
| 24435 |
+
"grad_norm": 35.03125,
|
| 24436 |
+
"learning_rate": 9.899007677214272e-06,
|
| 24437 |
+
"loss": 19.5806,
|
| 24438 |
+
"step": 34830
|
| 24439 |
+
},
|
| 24440 |
+
{
|
| 24441 |
+
"epoch": 0.6465366263277175,
|
| 24442 |
+
"grad_norm": 34.125,
|
| 24443 |
+
"learning_rate": 9.898978681428229e-06,
|
| 24444 |
+
"loss": 19.8743,
|
| 24445 |
+
"step": 34840
|
| 24446 |
+
},
|
| 24447 |
+
{
|
| 24448 |
+
"epoch": 0.6467221994121972,
|
| 24449 |
+
"grad_norm": 36.375,
|
| 24450 |
+
"learning_rate": 9.898949685642186e-06,
|
| 24451 |
+
"loss": 19.4995,
|
| 24452 |
+
"step": 34850
|
| 24453 |
+
},
|
| 24454 |
+
{
|
| 24455 |
+
"epoch": 0.6469077724966771,
|
| 24456 |
+
"grad_norm": 35.46875,
|
| 24457 |
+
"learning_rate": 9.898920689856144e-06,
|
| 24458 |
+
"loss": 19.5762,
|
| 24459 |
+
"step": 34860
|
| 24460 |
+
},
|
| 24461 |
+
{
|
| 24462 |
+
"epoch": 0.6470933455811569,
|
| 24463 |
+
"grad_norm": 35.375,
|
| 24464 |
+
"learning_rate": 9.898891694070101e-06,
|
| 24465 |
+
"loss": 19.7716,
|
| 24466 |
+
"step": 34870
|
| 24467 |
+
},
|
| 24468 |
+
{
|
| 24469 |
+
"epoch": 0.6472789186656367,
|
| 24470 |
+
"grad_norm": 34.5,
|
| 24471 |
+
"learning_rate": 9.89886269828406e-06,
|
| 24472 |
+
"loss": 19.8697,
|
| 24473 |
+
"step": 34880
|
| 24474 |
+
},
|
| 24475 |
+
{
|
| 24476 |
+
"epoch": 0.6474644917501166,
|
| 24477 |
+
"grad_norm": 36.03125,
|
| 24478 |
+
"learning_rate": 9.898833702498016e-06,
|
| 24479 |
+
"loss": 19.7747,
|
| 24480 |
+
"step": 34890
|
| 24481 |
+
},
|
| 24482 |
+
{
|
| 24483 |
+
"epoch": 0.6476500648345964,
|
| 24484 |
+
"grad_norm": 35.15625,
|
| 24485 |
+
"learning_rate": 9.898804706711973e-06,
|
| 24486 |
+
"loss": 19.6159,
|
| 24487 |
+
"step": 34900
|
| 24488 |
+
},
|
| 24489 |
+
{
|
| 24490 |
+
"epoch": 0.6478356379190762,
|
| 24491 |
+
"grad_norm": 35.53125,
|
| 24492 |
+
"learning_rate": 9.898775710925933e-06,
|
| 24493 |
+
"loss": 20.2549,
|
| 24494 |
+
"step": 34910
|
| 24495 |
+
},
|
| 24496 |
+
{
|
| 24497 |
+
"epoch": 0.648021211003556,
|
| 24498 |
+
"grad_norm": 35.09375,
|
| 24499 |
+
"learning_rate": 9.89874671513989e-06,
|
| 24500 |
+
"loss": 20.0461,
|
| 24501 |
+
"step": 34920
|
| 24502 |
+
},
|
| 24503 |
+
{
|
| 24504 |
+
"epoch": 0.6482067840880359,
|
| 24505 |
+
"grad_norm": 34.125,
|
| 24506 |
+
"learning_rate": 9.898717719353847e-06,
|
| 24507 |
+
"loss": 19.3693,
|
| 24508 |
+
"step": 34930
|
| 24509 |
+
},
|
| 24510 |
+
{
|
| 24511 |
+
"epoch": 0.6483923571725156,
|
| 24512 |
+
"grad_norm": 34.625,
|
| 24513 |
+
"learning_rate": 9.898688723567805e-06,
|
| 24514 |
+
"loss": 19.9014,
|
| 24515 |
+
"step": 34940
|
| 24516 |
+
},
|
| 24517 |
+
{
|
| 24518 |
+
"epoch": 0.6485779302569955,
|
| 24519 |
+
"grad_norm": 37.65625,
|
| 24520 |
+
"learning_rate": 9.898659727781762e-06,
|
| 24521 |
+
"loss": 20.2016,
|
| 24522 |
+
"step": 34950
|
| 24523 |
+
},
|
| 24524 |
+
{
|
| 24525 |
+
"epoch": 0.6487635033414754,
|
| 24526 |
+
"grad_norm": 35.53125,
|
| 24527 |
+
"learning_rate": 9.89863073199572e-06,
|
| 24528 |
+
"loss": 20.0671,
|
| 24529 |
+
"step": 34960
|
| 24530 |
+
},
|
| 24531 |
+
{
|
| 24532 |
+
"epoch": 0.6489490764259552,
|
| 24533 |
+
"grad_norm": 34.5,
|
| 24534 |
+
"learning_rate": 9.898601736209677e-06,
|
| 24535 |
+
"loss": 19.811,
|
| 24536 |
+
"step": 34970
|
| 24537 |
+
},
|
| 24538 |
+
{
|
| 24539 |
+
"epoch": 0.649134649510435,
|
| 24540 |
+
"grad_norm": 35.40625,
|
| 24541 |
+
"learning_rate": 9.898572740423636e-06,
|
| 24542 |
+
"loss": 19.5194,
|
| 24543 |
+
"step": 34980
|
| 24544 |
+
},
|
| 24545 |
+
{
|
| 24546 |
+
"epoch": 0.6493202225949148,
|
| 24547 |
+
"grad_norm": 38.28125,
|
| 24548 |
+
"learning_rate": 9.898543744637594e-06,
|
| 24549 |
+
"loss": 19.9472,
|
| 24550 |
+
"step": 34990
|
| 24551 |
+
},
|
| 24552 |
+
{
|
| 24553 |
+
"epoch": 0.6495057956793947,
|
| 24554 |
+
"grad_norm": 33.96875,
|
| 24555 |
+
"learning_rate": 9.89851474885155e-06,
|
| 24556 |
+
"loss": 19.7037,
|
| 24557 |
+
"step": 35000
|
| 24558 |
+
},
|
| 24559 |
+
{
|
| 24560 |
+
"epoch": 0.6495057956793947,
|
| 24561 |
+
"eval_loss": 2.4709317684173584,
|
| 24562 |
+
"eval_runtime": 455.6259,
|
| 24563 |
+
"eval_samples_per_second": 3187.082,
|
| 24564 |
+
"eval_steps_per_second": 49.8,
|
| 24565 |
+
"step": 35000
|
| 24566 |
}
|
| 24567 |
],
|
| 24568 |
"logging_steps": 10,
|
|
|
|
| 24582 |
"attributes": {}
|
| 24583 |
}
|
| 24584 |
},
|
| 24585 |
+
"total_flos": 6.109329656643584e+18,
|
| 24586 |
"train_batch_size": 8,
|
| 24587 |
"trial_name": null,
|
| 24588 |
"trial_params": null
|