Training in progress, step 45000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3511 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 306619286
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8898863e03111dc0f9067fbf61d7c9148ac8efc7f3547cbf592b621a60724e4b
|
| 3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919972410
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b81fcbe3c87675fdccd5b2d56c7d720479008ef5d12e3659d6f5288de1f4369
|
| 3 |
size 919972410
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ca51d4b33edcedf9568d5202767b896d828b5aeca18f2cdd82617688464b784
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10e6ea705ea5a1704cd5773090c827a2013c8caab967a116ff24a5f57ce3ce90
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6ff0b4da12dd0cdcb6e90b04160e41685d9ccc1fa1cc74bb7949edf700200d4
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:322470b09ac4f5d9443d55c37c8b8e7d0e8a1702208c81e52e3a58a8de515b5b
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ababe8505205ca2bcb959a2abbe2fbc8b6ad677bd43b1f2ee9055b3cb400b061
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed1c3ba656fdb40a72824e366a08e148a30e1089a6ecd019eaf28fa4a17859fa
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:042c62210b9e1c9ed394e0a3362b1c773c07591d94f2716a8e928676134742b7
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb54f0278c663494261026658652f845bae43245e75ccc213c6897de179f542a
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6e8fc146b9f79d0e4ed1e4480536314f0ec3a00f8460a905aee0c66bb636dfe
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 5000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -28071,6 +28071,3514 @@
|
|
| 28071 |
"eval_samples_per_second": 3197.12,
|
| 28072 |
"eval_steps_per_second": 49.956,
|
| 28073 |
"step": 40000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28074 |
}
|
| 28075 |
],
|
| 28076 |
"logging_steps": 10,
|
|
@@ -28090,7 +31598,7 @@
|
|
| 28090 |
"attributes": {}
|
| 28091 |
}
|
| 28092 |
},
|
| 28093 |
-
"total_flos":
|
| 28094 |
"train_batch_size": 8,
|
| 28095 |
"trial_name": null,
|
| 28096 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.8350788801592217,
|
| 5 |
"eval_steps": 5000,
|
| 6 |
+
"global_step": 45000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 28071 |
"eval_samples_per_second": 3197.12,
|
| 28072 |
"eval_steps_per_second": 49.956,
|
| 28073 |
"step": 40000
|
| 28074 |
+
},
|
| 28075 |
+
{
|
| 28076 |
+
"epoch": 0.742477911003788,
|
| 28077 |
+
"grad_norm": 35.1875,
|
| 28078 |
+
"learning_rate": 9.8839878600443e-06,
|
| 28079 |
+
"loss": 19.1801,
|
| 28080 |
+
"step": 40010
|
| 28081 |
+
},
|
| 28082 |
+
{
|
| 28083 |
+
"epoch": 0.7426634840882679,
|
| 28084 |
+
"grad_norm": 35.59375,
|
| 28085 |
+
"learning_rate": 9.883958864258259e-06,
|
| 28086 |
+
"loss": 19.2543,
|
| 28087 |
+
"step": 40020
|
| 28088 |
+
},
|
| 28089 |
+
{
|
| 28090 |
+
"epoch": 0.7428490571727476,
|
| 28091 |
+
"grad_norm": 35.34375,
|
| 28092 |
+
"learning_rate": 9.883929868472216e-06,
|
| 28093 |
+
"loss": 19.4882,
|
| 28094 |
+
"step": 40030
|
| 28095 |
+
},
|
| 28096 |
+
{
|
| 28097 |
+
"epoch": 0.7430346302572275,
|
| 28098 |
+
"grad_norm": 37.21875,
|
| 28099 |
+
"learning_rate": 9.883900872686172e-06,
|
| 28100 |
+
"loss": 19.6511,
|
| 28101 |
+
"step": 40040
|
| 28102 |
+
},
|
| 28103 |
+
{
|
| 28104 |
+
"epoch": 0.7432202033417074,
|
| 28105 |
+
"grad_norm": 37.5,
|
| 28106 |
+
"learning_rate": 9.883871876900131e-06,
|
| 28107 |
+
"loss": 19.5237,
|
| 28108 |
+
"step": 40050
|
| 28109 |
+
},
|
| 28110 |
+
{
|
| 28111 |
+
"epoch": 0.7434057764261871,
|
| 28112 |
+
"grad_norm": 37.0,
|
| 28113 |
+
"learning_rate": 9.883842881114089e-06,
|
| 28114 |
+
"loss": 19.3757,
|
| 28115 |
+
"step": 40060
|
| 28116 |
+
},
|
| 28117 |
+
{
|
| 28118 |
+
"epoch": 0.743591349510667,
|
| 28119 |
+
"grad_norm": 36.03125,
|
| 28120 |
+
"learning_rate": 9.883813885328046e-06,
|
| 28121 |
+
"loss": 19.4206,
|
| 28122 |
+
"step": 40070
|
| 28123 |
+
},
|
| 28124 |
+
{
|
| 28125 |
+
"epoch": 0.7437769225951468,
|
| 28126 |
+
"grad_norm": 34.375,
|
| 28127 |
+
"learning_rate": 9.883784889542004e-06,
|
| 28128 |
+
"loss": 19.3413,
|
| 28129 |
+
"step": 40080
|
| 28130 |
+
},
|
| 28131 |
+
{
|
| 28132 |
+
"epoch": 0.7439624956796266,
|
| 28133 |
+
"grad_norm": 35.25,
|
| 28134 |
+
"learning_rate": 9.883755893755961e-06,
|
| 28135 |
+
"loss": 19.2141,
|
| 28136 |
+
"step": 40090
|
| 28137 |
+
},
|
| 28138 |
+
{
|
| 28139 |
+
"epoch": 0.7441480687641064,
|
| 28140 |
+
"grad_norm": 36.21875,
|
| 28141 |
+
"learning_rate": 9.883726897969918e-06,
|
| 28142 |
+
"loss": 19.7938,
|
| 28143 |
+
"step": 40100
|
| 28144 |
+
},
|
| 28145 |
+
{
|
| 28146 |
+
"epoch": 0.7443336418485863,
|
| 28147 |
+
"grad_norm": 35.875,
|
| 28148 |
+
"learning_rate": 9.883697902183876e-06,
|
| 28149 |
+
"loss": 19.4773,
|
| 28150 |
+
"step": 40110
|
| 28151 |
+
},
|
| 28152 |
+
{
|
| 28153 |
+
"epoch": 0.744519214933066,
|
| 28154 |
+
"grad_norm": 34.75,
|
| 28155 |
+
"learning_rate": 9.883668906397835e-06,
|
| 28156 |
+
"loss": 19.4294,
|
| 28157 |
+
"step": 40120
|
| 28158 |
+
},
|
| 28159 |
+
{
|
| 28160 |
+
"epoch": 0.7447047880175459,
|
| 28161 |
+
"grad_norm": 36.375,
|
| 28162 |
+
"learning_rate": 9.883639910611792e-06,
|
| 28163 |
+
"loss": 19.7319,
|
| 28164 |
+
"step": 40130
|
| 28165 |
+
},
|
| 28166 |
+
{
|
| 28167 |
+
"epoch": 0.7448903611020258,
|
| 28168 |
+
"grad_norm": 37.96875,
|
| 28169 |
+
"learning_rate": 9.883610914825748e-06,
|
| 28170 |
+
"loss": 19.207,
|
| 28171 |
+
"step": 40140
|
| 28172 |
+
},
|
| 28173 |
+
{
|
| 28174 |
+
"epoch": 0.7450759341865056,
|
| 28175 |
+
"grad_norm": 36.65625,
|
| 28176 |
+
"learning_rate": 9.883581919039707e-06,
|
| 28177 |
+
"loss": 19.2734,
|
| 28178 |
+
"step": 40150
|
| 28179 |
+
},
|
| 28180 |
+
{
|
| 28181 |
+
"epoch": 0.7452615072709854,
|
| 28182 |
+
"grad_norm": 36.125,
|
| 28183 |
+
"learning_rate": 9.883552923253664e-06,
|
| 28184 |
+
"loss": 19.6617,
|
| 28185 |
+
"step": 40160
|
| 28186 |
+
},
|
| 28187 |
+
{
|
| 28188 |
+
"epoch": 0.7454470803554653,
|
| 28189 |
+
"grad_norm": 37.25,
|
| 28190 |
+
"learning_rate": 9.883523927467622e-06,
|
| 28191 |
+
"loss": 19.5435,
|
| 28192 |
+
"step": 40170
|
| 28193 |
+
},
|
| 28194 |
+
{
|
| 28195 |
+
"epoch": 0.7456326534399451,
|
| 28196 |
+
"grad_norm": 34.3125,
|
| 28197 |
+
"learning_rate": 9.88349493168158e-06,
|
| 28198 |
+
"loss": 19.359,
|
| 28199 |
+
"step": 40180
|
| 28200 |
+
},
|
| 28201 |
+
{
|
| 28202 |
+
"epoch": 0.7458182265244249,
|
| 28203 |
+
"grad_norm": 37.34375,
|
| 28204 |
+
"learning_rate": 9.883465935895538e-06,
|
| 28205 |
+
"loss": 19.4984,
|
| 28206 |
+
"step": 40190
|
| 28207 |
+
},
|
| 28208 |
+
{
|
| 28209 |
+
"epoch": 0.7460037996089047,
|
| 28210 |
+
"grad_norm": 35.3125,
|
| 28211 |
+
"learning_rate": 9.883436940109494e-06,
|
| 28212 |
+
"loss": 19.529,
|
| 28213 |
+
"step": 40200
|
| 28214 |
+
},
|
| 28215 |
+
{
|
| 28216 |
+
"epoch": 0.7461893726933846,
|
| 28217 |
+
"grad_norm": 37.34375,
|
| 28218 |
+
"learning_rate": 9.883407944323452e-06,
|
| 28219 |
+
"loss": 19.4569,
|
| 28220 |
+
"step": 40210
|
| 28221 |
+
},
|
| 28222 |
+
{
|
| 28223 |
+
"epoch": 0.7463749457778643,
|
| 28224 |
+
"grad_norm": 35.28125,
|
| 28225 |
+
"learning_rate": 9.88337894853741e-06,
|
| 28226 |
+
"loss": 19.7786,
|
| 28227 |
+
"step": 40220
|
| 28228 |
+
},
|
| 28229 |
+
{
|
| 28230 |
+
"epoch": 0.7465605188623442,
|
| 28231 |
+
"grad_norm": 37.03125,
|
| 28232 |
+
"learning_rate": 9.883349952751368e-06,
|
| 28233 |
+
"loss": 19.3416,
|
| 28234 |
+
"step": 40230
|
| 28235 |
+
},
|
| 28236 |
+
{
|
| 28237 |
+
"epoch": 0.7467460919468241,
|
| 28238 |
+
"grad_norm": 34.96875,
|
| 28239 |
+
"learning_rate": 9.883320956965325e-06,
|
| 28240 |
+
"loss": 19.2966,
|
| 28241 |
+
"step": 40240
|
| 28242 |
+
},
|
| 28243 |
+
{
|
| 28244 |
+
"epoch": 0.7469316650313038,
|
| 28245 |
+
"grad_norm": 34.59375,
|
| 28246 |
+
"learning_rate": 9.883291961179283e-06,
|
| 28247 |
+
"loss": 19.4214,
|
| 28248 |
+
"step": 40250
|
| 28249 |
+
},
|
| 28250 |
+
{
|
| 28251 |
+
"epoch": 0.7471172381157837,
|
| 28252 |
+
"grad_norm": 36.59375,
|
| 28253 |
+
"learning_rate": 9.88326296539324e-06,
|
| 28254 |
+
"loss": 19.2123,
|
| 28255 |
+
"step": 40260
|
| 28256 |
+
},
|
| 28257 |
+
{
|
| 28258 |
+
"epoch": 0.7473028112002635,
|
| 28259 |
+
"grad_norm": 35.375,
|
| 28260 |
+
"learning_rate": 9.883233969607198e-06,
|
| 28261 |
+
"loss": 19.0819,
|
| 28262 |
+
"step": 40270
|
| 28263 |
+
},
|
| 28264 |
+
{
|
| 28265 |
+
"epoch": 0.7474883842847433,
|
| 28266 |
+
"grad_norm": 35.125,
|
| 28267 |
+
"learning_rate": 9.883204973821155e-06,
|
| 28268 |
+
"loss": 19.7437,
|
| 28269 |
+
"step": 40280
|
| 28270 |
+
},
|
| 28271 |
+
{
|
| 28272 |
+
"epoch": 0.7476739573692232,
|
| 28273 |
+
"grad_norm": 36.6875,
|
| 28274 |
+
"learning_rate": 9.883175978035112e-06,
|
| 28275 |
+
"loss": 19.0334,
|
| 28276 |
+
"step": 40290
|
| 28277 |
+
},
|
| 28278 |
+
{
|
| 28279 |
+
"epoch": 0.747859530453703,
|
| 28280 |
+
"grad_norm": 34.3125,
|
| 28281 |
+
"learning_rate": 9.883146982249072e-06,
|
| 28282 |
+
"loss": 19.398,
|
| 28283 |
+
"step": 40300
|
| 28284 |
+
},
|
| 28285 |
+
{
|
| 28286 |
+
"epoch": 0.7480451035381829,
|
| 28287 |
+
"grad_norm": 36.6875,
|
| 28288 |
+
"learning_rate": 9.883117986463027e-06,
|
| 28289 |
+
"loss": 19.3823,
|
| 28290 |
+
"step": 40310
|
| 28291 |
+
},
|
| 28292 |
+
{
|
| 28293 |
+
"epoch": 0.7482306766226626,
|
| 28294 |
+
"grad_norm": 36.53125,
|
| 28295 |
+
"learning_rate": 9.883088990676986e-06,
|
| 28296 |
+
"loss": 19.3756,
|
| 28297 |
+
"step": 40320
|
| 28298 |
+
},
|
| 28299 |
+
{
|
| 28300 |
+
"epoch": 0.7484162497071425,
|
| 28301 |
+
"grad_norm": 38.0,
|
| 28302 |
+
"learning_rate": 9.883059994890944e-06,
|
| 28303 |
+
"loss": 18.9045,
|
| 28304 |
+
"step": 40330
|
| 28305 |
+
},
|
| 28306 |
+
{
|
| 28307 |
+
"epoch": 0.7486018227916224,
|
| 28308 |
+
"grad_norm": 36.96875,
|
| 28309 |
+
"learning_rate": 9.883030999104901e-06,
|
| 28310 |
+
"loss": 19.2252,
|
| 28311 |
+
"step": 40340
|
| 28312 |
+
},
|
| 28313 |
+
{
|
| 28314 |
+
"epoch": 0.7487873958761021,
|
| 28315 |
+
"grad_norm": 37.1875,
|
| 28316 |
+
"learning_rate": 9.883002003318859e-06,
|
| 28317 |
+
"loss": 19.2014,
|
| 28318 |
+
"step": 40350
|
| 28319 |
+
},
|
| 28320 |
+
{
|
| 28321 |
+
"epoch": 0.748972968960582,
|
| 28322 |
+
"grad_norm": 33.34375,
|
| 28323 |
+
"learning_rate": 9.882973007532816e-06,
|
| 28324 |
+
"loss": 19.202,
|
| 28325 |
+
"step": 40360
|
| 28326 |
+
},
|
| 28327 |
+
{
|
| 28328 |
+
"epoch": 0.7491585420450618,
|
| 28329 |
+
"grad_norm": 33.0625,
|
| 28330 |
+
"learning_rate": 9.882944011746773e-06,
|
| 28331 |
+
"loss": 19.8227,
|
| 28332 |
+
"step": 40370
|
| 28333 |
+
},
|
| 28334 |
+
{
|
| 28335 |
+
"epoch": 0.7493441151295416,
|
| 28336 |
+
"grad_norm": 33.875,
|
| 28337 |
+
"learning_rate": 9.88291501596073e-06,
|
| 28338 |
+
"loss": 19.4171,
|
| 28339 |
+
"step": 40380
|
| 28340 |
+
},
|
| 28341 |
+
{
|
| 28342 |
+
"epoch": 0.7495296882140214,
|
| 28343 |
+
"grad_norm": 36.21875,
|
| 28344 |
+
"learning_rate": 9.882886020174688e-06,
|
| 28345 |
+
"loss": 19.1841,
|
| 28346 |
+
"step": 40390
|
| 28347 |
+
},
|
| 28348 |
+
{
|
| 28349 |
+
"epoch": 0.7497152612985013,
|
| 28350 |
+
"grad_norm": 38.03125,
|
| 28351 |
+
"learning_rate": 9.882857024388647e-06,
|
| 28352 |
+
"loss": 19.3367,
|
| 28353 |
+
"step": 40400
|
| 28354 |
+
},
|
| 28355 |
+
{
|
| 28356 |
+
"epoch": 0.749900834382981,
|
| 28357 |
+
"grad_norm": 36.15625,
|
| 28358 |
+
"learning_rate": 9.882828028602603e-06,
|
| 28359 |
+
"loss": 19.5167,
|
| 28360 |
+
"step": 40410
|
| 28361 |
+
},
|
| 28362 |
+
{
|
| 28363 |
+
"epoch": 0.7500864074674609,
|
| 28364 |
+
"grad_norm": 36.0625,
|
| 28365 |
+
"learning_rate": 9.88279903281656e-06,
|
| 28366 |
+
"loss": 19.3782,
|
| 28367 |
+
"step": 40420
|
| 28368 |
+
},
|
| 28369 |
+
{
|
| 28370 |
+
"epoch": 0.7502719805519408,
|
| 28371 |
+
"grad_norm": 37.125,
|
| 28372 |
+
"learning_rate": 9.88277003703052e-06,
|
| 28373 |
+
"loss": 19.5295,
|
| 28374 |
+
"step": 40430
|
| 28375 |
+
},
|
| 28376 |
+
{
|
| 28377 |
+
"epoch": 0.7504575536364205,
|
| 28378 |
+
"grad_norm": 34.46875,
|
| 28379 |
+
"learning_rate": 9.882741041244477e-06,
|
| 28380 |
+
"loss": 19.3648,
|
| 28381 |
+
"step": 40440
|
| 28382 |
+
},
|
| 28383 |
+
{
|
| 28384 |
+
"epoch": 0.7506431267209004,
|
| 28385 |
+
"grad_norm": 35.40625,
|
| 28386 |
+
"learning_rate": 9.882712045458434e-06,
|
| 28387 |
+
"loss": 19.4462,
|
| 28388 |
+
"step": 40450
|
| 28389 |
+
},
|
| 28390 |
+
{
|
| 28391 |
+
"epoch": 0.7508286998053803,
|
| 28392 |
+
"grad_norm": 37.78125,
|
| 28393 |
+
"learning_rate": 9.882683049672392e-06,
|
| 28394 |
+
"loss": 19.5866,
|
| 28395 |
+
"step": 40460
|
| 28396 |
+
},
|
| 28397 |
+
{
|
| 28398 |
+
"epoch": 0.75101427288986,
|
| 28399 |
+
"grad_norm": 36.46875,
|
| 28400 |
+
"learning_rate": 9.88265405388635e-06,
|
| 28401 |
+
"loss": 19.1955,
|
| 28402 |
+
"step": 40470
|
| 28403 |
+
},
|
| 28404 |
+
{
|
| 28405 |
+
"epoch": 0.7511998459743399,
|
| 28406 |
+
"grad_norm": 36.59375,
|
| 28407 |
+
"learning_rate": 9.882625058100307e-06,
|
| 28408 |
+
"loss": 19.3081,
|
| 28409 |
+
"step": 40480
|
| 28410 |
+
},
|
| 28411 |
+
{
|
| 28412 |
+
"epoch": 0.7513854190588197,
|
| 28413 |
+
"grad_norm": 35.5625,
|
| 28414 |
+
"learning_rate": 9.882596062314264e-06,
|
| 28415 |
+
"loss": 19.5015,
|
| 28416 |
+
"step": 40490
|
| 28417 |
+
},
|
| 28418 |
+
{
|
| 28419 |
+
"epoch": 0.7515709921432996,
|
| 28420 |
+
"grad_norm": 36.90625,
|
| 28421 |
+
"learning_rate": 9.882567066528223e-06,
|
| 28422 |
+
"loss": 19.6425,
|
| 28423 |
+
"step": 40500
|
| 28424 |
+
},
|
| 28425 |
+
{
|
| 28426 |
+
"epoch": 0.7517565652277793,
|
| 28427 |
+
"grad_norm": 37.1875,
|
| 28428 |
+
"learning_rate": 9.88253807074218e-06,
|
| 28429 |
+
"loss": 19.2909,
|
| 28430 |
+
"step": 40510
|
| 28431 |
+
},
|
| 28432 |
+
{
|
| 28433 |
+
"epoch": 0.7519421383122592,
|
| 28434 |
+
"grad_norm": 33.59375,
|
| 28435 |
+
"learning_rate": 9.882509074956136e-06,
|
| 28436 |
+
"loss": 19.621,
|
| 28437 |
+
"step": 40520
|
| 28438 |
+
},
|
| 28439 |
+
{
|
| 28440 |
+
"epoch": 0.7521277113967391,
|
| 28441 |
+
"grad_norm": 36.34375,
|
| 28442 |
+
"learning_rate": 9.882480079170095e-06,
|
| 28443 |
+
"loss": 19.4051,
|
| 28444 |
+
"step": 40530
|
| 28445 |
+
},
|
| 28446 |
+
{
|
| 28447 |
+
"epoch": 0.7523132844812188,
|
| 28448 |
+
"grad_norm": 36.125,
|
| 28449 |
+
"learning_rate": 9.882451083384053e-06,
|
| 28450 |
+
"loss": 19.5807,
|
| 28451 |
+
"step": 40540
|
| 28452 |
+
},
|
| 28453 |
+
{
|
| 28454 |
+
"epoch": 0.7524988575656987,
|
| 28455 |
+
"grad_norm": 36.25,
|
| 28456 |
+
"learning_rate": 9.88242208759801e-06,
|
| 28457 |
+
"loss": 19.2389,
|
| 28458 |
+
"step": 40550
|
| 28459 |
+
},
|
| 28460 |
+
{
|
| 28461 |
+
"epoch": 0.7526844306501785,
|
| 28462 |
+
"grad_norm": 33.71875,
|
| 28463 |
+
"learning_rate": 9.882393091811968e-06,
|
| 28464 |
+
"loss": 19.5497,
|
| 28465 |
+
"step": 40560
|
| 28466 |
+
},
|
| 28467 |
+
{
|
| 28468 |
+
"epoch": 0.7528700037346583,
|
| 28469 |
+
"grad_norm": 36.6875,
|
| 28470 |
+
"learning_rate": 9.882364096025925e-06,
|
| 28471 |
+
"loss": 19.7689,
|
| 28472 |
+
"step": 40570
|
| 28473 |
+
},
|
| 28474 |
+
{
|
| 28475 |
+
"epoch": 0.7530555768191382,
|
| 28476 |
+
"grad_norm": 37.34375,
|
| 28477 |
+
"learning_rate": 9.882335100239882e-06,
|
| 28478 |
+
"loss": 19.1836,
|
| 28479 |
+
"step": 40580
|
| 28480 |
+
},
|
| 28481 |
+
{
|
| 28482 |
+
"epoch": 0.753241149903618,
|
| 28483 |
+
"grad_norm": 34.21875,
|
| 28484 |
+
"learning_rate": 9.88230610445384e-06,
|
| 28485 |
+
"loss": 18.9019,
|
| 28486 |
+
"step": 40590
|
| 28487 |
+
},
|
| 28488 |
+
{
|
| 28489 |
+
"epoch": 0.7534267229880978,
|
| 28490 |
+
"grad_norm": 35.53125,
|
| 28491 |
+
"learning_rate": 9.882277108667799e-06,
|
| 28492 |
+
"loss": 19.0727,
|
| 28493 |
+
"step": 40600
|
| 28494 |
+
},
|
| 28495 |
+
{
|
| 28496 |
+
"epoch": 0.7536122960725776,
|
| 28497 |
+
"grad_norm": 34.8125,
|
| 28498 |
+
"learning_rate": 9.882248112881756e-06,
|
| 28499 |
+
"loss": 19.1506,
|
| 28500 |
+
"step": 40610
|
| 28501 |
+
},
|
| 28502 |
+
{
|
| 28503 |
+
"epoch": 0.7537978691570575,
|
| 28504 |
+
"grad_norm": 35.625,
|
| 28505 |
+
"learning_rate": 9.882219117095714e-06,
|
| 28506 |
+
"loss": 19.1912,
|
| 28507 |
+
"step": 40620
|
| 28508 |
+
},
|
| 28509 |
+
{
|
| 28510 |
+
"epoch": 0.7539834422415372,
|
| 28511 |
+
"grad_norm": 35.84375,
|
| 28512 |
+
"learning_rate": 9.882190121309671e-06,
|
| 28513 |
+
"loss": 19.2547,
|
| 28514 |
+
"step": 40630
|
| 28515 |
+
},
|
| 28516 |
+
{
|
| 28517 |
+
"epoch": 0.7541690153260171,
|
| 28518 |
+
"grad_norm": 36.375,
|
| 28519 |
+
"learning_rate": 9.882161125523628e-06,
|
| 28520 |
+
"loss": 19.6462,
|
| 28521 |
+
"step": 40640
|
| 28522 |
+
},
|
| 28523 |
+
{
|
| 28524 |
+
"epoch": 0.754354588410497,
|
| 28525 |
+
"grad_norm": 37.21875,
|
| 28526 |
+
"learning_rate": 9.882132129737586e-06,
|
| 28527 |
+
"loss": 19.0219,
|
| 28528 |
+
"step": 40650
|
| 28529 |
+
},
|
| 28530 |
+
{
|
| 28531 |
+
"epoch": 0.7545401614949767,
|
| 28532 |
+
"grad_norm": 35.25,
|
| 28533 |
+
"learning_rate": 9.882103133951543e-06,
|
| 28534 |
+
"loss": 19.2065,
|
| 28535 |
+
"step": 40660
|
| 28536 |
+
},
|
| 28537 |
+
{
|
| 28538 |
+
"epoch": 0.7547257345794566,
|
| 28539 |
+
"grad_norm": 35.03125,
|
| 28540 |
+
"learning_rate": 9.882074138165502e-06,
|
| 28541 |
+
"loss": 19.058,
|
| 28542 |
+
"step": 40670
|
| 28543 |
+
},
|
| 28544 |
+
{
|
| 28545 |
+
"epoch": 0.7549113076639364,
|
| 28546 |
+
"grad_norm": 36.21875,
|
| 28547 |
+
"learning_rate": 9.882045142379458e-06,
|
| 28548 |
+
"loss": 19.1856,
|
| 28549 |
+
"step": 40680
|
| 28550 |
+
},
|
| 28551 |
+
{
|
| 28552 |
+
"epoch": 0.7550968807484163,
|
| 28553 |
+
"grad_norm": 35.5625,
|
| 28554 |
+
"learning_rate": 9.882016146593416e-06,
|
| 28555 |
+
"loss": 19.3971,
|
| 28556 |
+
"step": 40690
|
| 28557 |
+
},
|
| 28558 |
+
{
|
| 28559 |
+
"epoch": 0.755282453832896,
|
| 28560 |
+
"grad_norm": 34.59375,
|
| 28561 |
+
"learning_rate": 9.881987150807375e-06,
|
| 28562 |
+
"loss": 19.5486,
|
| 28563 |
+
"step": 40700
|
| 28564 |
+
},
|
| 28565 |
+
{
|
| 28566 |
+
"epoch": 0.7554680269173759,
|
| 28567 |
+
"grad_norm": 35.65625,
|
| 28568 |
+
"learning_rate": 9.881958155021332e-06,
|
| 28569 |
+
"loss": 19.3793,
|
| 28570 |
+
"step": 40710
|
| 28571 |
+
},
|
| 28572 |
+
{
|
| 28573 |
+
"epoch": 0.7556536000018558,
|
| 28574 |
+
"grad_norm": 37.78125,
|
| 28575 |
+
"learning_rate": 9.88192915923529e-06,
|
| 28576 |
+
"loss": 19.3927,
|
| 28577 |
+
"step": 40720
|
| 28578 |
+
},
|
| 28579 |
+
{
|
| 28580 |
+
"epoch": 0.7558391730863355,
|
| 28581 |
+
"grad_norm": 34.34375,
|
| 28582 |
+
"learning_rate": 9.881900163449247e-06,
|
| 28583 |
+
"loss": 19.2427,
|
| 28584 |
+
"step": 40730
|
| 28585 |
+
},
|
| 28586 |
+
{
|
| 28587 |
+
"epoch": 0.7560247461708154,
|
| 28588 |
+
"grad_norm": 35.46875,
|
| 28589 |
+
"learning_rate": 9.881871167663204e-06,
|
| 28590 |
+
"loss": 19.363,
|
| 28591 |
+
"step": 40740
|
| 28592 |
+
},
|
| 28593 |
+
{
|
| 28594 |
+
"epoch": 0.7562103192552952,
|
| 28595 |
+
"grad_norm": 34.28125,
|
| 28596 |
+
"learning_rate": 9.881842171877162e-06,
|
| 28597 |
+
"loss": 19.2003,
|
| 28598 |
+
"step": 40750
|
| 28599 |
+
},
|
| 28600 |
+
{
|
| 28601 |
+
"epoch": 0.756395892339775,
|
| 28602 |
+
"grad_norm": 36.21875,
|
| 28603 |
+
"learning_rate": 9.881813176091119e-06,
|
| 28604 |
+
"loss": 19.225,
|
| 28605 |
+
"step": 40760
|
| 28606 |
+
},
|
| 28607 |
+
{
|
| 28608 |
+
"epoch": 0.7565814654242549,
|
| 28609 |
+
"grad_norm": 35.375,
|
| 28610 |
+
"learning_rate": 9.881784180305078e-06,
|
| 28611 |
+
"loss": 19.8474,
|
| 28612 |
+
"step": 40770
|
| 28613 |
+
},
|
| 28614 |
+
{
|
| 28615 |
+
"epoch": 0.7567670385087347,
|
| 28616 |
+
"grad_norm": 35.46875,
|
| 28617 |
+
"learning_rate": 9.881755184519036e-06,
|
| 28618 |
+
"loss": 19.3724,
|
| 28619 |
+
"step": 40780
|
| 28620 |
+
},
|
| 28621 |
+
{
|
| 28622 |
+
"epoch": 0.7569526115932145,
|
| 28623 |
+
"grad_norm": 36.125,
|
| 28624 |
+
"learning_rate": 9.881726188732991e-06,
|
| 28625 |
+
"loss": 19.03,
|
| 28626 |
+
"step": 40790
|
| 28627 |
+
},
|
| 28628 |
+
{
|
| 28629 |
+
"epoch": 0.7571381846776943,
|
| 28630 |
+
"grad_norm": 36.75,
|
| 28631 |
+
"learning_rate": 9.88169719294695e-06,
|
| 28632 |
+
"loss": 19.4802,
|
| 28633 |
+
"step": 40800
|
| 28634 |
+
},
|
| 28635 |
+
{
|
| 28636 |
+
"epoch": 0.7573237577621742,
|
| 28637 |
+
"grad_norm": 34.03125,
|
| 28638 |
+
"learning_rate": 9.881668197160908e-06,
|
| 28639 |
+
"loss": 19.0697,
|
| 28640 |
+
"step": 40810
|
| 28641 |
+
},
|
| 28642 |
+
{
|
| 28643 |
+
"epoch": 0.757509330846654,
|
| 28644 |
+
"grad_norm": 35.4375,
|
| 28645 |
+
"learning_rate": 9.881639201374865e-06,
|
| 28646 |
+
"loss": 19.6207,
|
| 28647 |
+
"step": 40820
|
| 28648 |
+
},
|
| 28649 |
+
{
|
| 28650 |
+
"epoch": 0.7576949039311338,
|
| 28651 |
+
"grad_norm": 35.09375,
|
| 28652 |
+
"learning_rate": 9.881610205588823e-06,
|
| 28653 |
+
"loss": 19.4589,
|
| 28654 |
+
"step": 40830
|
| 28655 |
+
},
|
| 28656 |
+
{
|
| 28657 |
+
"epoch": 0.7578804770156137,
|
| 28658 |
+
"grad_norm": 34.4375,
|
| 28659 |
+
"learning_rate": 9.88158120980278e-06,
|
| 28660 |
+
"loss": 19.5961,
|
| 28661 |
+
"step": 40840
|
| 28662 |
+
},
|
| 28663 |
+
{
|
| 28664 |
+
"epoch": 0.7580660501000935,
|
| 28665 |
+
"grad_norm": 34.625,
|
| 28666 |
+
"learning_rate": 9.881552214016737e-06,
|
| 28667 |
+
"loss": 19.2363,
|
| 28668 |
+
"step": 40850
|
| 28669 |
+
},
|
| 28670 |
+
{
|
| 28671 |
+
"epoch": 0.7582516231845733,
|
| 28672 |
+
"grad_norm": 37.0,
|
| 28673 |
+
"learning_rate": 9.881523218230695e-06,
|
| 28674 |
+
"loss": 18.9855,
|
| 28675 |
+
"step": 40860
|
| 28676 |
+
},
|
| 28677 |
+
{
|
| 28678 |
+
"epoch": 0.7584371962690531,
|
| 28679 |
+
"grad_norm": 36.3125,
|
| 28680 |
+
"learning_rate": 9.881494222444652e-06,
|
| 28681 |
+
"loss": 19.5753,
|
| 28682 |
+
"step": 40870
|
| 28683 |
+
},
|
| 28684 |
+
{
|
| 28685 |
+
"epoch": 0.758622769353533,
|
| 28686 |
+
"grad_norm": 37.625,
|
| 28687 |
+
"learning_rate": 9.881465226658611e-06,
|
| 28688 |
+
"loss": 19.2938,
|
| 28689 |
+
"step": 40880
|
| 28690 |
+
},
|
| 28691 |
+
{
|
| 28692 |
+
"epoch": 0.7588083424380128,
|
| 28693 |
+
"grad_norm": 34.75,
|
| 28694 |
+
"learning_rate": 9.881436230872569e-06,
|
| 28695 |
+
"loss": 19.4143,
|
| 28696 |
+
"step": 40890
|
| 28697 |
+
},
|
| 28698 |
+
{
|
| 28699 |
+
"epoch": 0.7589939155224926,
|
| 28700 |
+
"grad_norm": 36.03125,
|
| 28701 |
+
"learning_rate": 9.881407235086526e-06,
|
| 28702 |
+
"loss": 19.3378,
|
| 28703 |
+
"step": 40900
|
| 28704 |
+
},
|
| 28705 |
+
{
|
| 28706 |
+
"epoch": 0.7591794886069725,
|
| 28707 |
+
"grad_norm": 34.65625,
|
| 28708 |
+
"learning_rate": 9.881378239300484e-06,
|
| 28709 |
+
"loss": 19.4041,
|
| 28710 |
+
"step": 40910
|
| 28711 |
+
},
|
| 28712 |
+
{
|
| 28713 |
+
"epoch": 0.7593650616914522,
|
| 28714 |
+
"grad_norm": 36.09375,
|
| 28715 |
+
"learning_rate": 9.881349243514441e-06,
|
| 28716 |
+
"loss": 19.7739,
|
| 28717 |
+
"step": 40920
|
| 28718 |
+
},
|
| 28719 |
+
{
|
| 28720 |
+
"epoch": 0.7595506347759321,
|
| 28721 |
+
"grad_norm": 36.59375,
|
| 28722 |
+
"learning_rate": 9.881320247728398e-06,
|
| 28723 |
+
"loss": 19.1821,
|
| 28724 |
+
"step": 40930
|
| 28725 |
+
},
|
| 28726 |
+
{
|
| 28727 |
+
"epoch": 0.759736207860412,
|
| 28728 |
+
"grad_norm": 34.6875,
|
| 28729 |
+
"learning_rate": 9.881291251942356e-06,
|
| 28730 |
+
"loss": 18.7575,
|
| 28731 |
+
"step": 40940
|
| 28732 |
+
},
|
| 28733 |
+
{
|
| 28734 |
+
"epoch": 0.7599217809448917,
|
| 28735 |
+
"grad_norm": 37.6875,
|
| 28736 |
+
"learning_rate": 9.881262256156313e-06,
|
| 28737 |
+
"loss": 19.4397,
|
| 28738 |
+
"step": 40950
|
| 28739 |
+
},
|
| 28740 |
+
{
|
| 28741 |
+
"epoch": 0.7601073540293716,
|
| 28742 |
+
"grad_norm": 36.28125,
|
| 28743 |
+
"learning_rate": 9.88123326037027e-06,
|
| 28744 |
+
"loss": 19.3813,
|
| 28745 |
+
"step": 40960
|
| 28746 |
+
},
|
| 28747 |
+
{
|
| 28748 |
+
"epoch": 0.7602929271138514,
|
| 28749 |
+
"grad_norm": 34.84375,
|
| 28750 |
+
"learning_rate": 9.881204264584228e-06,
|
| 28751 |
+
"loss": 19.342,
|
| 28752 |
+
"step": 40970
|
| 28753 |
+
},
|
| 28754 |
+
{
|
| 28755 |
+
"epoch": 0.7604785001983312,
|
| 28756 |
+
"grad_norm": 35.9375,
|
| 28757 |
+
"learning_rate": 9.881175268798187e-06,
|
| 28758 |
+
"loss": 18.8295,
|
| 28759 |
+
"step": 40980
|
| 28760 |
+
},
|
| 28761 |
+
{
|
| 28762 |
+
"epoch": 0.760664073282811,
|
| 28763 |
+
"grad_norm": 35.0,
|
| 28764 |
+
"learning_rate": 9.881146273012145e-06,
|
| 28765 |
+
"loss": 19.1556,
|
| 28766 |
+
"step": 40990
|
| 28767 |
+
},
|
| 28768 |
+
{
|
| 28769 |
+
"epoch": 0.7608496463672909,
|
| 28770 |
+
"grad_norm": 35.34375,
|
| 28771 |
+
"learning_rate": 9.8811172772261e-06,
|
| 28772 |
+
"loss": 19.3345,
|
| 28773 |
+
"step": 41000
|
| 28774 |
+
},
|
| 28775 |
+
{
|
| 28776 |
+
"epoch": 0.7610352194517707,
|
| 28777 |
+
"grad_norm": 37.96875,
|
| 28778 |
+
"learning_rate": 9.88108828144006e-06,
|
| 28779 |
+
"loss": 19.3571,
|
| 28780 |
+
"step": 41010
|
| 28781 |
+
},
|
| 28782 |
+
{
|
| 28783 |
+
"epoch": 0.7612207925362505,
|
| 28784 |
+
"grad_norm": 35.4375,
|
| 28785 |
+
"learning_rate": 9.881059285654017e-06,
|
| 28786 |
+
"loss": 19.0342,
|
| 28787 |
+
"step": 41020
|
| 28788 |
+
},
|
| 28789 |
+
{
|
| 28790 |
+
"epoch": 0.7614063656207304,
|
| 28791 |
+
"grad_norm": 35.59375,
|
| 28792 |
+
"learning_rate": 9.881030289867974e-06,
|
| 28793 |
+
"loss": 19.2773,
|
| 28794 |
+
"step": 41030
|
| 28795 |
+
},
|
| 28796 |
+
{
|
| 28797 |
+
"epoch": 0.7615919387052102,
|
| 28798 |
+
"grad_norm": 36.96875,
|
| 28799 |
+
"learning_rate": 9.881001294081932e-06,
|
| 28800 |
+
"loss": 19.5343,
|
| 28801 |
+
"step": 41040
|
| 28802 |
+
},
|
| 28803 |
+
{
|
| 28804 |
+
"epoch": 0.76177751178969,
|
| 28805 |
+
"grad_norm": 34.40625,
|
| 28806 |
+
"learning_rate": 9.88097229829589e-06,
|
| 28807 |
+
"loss": 18.9572,
|
| 28808 |
+
"step": 41050
|
| 28809 |
+
},
|
| 28810 |
+
{
|
| 28811 |
+
"epoch": 0.7619630848741699,
|
| 28812 |
+
"grad_norm": 35.65625,
|
| 28813 |
+
"learning_rate": 9.880943302509846e-06,
|
| 28814 |
+
"loss": 19.2055,
|
| 28815 |
+
"step": 41060
|
| 28816 |
+
},
|
| 28817 |
+
{
|
| 28818 |
+
"epoch": 0.7621486579586497,
|
| 28819 |
+
"grad_norm": 34.0,
|
| 28820 |
+
"learning_rate": 9.880914306723804e-06,
|
| 28821 |
+
"loss": 19.3073,
|
| 28822 |
+
"step": 41070
|
| 28823 |
+
},
|
| 28824 |
+
{
|
| 28825 |
+
"epoch": 0.7623342310431295,
|
| 28826 |
+
"grad_norm": 35.40625,
|
| 28827 |
+
"learning_rate": 9.880885310937763e-06,
|
| 28828 |
+
"loss": 19.5048,
|
| 28829 |
+
"step": 41080
|
| 28830 |
+
},
|
| 28831 |
+
{
|
| 28832 |
+
"epoch": 0.7625198041276093,
|
| 28833 |
+
"grad_norm": 36.1875,
|
| 28834 |
+
"learning_rate": 9.88085631515172e-06,
|
| 28835 |
+
"loss": 19.8103,
|
| 28836 |
+
"step": 41090
|
| 28837 |
+
},
|
| 28838 |
+
{
|
| 28839 |
+
"epoch": 0.7627053772120892,
|
| 28840 |
+
"grad_norm": 36.65625,
|
| 28841 |
+
"learning_rate": 9.880827319365678e-06,
|
| 28842 |
+
"loss": 19.5557,
|
| 28843 |
+
"step": 41100
|
| 28844 |
+
},
|
| 28845 |
+
{
|
| 28846 |
+
"epoch": 0.762890950296569,
|
| 28847 |
+
"grad_norm": 34.53125,
|
| 28848 |
+
"learning_rate": 9.880798323579635e-06,
|
| 28849 |
+
"loss": 19.5534,
|
| 28850 |
+
"step": 41110
|
| 28851 |
+
},
|
| 28852 |
+
{
|
| 28853 |
+
"epoch": 0.7630765233810488,
|
| 28854 |
+
"grad_norm": 35.6875,
|
| 28855 |
+
"learning_rate": 9.880769327793593e-06,
|
| 28856 |
+
"loss": 19.303,
|
| 28857 |
+
"step": 41120
|
| 28858 |
+
},
|
| 28859 |
+
{
|
| 28860 |
+
"epoch": 0.7632620964655287,
|
| 28861 |
+
"grad_norm": 34.71875,
|
| 28862 |
+
"learning_rate": 9.88074033200755e-06,
|
| 28863 |
+
"loss": 19.164,
|
| 28864 |
+
"step": 41130
|
| 28865 |
+
},
|
| 28866 |
+
{
|
| 28867 |
+
"epoch": 0.7634476695500084,
|
| 28868 |
+
"grad_norm": 37.0625,
|
| 28869 |
+
"learning_rate": 9.880711336221507e-06,
|
| 28870 |
+
"loss": 19.1969,
|
| 28871 |
+
"step": 41140
|
| 28872 |
+
},
|
| 28873 |
+
{
|
| 28874 |
+
"epoch": 0.7636332426344883,
|
| 28875 |
+
"grad_norm": 34.28125,
|
| 28876 |
+
"learning_rate": 9.880682340435466e-06,
|
| 28877 |
+
"loss": 19.5003,
|
| 28878 |
+
"step": 41150
|
| 28879 |
+
},
|
| 28880 |
+
{
|
| 28881 |
+
"epoch": 0.7638188157189681,
|
| 28882 |
+
"grad_norm": 36.78125,
|
| 28883 |
+
"learning_rate": 9.880653344649422e-06,
|
| 28884 |
+
"loss": 19.3433,
|
| 28885 |
+
"step": 41160
|
| 28886 |
+
},
|
| 28887 |
+
{
|
| 28888 |
+
"epoch": 0.7640043888034479,
|
| 28889 |
+
"grad_norm": 37.75,
|
| 28890 |
+
"learning_rate": 9.88062434886338e-06,
|
| 28891 |
+
"loss": 18.9825,
|
| 28892 |
+
"step": 41170
|
| 28893 |
+
},
|
| 28894 |
+
{
|
| 28895 |
+
"epoch": 0.7641899618879278,
|
| 28896 |
+
"grad_norm": 36.34375,
|
| 28897 |
+
"learning_rate": 9.880595353077339e-06,
|
| 28898 |
+
"loss": 19.0039,
|
| 28899 |
+
"step": 41180
|
| 28900 |
+
},
|
| 28901 |
+
{
|
| 28902 |
+
"epoch": 0.7643755349724076,
|
| 28903 |
+
"grad_norm": 36.8125,
|
| 28904 |
+
"learning_rate": 9.880566357291296e-06,
|
| 28905 |
+
"loss": 19.4506,
|
| 28906 |
+
"step": 41190
|
| 28907 |
+
},
|
| 28908 |
+
{
|
| 28909 |
+
"epoch": 0.7645611080568875,
|
| 28910 |
+
"grad_norm": 33.78125,
|
| 28911 |
+
"learning_rate": 9.880537361505253e-06,
|
| 28912 |
+
"loss": 19.3604,
|
| 28913 |
+
"step": 41200
|
| 28914 |
+
},
|
| 28915 |
+
{
|
| 28916 |
+
"epoch": 0.7647466811413672,
|
| 28917 |
+
"grad_norm": 37.875,
|
| 28918 |
+
"learning_rate": 9.880508365719211e-06,
|
| 28919 |
+
"loss": 19.2318,
|
| 28920 |
+
"step": 41210
|
| 28921 |
+
},
|
| 28922 |
+
{
|
| 28923 |
+
"epoch": 0.7649322542258471,
|
| 28924 |
+
"grad_norm": 35.5625,
|
| 28925 |
+
"learning_rate": 9.880479369933168e-06,
|
| 28926 |
+
"loss": 19.3256,
|
| 28927 |
+
"step": 41220
|
| 28928 |
+
},
|
| 28929 |
+
{
|
| 28930 |
+
"epoch": 0.765117827310327,
|
| 28931 |
+
"grad_norm": 37.40625,
|
| 28932 |
+
"learning_rate": 9.880450374147126e-06,
|
| 28933 |
+
"loss": 19.5014,
|
| 28934 |
+
"step": 41230
|
| 28935 |
+
},
|
| 28936 |
+
{
|
| 28937 |
+
"epoch": 0.7653034003948067,
|
| 28938 |
+
"grad_norm": 36.9375,
|
| 28939 |
+
"learning_rate": 9.880421378361083e-06,
|
| 28940 |
+
"loss": 19.2619,
|
| 28941 |
+
"step": 41240
|
| 28942 |
+
},
|
| 28943 |
+
{
|
| 28944 |
+
"epoch": 0.7654889734792866,
|
| 28945 |
+
"grad_norm": 37.53125,
|
| 28946 |
+
"learning_rate": 9.880392382575042e-06,
|
| 28947 |
+
"loss": 19.1359,
|
| 28948 |
+
"step": 41250
|
| 28949 |
+
},
|
| 28950 |
+
{
|
| 28951 |
+
"epoch": 0.7656745465637664,
|
| 28952 |
+
"grad_norm": 35.375,
|
| 28953 |
+
"learning_rate": 9.880363386789e-06,
|
| 28954 |
+
"loss": 19.3461,
|
| 28955 |
+
"step": 41260
|
| 28956 |
+
},
|
| 28957 |
+
{
|
| 28958 |
+
"epoch": 0.7658601196482462,
|
| 28959 |
+
"grad_norm": 36.71875,
|
| 28960 |
+
"learning_rate": 9.880334391002955e-06,
|
| 28961 |
+
"loss": 18.8452,
|
| 28962 |
+
"step": 41270
|
| 28963 |
+
},
|
| 28964 |
+
{
|
| 28965 |
+
"epoch": 0.766045692732726,
|
| 28966 |
+
"grad_norm": 36.03125,
|
| 28967 |
+
"learning_rate": 9.880305395216914e-06,
|
| 28968 |
+
"loss": 19.2407,
|
| 28969 |
+
"step": 41280
|
| 28970 |
+
},
|
| 28971 |
+
{
|
| 28972 |
+
"epoch": 0.7662312658172059,
|
| 28973 |
+
"grad_norm": 36.78125,
|
| 28974 |
+
"learning_rate": 9.880276399430872e-06,
|
| 28975 |
+
"loss": 19.4143,
|
| 28976 |
+
"step": 41290
|
| 28977 |
+
},
|
| 28978 |
+
{
|
| 28979 |
+
"epoch": 0.7664168389016857,
|
| 28980 |
+
"grad_norm": 35.8125,
|
| 28981 |
+
"learning_rate": 9.88024740364483e-06,
|
| 28982 |
+
"loss": 19.3873,
|
| 28983 |
+
"step": 41300
|
| 28984 |
+
},
|
| 28985 |
+
{
|
| 28986 |
+
"epoch": 0.7666024119861655,
|
| 28987 |
+
"grad_norm": 35.125,
|
| 28988 |
+
"learning_rate": 9.880218407858787e-06,
|
| 28989 |
+
"loss": 19.6352,
|
| 28990 |
+
"step": 41310
|
| 28991 |
+
},
|
| 28992 |
+
{
|
| 28993 |
+
"epoch": 0.7667879850706454,
|
| 28994 |
+
"grad_norm": 36.0625,
|
| 28995 |
+
"learning_rate": 9.880189412072744e-06,
|
| 28996 |
+
"loss": 19.2883,
|
| 28997 |
+
"step": 41320
|
| 28998 |
+
},
|
| 28999 |
+
{
|
| 29000 |
+
"epoch": 0.7669735581551251,
|
| 29001 |
+
"grad_norm": 36.40625,
|
| 29002 |
+
"learning_rate": 9.880160416286701e-06,
|
| 29003 |
+
"loss": 18.7959,
|
| 29004 |
+
"step": 41330
|
| 29005 |
+
},
|
| 29006 |
+
{
|
| 29007 |
+
"epoch": 0.767159131239605,
|
| 29008 |
+
"grad_norm": 34.78125,
|
| 29009 |
+
"learning_rate": 9.880131420500659e-06,
|
| 29010 |
+
"loss": 18.8009,
|
| 29011 |
+
"step": 41340
|
| 29012 |
+
},
|
| 29013 |
+
{
|
| 29014 |
+
"epoch": 0.7673447043240849,
|
| 29015 |
+
"grad_norm": 36.5625,
|
| 29016 |
+
"learning_rate": 9.880102424714618e-06,
|
| 29017 |
+
"loss": 19.1237,
|
| 29018 |
+
"step": 41350
|
| 29019 |
+
},
|
| 29020 |
+
{
|
| 29021 |
+
"epoch": 0.7675302774085646,
|
| 29022 |
+
"grad_norm": 38.78125,
|
| 29023 |
+
"learning_rate": 9.880073428928575e-06,
|
| 29024 |
+
"loss": 18.9787,
|
| 29025 |
+
"step": 41360
|
| 29026 |
+
},
|
| 29027 |
+
{
|
| 29028 |
+
"epoch": 0.7677158504930445,
|
| 29029 |
+
"grad_norm": 32.71875,
|
| 29030 |
+
"learning_rate": 9.880044433142533e-06,
|
| 29031 |
+
"loss": 19.2766,
|
| 29032 |
+
"step": 41370
|
| 29033 |
+
},
|
| 29034 |
+
{
|
| 29035 |
+
"epoch": 0.7679014235775243,
|
| 29036 |
+
"grad_norm": 35.1875,
|
| 29037 |
+
"learning_rate": 9.88001543735649e-06,
|
| 29038 |
+
"loss": 18.937,
|
| 29039 |
+
"step": 41380
|
| 29040 |
+
},
|
| 29041 |
+
{
|
| 29042 |
+
"epoch": 0.7680869966620042,
|
| 29043 |
+
"grad_norm": 35.40625,
|
| 29044 |
+
"learning_rate": 9.879986441570448e-06,
|
| 29045 |
+
"loss": 19.729,
|
| 29046 |
+
"step": 41390
|
| 29047 |
+
},
|
| 29048 |
+
{
|
| 29049 |
+
"epoch": 0.768272569746484,
|
| 29050 |
+
"grad_norm": 34.71875,
|
| 29051 |
+
"learning_rate": 9.879957445784405e-06,
|
| 29052 |
+
"loss": 19.1228,
|
| 29053 |
+
"step": 41400
|
| 29054 |
+
},
|
| 29055 |
+
{
|
| 29056 |
+
"epoch": 0.7684581428309638,
|
| 29057 |
+
"grad_norm": 36.5,
|
| 29058 |
+
"learning_rate": 9.879928449998362e-06,
|
| 29059 |
+
"loss": 19.3379,
|
| 29060 |
+
"step": 41410
|
| 29061 |
+
},
|
| 29062 |
+
{
|
| 29063 |
+
"epoch": 0.7686437159154437,
|
| 29064 |
+
"grad_norm": 36.75,
|
| 29065 |
+
"learning_rate": 9.87989945421232e-06,
|
| 29066 |
+
"loss": 19.4041,
|
| 29067 |
+
"step": 41420
|
| 29068 |
+
},
|
| 29069 |
+
{
|
| 29070 |
+
"epoch": 0.7688292889999234,
|
| 29071 |
+
"grad_norm": 37.03125,
|
| 29072 |
+
"learning_rate": 9.879870458426277e-06,
|
| 29073 |
+
"loss": 19.2628,
|
| 29074 |
+
"step": 41430
|
| 29075 |
+
},
|
| 29076 |
+
{
|
| 29077 |
+
"epoch": 0.7690148620844033,
|
| 29078 |
+
"grad_norm": 34.875,
|
| 29079 |
+
"learning_rate": 9.879841462640235e-06,
|
| 29080 |
+
"loss": 19.4629,
|
| 29081 |
+
"step": 41440
|
| 29082 |
+
},
|
| 29083 |
+
{
|
| 29084 |
+
"epoch": 0.7692004351688831,
|
| 29085 |
+
"grad_norm": 35.96875,
|
| 29086 |
+
"learning_rate": 9.879812466854192e-06,
|
| 29087 |
+
"loss": 19.196,
|
| 29088 |
+
"step": 41450
|
| 29089 |
+
},
|
| 29090 |
+
{
|
| 29091 |
+
"epoch": 0.7693860082533629,
|
| 29092 |
+
"grad_norm": 38.15625,
|
| 29093 |
+
"learning_rate": 9.879783471068151e-06,
|
| 29094 |
+
"loss": 19.752,
|
| 29095 |
+
"step": 41460
|
| 29096 |
+
},
|
| 29097 |
+
{
|
| 29098 |
+
"epoch": 0.7695715813378428,
|
| 29099 |
+
"grad_norm": 38.75,
|
| 29100 |
+
"learning_rate": 9.879754475282109e-06,
|
| 29101 |
+
"loss": 19.2405,
|
| 29102 |
+
"step": 41470
|
| 29103 |
+
},
|
| 29104 |
+
{
|
| 29105 |
+
"epoch": 0.7697571544223226,
|
| 29106 |
+
"grad_norm": 37.71875,
|
| 29107 |
+
"learning_rate": 9.879725479496066e-06,
|
| 29108 |
+
"loss": 19.2407,
|
| 29109 |
+
"step": 41480
|
| 29110 |
+
},
|
| 29111 |
+
{
|
| 29112 |
+
"epoch": 0.7699427275068024,
|
| 29113 |
+
"grad_norm": 36.78125,
|
| 29114 |
+
"learning_rate": 9.879696483710023e-06,
|
| 29115 |
+
"loss": 19.1094,
|
| 29116 |
+
"step": 41490
|
| 29117 |
+
},
|
| 29118 |
+
{
|
| 29119 |
+
"epoch": 0.7701283005912822,
|
| 29120 |
+
"grad_norm": 36.1875,
|
| 29121 |
+
"learning_rate": 9.87966748792398e-06,
|
| 29122 |
+
"loss": 19.3451,
|
| 29123 |
+
"step": 41500
|
| 29124 |
+
},
|
| 29125 |
+
{
|
| 29126 |
+
"epoch": 0.7703138736757621,
|
| 29127 |
+
"grad_norm": 36.53125,
|
| 29128 |
+
"learning_rate": 9.879638492137938e-06,
|
| 29129 |
+
"loss": 18.9383,
|
| 29130 |
+
"step": 41510
|
| 29131 |
+
},
|
| 29132 |
+
{
|
| 29133 |
+
"epoch": 0.7704994467602418,
|
| 29134 |
+
"grad_norm": 34.0,
|
| 29135 |
+
"learning_rate": 9.879609496351896e-06,
|
| 29136 |
+
"loss": 19.115,
|
| 29137 |
+
"step": 41520
|
| 29138 |
+
},
|
| 29139 |
+
{
|
| 29140 |
+
"epoch": 0.7706850198447217,
|
| 29141 |
+
"grad_norm": 35.78125,
|
| 29142 |
+
"learning_rate": 9.879580500565855e-06,
|
| 29143 |
+
"loss": 19.4492,
|
| 29144 |
+
"step": 41530
|
| 29145 |
+
},
|
| 29146 |
+
{
|
| 29147 |
+
"epoch": 0.7708705929292016,
|
| 29148 |
+
"grad_norm": 37.40625,
|
| 29149 |
+
"learning_rate": 9.87955150477981e-06,
|
| 29150 |
+
"loss": 19.3029,
|
| 29151 |
+
"step": 41540
|
| 29152 |
+
},
|
| 29153 |
+
{
|
| 29154 |
+
"epoch": 0.7710561660136813,
|
| 29155 |
+
"grad_norm": 35.15625,
|
| 29156 |
+
"learning_rate": 9.879522508993768e-06,
|
| 29157 |
+
"loss": 18.9063,
|
| 29158 |
+
"step": 41550
|
| 29159 |
+
},
|
| 29160 |
+
{
|
| 29161 |
+
"epoch": 0.7712417390981612,
|
| 29162 |
+
"grad_norm": 35.71875,
|
| 29163 |
+
"learning_rate": 9.879493513207727e-06,
|
| 29164 |
+
"loss": 19.6495,
|
| 29165 |
+
"step": 41560
|
| 29166 |
+
},
|
| 29167 |
+
{
|
| 29168 |
+
"epoch": 0.771427312182641,
|
| 29169 |
+
"grad_norm": 36.1875,
|
| 29170 |
+
"learning_rate": 9.879464517421684e-06,
|
| 29171 |
+
"loss": 19.384,
|
| 29172 |
+
"step": 41570
|
| 29173 |
+
},
|
| 29174 |
+
{
|
| 29175 |
+
"epoch": 0.7716128852671209,
|
| 29176 |
+
"grad_norm": 35.375,
|
| 29177 |
+
"learning_rate": 9.879435521635642e-06,
|
| 29178 |
+
"loss": 19.2114,
|
| 29179 |
+
"step": 41580
|
| 29180 |
+
},
|
| 29181 |
+
{
|
| 29182 |
+
"epoch": 0.7717984583516007,
|
| 29183 |
+
"grad_norm": 34.53125,
|
| 29184 |
+
"learning_rate": 9.879406525849599e-06,
|
| 29185 |
+
"loss": 19.6205,
|
| 29186 |
+
"step": 41590
|
| 29187 |
+
},
|
| 29188 |
+
{
|
| 29189 |
+
"epoch": 0.7719840314360805,
|
| 29190 |
+
"grad_norm": 33.59375,
|
| 29191 |
+
"learning_rate": 9.879377530063557e-06,
|
| 29192 |
+
"loss": 19.7098,
|
| 29193 |
+
"step": 41600
|
| 29194 |
+
},
|
| 29195 |
+
{
|
| 29196 |
+
"epoch": 0.7721696045205604,
|
| 29197 |
+
"grad_norm": 36.25,
|
| 29198 |
+
"learning_rate": 9.879348534277514e-06,
|
| 29199 |
+
"loss": 19.5254,
|
| 29200 |
+
"step": 41610
|
| 29201 |
+
},
|
| 29202 |
+
{
|
| 29203 |
+
"epoch": 0.7723551776050401,
|
| 29204 |
+
"grad_norm": 33.375,
|
| 29205 |
+
"learning_rate": 9.879319538491471e-06,
|
| 29206 |
+
"loss": 19.4273,
|
| 29207 |
+
"step": 41620
|
| 29208 |
+
},
|
| 29209 |
+
{
|
| 29210 |
+
"epoch": 0.77254075068952,
|
| 29211 |
+
"grad_norm": 36.46875,
|
| 29212 |
+
"learning_rate": 9.87929054270543e-06,
|
| 29213 |
+
"loss": 19.7937,
|
| 29214 |
+
"step": 41630
|
| 29215 |
+
},
|
| 29216 |
+
{
|
| 29217 |
+
"epoch": 0.7727263237739999,
|
| 29218 |
+
"grad_norm": 37.375,
|
| 29219 |
+
"learning_rate": 9.879261546919388e-06,
|
| 29220 |
+
"loss": 19.3302,
|
| 29221 |
+
"step": 41640
|
| 29222 |
+
},
|
| 29223 |
+
{
|
| 29224 |
+
"epoch": 0.7729118968584796,
|
| 29225 |
+
"grad_norm": 34.75,
|
| 29226 |
+
"learning_rate": 9.879232551133344e-06,
|
| 29227 |
+
"loss": 19.7385,
|
| 29228 |
+
"step": 41650
|
| 29229 |
+
},
|
| 29230 |
+
{
|
| 29231 |
+
"epoch": 0.7730974699429595,
|
| 29232 |
+
"grad_norm": 35.15625,
|
| 29233 |
+
"learning_rate": 9.879203555347303e-06,
|
| 29234 |
+
"loss": 19.3012,
|
| 29235 |
+
"step": 41660
|
| 29236 |
+
},
|
| 29237 |
+
{
|
| 29238 |
+
"epoch": 0.7732830430274393,
|
| 29239 |
+
"grad_norm": 36.46875,
|
| 29240 |
+
"learning_rate": 9.87917455956126e-06,
|
| 29241 |
+
"loss": 19.6957,
|
| 29242 |
+
"step": 41670
|
| 29243 |
+
},
|
| 29244 |
+
{
|
| 29245 |
+
"epoch": 0.7734686161119191,
|
| 29246 |
+
"grad_norm": 35.5625,
|
| 29247 |
+
"learning_rate": 9.879145563775217e-06,
|
| 29248 |
+
"loss": 19.4753,
|
| 29249 |
+
"step": 41680
|
| 29250 |
+
},
|
| 29251 |
+
{
|
| 29252 |
+
"epoch": 0.773654189196399,
|
| 29253 |
+
"grad_norm": 36.09375,
|
| 29254 |
+
"learning_rate": 9.879116567989175e-06,
|
| 29255 |
+
"loss": 18.989,
|
| 29256 |
+
"step": 41690
|
| 29257 |
+
},
|
| 29258 |
+
{
|
| 29259 |
+
"epoch": 0.7738397622808788,
|
| 29260 |
+
"grad_norm": 35.53125,
|
| 29261 |
+
"learning_rate": 9.879087572203132e-06,
|
| 29262 |
+
"loss": 19.5413,
|
| 29263 |
+
"step": 41700
|
| 29264 |
+
},
|
| 29265 |
+
{
|
| 29266 |
+
"epoch": 0.7740253353653586,
|
| 29267 |
+
"grad_norm": 35.4375,
|
| 29268 |
+
"learning_rate": 9.87905857641709e-06,
|
| 29269 |
+
"loss": 19.1875,
|
| 29270 |
+
"step": 41710
|
| 29271 |
+
},
|
| 29272 |
+
{
|
| 29273 |
+
"epoch": 0.7742109084498384,
|
| 29274 |
+
"grad_norm": 35.875,
|
| 29275 |
+
"learning_rate": 9.879029580631047e-06,
|
| 29276 |
+
"loss": 18.9351,
|
| 29277 |
+
"step": 41720
|
| 29278 |
+
},
|
| 29279 |
+
{
|
| 29280 |
+
"epoch": 0.7743964815343183,
|
| 29281 |
+
"grad_norm": 35.1875,
|
| 29282 |
+
"learning_rate": 9.879000584845006e-06,
|
| 29283 |
+
"loss": 19.5943,
|
| 29284 |
+
"step": 41730
|
| 29285 |
+
},
|
| 29286 |
+
{
|
| 29287 |
+
"epoch": 0.7745820546187981,
|
| 29288 |
+
"grad_norm": 35.15625,
|
| 29289 |
+
"learning_rate": 9.878971589058964e-06,
|
| 29290 |
+
"loss": 19.2853,
|
| 29291 |
+
"step": 41740
|
| 29292 |
+
},
|
| 29293 |
+
{
|
| 29294 |
+
"epoch": 0.7747676277032779,
|
| 29295 |
+
"grad_norm": 36.65625,
|
| 29296 |
+
"learning_rate": 9.87894259327292e-06,
|
| 29297 |
+
"loss": 19.1505,
|
| 29298 |
+
"step": 41750
|
| 29299 |
+
},
|
| 29300 |
+
{
|
| 29301 |
+
"epoch": 0.7749532007877578,
|
| 29302 |
+
"grad_norm": 37.03125,
|
| 29303 |
+
"learning_rate": 9.878913597486878e-06,
|
| 29304 |
+
"loss": 19.5033,
|
| 29305 |
+
"step": 41760
|
| 29306 |
+
},
|
| 29307 |
+
{
|
| 29308 |
+
"epoch": 0.7751387738722376,
|
| 29309 |
+
"grad_norm": 35.78125,
|
| 29310 |
+
"learning_rate": 9.878884601700836e-06,
|
| 29311 |
+
"loss": 19.2482,
|
| 29312 |
+
"step": 41770
|
| 29313 |
+
},
|
| 29314 |
+
{
|
| 29315 |
+
"epoch": 0.7753243469567174,
|
| 29316 |
+
"grad_norm": 36.84375,
|
| 29317 |
+
"learning_rate": 9.878855605914793e-06,
|
| 29318 |
+
"loss": 19.6271,
|
| 29319 |
+
"step": 41780
|
| 29320 |
+
},
|
| 29321 |
+
{
|
| 29322 |
+
"epoch": 0.7755099200411972,
|
| 29323 |
+
"grad_norm": 36.375,
|
| 29324 |
+
"learning_rate": 9.87882661012875e-06,
|
| 29325 |
+
"loss": 19.6722,
|
| 29326 |
+
"step": 41790
|
| 29327 |
+
},
|
| 29328 |
+
{
|
| 29329 |
+
"epoch": 0.7756954931256771,
|
| 29330 |
+
"grad_norm": 36.09375,
|
| 29331 |
+
"learning_rate": 9.87879761434271e-06,
|
| 29332 |
+
"loss": 19.1386,
|
| 29333 |
+
"step": 41800
|
| 29334 |
+
},
|
| 29335 |
+
{
|
| 29336 |
+
"epoch": 0.7758810662101568,
|
| 29337 |
+
"grad_norm": 35.0,
|
| 29338 |
+
"learning_rate": 9.878768618556665e-06,
|
| 29339 |
+
"loss": 19.687,
|
| 29340 |
+
"step": 41810
|
| 29341 |
+
},
|
| 29342 |
+
{
|
| 29343 |
+
"epoch": 0.7760666392946367,
|
| 29344 |
+
"grad_norm": 35.5,
|
| 29345 |
+
"learning_rate": 9.878739622770623e-06,
|
| 29346 |
+
"loss": 19.3746,
|
| 29347 |
+
"step": 41820
|
| 29348 |
+
},
|
| 29349 |
+
{
|
| 29350 |
+
"epoch": 0.7762522123791166,
|
| 29351 |
+
"grad_norm": 36.71875,
|
| 29352 |
+
"learning_rate": 9.878710626984582e-06,
|
| 29353 |
+
"loss": 19.3001,
|
| 29354 |
+
"step": 41830
|
| 29355 |
+
},
|
| 29356 |
+
{
|
| 29357 |
+
"epoch": 0.7764377854635963,
|
| 29358 |
+
"grad_norm": 35.875,
|
| 29359 |
+
"learning_rate": 9.87868163119854e-06,
|
| 29360 |
+
"loss": 19.1276,
|
| 29361 |
+
"step": 41840
|
| 29362 |
+
},
|
| 29363 |
+
{
|
| 29364 |
+
"epoch": 0.7766233585480762,
|
| 29365 |
+
"grad_norm": 36.59375,
|
| 29366 |
+
"learning_rate": 9.878652635412497e-06,
|
| 29367 |
+
"loss": 19.1937,
|
| 29368 |
+
"step": 41850
|
| 29369 |
+
},
|
| 29370 |
+
{
|
| 29371 |
+
"epoch": 0.776808931632556,
|
| 29372 |
+
"grad_norm": 34.5,
|
| 29373 |
+
"learning_rate": 9.878623639626454e-06,
|
| 29374 |
+
"loss": 19.5688,
|
| 29375 |
+
"step": 41860
|
| 29376 |
+
},
|
| 29377 |
+
{
|
| 29378 |
+
"epoch": 0.7769945047170358,
|
| 29379 |
+
"grad_norm": 36.84375,
|
| 29380 |
+
"learning_rate": 9.878594643840412e-06,
|
| 29381 |
+
"loss": 18.9137,
|
| 29382 |
+
"step": 41870
|
| 29383 |
+
},
|
| 29384 |
+
{
|
| 29385 |
+
"epoch": 0.7771800778015157,
|
| 29386 |
+
"grad_norm": 36.625,
|
| 29387 |
+
"learning_rate": 9.878565648054369e-06,
|
| 29388 |
+
"loss": 19.4192,
|
| 29389 |
+
"step": 41880
|
| 29390 |
+
},
|
| 29391 |
+
{
|
| 29392 |
+
"epoch": 0.7773656508859955,
|
| 29393 |
+
"grad_norm": 37.34375,
|
| 29394 |
+
"learning_rate": 9.878536652268326e-06,
|
| 29395 |
+
"loss": 19.2262,
|
| 29396 |
+
"step": 41890
|
| 29397 |
+
},
|
| 29398 |
+
{
|
| 29399 |
+
"epoch": 0.7775512239704753,
|
| 29400 |
+
"grad_norm": 36.09375,
|
| 29401 |
+
"learning_rate": 9.878507656482284e-06,
|
| 29402 |
+
"loss": 18.9775,
|
| 29403 |
+
"step": 41900
|
| 29404 |
+
},
|
| 29405 |
+
{
|
| 29406 |
+
"epoch": 0.7777367970549551,
|
| 29407 |
+
"grad_norm": 35.25,
|
| 29408 |
+
"learning_rate": 9.878478660696241e-06,
|
| 29409 |
+
"loss": 19.2364,
|
| 29410 |
+
"step": 41910
|
| 29411 |
+
},
|
| 29412 |
+
{
|
| 29413 |
+
"epoch": 0.777922370139435,
|
| 29414 |
+
"grad_norm": 35.65625,
|
| 29415 |
+
"learning_rate": 9.878449664910199e-06,
|
| 29416 |
+
"loss": 19.5075,
|
| 29417 |
+
"step": 41920
|
| 29418 |
+
},
|
| 29419 |
+
{
|
| 29420 |
+
"epoch": 0.7781079432239149,
|
| 29421 |
+
"grad_norm": 37.15625,
|
| 29422 |
+
"learning_rate": 9.878420669124156e-06,
|
| 29423 |
+
"loss": 19.3397,
|
| 29424 |
+
"step": 41930
|
| 29425 |
+
},
|
| 29426 |
+
{
|
| 29427 |
+
"epoch": 0.7782935163083946,
|
| 29428 |
+
"grad_norm": 33.5625,
|
| 29429 |
+
"learning_rate": 9.878391673338115e-06,
|
| 29430 |
+
"loss": 19.0782,
|
| 29431 |
+
"step": 41940
|
| 29432 |
+
},
|
| 29433 |
+
{
|
| 29434 |
+
"epoch": 0.7784790893928745,
|
| 29435 |
+
"grad_norm": 35.5,
|
| 29436 |
+
"learning_rate": 9.878362677552073e-06,
|
| 29437 |
+
"loss": 19.4959,
|
| 29438 |
+
"step": 41950
|
| 29439 |
+
},
|
| 29440 |
+
{
|
| 29441 |
+
"epoch": 0.7786646624773543,
|
| 29442 |
+
"grad_norm": 35.0,
|
| 29443 |
+
"learning_rate": 9.87833368176603e-06,
|
| 29444 |
+
"loss": 19.4202,
|
| 29445 |
+
"step": 41960
|
| 29446 |
+
},
|
| 29447 |
+
{
|
| 29448 |
+
"epoch": 0.7788502355618341,
|
| 29449 |
+
"grad_norm": 38.875,
|
| 29450 |
+
"learning_rate": 9.878304685979987e-06,
|
| 29451 |
+
"loss": 19.3642,
|
| 29452 |
+
"step": 41970
|
| 29453 |
+
},
|
| 29454 |
+
{
|
| 29455 |
+
"epoch": 0.7790358086463139,
|
| 29456 |
+
"grad_norm": 35.0,
|
| 29457 |
+
"learning_rate": 9.878275690193945e-06,
|
| 29458 |
+
"loss": 19.0663,
|
| 29459 |
+
"step": 41980
|
| 29460 |
+
},
|
| 29461 |
+
{
|
| 29462 |
+
"epoch": 0.7792213817307938,
|
| 29463 |
+
"grad_norm": 36.8125,
|
| 29464 |
+
"learning_rate": 9.878246694407902e-06,
|
| 29465 |
+
"loss": 19.0382,
|
| 29466 |
+
"step": 41990
|
| 29467 |
+
},
|
| 29468 |
+
{
|
| 29469 |
+
"epoch": 0.7794069548152736,
|
| 29470 |
+
"grad_norm": 36.5625,
|
| 29471 |
+
"learning_rate": 9.87821769862186e-06,
|
| 29472 |
+
"loss": 19.1091,
|
| 29473 |
+
"step": 42000
|
| 29474 |
+
},
|
| 29475 |
+
{
|
| 29476 |
+
"epoch": 0.7795925278997534,
|
| 29477 |
+
"grad_norm": 34.6875,
|
| 29478 |
+
"learning_rate": 9.878188702835819e-06,
|
| 29479 |
+
"loss": 19.1205,
|
| 29480 |
+
"step": 42010
|
| 29481 |
+
},
|
| 29482 |
+
{
|
| 29483 |
+
"epoch": 0.7797781009842333,
|
| 29484 |
+
"grad_norm": 34.6875,
|
| 29485 |
+
"learning_rate": 9.878159707049774e-06,
|
| 29486 |
+
"loss": 19.4904,
|
| 29487 |
+
"step": 42020
|
| 29488 |
+
},
|
| 29489 |
+
{
|
| 29490 |
+
"epoch": 0.779963674068713,
|
| 29491 |
+
"grad_norm": 35.84375,
|
| 29492 |
+
"learning_rate": 9.878130711263732e-06,
|
| 29493 |
+
"loss": 18.6078,
|
| 29494 |
+
"step": 42030
|
| 29495 |
+
},
|
| 29496 |
+
{
|
| 29497 |
+
"epoch": 0.7801492471531929,
|
| 29498 |
+
"grad_norm": 36.84375,
|
| 29499 |
+
"learning_rate": 9.878101715477691e-06,
|
| 29500 |
+
"loss": 18.9939,
|
| 29501 |
+
"step": 42040
|
| 29502 |
+
},
|
| 29503 |
+
{
|
| 29504 |
+
"epoch": 0.7803348202376728,
|
| 29505 |
+
"grad_norm": 35.96875,
|
| 29506 |
+
"learning_rate": 9.878072719691648e-06,
|
| 29507 |
+
"loss": 19.1132,
|
| 29508 |
+
"step": 42050
|
| 29509 |
+
},
|
| 29510 |
+
{
|
| 29511 |
+
"epoch": 0.7805203933221525,
|
| 29512 |
+
"grad_norm": 35.59375,
|
| 29513 |
+
"learning_rate": 9.878043723905606e-06,
|
| 29514 |
+
"loss": 19.2332,
|
| 29515 |
+
"step": 42060
|
| 29516 |
+
},
|
| 29517 |
+
{
|
| 29518 |
+
"epoch": 0.7807059664066324,
|
| 29519 |
+
"grad_norm": 37.875,
|
| 29520 |
+
"learning_rate": 9.878014728119563e-06,
|
| 29521 |
+
"loss": 19.1597,
|
| 29522 |
+
"step": 42070
|
| 29523 |
+
},
|
| 29524 |
+
{
|
| 29525 |
+
"epoch": 0.7808915394911122,
|
| 29526 |
+
"grad_norm": 36.65625,
|
| 29527 |
+
"learning_rate": 9.87798573233352e-06,
|
| 29528 |
+
"loss": 19.1368,
|
| 29529 |
+
"step": 42080
|
| 29530 |
+
},
|
| 29531 |
+
{
|
| 29532 |
+
"epoch": 0.781077112575592,
|
| 29533 |
+
"grad_norm": 36.21875,
|
| 29534 |
+
"learning_rate": 9.877956736547478e-06,
|
| 29535 |
+
"loss": 19.6976,
|
| 29536 |
+
"step": 42090
|
| 29537 |
+
},
|
| 29538 |
+
{
|
| 29539 |
+
"epoch": 0.7812626856600718,
|
| 29540 |
+
"grad_norm": 38.0625,
|
| 29541 |
+
"learning_rate": 9.877927740761435e-06,
|
| 29542 |
+
"loss": 18.8875,
|
| 29543 |
+
"step": 42100
|
| 29544 |
+
},
|
| 29545 |
+
{
|
| 29546 |
+
"epoch": 0.7814482587445517,
|
| 29547 |
+
"grad_norm": 36.8125,
|
| 29548 |
+
"learning_rate": 9.877898744975394e-06,
|
| 29549 |
+
"loss": 19.1315,
|
| 29550 |
+
"step": 42110
|
| 29551 |
+
},
|
| 29552 |
+
{
|
| 29553 |
+
"epoch": 0.7816338318290316,
|
| 29554 |
+
"grad_norm": 36.0,
|
| 29555 |
+
"learning_rate": 9.877869749189352e-06,
|
| 29556 |
+
"loss": 19.1785,
|
| 29557 |
+
"step": 42120
|
| 29558 |
+
},
|
| 29559 |
+
{
|
| 29560 |
+
"epoch": 0.7818194049135113,
|
| 29561 |
+
"grad_norm": 36.65625,
|
| 29562 |
+
"learning_rate": 9.877840753403308e-06,
|
| 29563 |
+
"loss": 19.2687,
|
| 29564 |
+
"step": 42130
|
| 29565 |
+
},
|
| 29566 |
+
{
|
| 29567 |
+
"epoch": 0.7820049779979912,
|
| 29568 |
+
"grad_norm": 36.0,
|
| 29569 |
+
"learning_rate": 9.877811757617267e-06,
|
| 29570 |
+
"loss": 19.1766,
|
| 29571 |
+
"step": 42140
|
| 29572 |
+
},
|
| 29573 |
+
{
|
| 29574 |
+
"epoch": 0.782190551082471,
|
| 29575 |
+
"grad_norm": 35.75,
|
| 29576 |
+
"learning_rate": 9.877782761831224e-06,
|
| 29577 |
+
"loss": 19.1511,
|
| 29578 |
+
"step": 42150
|
| 29579 |
+
},
|
| 29580 |
+
{
|
| 29581 |
+
"epoch": 0.7823761241669508,
|
| 29582 |
+
"grad_norm": 37.3125,
|
| 29583 |
+
"learning_rate": 9.877753766045181e-06,
|
| 29584 |
+
"loss": 19.0198,
|
| 29585 |
+
"step": 42160
|
| 29586 |
+
},
|
| 29587 |
+
{
|
| 29588 |
+
"epoch": 0.7825616972514307,
|
| 29589 |
+
"grad_norm": 34.5,
|
| 29590 |
+
"learning_rate": 9.877724770259139e-06,
|
| 29591 |
+
"loss": 19.6603,
|
| 29592 |
+
"step": 42170
|
| 29593 |
+
},
|
| 29594 |
+
{
|
| 29595 |
+
"epoch": 0.7827472703359105,
|
| 29596 |
+
"grad_norm": 34.65625,
|
| 29597 |
+
"learning_rate": 9.877695774473096e-06,
|
| 29598 |
+
"loss": 19.1918,
|
| 29599 |
+
"step": 42180
|
| 29600 |
+
},
|
| 29601 |
+
{
|
| 29602 |
+
"epoch": 0.7829328434203903,
|
| 29603 |
+
"grad_norm": 35.84375,
|
| 29604 |
+
"learning_rate": 9.877666778687054e-06,
|
| 29605 |
+
"loss": 19.2404,
|
| 29606 |
+
"step": 42190
|
| 29607 |
+
},
|
| 29608 |
+
{
|
| 29609 |
+
"epoch": 0.7831184165048701,
|
| 29610 |
+
"grad_norm": 36.28125,
|
| 29611 |
+
"learning_rate": 9.877637782901011e-06,
|
| 29612 |
+
"loss": 19.3464,
|
| 29613 |
+
"step": 42200
|
| 29614 |
+
},
|
| 29615 |
+
{
|
| 29616 |
+
"epoch": 0.78330398958935,
|
| 29617 |
+
"grad_norm": 36.125,
|
| 29618 |
+
"learning_rate": 9.87760878711497e-06,
|
| 29619 |
+
"loss": 19.2844,
|
| 29620 |
+
"step": 42210
|
| 29621 |
+
},
|
| 29622 |
+
{
|
| 29623 |
+
"epoch": 0.7834895626738297,
|
| 29624 |
+
"grad_norm": 36.4375,
|
| 29625 |
+
"learning_rate": 9.877579791328928e-06,
|
| 29626 |
+
"loss": 18.8615,
|
| 29627 |
+
"step": 42220
|
| 29628 |
+
},
|
| 29629 |
+
{
|
| 29630 |
+
"epoch": 0.7836751357583096,
|
| 29631 |
+
"grad_norm": 34.96875,
|
| 29632 |
+
"learning_rate": 9.877550795542885e-06,
|
| 29633 |
+
"loss": 18.8328,
|
| 29634 |
+
"step": 42230
|
| 29635 |
+
},
|
| 29636 |
+
{
|
| 29637 |
+
"epoch": 0.7838607088427895,
|
| 29638 |
+
"grad_norm": 34.125,
|
| 29639 |
+
"learning_rate": 9.877521799756842e-06,
|
| 29640 |
+
"loss": 19.1479,
|
| 29641 |
+
"step": 42240
|
| 29642 |
+
},
|
| 29643 |
+
{
|
| 29644 |
+
"epoch": 0.7840462819272692,
|
| 29645 |
+
"grad_norm": 34.9375,
|
| 29646 |
+
"learning_rate": 9.8774928039708e-06,
|
| 29647 |
+
"loss": 19.2093,
|
| 29648 |
+
"step": 42250
|
| 29649 |
+
},
|
| 29650 |
+
{
|
| 29651 |
+
"epoch": 0.7842318550117491,
|
| 29652 |
+
"grad_norm": 36.6875,
|
| 29653 |
+
"learning_rate": 9.877463808184757e-06,
|
| 29654 |
+
"loss": 18.9336,
|
| 29655 |
+
"step": 42260
|
| 29656 |
+
},
|
| 29657 |
+
{
|
| 29658 |
+
"epoch": 0.7844174280962289,
|
| 29659 |
+
"grad_norm": 37.125,
|
| 29660 |
+
"learning_rate": 9.877434812398715e-06,
|
| 29661 |
+
"loss": 18.7095,
|
| 29662 |
+
"step": 42270
|
| 29663 |
+
},
|
| 29664 |
+
{
|
| 29665 |
+
"epoch": 0.7846030011807088,
|
| 29666 |
+
"grad_norm": 36.5,
|
| 29667 |
+
"learning_rate": 9.877405816612674e-06,
|
| 29668 |
+
"loss": 19.2101,
|
| 29669 |
+
"step": 42280
|
| 29670 |
+
},
|
| 29671 |
+
{
|
| 29672 |
+
"epoch": 0.7847885742651886,
|
| 29673 |
+
"grad_norm": 35.90625,
|
| 29674 |
+
"learning_rate": 9.87737682082663e-06,
|
| 29675 |
+
"loss": 19.6382,
|
| 29676 |
+
"step": 42290
|
| 29677 |
+
},
|
| 29678 |
+
{
|
| 29679 |
+
"epoch": 0.7849741473496684,
|
| 29680 |
+
"grad_norm": 34.0625,
|
| 29681 |
+
"learning_rate": 9.877347825040587e-06,
|
| 29682 |
+
"loss": 19.6099,
|
| 29683 |
+
"step": 42300
|
| 29684 |
+
},
|
| 29685 |
+
{
|
| 29686 |
+
"epoch": 0.7851597204341483,
|
| 29687 |
+
"grad_norm": 36.8125,
|
| 29688 |
+
"learning_rate": 9.877318829254546e-06,
|
| 29689 |
+
"loss": 19.2404,
|
| 29690 |
+
"step": 42310
|
| 29691 |
+
},
|
| 29692 |
+
{
|
| 29693 |
+
"epoch": 0.785345293518628,
|
| 29694 |
+
"grad_norm": 36.03125,
|
| 29695 |
+
"learning_rate": 9.877289833468503e-06,
|
| 29696 |
+
"loss": 19.0874,
|
| 29697 |
+
"step": 42320
|
| 29698 |
+
},
|
| 29699 |
+
{
|
| 29700 |
+
"epoch": 0.7855308666031079,
|
| 29701 |
+
"grad_norm": 35.46875,
|
| 29702 |
+
"learning_rate": 9.87726083768246e-06,
|
| 29703 |
+
"loss": 19.0245,
|
| 29704 |
+
"step": 42330
|
| 29705 |
+
},
|
| 29706 |
+
{
|
| 29707 |
+
"epoch": 0.7857164396875878,
|
| 29708 |
+
"grad_norm": 36.625,
|
| 29709 |
+
"learning_rate": 9.877231841896418e-06,
|
| 29710 |
+
"loss": 19.3828,
|
| 29711 |
+
"step": 42340
|
| 29712 |
+
},
|
| 29713 |
+
{
|
| 29714 |
+
"epoch": 0.7859020127720675,
|
| 29715 |
+
"grad_norm": 37.625,
|
| 29716 |
+
"learning_rate": 9.877202846110376e-06,
|
| 29717 |
+
"loss": 18.6832,
|
| 29718 |
+
"step": 42350
|
| 29719 |
+
},
|
| 29720 |
+
{
|
| 29721 |
+
"epoch": 0.7860875858565474,
|
| 29722 |
+
"grad_norm": 34.375,
|
| 29723 |
+
"learning_rate": 9.877173850324333e-06,
|
| 29724 |
+
"loss": 19.379,
|
| 29725 |
+
"step": 42360
|
| 29726 |
+
},
|
| 29727 |
+
{
|
| 29728 |
+
"epoch": 0.7862731589410272,
|
| 29729 |
+
"grad_norm": 36.75,
|
| 29730 |
+
"learning_rate": 9.87714485453829e-06,
|
| 29731 |
+
"loss": 19.779,
|
| 29732 |
+
"step": 42370
|
| 29733 |
+
},
|
| 29734 |
+
{
|
| 29735 |
+
"epoch": 0.786458732025507,
|
| 29736 |
+
"grad_norm": 36.75,
|
| 29737 |
+
"learning_rate": 9.877115858752248e-06,
|
| 29738 |
+
"loss": 19.2348,
|
| 29739 |
+
"step": 42380
|
| 29740 |
+
},
|
| 29741 |
+
{
|
| 29742 |
+
"epoch": 0.7866443051099868,
|
| 29743 |
+
"grad_norm": 35.375,
|
| 29744 |
+
"learning_rate": 9.877086862966207e-06,
|
| 29745 |
+
"loss": 19.3421,
|
| 29746 |
+
"step": 42390
|
| 29747 |
+
},
|
| 29748 |
+
{
|
| 29749 |
+
"epoch": 0.7868298781944667,
|
| 29750 |
+
"grad_norm": 35.15625,
|
| 29751 |
+
"learning_rate": 9.877057867180163e-06,
|
| 29752 |
+
"loss": 19.0024,
|
| 29753 |
+
"step": 42400
|
| 29754 |
+
},
|
| 29755 |
+
{
|
| 29756 |
+
"epoch": 0.7870154512789465,
|
| 29757 |
+
"grad_norm": 36.96875,
|
| 29758 |
+
"learning_rate": 9.877028871394122e-06,
|
| 29759 |
+
"loss": 18.825,
|
| 29760 |
+
"step": 42410
|
| 29761 |
+
},
|
| 29762 |
+
{
|
| 29763 |
+
"epoch": 0.7872010243634263,
|
| 29764 |
+
"grad_norm": 36.65625,
|
| 29765 |
+
"learning_rate": 9.876999875608079e-06,
|
| 29766 |
+
"loss": 19.4176,
|
| 29767 |
+
"step": 42420
|
| 29768 |
+
},
|
| 29769 |
+
{
|
| 29770 |
+
"epoch": 0.7873865974479062,
|
| 29771 |
+
"grad_norm": 36.4375,
|
| 29772 |
+
"learning_rate": 9.876970879822037e-06,
|
| 29773 |
+
"loss": 19.0654,
|
| 29774 |
+
"step": 42430
|
| 29775 |
+
},
|
| 29776 |
+
{
|
| 29777 |
+
"epoch": 0.7875721705323859,
|
| 29778 |
+
"grad_norm": 36.625,
|
| 29779 |
+
"learning_rate": 9.876941884035994e-06,
|
| 29780 |
+
"loss": 19.5037,
|
| 29781 |
+
"step": 42440
|
| 29782 |
+
},
|
| 29783 |
+
{
|
| 29784 |
+
"epoch": 0.7877577436168658,
|
| 29785 |
+
"grad_norm": 35.71875,
|
| 29786 |
+
"learning_rate": 9.876912888249951e-06,
|
| 29787 |
+
"loss": 18.6874,
|
| 29788 |
+
"step": 42450
|
| 29789 |
+
},
|
| 29790 |
+
{
|
| 29791 |
+
"epoch": 0.7879433167013457,
|
| 29792 |
+
"grad_norm": 36.40625,
|
| 29793 |
+
"learning_rate": 9.876883892463909e-06,
|
| 29794 |
+
"loss": 19.3074,
|
| 29795 |
+
"step": 42460
|
| 29796 |
+
},
|
| 29797 |
+
{
|
| 29798 |
+
"epoch": 0.7881288897858255,
|
| 29799 |
+
"grad_norm": 34.96875,
|
| 29800 |
+
"learning_rate": 9.876854896677866e-06,
|
| 29801 |
+
"loss": 18.9853,
|
| 29802 |
+
"step": 42470
|
| 29803 |
+
},
|
| 29804 |
+
{
|
| 29805 |
+
"epoch": 0.7883144628703053,
|
| 29806 |
+
"grad_norm": 35.3125,
|
| 29807 |
+
"learning_rate": 9.876825900891824e-06,
|
| 29808 |
+
"loss": 18.9235,
|
| 29809 |
+
"step": 42480
|
| 29810 |
+
},
|
| 29811 |
+
{
|
| 29812 |
+
"epoch": 0.7885000359547851,
|
| 29813 |
+
"grad_norm": 36.90625,
|
| 29814 |
+
"learning_rate": 9.876796905105783e-06,
|
| 29815 |
+
"loss": 19.4066,
|
| 29816 |
+
"step": 42490
|
| 29817 |
+
},
|
| 29818 |
+
{
|
| 29819 |
+
"epoch": 0.788685609039265,
|
| 29820 |
+
"grad_norm": 36.1875,
|
| 29821 |
+
"learning_rate": 9.876767909319738e-06,
|
| 29822 |
+
"loss": 19.2456,
|
| 29823 |
+
"step": 42500
|
| 29824 |
+
},
|
| 29825 |
+
{
|
| 29826 |
+
"epoch": 0.7888711821237447,
|
| 29827 |
+
"grad_norm": 36.59375,
|
| 29828 |
+
"learning_rate": 9.876738913533696e-06,
|
| 29829 |
+
"loss": 18.9103,
|
| 29830 |
+
"step": 42510
|
| 29831 |
+
},
|
| 29832 |
+
{
|
| 29833 |
+
"epoch": 0.7890567552082246,
|
| 29834 |
+
"grad_norm": 34.03125,
|
| 29835 |
+
"learning_rate": 9.876709917747655e-06,
|
| 29836 |
+
"loss": 19.2111,
|
| 29837 |
+
"step": 42520
|
| 29838 |
+
},
|
| 29839 |
+
{
|
| 29840 |
+
"epoch": 0.7892423282927045,
|
| 29841 |
+
"grad_norm": 35.21875,
|
| 29842 |
+
"learning_rate": 9.876680921961612e-06,
|
| 29843 |
+
"loss": 19.4737,
|
| 29844 |
+
"step": 42530
|
| 29845 |
+
},
|
| 29846 |
+
{
|
| 29847 |
+
"epoch": 0.7894279013771842,
|
| 29848 |
+
"grad_norm": 35.65625,
|
| 29849 |
+
"learning_rate": 9.87665192617557e-06,
|
| 29850 |
+
"loss": 19.1734,
|
| 29851 |
+
"step": 42540
|
| 29852 |
+
},
|
| 29853 |
+
{
|
| 29854 |
+
"epoch": 0.7896134744616641,
|
| 29855 |
+
"grad_norm": 38.78125,
|
| 29856 |
+
"learning_rate": 9.876622930389527e-06,
|
| 29857 |
+
"loss": 19.2003,
|
| 29858 |
+
"step": 42550
|
| 29859 |
+
},
|
| 29860 |
+
{
|
| 29861 |
+
"epoch": 0.7897990475461439,
|
| 29862 |
+
"grad_norm": 35.53125,
|
| 29863 |
+
"learning_rate": 9.876593934603485e-06,
|
| 29864 |
+
"loss": 19.1934,
|
| 29865 |
+
"step": 42560
|
| 29866 |
+
},
|
| 29867 |
+
{
|
| 29868 |
+
"epoch": 0.7899846206306237,
|
| 29869 |
+
"grad_norm": 36.0625,
|
| 29870 |
+
"learning_rate": 9.876564938817442e-06,
|
| 29871 |
+
"loss": 19.3247,
|
| 29872 |
+
"step": 42570
|
| 29873 |
+
},
|
| 29874 |
+
{
|
| 29875 |
+
"epoch": 0.7901701937151036,
|
| 29876 |
+
"grad_norm": 36.28125,
|
| 29877 |
+
"learning_rate": 9.8765359430314e-06,
|
| 29878 |
+
"loss": 19.6564,
|
| 29879 |
+
"step": 42580
|
| 29880 |
+
},
|
| 29881 |
+
{
|
| 29882 |
+
"epoch": 0.7903557667995834,
|
| 29883 |
+
"grad_norm": 36.625,
|
| 29884 |
+
"learning_rate": 9.876506947245358e-06,
|
| 29885 |
+
"loss": 19.0949,
|
| 29886 |
+
"step": 42590
|
| 29887 |
+
},
|
| 29888 |
+
{
|
| 29889 |
+
"epoch": 0.7905413398840632,
|
| 29890 |
+
"grad_norm": 34.9375,
|
| 29891 |
+
"learning_rate": 9.876477951459316e-06,
|
| 29892 |
+
"loss": 18.5577,
|
| 29893 |
+
"step": 42600
|
| 29894 |
+
},
|
| 29895 |
+
{
|
| 29896 |
+
"epoch": 0.790726912968543,
|
| 29897 |
+
"grad_norm": 35.9375,
|
| 29898 |
+
"learning_rate": 9.876448955673272e-06,
|
| 29899 |
+
"loss": 18.6629,
|
| 29900 |
+
"step": 42610
|
| 29901 |
+
},
|
| 29902 |
+
{
|
| 29903 |
+
"epoch": 0.7909124860530229,
|
| 29904 |
+
"grad_norm": 35.40625,
|
| 29905 |
+
"learning_rate": 9.87641995988723e-06,
|
| 29906 |
+
"loss": 19.3618,
|
| 29907 |
+
"step": 42620
|
| 29908 |
+
},
|
| 29909 |
+
{
|
| 29910 |
+
"epoch": 0.7910980591375028,
|
| 29911 |
+
"grad_norm": 34.3125,
|
| 29912 |
+
"learning_rate": 9.876390964101188e-06,
|
| 29913 |
+
"loss": 18.985,
|
| 29914 |
+
"step": 42630
|
| 29915 |
+
},
|
| 29916 |
+
{
|
| 29917 |
+
"epoch": 0.7912836322219825,
|
| 29918 |
+
"grad_norm": 37.25,
|
| 29919 |
+
"learning_rate": 9.876361968315145e-06,
|
| 29920 |
+
"loss": 19.3863,
|
| 29921 |
+
"step": 42640
|
| 29922 |
+
},
|
| 29923 |
+
{
|
| 29924 |
+
"epoch": 0.7914692053064624,
|
| 29925 |
+
"grad_norm": 35.84375,
|
| 29926 |
+
"learning_rate": 9.876332972529103e-06,
|
| 29927 |
+
"loss": 19.2617,
|
| 29928 |
+
"step": 42650
|
| 29929 |
+
},
|
| 29930 |
+
{
|
| 29931 |
+
"epoch": 0.7916547783909422,
|
| 29932 |
+
"grad_norm": 35.75,
|
| 29933 |
+
"learning_rate": 9.87630397674306e-06,
|
| 29934 |
+
"loss": 19.6245,
|
| 29935 |
+
"step": 42660
|
| 29936 |
+
},
|
| 29937 |
+
{
|
| 29938 |
+
"epoch": 0.791840351475422,
|
| 29939 |
+
"grad_norm": 37.1875,
|
| 29940 |
+
"learning_rate": 9.876274980957018e-06,
|
| 29941 |
+
"loss": 18.9741,
|
| 29942 |
+
"step": 42670
|
| 29943 |
+
},
|
| 29944 |
+
{
|
| 29945 |
+
"epoch": 0.7920259245599018,
|
| 29946 |
+
"grad_norm": 37.21875,
|
| 29947 |
+
"learning_rate": 9.876245985170975e-06,
|
| 29948 |
+
"loss": 19.222,
|
| 29949 |
+
"step": 42680
|
| 29950 |
+
},
|
| 29951 |
+
{
|
| 29952 |
+
"epoch": 0.7922114976443817,
|
| 29953 |
+
"grad_norm": 33.5625,
|
| 29954 |
+
"learning_rate": 9.876216989384934e-06,
|
| 29955 |
+
"loss": 19.3086,
|
| 29956 |
+
"step": 42690
|
| 29957 |
+
},
|
| 29958 |
+
{
|
| 29959 |
+
"epoch": 0.7923970707288615,
|
| 29960 |
+
"grad_norm": 36.59375,
|
| 29961 |
+
"learning_rate": 9.876187993598892e-06,
|
| 29962 |
+
"loss": 19.1836,
|
| 29963 |
+
"step": 42700
|
| 29964 |
+
},
|
| 29965 |
+
{
|
| 29966 |
+
"epoch": 0.7925826438133413,
|
| 29967 |
+
"grad_norm": 36.625,
|
| 29968 |
+
"learning_rate": 9.876158997812849e-06,
|
| 29969 |
+
"loss": 19.0206,
|
| 29970 |
+
"step": 42710
|
| 29971 |
+
},
|
| 29972 |
+
{
|
| 29973 |
+
"epoch": 0.7927682168978212,
|
| 29974 |
+
"grad_norm": 35.5,
|
| 29975 |
+
"learning_rate": 9.876130002026806e-06,
|
| 29976 |
+
"loss": 19.2777,
|
| 29977 |
+
"step": 42720
|
| 29978 |
+
},
|
| 29979 |
+
{
|
| 29980 |
+
"epoch": 0.7929537899823009,
|
| 29981 |
+
"grad_norm": 38.15625,
|
| 29982 |
+
"learning_rate": 9.876101006240764e-06,
|
| 29983 |
+
"loss": 19.4687,
|
| 29984 |
+
"step": 42730
|
| 29985 |
+
},
|
| 29986 |
+
{
|
| 29987 |
+
"epoch": 0.7931393630667808,
|
| 29988 |
+
"grad_norm": 36.78125,
|
| 29989 |
+
"learning_rate": 9.876072010454721e-06,
|
| 29990 |
+
"loss": 19.2891,
|
| 29991 |
+
"step": 42740
|
| 29992 |
+
},
|
| 29993 |
+
{
|
| 29994 |
+
"epoch": 0.7933249361512607,
|
| 29995 |
+
"grad_norm": 36.3125,
|
| 29996 |
+
"learning_rate": 9.876043014668679e-06,
|
| 29997 |
+
"loss": 19.1073,
|
| 29998 |
+
"step": 42750
|
| 29999 |
+
},
|
| 30000 |
+
{
|
| 30001 |
+
"epoch": 0.7935105092357404,
|
| 30002 |
+
"grad_norm": 37.15625,
|
| 30003 |
+
"learning_rate": 9.876014018882638e-06,
|
| 30004 |
+
"loss": 19.2614,
|
| 30005 |
+
"step": 42760
|
| 30006 |
+
},
|
| 30007 |
+
{
|
| 30008 |
+
"epoch": 0.7936960823202203,
|
| 30009 |
+
"grad_norm": 35.5625,
|
| 30010 |
+
"learning_rate": 9.875985023096593e-06,
|
| 30011 |
+
"loss": 19.5217,
|
| 30012 |
+
"step": 42770
|
| 30013 |
+
},
|
| 30014 |
+
{
|
| 30015 |
+
"epoch": 0.7938816554047001,
|
| 30016 |
+
"grad_norm": 35.5625,
|
| 30017 |
+
"learning_rate": 9.875956027310551e-06,
|
| 30018 |
+
"loss": 18.8875,
|
| 30019 |
+
"step": 42780
|
| 30020 |
+
},
|
| 30021 |
+
{
|
| 30022 |
+
"epoch": 0.7940672284891799,
|
| 30023 |
+
"grad_norm": 34.9375,
|
| 30024 |
+
"learning_rate": 9.87592703152451e-06,
|
| 30025 |
+
"loss": 18.9884,
|
| 30026 |
+
"step": 42790
|
| 30027 |
+
},
|
| 30028 |
+
{
|
| 30029 |
+
"epoch": 0.7942528015736597,
|
| 30030 |
+
"grad_norm": 34.59375,
|
| 30031 |
+
"learning_rate": 9.875898035738467e-06,
|
| 30032 |
+
"loss": 18.7005,
|
| 30033 |
+
"step": 42800
|
| 30034 |
+
},
|
| 30035 |
+
{
|
| 30036 |
+
"epoch": 0.7944383746581396,
|
| 30037 |
+
"grad_norm": 34.78125,
|
| 30038 |
+
"learning_rate": 9.875869039952425e-06,
|
| 30039 |
+
"loss": 19.4181,
|
| 30040 |
+
"step": 42810
|
| 30041 |
+
},
|
| 30042 |
+
{
|
| 30043 |
+
"epoch": 0.7946239477426195,
|
| 30044 |
+
"grad_norm": 35.1875,
|
| 30045 |
+
"learning_rate": 9.875840044166382e-06,
|
| 30046 |
+
"loss": 19.495,
|
| 30047 |
+
"step": 42820
|
| 30048 |
+
},
|
| 30049 |
+
{
|
| 30050 |
+
"epoch": 0.7948095208270992,
|
| 30051 |
+
"grad_norm": 36.21875,
|
| 30052 |
+
"learning_rate": 9.87581104838034e-06,
|
| 30053 |
+
"loss": 19.6189,
|
| 30054 |
+
"step": 42830
|
| 30055 |
+
},
|
| 30056 |
+
{
|
| 30057 |
+
"epoch": 0.7949950939115791,
|
| 30058 |
+
"grad_norm": 34.40625,
|
| 30059 |
+
"learning_rate": 9.875782052594297e-06,
|
| 30060 |
+
"loss": 19.131,
|
| 30061 |
+
"step": 42840
|
| 30062 |
+
},
|
| 30063 |
+
{
|
| 30064 |
+
"epoch": 0.7951806669960589,
|
| 30065 |
+
"grad_norm": 38.84375,
|
| 30066 |
+
"learning_rate": 9.875753056808254e-06,
|
| 30067 |
+
"loss": 19.5435,
|
| 30068 |
+
"step": 42850
|
| 30069 |
+
},
|
| 30070 |
+
{
|
| 30071 |
+
"epoch": 0.7953662400805387,
|
| 30072 |
+
"grad_norm": 36.46875,
|
| 30073 |
+
"learning_rate": 9.875724061022214e-06,
|
| 30074 |
+
"loss": 19.4428,
|
| 30075 |
+
"step": 42860
|
| 30076 |
+
},
|
| 30077 |
+
{
|
| 30078 |
+
"epoch": 0.7955518131650186,
|
| 30079 |
+
"grad_norm": 34.53125,
|
| 30080 |
+
"learning_rate": 9.875695065236171e-06,
|
| 30081 |
+
"loss": 19.3201,
|
| 30082 |
+
"step": 42870
|
| 30083 |
+
},
|
| 30084 |
+
{
|
| 30085 |
+
"epoch": 0.7957373862494984,
|
| 30086 |
+
"grad_norm": 37.0,
|
| 30087 |
+
"learning_rate": 9.875666069450127e-06,
|
| 30088 |
+
"loss": 19.0514,
|
| 30089 |
+
"step": 42880
|
| 30090 |
+
},
|
| 30091 |
+
{
|
| 30092 |
+
"epoch": 0.7959229593339782,
|
| 30093 |
+
"grad_norm": 36.25,
|
| 30094 |
+
"learning_rate": 9.875637073664086e-06,
|
| 30095 |
+
"loss": 18.7184,
|
| 30096 |
+
"step": 42890
|
| 30097 |
+
},
|
| 30098 |
+
{
|
| 30099 |
+
"epoch": 0.796108532418458,
|
| 30100 |
+
"grad_norm": 35.375,
|
| 30101 |
+
"learning_rate": 9.875608077878043e-06,
|
| 30102 |
+
"loss": 19.1007,
|
| 30103 |
+
"step": 42900
|
| 30104 |
+
},
|
| 30105 |
+
{
|
| 30106 |
+
"epoch": 0.7962941055029379,
|
| 30107 |
+
"grad_norm": 33.46875,
|
| 30108 |
+
"learning_rate": 9.875579082092e-06,
|
| 30109 |
+
"loss": 18.8951,
|
| 30110 |
+
"step": 42910
|
| 30111 |
+
},
|
| 30112 |
+
{
|
| 30113 |
+
"epoch": 0.7964796785874176,
|
| 30114 |
+
"grad_norm": 37.375,
|
| 30115 |
+
"learning_rate": 9.875550086305958e-06,
|
| 30116 |
+
"loss": 19.4892,
|
| 30117 |
+
"step": 42920
|
| 30118 |
+
},
|
| 30119 |
+
{
|
| 30120 |
+
"epoch": 0.7966652516718975,
|
| 30121 |
+
"grad_norm": 35.78125,
|
| 30122 |
+
"learning_rate": 9.875521090519915e-06,
|
| 30123 |
+
"loss": 19.6579,
|
| 30124 |
+
"step": 42930
|
| 30125 |
+
},
|
| 30126 |
+
{
|
| 30127 |
+
"epoch": 0.7968508247563774,
|
| 30128 |
+
"grad_norm": 36.375,
|
| 30129 |
+
"learning_rate": 9.875492094733873e-06,
|
| 30130 |
+
"loss": 19.2619,
|
| 30131 |
+
"step": 42940
|
| 30132 |
+
},
|
| 30133 |
+
{
|
| 30134 |
+
"epoch": 0.7970363978408571,
|
| 30135 |
+
"grad_norm": 38.0625,
|
| 30136 |
+
"learning_rate": 9.87546309894783e-06,
|
| 30137 |
+
"loss": 19.5734,
|
| 30138 |
+
"step": 42950
|
| 30139 |
+
},
|
| 30140 |
+
{
|
| 30141 |
+
"epoch": 0.797221970925337,
|
| 30142 |
+
"grad_norm": 36.21875,
|
| 30143 |
+
"learning_rate": 9.875434103161788e-06,
|
| 30144 |
+
"loss": 18.9373,
|
| 30145 |
+
"step": 42960
|
| 30146 |
+
},
|
| 30147 |
+
{
|
| 30148 |
+
"epoch": 0.7974075440098168,
|
| 30149 |
+
"grad_norm": 35.9375,
|
| 30150 |
+
"learning_rate": 9.875405107375747e-06,
|
| 30151 |
+
"loss": 19.2821,
|
| 30152 |
+
"step": 42970
|
| 30153 |
+
},
|
| 30154 |
+
{
|
| 30155 |
+
"epoch": 0.7975931170942966,
|
| 30156 |
+
"grad_norm": 35.4375,
|
| 30157 |
+
"learning_rate": 9.875376111589704e-06,
|
| 30158 |
+
"loss": 19.2418,
|
| 30159 |
+
"step": 42980
|
| 30160 |
+
},
|
| 30161 |
+
{
|
| 30162 |
+
"epoch": 0.7977786901787765,
|
| 30163 |
+
"grad_norm": 36.03125,
|
| 30164 |
+
"learning_rate": 9.875347115803662e-06,
|
| 30165 |
+
"loss": 19.0795,
|
| 30166 |
+
"step": 42990
|
| 30167 |
+
},
|
| 30168 |
+
{
|
| 30169 |
+
"epoch": 0.7979642632632563,
|
| 30170 |
+
"grad_norm": 36.40625,
|
| 30171 |
+
"learning_rate": 9.875318120017619e-06,
|
| 30172 |
+
"loss": 19.5488,
|
| 30173 |
+
"step": 43000
|
| 30174 |
+
},
|
| 30175 |
+
{
|
| 30176 |
+
"epoch": 0.7981498363477362,
|
| 30177 |
+
"grad_norm": 34.9375,
|
| 30178 |
+
"learning_rate": 9.875289124231576e-06,
|
| 30179 |
+
"loss": 19.3044,
|
| 30180 |
+
"step": 43010
|
| 30181 |
+
},
|
| 30182 |
+
{
|
| 30183 |
+
"epoch": 0.7983354094322159,
|
| 30184 |
+
"grad_norm": 36.53125,
|
| 30185 |
+
"learning_rate": 9.875260128445534e-06,
|
| 30186 |
+
"loss": 19.1615,
|
| 30187 |
+
"step": 43020
|
| 30188 |
+
},
|
| 30189 |
+
{
|
| 30190 |
+
"epoch": 0.7985209825166958,
|
| 30191 |
+
"grad_norm": 35.96875,
|
| 30192 |
+
"learning_rate": 9.875231132659491e-06,
|
| 30193 |
+
"loss": 19.5529,
|
| 30194 |
+
"step": 43030
|
| 30195 |
+
},
|
| 30196 |
+
{
|
| 30197 |
+
"epoch": 0.7987065556011756,
|
| 30198 |
+
"grad_norm": 36.59375,
|
| 30199 |
+
"learning_rate": 9.875202136873449e-06,
|
| 30200 |
+
"loss": 18.9387,
|
| 30201 |
+
"step": 43040
|
| 30202 |
+
},
|
| 30203 |
+
{
|
| 30204 |
+
"epoch": 0.7988921286856554,
|
| 30205 |
+
"grad_norm": 35.4375,
|
| 30206 |
+
"learning_rate": 9.875173141087406e-06,
|
| 30207 |
+
"loss": 19.0329,
|
| 30208 |
+
"step": 43050
|
| 30209 |
+
},
|
| 30210 |
+
{
|
| 30211 |
+
"epoch": 0.7990777017701353,
|
| 30212 |
+
"grad_norm": 36.8125,
|
| 30213 |
+
"learning_rate": 9.875144145301363e-06,
|
| 30214 |
+
"loss": 19.6343,
|
| 30215 |
+
"step": 43060
|
| 30216 |
+
},
|
| 30217 |
+
{
|
| 30218 |
+
"epoch": 0.7992632748546151,
|
| 30219 |
+
"grad_norm": 36.40625,
|
| 30220 |
+
"learning_rate": 9.875115149515322e-06,
|
| 30221 |
+
"loss": 19.1546,
|
| 30222 |
+
"step": 43070
|
| 30223 |
+
},
|
| 30224 |
+
{
|
| 30225 |
+
"epoch": 0.7994488479390949,
|
| 30226 |
+
"grad_norm": 35.96875,
|
| 30227 |
+
"learning_rate": 9.87508615372928e-06,
|
| 30228 |
+
"loss": 18.9671,
|
| 30229 |
+
"step": 43080
|
| 30230 |
+
},
|
| 30231 |
+
{
|
| 30232 |
+
"epoch": 0.7996344210235747,
|
| 30233 |
+
"grad_norm": 36.53125,
|
| 30234 |
+
"learning_rate": 9.875057157943236e-06,
|
| 30235 |
+
"loss": 19.1909,
|
| 30236 |
+
"step": 43090
|
| 30237 |
+
},
|
| 30238 |
+
{
|
| 30239 |
+
"epoch": 0.7998199941080546,
|
| 30240 |
+
"grad_norm": 36.28125,
|
| 30241 |
+
"learning_rate": 9.875028162157195e-06,
|
| 30242 |
+
"loss": 19.067,
|
| 30243 |
+
"step": 43100
|
| 30244 |
+
},
|
| 30245 |
+
{
|
| 30246 |
+
"epoch": 0.8000055671925344,
|
| 30247 |
+
"grad_norm": 35.375,
|
| 30248 |
+
"learning_rate": 9.874999166371152e-06,
|
| 30249 |
+
"loss": 18.7279,
|
| 30250 |
+
"step": 43110
|
| 30251 |
+
},
|
| 30252 |
+
{
|
| 30253 |
+
"epoch": 0.8001911402770142,
|
| 30254 |
+
"grad_norm": 36.40625,
|
| 30255 |
+
"learning_rate": 9.87497017058511e-06,
|
| 30256 |
+
"loss": 19.0467,
|
| 30257 |
+
"step": 43120
|
| 30258 |
+
},
|
| 30259 |
+
{
|
| 30260 |
+
"epoch": 0.8003767133614941,
|
| 30261 |
+
"grad_norm": 39.53125,
|
| 30262 |
+
"learning_rate": 9.874941174799067e-06,
|
| 30263 |
+
"loss": 19.5147,
|
| 30264 |
+
"step": 43130
|
| 30265 |
+
},
|
| 30266 |
+
{
|
| 30267 |
+
"epoch": 0.8005622864459738,
|
| 30268 |
+
"grad_norm": 36.625,
|
| 30269 |
+
"learning_rate": 9.874912179013026e-06,
|
| 30270 |
+
"loss": 18.7374,
|
| 30271 |
+
"step": 43140
|
| 30272 |
+
},
|
| 30273 |
+
{
|
| 30274 |
+
"epoch": 0.8007478595304537,
|
| 30275 |
+
"grad_norm": 34.3125,
|
| 30276 |
+
"learning_rate": 9.874883183226982e-06,
|
| 30277 |
+
"loss": 19.4482,
|
| 30278 |
+
"step": 43150
|
| 30279 |
+
},
|
| 30280 |
+
{
|
| 30281 |
+
"epoch": 0.8009334326149335,
|
| 30282 |
+
"grad_norm": 35.8125,
|
| 30283 |
+
"learning_rate": 9.874854187440939e-06,
|
| 30284 |
+
"loss": 19.1165,
|
| 30285 |
+
"step": 43160
|
| 30286 |
+
},
|
| 30287 |
+
{
|
| 30288 |
+
"epoch": 0.8011190056994134,
|
| 30289 |
+
"grad_norm": 35.65625,
|
| 30290 |
+
"learning_rate": 9.874825191654898e-06,
|
| 30291 |
+
"loss": 18.8476,
|
| 30292 |
+
"step": 43170
|
| 30293 |
+
},
|
| 30294 |
+
{
|
| 30295 |
+
"epoch": 0.8013045787838932,
|
| 30296 |
+
"grad_norm": 36.6875,
|
| 30297 |
+
"learning_rate": 9.874796195868856e-06,
|
| 30298 |
+
"loss": 18.6734,
|
| 30299 |
+
"step": 43180
|
| 30300 |
+
},
|
| 30301 |
+
{
|
| 30302 |
+
"epoch": 0.801490151868373,
|
| 30303 |
+
"grad_norm": 36.3125,
|
| 30304 |
+
"learning_rate": 9.874767200082813e-06,
|
| 30305 |
+
"loss": 19.189,
|
| 30306 |
+
"step": 43190
|
| 30307 |
+
},
|
| 30308 |
+
{
|
| 30309 |
+
"epoch": 0.8016757249528529,
|
| 30310 |
+
"grad_norm": 35.8125,
|
| 30311 |
+
"learning_rate": 9.87473820429677e-06,
|
| 30312 |
+
"loss": 19.4344,
|
| 30313 |
+
"step": 43200
|
| 30314 |
+
},
|
| 30315 |
+
{
|
| 30316 |
+
"epoch": 0.8018612980373326,
|
| 30317 |
+
"grad_norm": 38.03125,
|
| 30318 |
+
"learning_rate": 9.874709208510728e-06,
|
| 30319 |
+
"loss": 19.112,
|
| 30320 |
+
"step": 43210
|
| 30321 |
+
},
|
| 30322 |
+
{
|
| 30323 |
+
"epoch": 0.8020468711218125,
|
| 30324 |
+
"grad_norm": 37.0625,
|
| 30325 |
+
"learning_rate": 9.874680212724685e-06,
|
| 30326 |
+
"loss": 19.2927,
|
| 30327 |
+
"step": 43220
|
| 30328 |
+
},
|
| 30329 |
+
{
|
| 30330 |
+
"epoch": 0.8022324442062924,
|
| 30331 |
+
"grad_norm": 35.15625,
|
| 30332 |
+
"learning_rate": 9.874651216938643e-06,
|
| 30333 |
+
"loss": 19.2974,
|
| 30334 |
+
"step": 43230
|
| 30335 |
+
},
|
| 30336 |
+
{
|
| 30337 |
+
"epoch": 0.8024180172907721,
|
| 30338 |
+
"grad_norm": 34.84375,
|
| 30339 |
+
"learning_rate": 9.874622221152602e-06,
|
| 30340 |
+
"loss": 18.9434,
|
| 30341 |
+
"step": 43240
|
| 30342 |
+
},
|
| 30343 |
+
{
|
| 30344 |
+
"epoch": 0.802603590375252,
|
| 30345 |
+
"grad_norm": 37.84375,
|
| 30346 |
+
"learning_rate": 9.874593225366557e-06,
|
| 30347 |
+
"loss": 18.9947,
|
| 30348 |
+
"step": 43250
|
| 30349 |
+
},
|
| 30350 |
+
{
|
| 30351 |
+
"epoch": 0.8027891634597318,
|
| 30352 |
+
"grad_norm": 36.3125,
|
| 30353 |
+
"learning_rate": 9.874564229580515e-06,
|
| 30354 |
+
"loss": 19.3009,
|
| 30355 |
+
"step": 43260
|
| 30356 |
+
},
|
| 30357 |
+
{
|
| 30358 |
+
"epoch": 0.8029747365442116,
|
| 30359 |
+
"grad_norm": 33.3125,
|
| 30360 |
+
"learning_rate": 9.874535233794474e-06,
|
| 30361 |
+
"loss": 18.7793,
|
| 30362 |
+
"step": 43270
|
| 30363 |
+
},
|
| 30364 |
+
{
|
| 30365 |
+
"epoch": 0.8031603096286914,
|
| 30366 |
+
"grad_norm": 36.78125,
|
| 30367 |
+
"learning_rate": 9.874506238008431e-06,
|
| 30368 |
+
"loss": 19.2566,
|
| 30369 |
+
"step": 43280
|
| 30370 |
+
},
|
| 30371 |
+
{
|
| 30372 |
+
"epoch": 0.8033458827131713,
|
| 30373 |
+
"grad_norm": 36.59375,
|
| 30374 |
+
"learning_rate": 9.874477242222389e-06,
|
| 30375 |
+
"loss": 19.238,
|
| 30376 |
+
"step": 43290
|
| 30377 |
+
},
|
| 30378 |
+
{
|
| 30379 |
+
"epoch": 0.8035314557976511,
|
| 30380 |
+
"grad_norm": 36.4375,
|
| 30381 |
+
"learning_rate": 9.874448246436346e-06,
|
| 30382 |
+
"loss": 19.245,
|
| 30383 |
+
"step": 43300
|
| 30384 |
+
},
|
| 30385 |
+
{
|
| 30386 |
+
"epoch": 0.8037170288821309,
|
| 30387 |
+
"grad_norm": 35.25,
|
| 30388 |
+
"learning_rate": 9.874419250650304e-06,
|
| 30389 |
+
"loss": 19.1339,
|
| 30390 |
+
"step": 43310
|
| 30391 |
+
},
|
| 30392 |
+
{
|
| 30393 |
+
"epoch": 0.8039026019666108,
|
| 30394 |
+
"grad_norm": 34.9375,
|
| 30395 |
+
"learning_rate": 9.874390254864261e-06,
|
| 30396 |
+
"loss": 18.6303,
|
| 30397 |
+
"step": 43320
|
| 30398 |
+
},
|
| 30399 |
+
{
|
| 30400 |
+
"epoch": 0.8040881750510905,
|
| 30401 |
+
"grad_norm": 36.3125,
|
| 30402 |
+
"learning_rate": 9.874361259078218e-06,
|
| 30403 |
+
"loss": 18.7144,
|
| 30404 |
+
"step": 43330
|
| 30405 |
+
},
|
| 30406 |
+
{
|
| 30407 |
+
"epoch": 0.8042737481355704,
|
| 30408 |
+
"grad_norm": 37.4375,
|
| 30409 |
+
"learning_rate": 9.874332263292178e-06,
|
| 30410 |
+
"loss": 19.6017,
|
| 30411 |
+
"step": 43340
|
| 30412 |
+
},
|
| 30413 |
+
{
|
| 30414 |
+
"epoch": 0.8044593212200503,
|
| 30415 |
+
"grad_norm": 35.28125,
|
| 30416 |
+
"learning_rate": 9.874303267506135e-06,
|
| 30417 |
+
"loss": 19.0519,
|
| 30418 |
+
"step": 43350
|
| 30419 |
+
},
|
| 30420 |
+
{
|
| 30421 |
+
"epoch": 0.8046448943045301,
|
| 30422 |
+
"grad_norm": 36.53125,
|
| 30423 |
+
"learning_rate": 9.87427427172009e-06,
|
| 30424 |
+
"loss": 19.1844,
|
| 30425 |
+
"step": 43360
|
| 30426 |
+
},
|
| 30427 |
+
{
|
| 30428 |
+
"epoch": 0.8048304673890099,
|
| 30429 |
+
"grad_norm": 36.15625,
|
| 30430 |
+
"learning_rate": 9.87424527593405e-06,
|
| 30431 |
+
"loss": 18.981,
|
| 30432 |
+
"step": 43370
|
| 30433 |
+
},
|
| 30434 |
+
{
|
| 30435 |
+
"epoch": 0.8050160404734897,
|
| 30436 |
+
"grad_norm": 37.0625,
|
| 30437 |
+
"learning_rate": 9.874216280148007e-06,
|
| 30438 |
+
"loss": 19.1258,
|
| 30439 |
+
"step": 43380
|
| 30440 |
+
},
|
| 30441 |
+
{
|
| 30442 |
+
"epoch": 0.8052016135579696,
|
| 30443 |
+
"grad_norm": 35.21875,
|
| 30444 |
+
"learning_rate": 9.874187284361965e-06,
|
| 30445 |
+
"loss": 18.9282,
|
| 30446 |
+
"step": 43390
|
| 30447 |
+
},
|
| 30448 |
+
{
|
| 30449 |
+
"epoch": 0.8053871866424493,
|
| 30450 |
+
"grad_norm": 39.625,
|
| 30451 |
+
"learning_rate": 9.874158288575922e-06,
|
| 30452 |
+
"loss": 18.9325,
|
| 30453 |
+
"step": 43400
|
| 30454 |
+
},
|
| 30455 |
+
{
|
| 30456 |
+
"epoch": 0.8055727597269292,
|
| 30457 |
+
"grad_norm": 35.3125,
|
| 30458 |
+
"learning_rate": 9.87412929278988e-06,
|
| 30459 |
+
"loss": 18.5758,
|
| 30460 |
+
"step": 43410
|
| 30461 |
+
},
|
| 30462 |
+
{
|
| 30463 |
+
"epoch": 0.8057583328114091,
|
| 30464 |
+
"grad_norm": 35.09375,
|
| 30465 |
+
"learning_rate": 9.874100297003837e-06,
|
| 30466 |
+
"loss": 18.8267,
|
| 30467 |
+
"step": 43420
|
| 30468 |
+
},
|
| 30469 |
+
{
|
| 30470 |
+
"epoch": 0.8059439058958888,
|
| 30471 |
+
"grad_norm": 34.21875,
|
| 30472 |
+
"learning_rate": 9.874071301217794e-06,
|
| 30473 |
+
"loss": 19.2754,
|
| 30474 |
+
"step": 43430
|
| 30475 |
+
},
|
| 30476 |
+
{
|
| 30477 |
+
"epoch": 0.8061294789803687,
|
| 30478 |
+
"grad_norm": 36.65625,
|
| 30479 |
+
"learning_rate": 9.874042305431752e-06,
|
| 30480 |
+
"loss": 19.3909,
|
| 30481 |
+
"step": 43440
|
| 30482 |
+
},
|
| 30483 |
+
{
|
| 30484 |
+
"epoch": 0.8063150520648485,
|
| 30485 |
+
"grad_norm": 35.03125,
|
| 30486 |
+
"learning_rate": 9.87401330964571e-06,
|
| 30487 |
+
"loss": 19.1061,
|
| 30488 |
+
"step": 43450
|
| 30489 |
+
},
|
| 30490 |
+
{
|
| 30491 |
+
"epoch": 0.8065006251493283,
|
| 30492 |
+
"grad_norm": 37.875,
|
| 30493 |
+
"learning_rate": 9.873984313859668e-06,
|
| 30494 |
+
"loss": 19.1138,
|
| 30495 |
+
"step": 43460
|
| 30496 |
+
},
|
| 30497 |
+
{
|
| 30498 |
+
"epoch": 0.8066861982338082,
|
| 30499 |
+
"grad_norm": 36.71875,
|
| 30500 |
+
"learning_rate": 9.873955318073626e-06,
|
| 30501 |
+
"loss": 19.7451,
|
| 30502 |
+
"step": 43470
|
| 30503 |
+
},
|
| 30504 |
+
{
|
| 30505 |
+
"epoch": 0.806871771318288,
|
| 30506 |
+
"grad_norm": 36.5,
|
| 30507 |
+
"learning_rate": 9.873926322287583e-06,
|
| 30508 |
+
"loss": 19.0428,
|
| 30509 |
+
"step": 43480
|
| 30510 |
+
},
|
| 30511 |
+
{
|
| 30512 |
+
"epoch": 0.8070573444027678,
|
| 30513 |
+
"grad_norm": 36.8125,
|
| 30514 |
+
"learning_rate": 9.87389732650154e-06,
|
| 30515 |
+
"loss": 18.7527,
|
| 30516 |
+
"step": 43490
|
| 30517 |
+
},
|
| 30518 |
+
{
|
| 30519 |
+
"epoch": 0.8072429174872476,
|
| 30520 |
+
"grad_norm": 36.5625,
|
| 30521 |
+
"learning_rate": 9.873868330715498e-06,
|
| 30522 |
+
"loss": 19.3082,
|
| 30523 |
+
"step": 43500
|
| 30524 |
+
},
|
| 30525 |
+
{
|
| 30526 |
+
"epoch": 0.8074284905717275,
|
| 30527 |
+
"grad_norm": 35.9375,
|
| 30528 |
+
"learning_rate": 9.873839334929455e-06,
|
| 30529 |
+
"loss": 19.3901,
|
| 30530 |
+
"step": 43510
|
| 30531 |
+
},
|
| 30532 |
+
{
|
| 30533 |
+
"epoch": 0.8076140636562072,
|
| 30534 |
+
"grad_norm": 36.8125,
|
| 30535 |
+
"learning_rate": 9.873810339143413e-06,
|
| 30536 |
+
"loss": 19.0965,
|
| 30537 |
+
"step": 43520
|
| 30538 |
+
},
|
| 30539 |
+
{
|
| 30540 |
+
"epoch": 0.8077996367406871,
|
| 30541 |
+
"grad_norm": 36.75,
|
| 30542 |
+
"learning_rate": 9.87378134335737e-06,
|
| 30543 |
+
"loss": 19.2141,
|
| 30544 |
+
"step": 43530
|
| 30545 |
+
},
|
| 30546 |
+
{
|
| 30547 |
+
"epoch": 0.807985209825167,
|
| 30548 |
+
"grad_norm": 33.71875,
|
| 30549 |
+
"learning_rate": 9.873752347571327e-06,
|
| 30550 |
+
"loss": 19.1556,
|
| 30551 |
+
"step": 43540
|
| 30552 |
+
},
|
| 30553 |
+
{
|
| 30554 |
+
"epoch": 0.8081707829096468,
|
| 30555 |
+
"grad_norm": 37.71875,
|
| 30556 |
+
"learning_rate": 9.873723351785286e-06,
|
| 30557 |
+
"loss": 19.2163,
|
| 30558 |
+
"step": 43550
|
| 30559 |
+
},
|
| 30560 |
+
{
|
| 30561 |
+
"epoch": 0.8083563559941266,
|
| 30562 |
+
"grad_norm": 34.5,
|
| 30563 |
+
"learning_rate": 9.873694355999244e-06,
|
| 30564 |
+
"loss": 19.1022,
|
| 30565 |
+
"step": 43560
|
| 30566 |
+
},
|
| 30567 |
+
{
|
| 30568 |
+
"epoch": 0.8085419290786064,
|
| 30569 |
+
"grad_norm": 37.5,
|
| 30570 |
+
"learning_rate": 9.873665360213201e-06,
|
| 30571 |
+
"loss": 19.0576,
|
| 30572 |
+
"step": 43570
|
| 30573 |
+
},
|
| 30574 |
+
{
|
| 30575 |
+
"epoch": 0.8087275021630863,
|
| 30576 |
+
"grad_norm": 36.625,
|
| 30577 |
+
"learning_rate": 9.873636364427159e-06,
|
| 30578 |
+
"loss": 19.0831,
|
| 30579 |
+
"step": 43580
|
| 30580 |
+
},
|
| 30581 |
+
{
|
| 30582 |
+
"epoch": 0.8089130752475661,
|
| 30583 |
+
"grad_norm": 35.125,
|
| 30584 |
+
"learning_rate": 9.873607368641116e-06,
|
| 30585 |
+
"loss": 18.7494,
|
| 30586 |
+
"step": 43590
|
| 30587 |
+
},
|
| 30588 |
+
{
|
| 30589 |
+
"epoch": 0.8090986483320459,
|
| 30590 |
+
"grad_norm": 36.625,
|
| 30591 |
+
"learning_rate": 9.873578372855074e-06,
|
| 30592 |
+
"loss": 19.0648,
|
| 30593 |
+
"step": 43600
|
| 30594 |
+
},
|
| 30595 |
+
{
|
| 30596 |
+
"epoch": 0.8092842214165258,
|
| 30597 |
+
"grad_norm": 36.5,
|
| 30598 |
+
"learning_rate": 9.873549377069031e-06,
|
| 30599 |
+
"loss": 19.0214,
|
| 30600 |
+
"step": 43610
|
| 30601 |
+
},
|
| 30602 |
+
{
|
| 30603 |
+
"epoch": 0.8094697945010055,
|
| 30604 |
+
"grad_norm": 36.8125,
|
| 30605 |
+
"learning_rate": 9.87352038128299e-06,
|
| 30606 |
+
"loss": 18.9287,
|
| 30607 |
+
"step": 43620
|
| 30608 |
+
},
|
| 30609 |
+
{
|
| 30610 |
+
"epoch": 0.8096553675854854,
|
| 30611 |
+
"grad_norm": 35.15625,
|
| 30612 |
+
"learning_rate": 9.873491385496946e-06,
|
| 30613 |
+
"loss": 19.3647,
|
| 30614 |
+
"step": 43630
|
| 30615 |
+
},
|
| 30616 |
+
{
|
| 30617 |
+
"epoch": 0.8098409406699653,
|
| 30618 |
+
"grad_norm": 35.3125,
|
| 30619 |
+
"learning_rate": 9.873462389710903e-06,
|
| 30620 |
+
"loss": 18.8901,
|
| 30621 |
+
"step": 43640
|
| 30622 |
+
},
|
| 30623 |
+
{
|
| 30624 |
+
"epoch": 0.810026513754445,
|
| 30625 |
+
"grad_norm": 33.78125,
|
| 30626 |
+
"learning_rate": 9.873433393924862e-06,
|
| 30627 |
+
"loss": 19.4528,
|
| 30628 |
+
"step": 43650
|
| 30629 |
+
},
|
| 30630 |
+
{
|
| 30631 |
+
"epoch": 0.8102120868389249,
|
| 30632 |
+
"grad_norm": 36.75,
|
| 30633 |
+
"learning_rate": 9.87340439813882e-06,
|
| 30634 |
+
"loss": 18.7406,
|
| 30635 |
+
"step": 43660
|
| 30636 |
+
},
|
| 30637 |
+
{
|
| 30638 |
+
"epoch": 0.8103976599234047,
|
| 30639 |
+
"grad_norm": 37.125,
|
| 30640 |
+
"learning_rate": 9.873375402352777e-06,
|
| 30641 |
+
"loss": 19.3565,
|
| 30642 |
+
"step": 43670
|
| 30643 |
+
},
|
| 30644 |
+
{
|
| 30645 |
+
"epoch": 0.8105832330078845,
|
| 30646 |
+
"grad_norm": 36.9375,
|
| 30647 |
+
"learning_rate": 9.873346406566734e-06,
|
| 30648 |
+
"loss": 19.6584,
|
| 30649 |
+
"step": 43680
|
| 30650 |
+
},
|
| 30651 |
+
{
|
| 30652 |
+
"epoch": 0.8107688060923643,
|
| 30653 |
+
"grad_norm": 36.3125,
|
| 30654 |
+
"learning_rate": 9.873317410780692e-06,
|
| 30655 |
+
"loss": 19.1017,
|
| 30656 |
+
"step": 43690
|
| 30657 |
+
},
|
| 30658 |
+
{
|
| 30659 |
+
"epoch": 0.8109543791768442,
|
| 30660 |
+
"grad_norm": 36.5625,
|
| 30661 |
+
"learning_rate": 9.87328841499465e-06,
|
| 30662 |
+
"loss": 19.0014,
|
| 30663 |
+
"step": 43700
|
| 30664 |
+
},
|
| 30665 |
+
{
|
| 30666 |
+
"epoch": 0.8111399522613241,
|
| 30667 |
+
"grad_norm": 35.96875,
|
| 30668 |
+
"learning_rate": 9.873259419208607e-06,
|
| 30669 |
+
"loss": 19.361,
|
| 30670 |
+
"step": 43710
|
| 30671 |
+
},
|
| 30672 |
+
{
|
| 30673 |
+
"epoch": 0.8113255253458038,
|
| 30674 |
+
"grad_norm": 35.96875,
|
| 30675 |
+
"learning_rate": 9.873230423422566e-06,
|
| 30676 |
+
"loss": 18.9856,
|
| 30677 |
+
"step": 43720
|
| 30678 |
+
},
|
| 30679 |
+
{
|
| 30680 |
+
"epoch": 0.8115110984302837,
|
| 30681 |
+
"grad_norm": 34.25,
|
| 30682 |
+
"learning_rate": 9.873201427636523e-06,
|
| 30683 |
+
"loss": 19.1586,
|
| 30684 |
+
"step": 43730
|
| 30685 |
+
},
|
| 30686 |
+
{
|
| 30687 |
+
"epoch": 0.8116966715147635,
|
| 30688 |
+
"grad_norm": 35.59375,
|
| 30689 |
+
"learning_rate": 9.873172431850479e-06,
|
| 30690 |
+
"loss": 19.5068,
|
| 30691 |
+
"step": 43740
|
| 30692 |
+
},
|
| 30693 |
+
{
|
| 30694 |
+
"epoch": 0.8118822445992433,
|
| 30695 |
+
"grad_norm": 34.59375,
|
| 30696 |
+
"learning_rate": 9.873143436064438e-06,
|
| 30697 |
+
"loss": 19.1461,
|
| 30698 |
+
"step": 43750
|
| 30699 |
+
},
|
| 30700 |
+
{
|
| 30701 |
+
"epoch": 0.8120678176837232,
|
| 30702 |
+
"grad_norm": 37.71875,
|
| 30703 |
+
"learning_rate": 9.873114440278395e-06,
|
| 30704 |
+
"loss": 19.1016,
|
| 30705 |
+
"step": 43760
|
| 30706 |
+
},
|
| 30707 |
+
{
|
| 30708 |
+
"epoch": 0.812253390768203,
|
| 30709 |
+
"grad_norm": 34.34375,
|
| 30710 |
+
"learning_rate": 9.873085444492353e-06,
|
| 30711 |
+
"loss": 19.1093,
|
| 30712 |
+
"step": 43770
|
| 30713 |
+
},
|
| 30714 |
+
{
|
| 30715 |
+
"epoch": 0.8124389638526828,
|
| 30716 |
+
"grad_norm": 34.9375,
|
| 30717 |
+
"learning_rate": 9.87305644870631e-06,
|
| 30718 |
+
"loss": 18.9712,
|
| 30719 |
+
"step": 43780
|
| 30720 |
+
},
|
| 30721 |
+
{
|
| 30722 |
+
"epoch": 0.8126245369371626,
|
| 30723 |
+
"grad_norm": 36.6875,
|
| 30724 |
+
"learning_rate": 9.873027452920268e-06,
|
| 30725 |
+
"loss": 19.6167,
|
| 30726 |
+
"step": 43790
|
| 30727 |
+
},
|
| 30728 |
+
{
|
| 30729 |
+
"epoch": 0.8128101100216425,
|
| 30730 |
+
"grad_norm": 36.53125,
|
| 30731 |
+
"learning_rate": 9.872998457134225e-06,
|
| 30732 |
+
"loss": 19.0376,
|
| 30733 |
+
"step": 43800
|
| 30734 |
+
},
|
| 30735 |
+
{
|
| 30736 |
+
"epoch": 0.8129956831061222,
|
| 30737 |
+
"grad_norm": 35.46875,
|
| 30738 |
+
"learning_rate": 9.872969461348182e-06,
|
| 30739 |
+
"loss": 19.1814,
|
| 30740 |
+
"step": 43810
|
| 30741 |
+
},
|
| 30742 |
+
{
|
| 30743 |
+
"epoch": 0.8131812561906021,
|
| 30744 |
+
"grad_norm": 36.0,
|
| 30745 |
+
"learning_rate": 9.872940465562142e-06,
|
| 30746 |
+
"loss": 19.1973,
|
| 30747 |
+
"step": 43820
|
| 30748 |
+
},
|
| 30749 |
+
{
|
| 30750 |
+
"epoch": 0.813366829275082,
|
| 30751 |
+
"grad_norm": 38.5625,
|
| 30752 |
+
"learning_rate": 9.872911469776099e-06,
|
| 30753 |
+
"loss": 19.2537,
|
| 30754 |
+
"step": 43830
|
| 30755 |
+
},
|
| 30756 |
+
{
|
| 30757 |
+
"epoch": 0.8135524023595617,
|
| 30758 |
+
"grad_norm": 38.25,
|
| 30759 |
+
"learning_rate": 9.872882473990055e-06,
|
| 30760 |
+
"loss": 19.3154,
|
| 30761 |
+
"step": 43840
|
| 30762 |
+
},
|
| 30763 |
+
{
|
| 30764 |
+
"epoch": 0.8137379754440416,
|
| 30765 |
+
"grad_norm": 37.03125,
|
| 30766 |
+
"learning_rate": 9.872853478204014e-06,
|
| 30767 |
+
"loss": 19.3999,
|
| 30768 |
+
"step": 43850
|
| 30769 |
+
},
|
| 30770 |
+
{
|
| 30771 |
+
"epoch": 0.8139235485285214,
|
| 30772 |
+
"grad_norm": 34.375,
|
| 30773 |
+
"learning_rate": 9.872824482417971e-06,
|
| 30774 |
+
"loss": 18.8485,
|
| 30775 |
+
"step": 43860
|
| 30776 |
+
},
|
| 30777 |
+
{
|
| 30778 |
+
"epoch": 0.8141091216130012,
|
| 30779 |
+
"grad_norm": 35.25,
|
| 30780 |
+
"learning_rate": 9.872795486631929e-06,
|
| 30781 |
+
"loss": 19.1239,
|
| 30782 |
+
"step": 43870
|
| 30783 |
+
},
|
| 30784 |
+
{
|
| 30785 |
+
"epoch": 0.8142946946974811,
|
| 30786 |
+
"grad_norm": 38.3125,
|
| 30787 |
+
"learning_rate": 9.872766490845886e-06,
|
| 30788 |
+
"loss": 19.4616,
|
| 30789 |
+
"step": 43880
|
| 30790 |
+
},
|
| 30791 |
+
{
|
| 30792 |
+
"epoch": 0.8144802677819609,
|
| 30793 |
+
"grad_norm": 37.625,
|
| 30794 |
+
"learning_rate": 9.872737495059843e-06,
|
| 30795 |
+
"loss": 19.0829,
|
| 30796 |
+
"step": 43890
|
| 30797 |
+
},
|
| 30798 |
+
{
|
| 30799 |
+
"epoch": 0.8146658408664408,
|
| 30800 |
+
"grad_norm": 37.8125,
|
| 30801 |
+
"learning_rate": 9.8727084992738e-06,
|
| 30802 |
+
"loss": 19.356,
|
| 30803 |
+
"step": 43900
|
| 30804 |
+
},
|
| 30805 |
+
{
|
| 30806 |
+
"epoch": 0.8148514139509205,
|
| 30807 |
+
"grad_norm": 39.5625,
|
| 30808 |
+
"learning_rate": 9.872679503487758e-06,
|
| 30809 |
+
"loss": 19.0015,
|
| 30810 |
+
"step": 43910
|
| 30811 |
+
},
|
| 30812 |
+
{
|
| 30813 |
+
"epoch": 0.8150369870354004,
|
| 30814 |
+
"grad_norm": 35.53125,
|
| 30815 |
+
"learning_rate": 9.872650507701717e-06,
|
| 30816 |
+
"loss": 19.1196,
|
| 30817 |
+
"step": 43920
|
| 30818 |
+
},
|
| 30819 |
+
{
|
| 30820 |
+
"epoch": 0.8152225601198803,
|
| 30821 |
+
"grad_norm": 37.625,
|
| 30822 |
+
"learning_rate": 9.872621511915675e-06,
|
| 30823 |
+
"loss": 19.5022,
|
| 30824 |
+
"step": 43930
|
| 30825 |
+
},
|
| 30826 |
+
{
|
| 30827 |
+
"epoch": 0.81540813320436,
|
| 30828 |
+
"grad_norm": 35.25,
|
| 30829 |
+
"learning_rate": 9.872592516129632e-06,
|
| 30830 |
+
"loss": 18.9608,
|
| 30831 |
+
"step": 43940
|
| 30832 |
+
},
|
| 30833 |
+
{
|
| 30834 |
+
"epoch": 0.8155937062888399,
|
| 30835 |
+
"grad_norm": 35.25,
|
| 30836 |
+
"learning_rate": 9.87256352034359e-06,
|
| 30837 |
+
"loss": 19.2786,
|
| 30838 |
+
"step": 43950
|
| 30839 |
+
},
|
| 30840 |
+
{
|
| 30841 |
+
"epoch": 0.8157792793733197,
|
| 30842 |
+
"grad_norm": 35.46875,
|
| 30843 |
+
"learning_rate": 9.872534524557547e-06,
|
| 30844 |
+
"loss": 19.0429,
|
| 30845 |
+
"step": 43960
|
| 30846 |
+
},
|
| 30847 |
+
{
|
| 30848 |
+
"epoch": 0.8159648524577995,
|
| 30849 |
+
"grad_norm": 36.9375,
|
| 30850 |
+
"learning_rate": 9.872505528771504e-06,
|
| 30851 |
+
"loss": 19.0499,
|
| 30852 |
+
"step": 43970
|
| 30853 |
+
},
|
| 30854 |
+
{
|
| 30855 |
+
"epoch": 0.8161504255422793,
|
| 30856 |
+
"grad_norm": 35.1875,
|
| 30857 |
+
"learning_rate": 9.872476532985462e-06,
|
| 30858 |
+
"loss": 19.377,
|
| 30859 |
+
"step": 43980
|
| 30860 |
+
},
|
| 30861 |
+
{
|
| 30862 |
+
"epoch": 0.8163359986267592,
|
| 30863 |
+
"grad_norm": 34.71875,
|
| 30864 |
+
"learning_rate": 9.87244753719942e-06,
|
| 30865 |
+
"loss": 19.3778,
|
| 30866 |
+
"step": 43990
|
| 30867 |
+
},
|
| 30868 |
+
{
|
| 30869 |
+
"epoch": 0.816521571711239,
|
| 30870 |
+
"grad_norm": 37.625,
|
| 30871 |
+
"learning_rate": 9.872418541413378e-06,
|
| 30872 |
+
"loss": 19.1619,
|
| 30873 |
+
"step": 44000
|
| 30874 |
+
},
|
| 30875 |
+
{
|
| 30876 |
+
"epoch": 0.8167071447957188,
|
| 30877 |
+
"grad_norm": 34.96875,
|
| 30878 |
+
"learning_rate": 9.872389545627334e-06,
|
| 30879 |
+
"loss": 19.2044,
|
| 30880 |
+
"step": 44010
|
| 30881 |
+
},
|
| 30882 |
+
{
|
| 30883 |
+
"epoch": 0.8168927178801987,
|
| 30884 |
+
"grad_norm": 36.46875,
|
| 30885 |
+
"learning_rate": 9.872360549841291e-06,
|
| 30886 |
+
"loss": 18.8557,
|
| 30887 |
+
"step": 44020
|
| 30888 |
+
},
|
| 30889 |
+
{
|
| 30890 |
+
"epoch": 0.8170782909646784,
|
| 30891 |
+
"grad_norm": 37.0625,
|
| 30892 |
+
"learning_rate": 9.87233155405525e-06,
|
| 30893 |
+
"loss": 19.3249,
|
| 30894 |
+
"step": 44030
|
| 30895 |
+
},
|
| 30896 |
+
{
|
| 30897 |
+
"epoch": 0.8172638640491583,
|
| 30898 |
+
"grad_norm": 33.6875,
|
| 30899 |
+
"learning_rate": 9.872302558269208e-06,
|
| 30900 |
+
"loss": 19.2119,
|
| 30901 |
+
"step": 44040
|
| 30902 |
+
},
|
| 30903 |
+
{
|
| 30904 |
+
"epoch": 0.8174494371336382,
|
| 30905 |
+
"grad_norm": 36.375,
|
| 30906 |
+
"learning_rate": 9.872273562483165e-06,
|
| 30907 |
+
"loss": 18.7741,
|
| 30908 |
+
"step": 44050
|
| 30909 |
+
},
|
| 30910 |
+
{
|
| 30911 |
+
"epoch": 0.817635010218118,
|
| 30912 |
+
"grad_norm": 38.40625,
|
| 30913 |
+
"learning_rate": 9.872244566697123e-06,
|
| 30914 |
+
"loss": 18.7126,
|
| 30915 |
+
"step": 44060
|
| 30916 |
+
},
|
| 30917 |
+
{
|
| 30918 |
+
"epoch": 0.8178205833025978,
|
| 30919 |
+
"grad_norm": 36.78125,
|
| 30920 |
+
"learning_rate": 9.87221557091108e-06,
|
| 30921 |
+
"loss": 19.2019,
|
| 30922 |
+
"step": 44070
|
| 30923 |
+
},
|
| 30924 |
+
{
|
| 30925 |
+
"epoch": 0.8180061563870776,
|
| 30926 |
+
"grad_norm": 36.0625,
|
| 30927 |
+
"learning_rate": 9.872186575125038e-06,
|
| 30928 |
+
"loss": 19.5095,
|
| 30929 |
+
"step": 44080
|
| 30930 |
+
},
|
| 30931 |
+
{
|
| 30932 |
+
"epoch": 0.8181917294715575,
|
| 30933 |
+
"grad_norm": 37.53125,
|
| 30934 |
+
"learning_rate": 9.872157579338995e-06,
|
| 30935 |
+
"loss": 19.4252,
|
| 30936 |
+
"step": 44090
|
| 30937 |
+
},
|
| 30938 |
+
{
|
| 30939 |
+
"epoch": 0.8183773025560372,
|
| 30940 |
+
"grad_norm": 37.21875,
|
| 30941 |
+
"learning_rate": 9.872128583552954e-06,
|
| 30942 |
+
"loss": 19.17,
|
| 30943 |
+
"step": 44100
|
| 30944 |
+
},
|
| 30945 |
+
{
|
| 30946 |
+
"epoch": 0.8185628756405171,
|
| 30947 |
+
"grad_norm": 33.5,
|
| 30948 |
+
"learning_rate": 9.87209958776691e-06,
|
| 30949 |
+
"loss": 19.4603,
|
| 30950 |
+
"step": 44110
|
| 30951 |
+
},
|
| 30952 |
+
{
|
| 30953 |
+
"epoch": 0.818748448724997,
|
| 30954 |
+
"grad_norm": 36.71875,
|
| 30955 |
+
"learning_rate": 9.872070591980867e-06,
|
| 30956 |
+
"loss": 19.2598,
|
| 30957 |
+
"step": 44120
|
| 30958 |
+
},
|
| 30959 |
+
{
|
| 30960 |
+
"epoch": 0.8189340218094767,
|
| 30961 |
+
"grad_norm": 34.5625,
|
| 30962 |
+
"learning_rate": 9.872041596194826e-06,
|
| 30963 |
+
"loss": 18.7924,
|
| 30964 |
+
"step": 44130
|
| 30965 |
+
},
|
| 30966 |
+
{
|
| 30967 |
+
"epoch": 0.8191195948939566,
|
| 30968 |
+
"grad_norm": 37.40625,
|
| 30969 |
+
"learning_rate": 9.872012600408784e-06,
|
| 30970 |
+
"loss": 19.4831,
|
| 30971 |
+
"step": 44140
|
| 30972 |
+
},
|
| 30973 |
+
{
|
| 30974 |
+
"epoch": 0.8193051679784364,
|
| 30975 |
+
"grad_norm": 35.03125,
|
| 30976 |
+
"learning_rate": 9.871983604622741e-06,
|
| 30977 |
+
"loss": 18.8814,
|
| 30978 |
+
"step": 44150
|
| 30979 |
+
},
|
| 30980 |
+
{
|
| 30981 |
+
"epoch": 0.8194907410629162,
|
| 30982 |
+
"grad_norm": 36.1875,
|
| 30983 |
+
"learning_rate": 9.871954608836698e-06,
|
| 30984 |
+
"loss": 19.2846,
|
| 30985 |
+
"step": 44160
|
| 30986 |
+
},
|
| 30987 |
+
{
|
| 30988 |
+
"epoch": 0.8196763141473961,
|
| 30989 |
+
"grad_norm": 35.46875,
|
| 30990 |
+
"learning_rate": 9.871925613050656e-06,
|
| 30991 |
+
"loss": 18.9267,
|
| 30992 |
+
"step": 44170
|
| 30993 |
+
},
|
| 30994 |
+
{
|
| 30995 |
+
"epoch": 0.8198618872318759,
|
| 30996 |
+
"grad_norm": 36.875,
|
| 30997 |
+
"learning_rate": 9.871896617264613e-06,
|
| 30998 |
+
"loss": 18.9907,
|
| 30999 |
+
"step": 44180
|
| 31000 |
+
},
|
| 31001 |
+
{
|
| 31002 |
+
"epoch": 0.8200474603163557,
|
| 31003 |
+
"grad_norm": 38.53125,
|
| 31004 |
+
"learning_rate": 9.87186762147857e-06,
|
| 31005 |
+
"loss": 19.8266,
|
| 31006 |
+
"step": 44190
|
| 31007 |
+
},
|
| 31008 |
+
{
|
| 31009 |
+
"epoch": 0.8202330334008355,
|
| 31010 |
+
"grad_norm": 37.1875,
|
| 31011 |
+
"learning_rate": 9.87183862569253e-06,
|
| 31012 |
+
"loss": 19.0446,
|
| 31013 |
+
"step": 44200
|
| 31014 |
+
},
|
| 31015 |
+
{
|
| 31016 |
+
"epoch": 0.8204186064853154,
|
| 31017 |
+
"grad_norm": 35.375,
|
| 31018 |
+
"learning_rate": 9.871809629906487e-06,
|
| 31019 |
+
"loss": 19.2261,
|
| 31020 |
+
"step": 44210
|
| 31021 |
+
},
|
| 31022 |
+
{
|
| 31023 |
+
"epoch": 0.8206041795697951,
|
| 31024 |
+
"grad_norm": 34.9375,
|
| 31025 |
+
"learning_rate": 9.871780634120443e-06,
|
| 31026 |
+
"loss": 19.3818,
|
| 31027 |
+
"step": 44220
|
| 31028 |
+
},
|
| 31029 |
+
{
|
| 31030 |
+
"epoch": 0.820789752654275,
|
| 31031 |
+
"grad_norm": 36.3125,
|
| 31032 |
+
"learning_rate": 9.871751638334402e-06,
|
| 31033 |
+
"loss": 19.2278,
|
| 31034 |
+
"step": 44230
|
| 31035 |
+
},
|
| 31036 |
+
{
|
| 31037 |
+
"epoch": 0.8209753257387549,
|
| 31038 |
+
"grad_norm": 38.21875,
|
| 31039 |
+
"learning_rate": 9.87172264254836e-06,
|
| 31040 |
+
"loss": 19.2026,
|
| 31041 |
+
"step": 44240
|
| 31042 |
+
},
|
| 31043 |
+
{
|
| 31044 |
+
"epoch": 0.8211608988232347,
|
| 31045 |
+
"grad_norm": 37.21875,
|
| 31046 |
+
"learning_rate": 9.871693646762317e-06,
|
| 31047 |
+
"loss": 19.0903,
|
| 31048 |
+
"step": 44250
|
| 31049 |
+
},
|
| 31050 |
+
{
|
| 31051 |
+
"epoch": 0.8213464719077145,
|
| 31052 |
+
"grad_norm": 37.28125,
|
| 31053 |
+
"learning_rate": 9.871664650976274e-06,
|
| 31054 |
+
"loss": 18.8226,
|
| 31055 |
+
"step": 44260
|
| 31056 |
+
},
|
| 31057 |
+
{
|
| 31058 |
+
"epoch": 0.8215320449921943,
|
| 31059 |
+
"grad_norm": 35.40625,
|
| 31060 |
+
"learning_rate": 9.871635655190232e-06,
|
| 31061 |
+
"loss": 19.3683,
|
| 31062 |
+
"step": 44270
|
| 31063 |
+
},
|
| 31064 |
+
{
|
| 31065 |
+
"epoch": 0.8217176180766742,
|
| 31066 |
+
"grad_norm": 35.21875,
|
| 31067 |
+
"learning_rate": 9.871606659404189e-06,
|
| 31068 |
+
"loss": 18.8462,
|
| 31069 |
+
"step": 44280
|
| 31070 |
+
},
|
| 31071 |
+
{
|
| 31072 |
+
"epoch": 0.821903191161154,
|
| 31073 |
+
"grad_norm": 38.71875,
|
| 31074 |
+
"learning_rate": 9.871577663618146e-06,
|
| 31075 |
+
"loss": 19.0862,
|
| 31076 |
+
"step": 44290
|
| 31077 |
+
},
|
| 31078 |
+
{
|
| 31079 |
+
"epoch": 0.8220887642456338,
|
| 31080 |
+
"grad_norm": 38.125,
|
| 31081 |
+
"learning_rate": 9.871548667832106e-06,
|
| 31082 |
+
"loss": 19.0924,
|
| 31083 |
+
"step": 44300
|
| 31084 |
+
},
|
| 31085 |
+
{
|
| 31086 |
+
"epoch": 0.8222743373301137,
|
| 31087 |
+
"grad_norm": 35.53125,
|
| 31088 |
+
"learning_rate": 9.871519672046063e-06,
|
| 31089 |
+
"loss": 18.8592,
|
| 31090 |
+
"step": 44310
|
| 31091 |
+
},
|
| 31092 |
+
{
|
| 31093 |
+
"epoch": 0.8224599104145934,
|
| 31094 |
+
"grad_norm": 36.875,
|
| 31095 |
+
"learning_rate": 9.87149067626002e-06,
|
| 31096 |
+
"loss": 19.0449,
|
| 31097 |
+
"step": 44320
|
| 31098 |
+
},
|
| 31099 |
+
{
|
| 31100 |
+
"epoch": 0.8226454834990733,
|
| 31101 |
+
"grad_norm": 32.75,
|
| 31102 |
+
"learning_rate": 9.871461680473978e-06,
|
| 31103 |
+
"loss": 18.9665,
|
| 31104 |
+
"step": 44330
|
| 31105 |
+
},
|
| 31106 |
+
{
|
| 31107 |
+
"epoch": 0.8228310565835532,
|
| 31108 |
+
"grad_norm": 34.0,
|
| 31109 |
+
"learning_rate": 9.871432684687935e-06,
|
| 31110 |
+
"loss": 18.4968,
|
| 31111 |
+
"step": 44340
|
| 31112 |
+
},
|
| 31113 |
+
{
|
| 31114 |
+
"epoch": 0.8230166296680329,
|
| 31115 |
+
"grad_norm": 34.6875,
|
| 31116 |
+
"learning_rate": 9.871403688901893e-06,
|
| 31117 |
+
"loss": 19.4038,
|
| 31118 |
+
"step": 44350
|
| 31119 |
+
},
|
| 31120 |
+
{
|
| 31121 |
+
"epoch": 0.8232022027525128,
|
| 31122 |
+
"grad_norm": 38.4375,
|
| 31123 |
+
"learning_rate": 9.87137469311585e-06,
|
| 31124 |
+
"loss": 19.172,
|
| 31125 |
+
"step": 44360
|
| 31126 |
+
},
|
| 31127 |
+
{
|
| 31128 |
+
"epoch": 0.8233877758369926,
|
| 31129 |
+
"grad_norm": 34.90625,
|
| 31130 |
+
"learning_rate": 9.871345697329809e-06,
|
| 31131 |
+
"loss": 19.3212,
|
| 31132 |
+
"step": 44370
|
| 31133 |
+
},
|
| 31134 |
+
{
|
| 31135 |
+
"epoch": 0.8235733489214724,
|
| 31136 |
+
"grad_norm": 35.03125,
|
| 31137 |
+
"learning_rate": 9.871316701543765e-06,
|
| 31138 |
+
"loss": 18.7932,
|
| 31139 |
+
"step": 44380
|
| 31140 |
+
},
|
| 31141 |
+
{
|
| 31142 |
+
"epoch": 0.8237589220059522,
|
| 31143 |
+
"grad_norm": 35.46875,
|
| 31144 |
+
"learning_rate": 9.871287705757722e-06,
|
| 31145 |
+
"loss": 19.2297,
|
| 31146 |
+
"step": 44390
|
| 31147 |
+
},
|
| 31148 |
+
{
|
| 31149 |
+
"epoch": 0.8239444950904321,
|
| 31150 |
+
"grad_norm": 36.78125,
|
| 31151 |
+
"learning_rate": 9.871258709971681e-06,
|
| 31152 |
+
"loss": 18.9214,
|
| 31153 |
+
"step": 44400
|
| 31154 |
+
},
|
| 31155 |
+
{
|
| 31156 |
+
"epoch": 0.8241300681749119,
|
| 31157 |
+
"grad_norm": 37.0,
|
| 31158 |
+
"learning_rate": 9.871229714185639e-06,
|
| 31159 |
+
"loss": 19.1307,
|
| 31160 |
+
"step": 44410
|
| 31161 |
+
},
|
| 31162 |
+
{
|
| 31163 |
+
"epoch": 0.8243156412593917,
|
| 31164 |
+
"grad_norm": 35.78125,
|
| 31165 |
+
"learning_rate": 9.871200718399596e-06,
|
| 31166 |
+
"loss": 19.2335,
|
| 31167 |
+
"step": 44420
|
| 31168 |
+
},
|
| 31169 |
+
{
|
| 31170 |
+
"epoch": 0.8245012143438716,
|
| 31171 |
+
"grad_norm": 35.90625,
|
| 31172 |
+
"learning_rate": 9.871171722613554e-06,
|
| 31173 |
+
"loss": 19.2364,
|
| 31174 |
+
"step": 44430
|
| 31175 |
+
},
|
| 31176 |
+
{
|
| 31177 |
+
"epoch": 0.8246867874283514,
|
| 31178 |
+
"grad_norm": 35.65625,
|
| 31179 |
+
"learning_rate": 9.871142726827511e-06,
|
| 31180 |
+
"loss": 18.697,
|
| 31181 |
+
"step": 44440
|
| 31182 |
+
},
|
| 31183 |
+
{
|
| 31184 |
+
"epoch": 0.8248723605128312,
|
| 31185 |
+
"grad_norm": 37.125,
|
| 31186 |
+
"learning_rate": 9.871113731041468e-06,
|
| 31187 |
+
"loss": 19.2022,
|
| 31188 |
+
"step": 44450
|
| 31189 |
+
},
|
| 31190 |
+
{
|
| 31191 |
+
"epoch": 0.825057933597311,
|
| 31192 |
+
"grad_norm": 35.90625,
|
| 31193 |
+
"learning_rate": 9.871084735255426e-06,
|
| 31194 |
+
"loss": 19.0279,
|
| 31195 |
+
"step": 44460
|
| 31196 |
+
},
|
| 31197 |
+
{
|
| 31198 |
+
"epoch": 0.8252435066817909,
|
| 31199 |
+
"grad_norm": 35.25,
|
| 31200 |
+
"learning_rate": 9.871055739469383e-06,
|
| 31201 |
+
"loss": 18.9795,
|
| 31202 |
+
"step": 44470
|
| 31203 |
+
},
|
| 31204 |
+
{
|
| 31205 |
+
"epoch": 0.8254290797662707,
|
| 31206 |
+
"grad_norm": 36.84375,
|
| 31207 |
+
"learning_rate": 9.871026743683342e-06,
|
| 31208 |
+
"loss": 19.2522,
|
| 31209 |
+
"step": 44480
|
| 31210 |
+
},
|
| 31211 |
+
{
|
| 31212 |
+
"epoch": 0.8256146528507505,
|
| 31213 |
+
"grad_norm": 36.1875,
|
| 31214 |
+
"learning_rate": 9.870997747897298e-06,
|
| 31215 |
+
"loss": 18.8613,
|
| 31216 |
+
"step": 44490
|
| 31217 |
+
},
|
| 31218 |
+
{
|
| 31219 |
+
"epoch": 0.8258002259352304,
|
| 31220 |
+
"grad_norm": 36.65625,
|
| 31221 |
+
"learning_rate": 9.870968752111257e-06,
|
| 31222 |
+
"loss": 19.3883,
|
| 31223 |
+
"step": 44500
|
| 31224 |
+
},
|
| 31225 |
+
{
|
| 31226 |
+
"epoch": 0.8259857990197101,
|
| 31227 |
+
"grad_norm": 36.09375,
|
| 31228 |
+
"learning_rate": 9.870939756325215e-06,
|
| 31229 |
+
"loss": 19.3544,
|
| 31230 |
+
"step": 44510
|
| 31231 |
+
},
|
| 31232 |
+
{
|
| 31233 |
+
"epoch": 0.82617137210419,
|
| 31234 |
+
"grad_norm": 38.5,
|
| 31235 |
+
"learning_rate": 9.870910760539172e-06,
|
| 31236 |
+
"loss": 19.3535,
|
| 31237 |
+
"step": 44520
|
| 31238 |
+
},
|
| 31239 |
+
{
|
| 31240 |
+
"epoch": 0.8263569451886699,
|
| 31241 |
+
"grad_norm": 34.75,
|
| 31242 |
+
"learning_rate": 9.87088176475313e-06,
|
| 31243 |
+
"loss": 18.8685,
|
| 31244 |
+
"step": 44530
|
| 31245 |
+
},
|
| 31246 |
+
{
|
| 31247 |
+
"epoch": 0.8265425182731496,
|
| 31248 |
+
"grad_norm": 38.8125,
|
| 31249 |
+
"learning_rate": 9.870852768967087e-06,
|
| 31250 |
+
"loss": 19.0754,
|
| 31251 |
+
"step": 44540
|
| 31252 |
+
},
|
| 31253 |
+
{
|
| 31254 |
+
"epoch": 0.8267280913576295,
|
| 31255 |
+
"grad_norm": 36.46875,
|
| 31256 |
+
"learning_rate": 9.870823773181044e-06,
|
| 31257 |
+
"loss": 19.056,
|
| 31258 |
+
"step": 44550
|
| 31259 |
+
},
|
| 31260 |
+
{
|
| 31261 |
+
"epoch": 0.8269136644421093,
|
| 31262 |
+
"grad_norm": 36.1875,
|
| 31263 |
+
"learning_rate": 9.870794777395002e-06,
|
| 31264 |
+
"loss": 18.5412,
|
| 31265 |
+
"step": 44560
|
| 31266 |
+
},
|
| 31267 |
+
{
|
| 31268 |
+
"epoch": 0.8270992375265891,
|
| 31269 |
+
"grad_norm": 34.15625,
|
| 31270 |
+
"learning_rate": 9.870765781608959e-06,
|
| 31271 |
+
"loss": 18.657,
|
| 31272 |
+
"step": 44570
|
| 31273 |
+
},
|
| 31274 |
+
{
|
| 31275 |
+
"epoch": 0.827284810611069,
|
| 31276 |
+
"grad_norm": 37.0,
|
| 31277 |
+
"learning_rate": 9.870736785822918e-06,
|
| 31278 |
+
"loss": 19.3666,
|
| 31279 |
+
"step": 44580
|
| 31280 |
+
},
|
| 31281 |
+
{
|
| 31282 |
+
"epoch": 0.8274703836955488,
|
| 31283 |
+
"grad_norm": 36.65625,
|
| 31284 |
+
"learning_rate": 9.870707790036875e-06,
|
| 31285 |
+
"loss": 19.1981,
|
| 31286 |
+
"step": 44590
|
| 31287 |
+
},
|
| 31288 |
+
{
|
| 31289 |
+
"epoch": 0.8276559567800287,
|
| 31290 |
+
"grad_norm": 35.4375,
|
| 31291 |
+
"learning_rate": 9.870678794250831e-06,
|
| 31292 |
+
"loss": 19.325,
|
| 31293 |
+
"step": 44600
|
| 31294 |
+
},
|
| 31295 |
+
{
|
| 31296 |
+
"epoch": 0.8278415298645084,
|
| 31297 |
+
"grad_norm": 36.375,
|
| 31298 |
+
"learning_rate": 9.87064979846479e-06,
|
| 31299 |
+
"loss": 19.4554,
|
| 31300 |
+
"step": 44610
|
| 31301 |
+
},
|
| 31302 |
+
{
|
| 31303 |
+
"epoch": 0.8280271029489883,
|
| 31304 |
+
"grad_norm": 36.78125,
|
| 31305 |
+
"learning_rate": 9.870620802678748e-06,
|
| 31306 |
+
"loss": 18.9955,
|
| 31307 |
+
"step": 44620
|
| 31308 |
+
},
|
| 31309 |
+
{
|
| 31310 |
+
"epoch": 0.8282126760334682,
|
| 31311 |
+
"grad_norm": 38.1875,
|
| 31312 |
+
"learning_rate": 9.870591806892705e-06,
|
| 31313 |
+
"loss": 19.2107,
|
| 31314 |
+
"step": 44630
|
| 31315 |
+
},
|
| 31316 |
+
{
|
| 31317 |
+
"epoch": 0.8283982491179479,
|
| 31318 |
+
"grad_norm": 37.5,
|
| 31319 |
+
"learning_rate": 9.870562811106662e-06,
|
| 31320 |
+
"loss": 18.8142,
|
| 31321 |
+
"step": 44640
|
| 31322 |
+
},
|
| 31323 |
+
{
|
| 31324 |
+
"epoch": 0.8285838222024278,
|
| 31325 |
+
"grad_norm": 38.34375,
|
| 31326 |
+
"learning_rate": 9.87053381532062e-06,
|
| 31327 |
+
"loss": 19.3055,
|
| 31328 |
+
"step": 44650
|
| 31329 |
+
},
|
| 31330 |
+
{
|
| 31331 |
+
"epoch": 0.8287693952869076,
|
| 31332 |
+
"grad_norm": 36.0,
|
| 31333 |
+
"learning_rate": 9.870504819534577e-06,
|
| 31334 |
+
"loss": 18.8541,
|
| 31335 |
+
"step": 44660
|
| 31336 |
+
},
|
| 31337 |
+
{
|
| 31338 |
+
"epoch": 0.8289549683713874,
|
| 31339 |
+
"grad_norm": 39.78125,
|
| 31340 |
+
"learning_rate": 9.870475823748535e-06,
|
| 31341 |
+
"loss": 19.1764,
|
| 31342 |
+
"step": 44670
|
| 31343 |
+
},
|
| 31344 |
+
{
|
| 31345 |
+
"epoch": 0.8291405414558672,
|
| 31346 |
+
"grad_norm": 36.03125,
|
| 31347 |
+
"learning_rate": 9.870446827962494e-06,
|
| 31348 |
+
"loss": 18.7747,
|
| 31349 |
+
"step": 44680
|
| 31350 |
+
},
|
| 31351 |
+
{
|
| 31352 |
+
"epoch": 0.8293261145403471,
|
| 31353 |
+
"grad_norm": 34.875,
|
| 31354 |
+
"learning_rate": 9.870417832176451e-06,
|
| 31355 |
+
"loss": 19.1716,
|
| 31356 |
+
"step": 44690
|
| 31357 |
+
},
|
| 31358 |
+
{
|
| 31359 |
+
"epoch": 0.8295116876248269,
|
| 31360 |
+
"grad_norm": 36.4375,
|
| 31361 |
+
"learning_rate": 9.870388836390407e-06,
|
| 31362 |
+
"loss": 19.152,
|
| 31363 |
+
"step": 44700
|
| 31364 |
+
},
|
| 31365 |
+
{
|
| 31366 |
+
"epoch": 0.8296972607093067,
|
| 31367 |
+
"grad_norm": 37.46875,
|
| 31368 |
+
"learning_rate": 9.870359840604366e-06,
|
| 31369 |
+
"loss": 18.8802,
|
| 31370 |
+
"step": 44710
|
| 31371 |
+
},
|
| 31372 |
+
{
|
| 31373 |
+
"epoch": 0.8298828337937866,
|
| 31374 |
+
"grad_norm": 35.5625,
|
| 31375 |
+
"learning_rate": 9.870330844818323e-06,
|
| 31376 |
+
"loss": 19.1369,
|
| 31377 |
+
"step": 44720
|
| 31378 |
+
},
|
| 31379 |
+
{
|
| 31380 |
+
"epoch": 0.8300684068782663,
|
| 31381 |
+
"grad_norm": 35.75,
|
| 31382 |
+
"learning_rate": 9.870301849032281e-06,
|
| 31383 |
+
"loss": 19.0659,
|
| 31384 |
+
"step": 44730
|
| 31385 |
+
},
|
| 31386 |
+
{
|
| 31387 |
+
"epoch": 0.8302539799627462,
|
| 31388 |
+
"grad_norm": 36.3125,
|
| 31389 |
+
"learning_rate": 9.870272853246238e-06,
|
| 31390 |
+
"loss": 19.2007,
|
| 31391 |
+
"step": 44740
|
| 31392 |
+
},
|
| 31393 |
+
{
|
| 31394 |
+
"epoch": 0.830439553047226,
|
| 31395 |
+
"grad_norm": 37.09375,
|
| 31396 |
+
"learning_rate": 9.870243857460197e-06,
|
| 31397 |
+
"loss": 19.3437,
|
| 31398 |
+
"step": 44750
|
| 31399 |
+
},
|
| 31400 |
+
{
|
| 31401 |
+
"epoch": 0.8306251261317058,
|
| 31402 |
+
"grad_norm": 36.28125,
|
| 31403 |
+
"learning_rate": 9.870214861674153e-06,
|
| 31404 |
+
"loss": 19.456,
|
| 31405 |
+
"step": 44760
|
| 31406 |
+
},
|
| 31407 |
+
{
|
| 31408 |
+
"epoch": 0.8308106992161857,
|
| 31409 |
+
"grad_norm": 35.625,
|
| 31410 |
+
"learning_rate": 9.87018586588811e-06,
|
| 31411 |
+
"loss": 18.8098,
|
| 31412 |
+
"step": 44770
|
| 31413 |
+
},
|
| 31414 |
+
{
|
| 31415 |
+
"epoch": 0.8309962723006655,
|
| 31416 |
+
"grad_norm": 36.0,
|
| 31417 |
+
"learning_rate": 9.87015687010207e-06,
|
| 31418 |
+
"loss": 19.4965,
|
| 31419 |
+
"step": 44780
|
| 31420 |
+
},
|
| 31421 |
+
{
|
| 31422 |
+
"epoch": 0.8311818453851454,
|
| 31423 |
+
"grad_norm": 36.125,
|
| 31424 |
+
"learning_rate": 9.870127874316027e-06,
|
| 31425 |
+
"loss": 19.1793,
|
| 31426 |
+
"step": 44790
|
| 31427 |
+
},
|
| 31428 |
+
{
|
| 31429 |
+
"epoch": 0.8313674184696251,
|
| 31430 |
+
"grad_norm": 36.78125,
|
| 31431 |
+
"learning_rate": 9.870098878529984e-06,
|
| 31432 |
+
"loss": 19.0221,
|
| 31433 |
+
"step": 44800
|
| 31434 |
+
},
|
| 31435 |
+
{
|
| 31436 |
+
"epoch": 0.831552991554105,
|
| 31437 |
+
"grad_norm": 36.15625,
|
| 31438 |
+
"learning_rate": 9.870069882743942e-06,
|
| 31439 |
+
"loss": 18.9155,
|
| 31440 |
+
"step": 44810
|
| 31441 |
+
},
|
| 31442 |
+
{
|
| 31443 |
+
"epoch": 0.8317385646385849,
|
| 31444 |
+
"grad_norm": 36.09375,
|
| 31445 |
+
"learning_rate": 9.8700408869579e-06,
|
| 31446 |
+
"loss": 18.9602,
|
| 31447 |
+
"step": 44820
|
| 31448 |
+
},
|
| 31449 |
+
{
|
| 31450 |
+
"epoch": 0.8319241377230646,
|
| 31451 |
+
"grad_norm": 37.4375,
|
| 31452 |
+
"learning_rate": 9.870011891171857e-06,
|
| 31453 |
+
"loss": 19.3968,
|
| 31454 |
+
"step": 44830
|
| 31455 |
+
},
|
| 31456 |
+
{
|
| 31457 |
+
"epoch": 0.8321097108075445,
|
| 31458 |
+
"grad_norm": 38.1875,
|
| 31459 |
+
"learning_rate": 9.869982895385814e-06,
|
| 31460 |
+
"loss": 18.7445,
|
| 31461 |
+
"step": 44840
|
| 31462 |
+
},
|
| 31463 |
+
{
|
| 31464 |
+
"epoch": 0.8322952838920243,
|
| 31465 |
+
"grad_norm": 34.875,
|
| 31466 |
+
"learning_rate": 9.869953899599773e-06,
|
| 31467 |
+
"loss": 19.3119,
|
| 31468 |
+
"step": 44850
|
| 31469 |
+
},
|
| 31470 |
+
{
|
| 31471 |
+
"epoch": 0.8324808569765041,
|
| 31472 |
+
"grad_norm": 36.0625,
|
| 31473 |
+
"learning_rate": 9.869924903813729e-06,
|
| 31474 |
+
"loss": 18.8227,
|
| 31475 |
+
"step": 44860
|
| 31476 |
+
},
|
| 31477 |
+
{
|
| 31478 |
+
"epoch": 0.832666430060984,
|
| 31479 |
+
"grad_norm": 35.15625,
|
| 31480 |
+
"learning_rate": 9.869895908027686e-06,
|
| 31481 |
+
"loss": 18.7788,
|
| 31482 |
+
"step": 44870
|
| 31483 |
+
},
|
| 31484 |
+
{
|
| 31485 |
+
"epoch": 0.8328520031454638,
|
| 31486 |
+
"grad_norm": 34.03125,
|
| 31487 |
+
"learning_rate": 9.869866912241645e-06,
|
| 31488 |
+
"loss": 19.126,
|
| 31489 |
+
"step": 44880
|
| 31490 |
+
},
|
| 31491 |
+
{
|
| 31492 |
+
"epoch": 0.8330375762299436,
|
| 31493 |
+
"grad_norm": 38.15625,
|
| 31494 |
+
"learning_rate": 9.869837916455603e-06,
|
| 31495 |
+
"loss": 18.9767,
|
| 31496 |
+
"step": 44890
|
| 31497 |
+
},
|
| 31498 |
+
{
|
| 31499 |
+
"epoch": 0.8332231493144234,
|
| 31500 |
+
"grad_norm": 37.15625,
|
| 31501 |
+
"learning_rate": 9.86980892066956e-06,
|
| 31502 |
+
"loss": 18.608,
|
| 31503 |
+
"step": 44900
|
| 31504 |
+
},
|
| 31505 |
+
{
|
| 31506 |
+
"epoch": 0.8334087223989033,
|
| 31507 |
+
"grad_norm": 37.03125,
|
| 31508 |
+
"learning_rate": 9.869779924883518e-06,
|
| 31509 |
+
"loss": 19.1132,
|
| 31510 |
+
"step": 44910
|
| 31511 |
+
},
|
| 31512 |
+
{
|
| 31513 |
+
"epoch": 0.833594295483383,
|
| 31514 |
+
"grad_norm": 35.0625,
|
| 31515 |
+
"learning_rate": 9.869750929097475e-06,
|
| 31516 |
+
"loss": 18.8629,
|
| 31517 |
+
"step": 44920
|
| 31518 |
+
},
|
| 31519 |
+
{
|
| 31520 |
+
"epoch": 0.8337798685678629,
|
| 31521 |
+
"grad_norm": 36.6875,
|
| 31522 |
+
"learning_rate": 9.869721933311432e-06,
|
| 31523 |
+
"loss": 19.265,
|
| 31524 |
+
"step": 44930
|
| 31525 |
+
},
|
| 31526 |
+
{
|
| 31527 |
+
"epoch": 0.8339654416523428,
|
| 31528 |
+
"grad_norm": 34.09375,
|
| 31529 |
+
"learning_rate": 9.86969293752539e-06,
|
| 31530 |
+
"loss": 19.1547,
|
| 31531 |
+
"step": 44940
|
| 31532 |
+
},
|
| 31533 |
+
{
|
| 31534 |
+
"epoch": 0.8341510147368225,
|
| 31535 |
+
"grad_norm": 34.78125,
|
| 31536 |
+
"learning_rate": 9.869663941739349e-06,
|
| 31537 |
+
"loss": 19.0749,
|
| 31538 |
+
"step": 44950
|
| 31539 |
+
},
|
| 31540 |
+
{
|
| 31541 |
+
"epoch": 0.8343365878213024,
|
| 31542 |
+
"grad_norm": 36.28125,
|
| 31543 |
+
"learning_rate": 9.869634945953306e-06,
|
| 31544 |
+
"loss": 19.0023,
|
| 31545 |
+
"step": 44960
|
| 31546 |
+
},
|
| 31547 |
+
{
|
| 31548 |
+
"epoch": 0.8345221609057822,
|
| 31549 |
+
"grad_norm": 37.09375,
|
| 31550 |
+
"learning_rate": 9.869605950167262e-06,
|
| 31551 |
+
"loss": 19.0009,
|
| 31552 |
+
"step": 44970
|
| 31553 |
+
},
|
| 31554 |
+
{
|
| 31555 |
+
"epoch": 0.8347077339902621,
|
| 31556 |
+
"grad_norm": 34.96875,
|
| 31557 |
+
"learning_rate": 9.869576954381221e-06,
|
| 31558 |
+
"loss": 18.7841,
|
| 31559 |
+
"step": 44980
|
| 31560 |
+
},
|
| 31561 |
+
{
|
| 31562 |
+
"epoch": 0.8348933070747419,
|
| 31563 |
+
"grad_norm": 34.9375,
|
| 31564 |
+
"learning_rate": 9.869547958595179e-06,
|
| 31565 |
+
"loss": 18.6512,
|
| 31566 |
+
"step": 44990
|
| 31567 |
+
},
|
| 31568 |
+
{
|
| 31569 |
+
"epoch": 0.8350788801592217,
|
| 31570 |
+
"grad_norm": 36.5,
|
| 31571 |
+
"learning_rate": 9.869518962809136e-06,
|
| 31572 |
+
"loss": 19.053,
|
| 31573 |
+
"step": 45000
|
| 31574 |
+
},
|
| 31575 |
+
{
|
| 31576 |
+
"epoch": 0.8350788801592217,
|
| 31577 |
+
"eval_loss": 2.3824570178985596,
|
| 31578 |
+
"eval_runtime": 454.6243,
|
| 31579 |
+
"eval_samples_per_second": 3194.103,
|
| 31580 |
+
"eval_steps_per_second": 49.909,
|
| 31581 |
+
"step": 45000
|
| 31582 |
}
|
| 31583 |
],
|
| 31584 |
"logging_steps": 10,
|
|
|
|
| 31598 |
"attributes": {}
|
| 31599 |
}
|
| 31600 |
},
|
| 31601 |
+
"total_flos": 7.854852415684608e+18,
|
| 31602 |
"train_batch_size": 8,
|
| 31603 |
"trial_name": null,
|
| 31604 |
"trial_params": null
|