Training in progress, step 20000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3511 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 306619286
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c23cf68abad32271631729127eb144e752956ff3904650e5132254b9fad4fa45
|
| 3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 919972410
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02301192e164720633e799be07ca43015ab4e55ee46289fcc068979fca646eac
|
| 3 |
size 919972410
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06fea830cf5ad73ec00d500ea6fb952740ac936f18e93fa2d32abde1ea3ead92
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be561d1df19be227394d8ea607c54262a06c9bf880af0aa5e04a52596a2a6cb0
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03f3e24417a59435f5a8450a4aeb0f09cc92734b5c3b45a0701b2c043c415c05
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bea02744c29f30024590ab1629a0e7b7dabbf1e8476456c2e7c5ce46dc35c28
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:041be966454b60c86af576fc1eb7f34189114689abff8f9622b947110f7334c8
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b85766f6596d15a810177d77dd259d9b50588cf100ec5f8ebff5fed881d57957
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8be75d04b1ebe614241b88fd010a5dda1b7bf703c00c6ebe310ca07975830fe7
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4699833a7ab4cb692996ef7567f934c0bac79d6a067963a873f89a38e412bd48
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2628d182b8ffc02b994fb2eed0e111e21ac10dadfa106370a9ce0523145ccd0
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 5000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -10531,6 +10531,3514 @@
|
|
| 10531 |
"eval_samples_per_second": 3195.28,
|
| 10532 |
"eval_steps_per_second": 49.928,
|
| 10533 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10534 |
}
|
| 10535 |
],
|
| 10536 |
"logging_steps": 10,
|
|
@@ -10550,7 +14058,7 @@
|
|
| 10550 |
"attributes": {}
|
| 10551 |
}
|
| 10552 |
},
|
| 10553 |
-
"total_flos":
|
| 10554 |
"train_batch_size": 8,
|
| 10555 |
"trial_name": null,
|
| 10556 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.3711461689596541,
|
| 5 |
"eval_steps": 5000,
|
| 6 |
+
"global_step": 20000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 10531 |
"eval_samples_per_second": 3195.28,
|
| 10532 |
"eval_steps_per_second": 49.928,
|
| 10533 |
"step": 15000
|
| 10534 |
+
},
|
| 10535 |
+
{
|
| 10536 |
+
"epoch": 0.2785451998042204,
|
| 10537 |
+
"grad_norm": 34.0625,
|
| 10538 |
+
"learning_rate": 9.956477325150337e-06,
|
| 10539 |
+
"loss": 22.2863,
|
| 10540 |
+
"step": 15010
|
| 10541 |
+
},
|
| 10542 |
+
{
|
| 10543 |
+
"epoch": 0.27873077288870024,
|
| 10544 |
+
"grad_norm": 34.96875,
|
| 10545 |
+
"learning_rate": 9.956448329364294e-06,
|
| 10546 |
+
"loss": 22.5122,
|
| 10547 |
+
"step": 15020
|
| 10548 |
+
},
|
| 10549 |
+
{
|
| 10550 |
+
"epoch": 0.27891634597318005,
|
| 10551 |
+
"grad_norm": 33.625,
|
| 10552 |
+
"learning_rate": 9.956419333578251e-06,
|
| 10553 |
+
"loss": 22.3634,
|
| 10554 |
+
"step": 15030
|
| 10555 |
+
},
|
| 10556 |
+
{
|
| 10557 |
+
"epoch": 0.27910191905765985,
|
| 10558 |
+
"grad_norm": 35.5625,
|
| 10559 |
+
"learning_rate": 9.95639033779221e-06,
|
| 10560 |
+
"loss": 22.5542,
|
| 10561 |
+
"step": 15040
|
| 10562 |
+
},
|
| 10563 |
+
{
|
| 10564 |
+
"epoch": 0.2792874921421397,
|
| 10565 |
+
"grad_norm": 35.25,
|
| 10566 |
+
"learning_rate": 9.956361342006166e-06,
|
| 10567 |
+
"loss": 22.201,
|
| 10568 |
+
"step": 15050
|
| 10569 |
+
},
|
| 10570 |
+
{
|
| 10571 |
+
"epoch": 0.2794730652266195,
|
| 10572 |
+
"grad_norm": 34.59375,
|
| 10573 |
+
"learning_rate": 9.956332346220124e-06,
|
| 10574 |
+
"loss": 22.3541,
|
| 10575 |
+
"step": 15060
|
| 10576 |
+
},
|
| 10577 |
+
{
|
| 10578 |
+
"epoch": 0.2796586383110994,
|
| 10579 |
+
"grad_norm": 33.875,
|
| 10580 |
+
"learning_rate": 9.956303350434083e-06,
|
| 10581 |
+
"loss": 21.9755,
|
| 10582 |
+
"step": 15070
|
| 10583 |
+
},
|
| 10584 |
+
{
|
| 10585 |
+
"epoch": 0.2798442113955792,
|
| 10586 |
+
"grad_norm": 34.03125,
|
| 10587 |
+
"learning_rate": 9.95627435464804e-06,
|
| 10588 |
+
"loss": 22.5443,
|
| 10589 |
+
"step": 15080
|
| 10590 |
+
},
|
| 10591 |
+
{
|
| 10592 |
+
"epoch": 0.280029784480059,
|
| 10593 |
+
"grad_norm": 35.46875,
|
| 10594 |
+
"learning_rate": 9.956245358861998e-06,
|
| 10595 |
+
"loss": 22.1045,
|
| 10596 |
+
"step": 15090
|
| 10597 |
+
},
|
| 10598 |
+
{
|
| 10599 |
+
"epoch": 0.28021535756453886,
|
| 10600 |
+
"grad_norm": 34.125,
|
| 10601 |
+
"learning_rate": 9.956216363075955e-06,
|
| 10602 |
+
"loss": 22.3805,
|
| 10603 |
+
"step": 15100
|
| 10604 |
+
},
|
| 10605 |
+
{
|
| 10606 |
+
"epoch": 0.28040093064901866,
|
| 10607 |
+
"grad_norm": 34.875,
|
| 10608 |
+
"learning_rate": 9.956187367289912e-06,
|
| 10609 |
+
"loss": 22.2453,
|
| 10610 |
+
"step": 15110
|
| 10611 |
+
},
|
| 10612 |
+
{
|
| 10613 |
+
"epoch": 0.28058650373349847,
|
| 10614 |
+
"grad_norm": 35.75,
|
| 10615 |
+
"learning_rate": 9.95615837150387e-06,
|
| 10616 |
+
"loss": 22.4519,
|
| 10617 |
+
"step": 15120
|
| 10618 |
+
},
|
| 10619 |
+
{
|
| 10620 |
+
"epoch": 0.28077207681797833,
|
| 10621 |
+
"grad_norm": 35.0,
|
| 10622 |
+
"learning_rate": 9.956129375717827e-06,
|
| 10623 |
+
"loss": 22.2457,
|
| 10624 |
+
"step": 15130
|
| 10625 |
+
},
|
| 10626 |
+
{
|
| 10627 |
+
"epoch": 0.28095764990245814,
|
| 10628 |
+
"grad_norm": 33.90625,
|
| 10629 |
+
"learning_rate": 9.956100379931786e-06,
|
| 10630 |
+
"loss": 22.1788,
|
| 10631 |
+
"step": 15140
|
| 10632 |
+
},
|
| 10633 |
+
{
|
| 10634 |
+
"epoch": 0.281143222986938,
|
| 10635 |
+
"grad_norm": 36.21875,
|
| 10636 |
+
"learning_rate": 9.956071384145742e-06,
|
| 10637 |
+
"loss": 22.0945,
|
| 10638 |
+
"step": 15150
|
| 10639 |
+
},
|
| 10640 |
+
{
|
| 10641 |
+
"epoch": 0.2813287960714178,
|
| 10642 |
+
"grad_norm": 35.90625,
|
| 10643 |
+
"learning_rate": 9.9560423883597e-06,
|
| 10644 |
+
"loss": 22.1752,
|
| 10645 |
+
"step": 15160
|
| 10646 |
+
},
|
| 10647 |
+
{
|
| 10648 |
+
"epoch": 0.2815143691558976,
|
| 10649 |
+
"grad_norm": 36.15625,
|
| 10650 |
+
"learning_rate": 9.956013392573658e-06,
|
| 10651 |
+
"loss": 22.5829,
|
| 10652 |
+
"step": 15170
|
| 10653 |
+
},
|
| 10654 |
+
{
|
| 10655 |
+
"epoch": 0.2816999422403775,
|
| 10656 |
+
"grad_norm": 37.0625,
|
| 10657 |
+
"learning_rate": 9.955984396787616e-06,
|
| 10658 |
+
"loss": 22.6663,
|
| 10659 |
+
"step": 15180
|
| 10660 |
+
},
|
| 10661 |
+
{
|
| 10662 |
+
"epoch": 0.2818855153248573,
|
| 10663 |
+
"grad_norm": 35.5,
|
| 10664 |
+
"learning_rate": 9.955955401001573e-06,
|
| 10665 |
+
"loss": 22.5561,
|
| 10666 |
+
"step": 15190
|
| 10667 |
+
},
|
| 10668 |
+
{
|
| 10669 |
+
"epoch": 0.2820710884093371,
|
| 10670 |
+
"grad_norm": 36.0,
|
| 10671 |
+
"learning_rate": 9.95592640521553e-06,
|
| 10672 |
+
"loss": 22.5047,
|
| 10673 |
+
"step": 15200
|
| 10674 |
+
},
|
| 10675 |
+
{
|
| 10676 |
+
"epoch": 0.28225666149381695,
|
| 10677 |
+
"grad_norm": 36.28125,
|
| 10678 |
+
"learning_rate": 9.955897409429488e-06,
|
| 10679 |
+
"loss": 22.4836,
|
| 10680 |
+
"step": 15210
|
| 10681 |
+
},
|
| 10682 |
+
{
|
| 10683 |
+
"epoch": 0.28244223457829676,
|
| 10684 |
+
"grad_norm": 35.96875,
|
| 10685 |
+
"learning_rate": 9.955868413643446e-06,
|
| 10686 |
+
"loss": 22.1041,
|
| 10687 |
+
"step": 15220
|
| 10688 |
+
},
|
| 10689 |
+
{
|
| 10690 |
+
"epoch": 0.28262780766277656,
|
| 10691 |
+
"grad_norm": 36.0,
|
| 10692 |
+
"learning_rate": 9.955839417857403e-06,
|
| 10693 |
+
"loss": 22.592,
|
| 10694 |
+
"step": 15230
|
| 10695 |
+
},
|
| 10696 |
+
{
|
| 10697 |
+
"epoch": 0.2828133807472564,
|
| 10698 |
+
"grad_norm": 35.375,
|
| 10699 |
+
"learning_rate": 9.955810422071362e-06,
|
| 10700 |
+
"loss": 22.2083,
|
| 10701 |
+
"step": 15240
|
| 10702 |
+
},
|
| 10703 |
+
{
|
| 10704 |
+
"epoch": 0.28299895383173623,
|
| 10705 |
+
"grad_norm": 34.53125,
|
| 10706 |
+
"learning_rate": 9.95578142628532e-06,
|
| 10707 |
+
"loss": 22.371,
|
| 10708 |
+
"step": 15250
|
| 10709 |
+
},
|
| 10710 |
+
{
|
| 10711 |
+
"epoch": 0.2831845269162161,
|
| 10712 |
+
"grad_norm": 34.25,
|
| 10713 |
+
"learning_rate": 9.955752430499275e-06,
|
| 10714 |
+
"loss": 22.5102,
|
| 10715 |
+
"step": 15260
|
| 10716 |
+
},
|
| 10717 |
+
{
|
| 10718 |
+
"epoch": 0.2833701000006959,
|
| 10719 |
+
"grad_norm": 34.125,
|
| 10720 |
+
"learning_rate": 9.955723434713234e-06,
|
| 10721 |
+
"loss": 22.0664,
|
| 10722 |
+
"step": 15270
|
| 10723 |
+
},
|
| 10724 |
+
{
|
| 10725 |
+
"epoch": 0.2835556730851757,
|
| 10726 |
+
"grad_norm": 36.1875,
|
| 10727 |
+
"learning_rate": 9.955694438927192e-06,
|
| 10728 |
+
"loss": 22.2947,
|
| 10729 |
+
"step": 15280
|
| 10730 |
+
},
|
| 10731 |
+
{
|
| 10732 |
+
"epoch": 0.28374124616965557,
|
| 10733 |
+
"grad_norm": 34.96875,
|
| 10734 |
+
"learning_rate": 9.955665443141149e-06,
|
| 10735 |
+
"loss": 22.093,
|
| 10736 |
+
"step": 15290
|
| 10737 |
+
},
|
| 10738 |
+
{
|
| 10739 |
+
"epoch": 0.2839268192541354,
|
| 10740 |
+
"grad_norm": 36.375,
|
| 10741 |
+
"learning_rate": 9.955636447355106e-06,
|
| 10742 |
+
"loss": 22.4046,
|
| 10743 |
+
"step": 15300
|
| 10744 |
+
},
|
| 10745 |
+
{
|
| 10746 |
+
"epoch": 0.2841123923386152,
|
| 10747 |
+
"grad_norm": 36.4375,
|
| 10748 |
+
"learning_rate": 9.955607451569066e-06,
|
| 10749 |
+
"loss": 22.7986,
|
| 10750 |
+
"step": 15310
|
| 10751 |
+
},
|
| 10752 |
+
{
|
| 10753 |
+
"epoch": 0.28429796542309504,
|
| 10754 |
+
"grad_norm": 37.40625,
|
| 10755 |
+
"learning_rate": 9.955578455783021e-06,
|
| 10756 |
+
"loss": 22.5896,
|
| 10757 |
+
"step": 15320
|
| 10758 |
+
},
|
| 10759 |
+
{
|
| 10760 |
+
"epoch": 0.28448353850757485,
|
| 10761 |
+
"grad_norm": 37.375,
|
| 10762 |
+
"learning_rate": 9.955549459996979e-06,
|
| 10763 |
+
"loss": 22.2348,
|
| 10764 |
+
"step": 15330
|
| 10765 |
+
},
|
| 10766 |
+
{
|
| 10767 |
+
"epoch": 0.2846691115920547,
|
| 10768 |
+
"grad_norm": 35.96875,
|
| 10769 |
+
"learning_rate": 9.955520464210938e-06,
|
| 10770 |
+
"loss": 22.1714,
|
| 10771 |
+
"step": 15340
|
| 10772 |
+
},
|
| 10773 |
+
{
|
| 10774 |
+
"epoch": 0.2848546846765345,
|
| 10775 |
+
"grad_norm": 35.53125,
|
| 10776 |
+
"learning_rate": 9.955491468424895e-06,
|
| 10777 |
+
"loss": 22.097,
|
| 10778 |
+
"step": 15350
|
| 10779 |
+
},
|
| 10780 |
+
{
|
| 10781 |
+
"epoch": 0.2850402577610143,
|
| 10782 |
+
"grad_norm": 38.03125,
|
| 10783 |
+
"learning_rate": 9.955462472638853e-06,
|
| 10784 |
+
"loss": 22.6676,
|
| 10785 |
+
"step": 15360
|
| 10786 |
+
},
|
| 10787 |
+
{
|
| 10788 |
+
"epoch": 0.2852258308454942,
|
| 10789 |
+
"grad_norm": 36.0625,
|
| 10790 |
+
"learning_rate": 9.95543347685281e-06,
|
| 10791 |
+
"loss": 21.9045,
|
| 10792 |
+
"step": 15370
|
| 10793 |
+
},
|
| 10794 |
+
{
|
| 10795 |
+
"epoch": 0.285411403929974,
|
| 10796 |
+
"grad_norm": 34.59375,
|
| 10797 |
+
"learning_rate": 9.955404481066767e-06,
|
| 10798 |
+
"loss": 22.1789,
|
| 10799 |
+
"step": 15380
|
| 10800 |
+
},
|
| 10801 |
+
{
|
| 10802 |
+
"epoch": 0.2855969770144538,
|
| 10803 |
+
"grad_norm": 34.4375,
|
| 10804 |
+
"learning_rate": 9.955375485280725e-06,
|
| 10805 |
+
"loss": 22.5378,
|
| 10806 |
+
"step": 15390
|
| 10807 |
+
},
|
| 10808 |
+
{
|
| 10809 |
+
"epoch": 0.28578255009893366,
|
| 10810 |
+
"grad_norm": 33.96875,
|
| 10811 |
+
"learning_rate": 9.955346489494682e-06,
|
| 10812 |
+
"loss": 22.4062,
|
| 10813 |
+
"step": 15400
|
| 10814 |
+
},
|
| 10815 |
+
{
|
| 10816 |
+
"epoch": 0.28596812318341347,
|
| 10817 |
+
"grad_norm": 35.53125,
|
| 10818 |
+
"learning_rate": 9.955317493708641e-06,
|
| 10819 |
+
"loss": 22.1425,
|
| 10820 |
+
"step": 15410
|
| 10821 |
+
},
|
| 10822 |
+
{
|
| 10823 |
+
"epoch": 0.28615369626789333,
|
| 10824 |
+
"grad_norm": 35.25,
|
| 10825 |
+
"learning_rate": 9.955288497922597e-06,
|
| 10826 |
+
"loss": 22.2226,
|
| 10827 |
+
"step": 15420
|
| 10828 |
+
},
|
| 10829 |
+
{
|
| 10830 |
+
"epoch": 0.28633926935237314,
|
| 10831 |
+
"grad_norm": 36.03125,
|
| 10832 |
+
"learning_rate": 9.955259502136554e-06,
|
| 10833 |
+
"loss": 22.172,
|
| 10834 |
+
"step": 15430
|
| 10835 |
+
},
|
| 10836 |
+
{
|
| 10837 |
+
"epoch": 0.28652484243685294,
|
| 10838 |
+
"grad_norm": 33.59375,
|
| 10839 |
+
"learning_rate": 9.955230506350514e-06,
|
| 10840 |
+
"loss": 21.8491,
|
| 10841 |
+
"step": 15440
|
| 10842 |
+
},
|
| 10843 |
+
{
|
| 10844 |
+
"epoch": 0.2867104155213328,
|
| 10845 |
+
"grad_norm": 34.59375,
|
| 10846 |
+
"learning_rate": 9.955201510564471e-06,
|
| 10847 |
+
"loss": 22.2386,
|
| 10848 |
+
"step": 15450
|
| 10849 |
+
},
|
| 10850 |
+
{
|
| 10851 |
+
"epoch": 0.2868959886058126,
|
| 10852 |
+
"grad_norm": 34.71875,
|
| 10853 |
+
"learning_rate": 9.955172514778428e-06,
|
| 10854 |
+
"loss": 22.1716,
|
| 10855 |
+
"step": 15460
|
| 10856 |
+
},
|
| 10857 |
+
{
|
| 10858 |
+
"epoch": 0.2870815616902924,
|
| 10859 |
+
"grad_norm": 35.75,
|
| 10860 |
+
"learning_rate": 9.955143518992386e-06,
|
| 10861 |
+
"loss": 21.7604,
|
| 10862 |
+
"step": 15470
|
| 10863 |
+
},
|
| 10864 |
+
{
|
| 10865 |
+
"epoch": 0.2872671347747723,
|
| 10866 |
+
"grad_norm": 35.21875,
|
| 10867 |
+
"learning_rate": 9.955114523206343e-06,
|
| 10868 |
+
"loss": 21.8419,
|
| 10869 |
+
"step": 15480
|
| 10870 |
+
},
|
| 10871 |
+
{
|
| 10872 |
+
"epoch": 0.2874527078592521,
|
| 10873 |
+
"grad_norm": 35.96875,
|
| 10874 |
+
"learning_rate": 9.9550855274203e-06,
|
| 10875 |
+
"loss": 21.8912,
|
| 10876 |
+
"step": 15490
|
| 10877 |
+
},
|
| 10878 |
+
{
|
| 10879 |
+
"epoch": 0.2876382809437319,
|
| 10880 |
+
"grad_norm": 35.71875,
|
| 10881 |
+
"learning_rate": 9.955056531634258e-06,
|
| 10882 |
+
"loss": 22.5902,
|
| 10883 |
+
"step": 15500
|
| 10884 |
+
},
|
| 10885 |
+
{
|
| 10886 |
+
"epoch": 0.28782385402821176,
|
| 10887 |
+
"grad_norm": 36.15625,
|
| 10888 |
+
"learning_rate": 9.955027535848215e-06,
|
| 10889 |
+
"loss": 22.1192,
|
| 10890 |
+
"step": 15510
|
| 10891 |
+
},
|
| 10892 |
+
{
|
| 10893 |
+
"epoch": 0.28800942711269156,
|
| 10894 |
+
"grad_norm": 35.25,
|
| 10895 |
+
"learning_rate": 9.954998540062175e-06,
|
| 10896 |
+
"loss": 21.5665,
|
| 10897 |
+
"step": 15520
|
| 10898 |
+
},
|
| 10899 |
+
{
|
| 10900 |
+
"epoch": 0.2881950001971714,
|
| 10901 |
+
"grad_norm": 33.46875,
|
| 10902 |
+
"learning_rate": 9.95496954427613e-06,
|
| 10903 |
+
"loss": 21.8936,
|
| 10904 |
+
"step": 15530
|
| 10905 |
+
},
|
| 10906 |
+
{
|
| 10907 |
+
"epoch": 0.28838057328165123,
|
| 10908 |
+
"grad_norm": 34.5625,
|
| 10909 |
+
"learning_rate": 9.95494054849009e-06,
|
| 10910 |
+
"loss": 22.6283,
|
| 10911 |
+
"step": 15540
|
| 10912 |
+
},
|
| 10913 |
+
{
|
| 10914 |
+
"epoch": 0.28856614636613104,
|
| 10915 |
+
"grad_norm": 35.3125,
|
| 10916 |
+
"learning_rate": 9.954911552704047e-06,
|
| 10917 |
+
"loss": 22.3834,
|
| 10918 |
+
"step": 15550
|
| 10919 |
+
},
|
| 10920 |
+
{
|
| 10921 |
+
"epoch": 0.2887517194506109,
|
| 10922 |
+
"grad_norm": 37.84375,
|
| 10923 |
+
"learning_rate": 9.954882556918004e-06,
|
| 10924 |
+
"loss": 21.9524,
|
| 10925 |
+
"step": 15560
|
| 10926 |
+
},
|
| 10927 |
+
{
|
| 10928 |
+
"epoch": 0.2889372925350907,
|
| 10929 |
+
"grad_norm": 33.75,
|
| 10930 |
+
"learning_rate": 9.954853561131962e-06,
|
| 10931 |
+
"loss": 22.4104,
|
| 10932 |
+
"step": 15570
|
| 10933 |
+
},
|
| 10934 |
+
{
|
| 10935 |
+
"epoch": 0.2891228656195705,
|
| 10936 |
+
"grad_norm": 34.53125,
|
| 10937 |
+
"learning_rate": 9.954824565345919e-06,
|
| 10938 |
+
"loss": 22.7281,
|
| 10939 |
+
"step": 15580
|
| 10940 |
+
},
|
| 10941 |
+
{
|
| 10942 |
+
"epoch": 0.2893084387040504,
|
| 10943 |
+
"grad_norm": 34.9375,
|
| 10944 |
+
"learning_rate": 9.954795569559876e-06,
|
| 10945 |
+
"loss": 21.9499,
|
| 10946 |
+
"step": 15590
|
| 10947 |
+
},
|
| 10948 |
+
{
|
| 10949 |
+
"epoch": 0.2894940117885302,
|
| 10950 |
+
"grad_norm": 35.53125,
|
| 10951 |
+
"learning_rate": 9.954766573773834e-06,
|
| 10952 |
+
"loss": 21.9476,
|
| 10953 |
+
"step": 15600
|
| 10954 |
+
},
|
| 10955 |
+
{
|
| 10956 |
+
"epoch": 0.28967958487301004,
|
| 10957 |
+
"grad_norm": 33.3125,
|
| 10958 |
+
"learning_rate": 9.954737577987791e-06,
|
| 10959 |
+
"loss": 21.7338,
|
| 10960 |
+
"step": 15610
|
| 10961 |
+
},
|
| 10962 |
+
{
|
| 10963 |
+
"epoch": 0.28986515795748985,
|
| 10964 |
+
"grad_norm": 35.6875,
|
| 10965 |
+
"learning_rate": 9.95470858220175e-06,
|
| 10966 |
+
"loss": 21.9634,
|
| 10967 |
+
"step": 15620
|
| 10968 |
+
},
|
| 10969 |
+
{
|
| 10970 |
+
"epoch": 0.29005073104196966,
|
| 10971 |
+
"grad_norm": 35.875,
|
| 10972 |
+
"learning_rate": 9.954679586415708e-06,
|
| 10973 |
+
"loss": 21.9819,
|
| 10974 |
+
"step": 15630
|
| 10975 |
+
},
|
| 10976 |
+
{
|
| 10977 |
+
"epoch": 0.2902363041264495,
|
| 10978 |
+
"grad_norm": 36.6875,
|
| 10979 |
+
"learning_rate": 9.954650590629663e-06,
|
| 10980 |
+
"loss": 22.4904,
|
| 10981 |
+
"step": 15640
|
| 10982 |
+
},
|
| 10983 |
+
{
|
| 10984 |
+
"epoch": 0.2904218772109293,
|
| 10985 |
+
"grad_norm": 34.78125,
|
| 10986 |
+
"learning_rate": 9.954621594843623e-06,
|
| 10987 |
+
"loss": 22.7612,
|
| 10988 |
+
"step": 15650
|
| 10989 |
+
},
|
| 10990 |
+
{
|
| 10991 |
+
"epoch": 0.29060745029540913,
|
| 10992 |
+
"grad_norm": 35.3125,
|
| 10993 |
+
"learning_rate": 9.95459259905758e-06,
|
| 10994 |
+
"loss": 22.3808,
|
| 10995 |
+
"step": 15660
|
| 10996 |
+
},
|
| 10997 |
+
{
|
| 10998 |
+
"epoch": 0.290793023379889,
|
| 10999 |
+
"grad_norm": 36.09375,
|
| 11000 |
+
"learning_rate": 9.954563603271537e-06,
|
| 11001 |
+
"loss": 22.5321,
|
| 11002 |
+
"step": 15670
|
| 11003 |
+
},
|
| 11004 |
+
{
|
| 11005 |
+
"epoch": 0.2909785964643688,
|
| 11006 |
+
"grad_norm": 35.46875,
|
| 11007 |
+
"learning_rate": 9.954534607485495e-06,
|
| 11008 |
+
"loss": 22.4891,
|
| 11009 |
+
"step": 15680
|
| 11010 |
+
},
|
| 11011 |
+
{
|
| 11012 |
+
"epoch": 0.29116416954884866,
|
| 11013 |
+
"grad_norm": 34.5625,
|
| 11014 |
+
"learning_rate": 9.954505611699452e-06,
|
| 11015 |
+
"loss": 22.188,
|
| 11016 |
+
"step": 15690
|
| 11017 |
+
},
|
| 11018 |
+
{
|
| 11019 |
+
"epoch": 0.29134974263332847,
|
| 11020 |
+
"grad_norm": 36.5625,
|
| 11021 |
+
"learning_rate": 9.95447661591341e-06,
|
| 11022 |
+
"loss": 22.2892,
|
| 11023 |
+
"step": 15700
|
| 11024 |
+
},
|
| 11025 |
+
{
|
| 11026 |
+
"epoch": 0.2915353157178083,
|
| 11027 |
+
"grad_norm": 37.6875,
|
| 11028 |
+
"learning_rate": 9.954447620127367e-06,
|
| 11029 |
+
"loss": 22.1511,
|
| 11030 |
+
"step": 15710
|
| 11031 |
+
},
|
| 11032 |
+
{
|
| 11033 |
+
"epoch": 0.29172088880228814,
|
| 11034 |
+
"grad_norm": 36.15625,
|
| 11035 |
+
"learning_rate": 9.954418624341326e-06,
|
| 11036 |
+
"loss": 22.2926,
|
| 11037 |
+
"step": 15720
|
| 11038 |
+
},
|
| 11039 |
+
{
|
| 11040 |
+
"epoch": 0.29190646188676794,
|
| 11041 |
+
"grad_norm": 33.8125,
|
| 11042 |
+
"learning_rate": 9.954389628555283e-06,
|
| 11043 |
+
"loss": 22.241,
|
| 11044 |
+
"step": 15730
|
| 11045 |
+
},
|
| 11046 |
+
{
|
| 11047 |
+
"epoch": 0.29209203497124775,
|
| 11048 |
+
"grad_norm": 34.71875,
|
| 11049 |
+
"learning_rate": 9.95436063276924e-06,
|
| 11050 |
+
"loss": 22.334,
|
| 11051 |
+
"step": 15740
|
| 11052 |
+
},
|
| 11053 |
+
{
|
| 11054 |
+
"epoch": 0.2922776080557276,
|
| 11055 |
+
"grad_norm": 36.53125,
|
| 11056 |
+
"learning_rate": 9.954331636983198e-06,
|
| 11057 |
+
"loss": 22.4288,
|
| 11058 |
+
"step": 15750
|
| 11059 |
+
},
|
| 11060 |
+
{
|
| 11061 |
+
"epoch": 0.2924631811402074,
|
| 11062 |
+
"grad_norm": 35.25,
|
| 11063 |
+
"learning_rate": 9.954302641197156e-06,
|
| 11064 |
+
"loss": 22.411,
|
| 11065 |
+
"step": 15760
|
| 11066 |
+
},
|
| 11067 |
+
{
|
| 11068 |
+
"epoch": 0.2926487542246872,
|
| 11069 |
+
"grad_norm": 35.90625,
|
| 11070 |
+
"learning_rate": 9.954273645411113e-06,
|
| 11071 |
+
"loss": 21.8901,
|
| 11072 |
+
"step": 15770
|
| 11073 |
+
},
|
| 11074 |
+
{
|
| 11075 |
+
"epoch": 0.2928343273091671,
|
| 11076 |
+
"grad_norm": 36.59375,
|
| 11077 |
+
"learning_rate": 9.95424464962507e-06,
|
| 11078 |
+
"loss": 22.2214,
|
| 11079 |
+
"step": 15780
|
| 11080 |
+
},
|
| 11081 |
+
{
|
| 11082 |
+
"epoch": 0.2930199003936469,
|
| 11083 |
+
"grad_norm": 33.90625,
|
| 11084 |
+
"learning_rate": 9.95421565383903e-06,
|
| 11085 |
+
"loss": 21.8987,
|
| 11086 |
+
"step": 15790
|
| 11087 |
+
},
|
| 11088 |
+
{
|
| 11089 |
+
"epoch": 0.29320547347812675,
|
| 11090 |
+
"grad_norm": 35.90625,
|
| 11091 |
+
"learning_rate": 9.954186658052985e-06,
|
| 11092 |
+
"loss": 22.1996,
|
| 11093 |
+
"step": 15800
|
| 11094 |
+
},
|
| 11095 |
+
{
|
| 11096 |
+
"epoch": 0.29339104656260656,
|
| 11097 |
+
"grad_norm": 34.6875,
|
| 11098 |
+
"learning_rate": 9.954157662266943e-06,
|
| 11099 |
+
"loss": 22.1059,
|
| 11100 |
+
"step": 15810
|
| 11101 |
+
},
|
| 11102 |
+
{
|
| 11103 |
+
"epoch": 0.29357661964708637,
|
| 11104 |
+
"grad_norm": 35.3125,
|
| 11105 |
+
"learning_rate": 9.954128666480902e-06,
|
| 11106 |
+
"loss": 22.3283,
|
| 11107 |
+
"step": 15820
|
| 11108 |
+
},
|
| 11109 |
+
{
|
| 11110 |
+
"epoch": 0.29376219273156623,
|
| 11111 |
+
"grad_norm": 36.59375,
|
| 11112 |
+
"learning_rate": 9.95409967069486e-06,
|
| 11113 |
+
"loss": 22.438,
|
| 11114 |
+
"step": 15830
|
| 11115 |
+
},
|
| 11116 |
+
{
|
| 11117 |
+
"epoch": 0.29394776581604604,
|
| 11118 |
+
"grad_norm": 37.15625,
|
| 11119 |
+
"learning_rate": 9.954070674908817e-06,
|
| 11120 |
+
"loss": 22.409,
|
| 11121 |
+
"step": 15840
|
| 11122 |
+
},
|
| 11123 |
+
{
|
| 11124 |
+
"epoch": 0.29413333890052584,
|
| 11125 |
+
"grad_norm": 34.8125,
|
| 11126 |
+
"learning_rate": 9.954041679122774e-06,
|
| 11127 |
+
"loss": 22.524,
|
| 11128 |
+
"step": 15850
|
| 11129 |
+
},
|
| 11130 |
+
{
|
| 11131 |
+
"epoch": 0.2943189119850057,
|
| 11132 |
+
"grad_norm": 37.0625,
|
| 11133 |
+
"learning_rate": 9.954012683336731e-06,
|
| 11134 |
+
"loss": 22.6223,
|
| 11135 |
+
"step": 15860
|
| 11136 |
+
},
|
| 11137 |
+
{
|
| 11138 |
+
"epoch": 0.2945044850694855,
|
| 11139 |
+
"grad_norm": 35.0,
|
| 11140 |
+
"learning_rate": 9.953983687550689e-06,
|
| 11141 |
+
"loss": 22.2213,
|
| 11142 |
+
"step": 15870
|
| 11143 |
+
},
|
| 11144 |
+
{
|
| 11145 |
+
"epoch": 0.2946900581539654,
|
| 11146 |
+
"grad_norm": 33.8125,
|
| 11147 |
+
"learning_rate": 9.953954691764646e-06,
|
| 11148 |
+
"loss": 21.861,
|
| 11149 |
+
"step": 15880
|
| 11150 |
+
},
|
| 11151 |
+
{
|
| 11152 |
+
"epoch": 0.2948756312384452,
|
| 11153 |
+
"grad_norm": 33.59375,
|
| 11154 |
+
"learning_rate": 9.953925695978605e-06,
|
| 11155 |
+
"loss": 22.1122,
|
| 11156 |
+
"step": 15890
|
| 11157 |
+
},
|
| 11158 |
+
{
|
| 11159 |
+
"epoch": 0.295061204322925,
|
| 11160 |
+
"grad_norm": 35.90625,
|
| 11161 |
+
"learning_rate": 9.953896700192563e-06,
|
| 11162 |
+
"loss": 21.8675,
|
| 11163 |
+
"step": 15900
|
| 11164 |
+
},
|
| 11165 |
+
{
|
| 11166 |
+
"epoch": 0.29524677740740485,
|
| 11167 |
+
"grad_norm": 32.65625,
|
| 11168 |
+
"learning_rate": 9.953867704406518e-06,
|
| 11169 |
+
"loss": 22.0003,
|
| 11170 |
+
"step": 15910
|
| 11171 |
+
},
|
| 11172 |
+
{
|
| 11173 |
+
"epoch": 0.29543235049188465,
|
| 11174 |
+
"grad_norm": 33.84375,
|
| 11175 |
+
"learning_rate": 9.953838708620478e-06,
|
| 11176 |
+
"loss": 22.2325,
|
| 11177 |
+
"step": 15920
|
| 11178 |
+
},
|
| 11179 |
+
{
|
| 11180 |
+
"epoch": 0.29561792357636446,
|
| 11181 |
+
"grad_norm": 36.15625,
|
| 11182 |
+
"learning_rate": 9.953809712834435e-06,
|
| 11183 |
+
"loss": 22.2477,
|
| 11184 |
+
"step": 15930
|
| 11185 |
+
},
|
| 11186 |
+
{
|
| 11187 |
+
"epoch": 0.2958034966608443,
|
| 11188 |
+
"grad_norm": 35.28125,
|
| 11189 |
+
"learning_rate": 9.953780717048392e-06,
|
| 11190 |
+
"loss": 22.0163,
|
| 11191 |
+
"step": 15940
|
| 11192 |
+
},
|
| 11193 |
+
{
|
| 11194 |
+
"epoch": 0.29598906974532413,
|
| 11195 |
+
"grad_norm": 35.84375,
|
| 11196 |
+
"learning_rate": 9.95375172126235e-06,
|
| 11197 |
+
"loss": 21.9324,
|
| 11198 |
+
"step": 15950
|
| 11199 |
+
},
|
| 11200 |
+
{
|
| 11201 |
+
"epoch": 0.296174642829804,
|
| 11202 |
+
"grad_norm": 36.90625,
|
| 11203 |
+
"learning_rate": 9.953722725476307e-06,
|
| 11204 |
+
"loss": 22.0944,
|
| 11205 |
+
"step": 15960
|
| 11206 |
+
},
|
| 11207 |
+
{
|
| 11208 |
+
"epoch": 0.2963602159142838,
|
| 11209 |
+
"grad_norm": 36.9375,
|
| 11210 |
+
"learning_rate": 9.953693729690265e-06,
|
| 11211 |
+
"loss": 22.2074,
|
| 11212 |
+
"step": 15970
|
| 11213 |
+
},
|
| 11214 |
+
{
|
| 11215 |
+
"epoch": 0.2965457889987636,
|
| 11216 |
+
"grad_norm": 35.84375,
|
| 11217 |
+
"learning_rate": 9.953664733904222e-06,
|
| 11218 |
+
"loss": 22.2047,
|
| 11219 |
+
"step": 15980
|
| 11220 |
+
},
|
| 11221 |
+
{
|
| 11222 |
+
"epoch": 0.29673136208324347,
|
| 11223 |
+
"grad_norm": 36.09375,
|
| 11224 |
+
"learning_rate": 9.953635738118181e-06,
|
| 11225 |
+
"loss": 22.3672,
|
| 11226 |
+
"step": 15990
|
| 11227 |
+
},
|
| 11228 |
+
{
|
| 11229 |
+
"epoch": 0.2969169351677233,
|
| 11230 |
+
"grad_norm": 33.78125,
|
| 11231 |
+
"learning_rate": 9.953606742332139e-06,
|
| 11232 |
+
"loss": 22.2694,
|
| 11233 |
+
"step": 16000
|
| 11234 |
+
},
|
| 11235 |
+
{
|
| 11236 |
+
"epoch": 0.2971025082522031,
|
| 11237 |
+
"grad_norm": 36.46875,
|
| 11238 |
+
"learning_rate": 9.953577746546094e-06,
|
| 11239 |
+
"loss": 21.7142,
|
| 11240 |
+
"step": 16010
|
| 11241 |
+
},
|
| 11242 |
+
{
|
| 11243 |
+
"epoch": 0.29728808133668294,
|
| 11244 |
+
"grad_norm": 34.09375,
|
| 11245 |
+
"learning_rate": 9.953548750760053e-06,
|
| 11246 |
+
"loss": 21.9962,
|
| 11247 |
+
"step": 16020
|
| 11248 |
+
},
|
| 11249 |
+
{
|
| 11250 |
+
"epoch": 0.29747365442116275,
|
| 11251 |
+
"grad_norm": 36.59375,
|
| 11252 |
+
"learning_rate": 9.95351975497401e-06,
|
| 11253 |
+
"loss": 22.0277,
|
| 11254 |
+
"step": 16030
|
| 11255 |
+
},
|
| 11256 |
+
{
|
| 11257 |
+
"epoch": 0.29765922750564255,
|
| 11258 |
+
"grad_norm": 33.15625,
|
| 11259 |
+
"learning_rate": 9.953490759187968e-06,
|
| 11260 |
+
"loss": 22.0372,
|
| 11261 |
+
"step": 16040
|
| 11262 |
+
},
|
| 11263 |
+
{
|
| 11264 |
+
"epoch": 0.2978448005901224,
|
| 11265 |
+
"grad_norm": 34.8125,
|
| 11266 |
+
"learning_rate": 9.953461763401926e-06,
|
| 11267 |
+
"loss": 22.3242,
|
| 11268 |
+
"step": 16050
|
| 11269 |
+
},
|
| 11270 |
+
{
|
| 11271 |
+
"epoch": 0.2980303736746022,
|
| 11272 |
+
"grad_norm": 36.15625,
|
| 11273 |
+
"learning_rate": 9.953432767615883e-06,
|
| 11274 |
+
"loss": 21.9268,
|
| 11275 |
+
"step": 16060
|
| 11276 |
+
},
|
| 11277 |
+
{
|
| 11278 |
+
"epoch": 0.2982159467590821,
|
| 11279 |
+
"grad_norm": 34.8125,
|
| 11280 |
+
"learning_rate": 9.95340377182984e-06,
|
| 11281 |
+
"loss": 21.7896,
|
| 11282 |
+
"step": 16070
|
| 11283 |
+
},
|
| 11284 |
+
{
|
| 11285 |
+
"epoch": 0.2984015198435619,
|
| 11286 |
+
"grad_norm": 35.625,
|
| 11287 |
+
"learning_rate": 9.953374776043798e-06,
|
| 11288 |
+
"loss": 21.933,
|
| 11289 |
+
"step": 16080
|
| 11290 |
+
},
|
| 11291 |
+
{
|
| 11292 |
+
"epoch": 0.2985870929280417,
|
| 11293 |
+
"grad_norm": 34.5625,
|
| 11294 |
+
"learning_rate": 9.953345780257755e-06,
|
| 11295 |
+
"loss": 22.3957,
|
| 11296 |
+
"step": 16090
|
| 11297 |
+
},
|
| 11298 |
+
{
|
| 11299 |
+
"epoch": 0.29877266601252156,
|
| 11300 |
+
"grad_norm": 34.3125,
|
| 11301 |
+
"learning_rate": 9.953316784471714e-06,
|
| 11302 |
+
"loss": 22.267,
|
| 11303 |
+
"step": 16100
|
| 11304 |
+
},
|
| 11305 |
+
{
|
| 11306 |
+
"epoch": 0.29895823909700137,
|
| 11307 |
+
"grad_norm": 34.21875,
|
| 11308 |
+
"learning_rate": 9.953287788685672e-06,
|
| 11309 |
+
"loss": 22.1046,
|
| 11310 |
+
"step": 16110
|
| 11311 |
+
},
|
| 11312 |
+
{
|
| 11313 |
+
"epoch": 0.2991438121814812,
|
| 11314 |
+
"grad_norm": 32.5625,
|
| 11315 |
+
"learning_rate": 9.953258792899627e-06,
|
| 11316 |
+
"loss": 22.5077,
|
| 11317 |
+
"step": 16120
|
| 11318 |
+
},
|
| 11319 |
+
{
|
| 11320 |
+
"epoch": 0.29932938526596103,
|
| 11321 |
+
"grad_norm": 35.28125,
|
| 11322 |
+
"learning_rate": 9.953229797113587e-06,
|
| 11323 |
+
"loss": 22.0254,
|
| 11324 |
+
"step": 16130
|
| 11325 |
+
},
|
| 11326 |
+
{
|
| 11327 |
+
"epoch": 0.29951495835044084,
|
| 11328 |
+
"grad_norm": 35.53125,
|
| 11329 |
+
"learning_rate": 9.953200801327544e-06,
|
| 11330 |
+
"loss": 21.9783,
|
| 11331 |
+
"step": 16140
|
| 11332 |
+
},
|
| 11333 |
+
{
|
| 11334 |
+
"epoch": 0.2997005314349207,
|
| 11335 |
+
"grad_norm": 35.96875,
|
| 11336 |
+
"learning_rate": 9.953171805541501e-06,
|
| 11337 |
+
"loss": 22.4283,
|
| 11338 |
+
"step": 16150
|
| 11339 |
+
},
|
| 11340 |
+
{
|
| 11341 |
+
"epoch": 0.2998861045194005,
|
| 11342 |
+
"grad_norm": 35.34375,
|
| 11343 |
+
"learning_rate": 9.953142809755459e-06,
|
| 11344 |
+
"loss": 21.9178,
|
| 11345 |
+
"step": 16160
|
| 11346 |
+
},
|
| 11347 |
+
{
|
| 11348 |
+
"epoch": 0.3000716776038803,
|
| 11349 |
+
"grad_norm": 35.5625,
|
| 11350 |
+
"learning_rate": 9.953113813969416e-06,
|
| 11351 |
+
"loss": 21.8685,
|
| 11352 |
+
"step": 16170
|
| 11353 |
+
},
|
| 11354 |
+
{
|
| 11355 |
+
"epoch": 0.3002572506883602,
|
| 11356 |
+
"grad_norm": 33.65625,
|
| 11357 |
+
"learning_rate": 9.953084818183374e-06,
|
| 11358 |
+
"loss": 22.4529,
|
| 11359 |
+
"step": 16180
|
| 11360 |
+
},
|
| 11361 |
+
{
|
| 11362 |
+
"epoch": 0.30044282377284,
|
| 11363 |
+
"grad_norm": 32.5,
|
| 11364 |
+
"learning_rate": 9.953055822397331e-06,
|
| 11365 |
+
"loss": 21.9859,
|
| 11366 |
+
"step": 16190
|
| 11367 |
+
},
|
| 11368 |
+
{
|
| 11369 |
+
"epoch": 0.3006283968573198,
|
| 11370 |
+
"grad_norm": 35.65625,
|
| 11371 |
+
"learning_rate": 9.95302682661129e-06,
|
| 11372 |
+
"loss": 21.9968,
|
| 11373 |
+
"step": 16200
|
| 11374 |
+
},
|
| 11375 |
+
{
|
| 11376 |
+
"epoch": 0.30081396994179965,
|
| 11377 |
+
"grad_norm": 35.90625,
|
| 11378 |
+
"learning_rate": 9.952997830825247e-06,
|
| 11379 |
+
"loss": 22.1074,
|
| 11380 |
+
"step": 16210
|
| 11381 |
+
},
|
| 11382 |
+
{
|
| 11383 |
+
"epoch": 0.30099954302627946,
|
| 11384 |
+
"grad_norm": 37.0625,
|
| 11385 |
+
"learning_rate": 9.952968835039205e-06,
|
| 11386 |
+
"loss": 21.8784,
|
| 11387 |
+
"step": 16220
|
| 11388 |
+
},
|
| 11389 |
+
{
|
| 11390 |
+
"epoch": 0.3011851161107593,
|
| 11391 |
+
"grad_norm": 36.71875,
|
| 11392 |
+
"learning_rate": 9.952939839253162e-06,
|
| 11393 |
+
"loss": 22.3373,
|
| 11394 |
+
"step": 16230
|
| 11395 |
+
},
|
| 11396 |
+
{
|
| 11397 |
+
"epoch": 0.3013706891952391,
|
| 11398 |
+
"grad_norm": 35.8125,
|
| 11399 |
+
"learning_rate": 9.95291084346712e-06,
|
| 11400 |
+
"loss": 21.973,
|
| 11401 |
+
"step": 16240
|
| 11402 |
+
},
|
| 11403 |
+
{
|
| 11404 |
+
"epoch": 0.30155626227971893,
|
| 11405 |
+
"grad_norm": 32.625,
|
| 11406 |
+
"learning_rate": 9.952881847681077e-06,
|
| 11407 |
+
"loss": 22.0185,
|
| 11408 |
+
"step": 16250
|
| 11409 |
+
},
|
| 11410 |
+
{
|
| 11411 |
+
"epoch": 0.3017418353641988,
|
| 11412 |
+
"grad_norm": 37.875,
|
| 11413 |
+
"learning_rate": 9.952852851895035e-06,
|
| 11414 |
+
"loss": 21.9603,
|
| 11415 |
+
"step": 16260
|
| 11416 |
+
},
|
| 11417 |
+
{
|
| 11418 |
+
"epoch": 0.3019274084486786,
|
| 11419 |
+
"grad_norm": 37.28125,
|
| 11420 |
+
"learning_rate": 9.952823856108994e-06,
|
| 11421 |
+
"loss": 22.3666,
|
| 11422 |
+
"step": 16270
|
| 11423 |
+
},
|
| 11424 |
+
{
|
| 11425 |
+
"epoch": 0.3021129815331584,
|
| 11426 |
+
"grad_norm": 35.65625,
|
| 11427 |
+
"learning_rate": 9.95279486032295e-06,
|
| 11428 |
+
"loss": 21.8065,
|
| 11429 |
+
"step": 16280
|
| 11430 |
+
},
|
| 11431 |
+
{
|
| 11432 |
+
"epoch": 0.30229855461763827,
|
| 11433 |
+
"grad_norm": 35.90625,
|
| 11434 |
+
"learning_rate": 9.952765864536907e-06,
|
| 11435 |
+
"loss": 22.0894,
|
| 11436 |
+
"step": 16290
|
| 11437 |
+
},
|
| 11438 |
+
{
|
| 11439 |
+
"epoch": 0.3024841277021181,
|
| 11440 |
+
"grad_norm": 36.25,
|
| 11441 |
+
"learning_rate": 9.952736868750866e-06,
|
| 11442 |
+
"loss": 22.6013,
|
| 11443 |
+
"step": 16300
|
| 11444 |
+
},
|
| 11445 |
+
{
|
| 11446 |
+
"epoch": 0.30266970078659794,
|
| 11447 |
+
"grad_norm": 36.34375,
|
| 11448 |
+
"learning_rate": 9.952707872964823e-06,
|
| 11449 |
+
"loss": 22.0674,
|
| 11450 |
+
"step": 16310
|
| 11451 |
+
},
|
| 11452 |
+
{
|
| 11453 |
+
"epoch": 0.30285527387107775,
|
| 11454 |
+
"grad_norm": 35.28125,
|
| 11455 |
+
"learning_rate": 9.95267887717878e-06,
|
| 11456 |
+
"loss": 21.6102,
|
| 11457 |
+
"step": 16320
|
| 11458 |
+
},
|
| 11459 |
+
{
|
| 11460 |
+
"epoch": 0.30304084695555755,
|
| 11461 |
+
"grad_norm": 35.53125,
|
| 11462 |
+
"learning_rate": 9.952649881392738e-06,
|
| 11463 |
+
"loss": 22.01,
|
| 11464 |
+
"step": 16330
|
| 11465 |
+
},
|
| 11466 |
+
{
|
| 11467 |
+
"epoch": 0.3032264200400374,
|
| 11468 |
+
"grad_norm": 36.21875,
|
| 11469 |
+
"learning_rate": 9.952620885606695e-06,
|
| 11470 |
+
"loss": 22.1756,
|
| 11471 |
+
"step": 16340
|
| 11472 |
+
},
|
| 11473 |
+
{
|
| 11474 |
+
"epoch": 0.3034119931245172,
|
| 11475 |
+
"grad_norm": 36.0625,
|
| 11476 |
+
"learning_rate": 9.952591889820653e-06,
|
| 11477 |
+
"loss": 22.1455,
|
| 11478 |
+
"step": 16350
|
| 11479 |
+
},
|
| 11480 |
+
{
|
| 11481 |
+
"epoch": 0.303597566208997,
|
| 11482 |
+
"grad_norm": 34.65625,
|
| 11483 |
+
"learning_rate": 9.95256289403461e-06,
|
| 11484 |
+
"loss": 22.1752,
|
| 11485 |
+
"step": 16360
|
| 11486 |
+
},
|
| 11487 |
+
{
|
| 11488 |
+
"epoch": 0.3037831392934769,
|
| 11489 |
+
"grad_norm": 34.75,
|
| 11490 |
+
"learning_rate": 9.95253389824857e-06,
|
| 11491 |
+
"loss": 22.277,
|
| 11492 |
+
"step": 16370
|
| 11493 |
+
},
|
| 11494 |
+
{
|
| 11495 |
+
"epoch": 0.3039687123779567,
|
| 11496 |
+
"grad_norm": 34.15625,
|
| 11497 |
+
"learning_rate": 9.952504902462527e-06,
|
| 11498 |
+
"loss": 22.5551,
|
| 11499 |
+
"step": 16380
|
| 11500 |
+
},
|
| 11501 |
+
{
|
| 11502 |
+
"epoch": 0.3041542854624365,
|
| 11503 |
+
"grad_norm": 34.0,
|
| 11504 |
+
"learning_rate": 9.952475906676482e-06,
|
| 11505 |
+
"loss": 21.9491,
|
| 11506 |
+
"step": 16390
|
| 11507 |
+
},
|
| 11508 |
+
{
|
| 11509 |
+
"epoch": 0.30433985854691636,
|
| 11510 |
+
"grad_norm": 35.21875,
|
| 11511 |
+
"learning_rate": 9.952446910890442e-06,
|
| 11512 |
+
"loss": 21.8338,
|
| 11513 |
+
"step": 16400
|
| 11514 |
+
},
|
| 11515 |
+
{
|
| 11516 |
+
"epoch": 0.30452543163139617,
|
| 11517 |
+
"grad_norm": 33.625,
|
| 11518 |
+
"learning_rate": 9.952417915104399e-06,
|
| 11519 |
+
"loss": 21.8369,
|
| 11520 |
+
"step": 16410
|
| 11521 |
+
},
|
| 11522 |
+
{
|
| 11523 |
+
"epoch": 0.30471100471587603,
|
| 11524 |
+
"grad_norm": 35.21875,
|
| 11525 |
+
"learning_rate": 9.952388919318356e-06,
|
| 11526 |
+
"loss": 22.0681,
|
| 11527 |
+
"step": 16420
|
| 11528 |
+
},
|
| 11529 |
+
{
|
| 11530 |
+
"epoch": 0.30489657780035584,
|
| 11531 |
+
"grad_norm": 34.59375,
|
| 11532 |
+
"learning_rate": 9.952359923532314e-06,
|
| 11533 |
+
"loss": 21.8606,
|
| 11534 |
+
"step": 16430
|
| 11535 |
+
},
|
| 11536 |
+
{
|
| 11537 |
+
"epoch": 0.30508215088483565,
|
| 11538 |
+
"grad_norm": 35.34375,
|
| 11539 |
+
"learning_rate": 9.952330927746271e-06,
|
| 11540 |
+
"loss": 21.8315,
|
| 11541 |
+
"step": 16440
|
| 11542 |
+
},
|
| 11543 |
+
{
|
| 11544 |
+
"epoch": 0.3052677239693155,
|
| 11545 |
+
"grad_norm": 35.09375,
|
| 11546 |
+
"learning_rate": 9.952301931960229e-06,
|
| 11547 |
+
"loss": 22.2084,
|
| 11548 |
+
"step": 16450
|
| 11549 |
+
},
|
| 11550 |
+
{
|
| 11551 |
+
"epoch": 0.3054532970537953,
|
| 11552 |
+
"grad_norm": 37.125,
|
| 11553 |
+
"learning_rate": 9.952272936174186e-06,
|
| 11554 |
+
"loss": 21.4615,
|
| 11555 |
+
"step": 16460
|
| 11556 |
+
},
|
| 11557 |
+
{
|
| 11558 |
+
"epoch": 0.3056388701382751,
|
| 11559 |
+
"grad_norm": 35.28125,
|
| 11560 |
+
"learning_rate": 9.952243940388145e-06,
|
| 11561 |
+
"loss": 21.9661,
|
| 11562 |
+
"step": 16470
|
| 11563 |
+
},
|
| 11564 |
+
{
|
| 11565 |
+
"epoch": 0.305824443222755,
|
| 11566 |
+
"grad_norm": 37.3125,
|
| 11567 |
+
"learning_rate": 9.952214944602103e-06,
|
| 11568 |
+
"loss": 21.8837,
|
| 11569 |
+
"step": 16480
|
| 11570 |
+
},
|
| 11571 |
+
{
|
| 11572 |
+
"epoch": 0.3060100163072348,
|
| 11573 |
+
"grad_norm": 34.4375,
|
| 11574 |
+
"learning_rate": 9.95218594881606e-06,
|
| 11575 |
+
"loss": 22.3747,
|
| 11576 |
+
"step": 16490
|
| 11577 |
+
},
|
| 11578 |
+
{
|
| 11579 |
+
"epoch": 0.30619558939171465,
|
| 11580 |
+
"grad_norm": 34.125,
|
| 11581 |
+
"learning_rate": 9.952156953030017e-06,
|
| 11582 |
+
"loss": 21.9814,
|
| 11583 |
+
"step": 16500
|
| 11584 |
+
},
|
| 11585 |
+
{
|
| 11586 |
+
"epoch": 0.30638116247619446,
|
| 11587 |
+
"grad_norm": 34.78125,
|
| 11588 |
+
"learning_rate": 9.952127957243975e-06,
|
| 11589 |
+
"loss": 22.081,
|
| 11590 |
+
"step": 16510
|
| 11591 |
+
},
|
| 11592 |
+
{
|
| 11593 |
+
"epoch": 0.30656673556067426,
|
| 11594 |
+
"grad_norm": 34.0625,
|
| 11595 |
+
"learning_rate": 9.952098961457932e-06,
|
| 11596 |
+
"loss": 22.3669,
|
| 11597 |
+
"step": 16520
|
| 11598 |
+
},
|
| 11599 |
+
{
|
| 11600 |
+
"epoch": 0.3067523086451541,
|
| 11601 |
+
"grad_norm": 36.40625,
|
| 11602 |
+
"learning_rate": 9.95206996567189e-06,
|
| 11603 |
+
"loss": 21.9809,
|
| 11604 |
+
"step": 16530
|
| 11605 |
+
},
|
| 11606 |
+
{
|
| 11607 |
+
"epoch": 0.30693788172963393,
|
| 11608 |
+
"grad_norm": 34.09375,
|
| 11609 |
+
"learning_rate": 9.952040969885847e-06,
|
| 11610 |
+
"loss": 21.7875,
|
| 11611 |
+
"step": 16540
|
| 11612 |
+
},
|
| 11613 |
+
{
|
| 11614 |
+
"epoch": 0.30712345481411374,
|
| 11615 |
+
"grad_norm": 35.71875,
|
| 11616 |
+
"learning_rate": 9.952011974099804e-06,
|
| 11617 |
+
"loss": 21.9626,
|
| 11618 |
+
"step": 16550
|
| 11619 |
+
},
|
| 11620 |
+
{
|
| 11621 |
+
"epoch": 0.3073090278985936,
|
| 11622 |
+
"grad_norm": 38.78125,
|
| 11623 |
+
"learning_rate": 9.951982978313762e-06,
|
| 11624 |
+
"loss": 21.6948,
|
| 11625 |
+
"step": 16560
|
| 11626 |
+
},
|
| 11627 |
+
{
|
| 11628 |
+
"epoch": 0.3074946009830734,
|
| 11629 |
+
"grad_norm": 33.6875,
|
| 11630 |
+
"learning_rate": 9.95195398252772e-06,
|
| 11631 |
+
"loss": 21.1979,
|
| 11632 |
+
"step": 16570
|
| 11633 |
+
},
|
| 11634 |
+
{
|
| 11635 |
+
"epoch": 0.30768017406755327,
|
| 11636 |
+
"grad_norm": 33.4375,
|
| 11637 |
+
"learning_rate": 9.951924986741678e-06,
|
| 11638 |
+
"loss": 22.1714,
|
| 11639 |
+
"step": 16580
|
| 11640 |
+
},
|
| 11641 |
+
{
|
| 11642 |
+
"epoch": 0.3078657471520331,
|
| 11643 |
+
"grad_norm": 34.78125,
|
| 11644 |
+
"learning_rate": 9.951895990955636e-06,
|
| 11645 |
+
"loss": 21.804,
|
| 11646 |
+
"step": 16590
|
| 11647 |
+
},
|
| 11648 |
+
{
|
| 11649 |
+
"epoch": 0.3080513202365129,
|
| 11650 |
+
"grad_norm": 34.09375,
|
| 11651 |
+
"learning_rate": 9.951866995169593e-06,
|
| 11652 |
+
"loss": 21.6856,
|
| 11653 |
+
"step": 16600
|
| 11654 |
+
},
|
| 11655 |
+
{
|
| 11656 |
+
"epoch": 0.30823689332099274,
|
| 11657 |
+
"grad_norm": 33.34375,
|
| 11658 |
+
"learning_rate": 9.95183799938355e-06,
|
| 11659 |
+
"loss": 22.2583,
|
| 11660 |
+
"step": 16610
|
| 11661 |
+
},
|
| 11662 |
+
{
|
| 11663 |
+
"epoch": 0.30842246640547255,
|
| 11664 |
+
"grad_norm": 34.15625,
|
| 11665 |
+
"learning_rate": 9.951809003597508e-06,
|
| 11666 |
+
"loss": 22.07,
|
| 11667 |
+
"step": 16620
|
| 11668 |
+
},
|
| 11669 |
+
{
|
| 11670 |
+
"epoch": 0.30860803948995236,
|
| 11671 |
+
"grad_norm": 35.84375,
|
| 11672 |
+
"learning_rate": 9.951780007811465e-06,
|
| 11673 |
+
"loss": 21.9016,
|
| 11674 |
+
"step": 16630
|
| 11675 |
+
},
|
| 11676 |
+
{
|
| 11677 |
+
"epoch": 0.3087936125744322,
|
| 11678 |
+
"grad_norm": 33.75,
|
| 11679 |
+
"learning_rate": 9.951751012025423e-06,
|
| 11680 |
+
"loss": 21.72,
|
| 11681 |
+
"step": 16640
|
| 11682 |
+
},
|
| 11683 |
+
{
|
| 11684 |
+
"epoch": 0.308979185658912,
|
| 11685 |
+
"grad_norm": 35.28125,
|
| 11686 |
+
"learning_rate": 9.951722016239382e-06,
|
| 11687 |
+
"loss": 21.76,
|
| 11688 |
+
"step": 16650
|
| 11689 |
+
},
|
| 11690 |
+
{
|
| 11691 |
+
"epoch": 0.30916475874339183,
|
| 11692 |
+
"grad_norm": 33.96875,
|
| 11693 |
+
"learning_rate": 9.951693020453338e-06,
|
| 11694 |
+
"loss": 22.1734,
|
| 11695 |
+
"step": 16660
|
| 11696 |
+
},
|
| 11697 |
+
{
|
| 11698 |
+
"epoch": 0.3093503318278717,
|
| 11699 |
+
"grad_norm": 34.9375,
|
| 11700 |
+
"learning_rate": 9.951664024667295e-06,
|
| 11701 |
+
"loss": 21.7641,
|
| 11702 |
+
"step": 16670
|
| 11703 |
+
},
|
| 11704 |
+
{
|
| 11705 |
+
"epoch": 0.3095359049123515,
|
| 11706 |
+
"grad_norm": 33.5625,
|
| 11707 |
+
"learning_rate": 9.951635028881254e-06,
|
| 11708 |
+
"loss": 21.7486,
|
| 11709 |
+
"step": 16680
|
| 11710 |
+
},
|
| 11711 |
+
{
|
| 11712 |
+
"epoch": 0.30972147799683136,
|
| 11713 |
+
"grad_norm": 35.65625,
|
| 11714 |
+
"learning_rate": 9.951606033095211e-06,
|
| 11715 |
+
"loss": 22.0392,
|
| 11716 |
+
"step": 16690
|
| 11717 |
+
},
|
| 11718 |
+
{
|
| 11719 |
+
"epoch": 0.30990705108131117,
|
| 11720 |
+
"grad_norm": 35.84375,
|
| 11721 |
+
"learning_rate": 9.951577037309169e-06,
|
| 11722 |
+
"loss": 21.8191,
|
| 11723 |
+
"step": 16700
|
| 11724 |
+
},
|
| 11725 |
+
{
|
| 11726 |
+
"epoch": 0.310092624165791,
|
| 11727 |
+
"grad_norm": 32.84375,
|
| 11728 |
+
"learning_rate": 9.951548041523126e-06,
|
| 11729 |
+
"loss": 21.7801,
|
| 11730 |
+
"step": 16710
|
| 11731 |
+
},
|
| 11732 |
+
{
|
| 11733 |
+
"epoch": 0.31027819725027084,
|
| 11734 |
+
"grad_norm": 38.625,
|
| 11735 |
+
"learning_rate": 9.951519045737084e-06,
|
| 11736 |
+
"loss": 22.5178,
|
| 11737 |
+
"step": 16720
|
| 11738 |
+
},
|
| 11739 |
+
{
|
| 11740 |
+
"epoch": 0.31046377033475064,
|
| 11741 |
+
"grad_norm": 35.28125,
|
| 11742 |
+
"learning_rate": 9.951490049951041e-06,
|
| 11743 |
+
"loss": 21.7387,
|
| 11744 |
+
"step": 16730
|
| 11745 |
+
},
|
| 11746 |
+
{
|
| 11747 |
+
"epoch": 0.31064934341923045,
|
| 11748 |
+
"grad_norm": 34.9375,
|
| 11749 |
+
"learning_rate": 9.951461054164999e-06,
|
| 11750 |
+
"loss": 22.385,
|
| 11751 |
+
"step": 16740
|
| 11752 |
+
},
|
| 11753 |
+
{
|
| 11754 |
+
"epoch": 0.3108349165037103,
|
| 11755 |
+
"grad_norm": 35.0,
|
| 11756 |
+
"learning_rate": 9.951432058378958e-06,
|
| 11757 |
+
"loss": 22.0111,
|
| 11758 |
+
"step": 16750
|
| 11759 |
+
},
|
| 11760 |
+
{
|
| 11761 |
+
"epoch": 0.3110204895881901,
|
| 11762 |
+
"grad_norm": 34.25,
|
| 11763 |
+
"learning_rate": 9.951403062592913e-06,
|
| 11764 |
+
"loss": 21.9642,
|
| 11765 |
+
"step": 16760
|
| 11766 |
+
},
|
| 11767 |
+
{
|
| 11768 |
+
"epoch": 0.31120606267267,
|
| 11769 |
+
"grad_norm": 35.59375,
|
| 11770 |
+
"learning_rate": 9.95137406680687e-06,
|
| 11771 |
+
"loss": 21.8199,
|
| 11772 |
+
"step": 16770
|
| 11773 |
+
},
|
| 11774 |
+
{
|
| 11775 |
+
"epoch": 0.3113916357571498,
|
| 11776 |
+
"grad_norm": 36.8125,
|
| 11777 |
+
"learning_rate": 9.95134507102083e-06,
|
| 11778 |
+
"loss": 21.8454,
|
| 11779 |
+
"step": 16780
|
| 11780 |
+
},
|
| 11781 |
+
{
|
| 11782 |
+
"epoch": 0.3115772088416296,
|
| 11783 |
+
"grad_norm": 34.875,
|
| 11784 |
+
"learning_rate": 9.951316075234787e-06,
|
| 11785 |
+
"loss": 21.7867,
|
| 11786 |
+
"step": 16790
|
| 11787 |
+
},
|
| 11788 |
+
{
|
| 11789 |
+
"epoch": 0.31176278192610946,
|
| 11790 |
+
"grad_norm": 34.875,
|
| 11791 |
+
"learning_rate": 9.951287079448745e-06,
|
| 11792 |
+
"loss": 21.7425,
|
| 11793 |
+
"step": 16800
|
| 11794 |
+
},
|
| 11795 |
+
{
|
| 11796 |
+
"epoch": 0.31194835501058926,
|
| 11797 |
+
"grad_norm": 33.875,
|
| 11798 |
+
"learning_rate": 9.951258083662702e-06,
|
| 11799 |
+
"loss": 22.1539,
|
| 11800 |
+
"step": 16810
|
| 11801 |
+
},
|
| 11802 |
+
{
|
| 11803 |
+
"epoch": 0.31213392809506907,
|
| 11804 |
+
"grad_norm": 33.5625,
|
| 11805 |
+
"learning_rate": 9.95122908787666e-06,
|
| 11806 |
+
"loss": 22.1815,
|
| 11807 |
+
"step": 16820
|
| 11808 |
+
},
|
| 11809 |
+
{
|
| 11810 |
+
"epoch": 0.31231950117954893,
|
| 11811 |
+
"grad_norm": 34.25,
|
| 11812 |
+
"learning_rate": 9.951200092090617e-06,
|
| 11813 |
+
"loss": 21.9751,
|
| 11814 |
+
"step": 16830
|
| 11815 |
+
},
|
| 11816 |
+
{
|
| 11817 |
+
"epoch": 0.31250507426402874,
|
| 11818 |
+
"grad_norm": 35.46875,
|
| 11819 |
+
"learning_rate": 9.951171096304574e-06,
|
| 11820 |
+
"loss": 22.1181,
|
| 11821 |
+
"step": 16840
|
| 11822 |
+
},
|
| 11823 |
+
{
|
| 11824 |
+
"epoch": 0.3126906473485086,
|
| 11825 |
+
"grad_norm": 35.75,
|
| 11826 |
+
"learning_rate": 9.951142100518533e-06,
|
| 11827 |
+
"loss": 21.8169,
|
| 11828 |
+
"step": 16850
|
| 11829 |
+
},
|
| 11830 |
+
{
|
| 11831 |
+
"epoch": 0.3128762204329884,
|
| 11832 |
+
"grad_norm": 37.78125,
|
| 11833 |
+
"learning_rate": 9.95111310473249e-06,
|
| 11834 |
+
"loss": 22.324,
|
| 11835 |
+
"step": 16860
|
| 11836 |
+
},
|
| 11837 |
+
{
|
| 11838 |
+
"epoch": 0.3130617935174682,
|
| 11839 |
+
"grad_norm": 36.25,
|
| 11840 |
+
"learning_rate": 9.951084108946447e-06,
|
| 11841 |
+
"loss": 22.0339,
|
| 11842 |
+
"step": 16870
|
| 11843 |
+
},
|
| 11844 |
+
{
|
| 11845 |
+
"epoch": 0.3132473666019481,
|
| 11846 |
+
"grad_norm": 33.53125,
|
| 11847 |
+
"learning_rate": 9.951055113160406e-06,
|
| 11848 |
+
"loss": 22.3883,
|
| 11849 |
+
"step": 16880
|
| 11850 |
+
},
|
| 11851 |
+
{
|
| 11852 |
+
"epoch": 0.3134329396864279,
|
| 11853 |
+
"grad_norm": 35.71875,
|
| 11854 |
+
"learning_rate": 9.951026117374363e-06,
|
| 11855 |
+
"loss": 21.4248,
|
| 11856 |
+
"step": 16890
|
| 11857 |
+
},
|
| 11858 |
+
{
|
| 11859 |
+
"epoch": 0.3136185127709077,
|
| 11860 |
+
"grad_norm": 34.0625,
|
| 11861 |
+
"learning_rate": 9.95099712158832e-06,
|
| 11862 |
+
"loss": 21.7226,
|
| 11863 |
+
"step": 16900
|
| 11864 |
+
},
|
| 11865 |
+
{
|
| 11866 |
+
"epoch": 0.31380408585538755,
|
| 11867 |
+
"grad_norm": 34.71875,
|
| 11868 |
+
"learning_rate": 9.950968125802278e-06,
|
| 11869 |
+
"loss": 22.1848,
|
| 11870 |
+
"step": 16910
|
| 11871 |
+
},
|
| 11872 |
+
{
|
| 11873 |
+
"epoch": 0.31398965893986736,
|
| 11874 |
+
"grad_norm": 35.6875,
|
| 11875 |
+
"learning_rate": 9.950939130016235e-06,
|
| 11876 |
+
"loss": 22.076,
|
| 11877 |
+
"step": 16920
|
| 11878 |
+
},
|
| 11879 |
+
{
|
| 11880 |
+
"epoch": 0.31417523202434716,
|
| 11881 |
+
"grad_norm": 36.5625,
|
| 11882 |
+
"learning_rate": 9.950910134230193e-06,
|
| 11883 |
+
"loss": 22.3584,
|
| 11884 |
+
"step": 16930
|
| 11885 |
+
},
|
| 11886 |
+
{
|
| 11887 |
+
"epoch": 0.314360805108827,
|
| 11888 |
+
"grad_norm": 35.78125,
|
| 11889 |
+
"learning_rate": 9.95088113844415e-06,
|
| 11890 |
+
"loss": 22.2669,
|
| 11891 |
+
"step": 16940
|
| 11892 |
+
},
|
| 11893 |
+
{
|
| 11894 |
+
"epoch": 0.31454637819330683,
|
| 11895 |
+
"grad_norm": 33.875,
|
| 11896 |
+
"learning_rate": 9.95085214265811e-06,
|
| 11897 |
+
"loss": 22.3504,
|
| 11898 |
+
"step": 16950
|
| 11899 |
+
},
|
| 11900 |
+
{
|
| 11901 |
+
"epoch": 0.3147319512777867,
|
| 11902 |
+
"grad_norm": 35.03125,
|
| 11903 |
+
"learning_rate": 9.950823146872067e-06,
|
| 11904 |
+
"loss": 21.7265,
|
| 11905 |
+
"step": 16960
|
| 11906 |
+
},
|
| 11907 |
+
{
|
| 11908 |
+
"epoch": 0.3149175243622665,
|
| 11909 |
+
"grad_norm": 33.75,
|
| 11910 |
+
"learning_rate": 9.950794151086024e-06,
|
| 11911 |
+
"loss": 22.3763,
|
| 11912 |
+
"step": 16970
|
| 11913 |
+
},
|
| 11914 |
+
{
|
| 11915 |
+
"epoch": 0.3151030974467463,
|
| 11916 |
+
"grad_norm": 33.25,
|
| 11917 |
+
"learning_rate": 9.950765155299981e-06,
|
| 11918 |
+
"loss": 21.5872,
|
| 11919 |
+
"step": 16980
|
| 11920 |
+
},
|
| 11921 |
+
{
|
| 11922 |
+
"epoch": 0.31528867053122617,
|
| 11923 |
+
"grad_norm": 35.1875,
|
| 11924 |
+
"learning_rate": 9.950736159513939e-06,
|
| 11925 |
+
"loss": 22.0631,
|
| 11926 |
+
"step": 16990
|
| 11927 |
+
},
|
| 11928 |
+
{
|
| 11929 |
+
"epoch": 0.315474243615706,
|
| 11930 |
+
"grad_norm": 35.46875,
|
| 11931 |
+
"learning_rate": 9.950707163727896e-06,
|
| 11932 |
+
"loss": 22.1039,
|
| 11933 |
+
"step": 17000
|
| 11934 |
+
},
|
| 11935 |
+
{
|
| 11936 |
+
"epoch": 0.3156598167001858,
|
| 11937 |
+
"grad_norm": 35.5625,
|
| 11938 |
+
"learning_rate": 9.950678167941854e-06,
|
| 11939 |
+
"loss": 21.9955,
|
| 11940 |
+
"step": 17010
|
| 11941 |
+
},
|
| 11942 |
+
{
|
| 11943 |
+
"epoch": 0.31584538978466564,
|
| 11944 |
+
"grad_norm": 35.1875,
|
| 11945 |
+
"learning_rate": 9.950649172155811e-06,
|
| 11946 |
+
"loss": 21.5005,
|
| 11947 |
+
"step": 17020
|
| 11948 |
+
},
|
| 11949 |
+
{
|
| 11950 |
+
"epoch": 0.31603096286914545,
|
| 11951 |
+
"grad_norm": 36.375,
|
| 11952 |
+
"learning_rate": 9.950620176369768e-06,
|
| 11953 |
+
"loss": 21.925,
|
| 11954 |
+
"step": 17030
|
| 11955 |
+
},
|
| 11956 |
+
{
|
| 11957 |
+
"epoch": 0.3162165359536253,
|
| 11958 |
+
"grad_norm": 34.5,
|
| 11959 |
+
"learning_rate": 9.950591180583726e-06,
|
| 11960 |
+
"loss": 21.8868,
|
| 11961 |
+
"step": 17040
|
| 11962 |
+
},
|
| 11963 |
+
{
|
| 11964 |
+
"epoch": 0.3164021090381051,
|
| 11965 |
+
"grad_norm": 35.03125,
|
| 11966 |
+
"learning_rate": 9.950562184797685e-06,
|
| 11967 |
+
"loss": 22.0574,
|
| 11968 |
+
"step": 17050
|
| 11969 |
+
},
|
| 11970 |
+
{
|
| 11971 |
+
"epoch": 0.3165876821225849,
|
| 11972 |
+
"grad_norm": 36.71875,
|
| 11973 |
+
"learning_rate": 9.950533189011642e-06,
|
| 11974 |
+
"loss": 22.0364,
|
| 11975 |
+
"step": 17060
|
| 11976 |
+
},
|
| 11977 |
+
{
|
| 11978 |
+
"epoch": 0.3167732552070648,
|
| 11979 |
+
"grad_norm": 33.875,
|
| 11980 |
+
"learning_rate": 9.9505041932256e-06,
|
| 11981 |
+
"loss": 21.8544,
|
| 11982 |
+
"step": 17070
|
| 11983 |
+
},
|
| 11984 |
+
{
|
| 11985 |
+
"epoch": 0.3169588282915446,
|
| 11986 |
+
"grad_norm": 33.75,
|
| 11987 |
+
"learning_rate": 9.950475197439557e-06,
|
| 11988 |
+
"loss": 21.9953,
|
| 11989 |
+
"step": 17080
|
| 11990 |
+
},
|
| 11991 |
+
{
|
| 11992 |
+
"epoch": 0.3171444013760244,
|
| 11993 |
+
"grad_norm": 34.78125,
|
| 11994 |
+
"learning_rate": 9.950446201653515e-06,
|
| 11995 |
+
"loss": 22.0436,
|
| 11996 |
+
"step": 17090
|
| 11997 |
+
},
|
| 11998 |
+
{
|
| 11999 |
+
"epoch": 0.31732997446050426,
|
| 12000 |
+
"grad_norm": 36.59375,
|
| 12001 |
+
"learning_rate": 9.950417205867472e-06,
|
| 12002 |
+
"loss": 21.6638,
|
| 12003 |
+
"step": 17100
|
| 12004 |
+
},
|
| 12005 |
+
{
|
| 12006 |
+
"epoch": 0.31751554754498407,
|
| 12007 |
+
"grad_norm": 34.0625,
|
| 12008 |
+
"learning_rate": 9.95038821008143e-06,
|
| 12009 |
+
"loss": 21.9026,
|
| 12010 |
+
"step": 17110
|
| 12011 |
+
},
|
| 12012 |
+
{
|
| 12013 |
+
"epoch": 0.31770112062946393,
|
| 12014 |
+
"grad_norm": 35.375,
|
| 12015 |
+
"learning_rate": 9.950359214295387e-06,
|
| 12016 |
+
"loss": 21.7069,
|
| 12017 |
+
"step": 17120
|
| 12018 |
+
},
|
| 12019 |
+
{
|
| 12020 |
+
"epoch": 0.31788669371394374,
|
| 12021 |
+
"grad_norm": 34.84375,
|
| 12022 |
+
"learning_rate": 9.950330218509346e-06,
|
| 12023 |
+
"loss": 21.673,
|
| 12024 |
+
"step": 17130
|
| 12025 |
+
},
|
| 12026 |
+
{
|
| 12027 |
+
"epoch": 0.31807226679842354,
|
| 12028 |
+
"grad_norm": 35.78125,
|
| 12029 |
+
"learning_rate": 9.950301222723302e-06,
|
| 12030 |
+
"loss": 21.9466,
|
| 12031 |
+
"step": 17140
|
| 12032 |
+
},
|
| 12033 |
+
{
|
| 12034 |
+
"epoch": 0.3182578398829034,
|
| 12035 |
+
"grad_norm": 37.34375,
|
| 12036 |
+
"learning_rate": 9.950272226937259e-06,
|
| 12037 |
+
"loss": 21.7084,
|
| 12038 |
+
"step": 17150
|
| 12039 |
+
},
|
| 12040 |
+
{
|
| 12041 |
+
"epoch": 0.3184434129673832,
|
| 12042 |
+
"grad_norm": 34.03125,
|
| 12043 |
+
"learning_rate": 9.950243231151218e-06,
|
| 12044 |
+
"loss": 21.572,
|
| 12045 |
+
"step": 17160
|
| 12046 |
+
},
|
| 12047 |
+
{
|
| 12048 |
+
"epoch": 0.318628986051863,
|
| 12049 |
+
"grad_norm": 35.9375,
|
| 12050 |
+
"learning_rate": 9.950214235365176e-06,
|
| 12051 |
+
"loss": 22.2523,
|
| 12052 |
+
"step": 17170
|
| 12053 |
+
},
|
| 12054 |
+
{
|
| 12055 |
+
"epoch": 0.3188145591363429,
|
| 12056 |
+
"grad_norm": 34.34375,
|
| 12057 |
+
"learning_rate": 9.950185239579133e-06,
|
| 12058 |
+
"loss": 22.0218,
|
| 12059 |
+
"step": 17180
|
| 12060 |
+
},
|
| 12061 |
+
{
|
| 12062 |
+
"epoch": 0.3190001322208227,
|
| 12063 |
+
"grad_norm": 33.78125,
|
| 12064 |
+
"learning_rate": 9.95015624379309e-06,
|
| 12065 |
+
"loss": 21.2069,
|
| 12066 |
+
"step": 17190
|
| 12067 |
+
},
|
| 12068 |
+
{
|
| 12069 |
+
"epoch": 0.3191857053053025,
|
| 12070 |
+
"grad_norm": 34.90625,
|
| 12071 |
+
"learning_rate": 9.950127248007048e-06,
|
| 12072 |
+
"loss": 22.0038,
|
| 12073 |
+
"step": 17200
|
| 12074 |
+
},
|
| 12075 |
+
{
|
| 12076 |
+
"epoch": 0.31937127838978235,
|
| 12077 |
+
"grad_norm": 33.375,
|
| 12078 |
+
"learning_rate": 9.950098252221005e-06,
|
| 12079 |
+
"loss": 21.5058,
|
| 12080 |
+
"step": 17210
|
| 12081 |
+
},
|
| 12082 |
+
{
|
| 12083 |
+
"epoch": 0.31955685147426216,
|
| 12084 |
+
"grad_norm": 34.34375,
|
| 12085 |
+
"learning_rate": 9.950069256434963e-06,
|
| 12086 |
+
"loss": 21.8254,
|
| 12087 |
+
"step": 17220
|
| 12088 |
+
},
|
| 12089 |
+
{
|
| 12090 |
+
"epoch": 0.319742424558742,
|
| 12091 |
+
"grad_norm": 36.15625,
|
| 12092 |
+
"learning_rate": 9.950040260648922e-06,
|
| 12093 |
+
"loss": 21.5007,
|
| 12094 |
+
"step": 17230
|
| 12095 |
+
},
|
| 12096 |
+
{
|
| 12097 |
+
"epoch": 0.31992799764322183,
|
| 12098 |
+
"grad_norm": 33.84375,
|
| 12099 |
+
"learning_rate": 9.950011264862879e-06,
|
| 12100 |
+
"loss": 22.6218,
|
| 12101 |
+
"step": 17240
|
| 12102 |
+
},
|
| 12103 |
+
{
|
| 12104 |
+
"epoch": 0.32011357072770164,
|
| 12105 |
+
"grad_norm": 34.96875,
|
| 12106 |
+
"learning_rate": 9.949982269076835e-06,
|
| 12107 |
+
"loss": 21.892,
|
| 12108 |
+
"step": 17250
|
| 12109 |
+
},
|
| 12110 |
+
{
|
| 12111 |
+
"epoch": 0.3202991438121815,
|
| 12112 |
+
"grad_norm": 35.15625,
|
| 12113 |
+
"learning_rate": 9.949953273290794e-06,
|
| 12114 |
+
"loss": 21.8203,
|
| 12115 |
+
"step": 17260
|
| 12116 |
+
},
|
| 12117 |
+
{
|
| 12118 |
+
"epoch": 0.3204847168966613,
|
| 12119 |
+
"grad_norm": 32.78125,
|
| 12120 |
+
"learning_rate": 9.949924277504751e-06,
|
| 12121 |
+
"loss": 22.0928,
|
| 12122 |
+
"step": 17270
|
| 12123 |
+
},
|
| 12124 |
+
{
|
| 12125 |
+
"epoch": 0.3206702899811411,
|
| 12126 |
+
"grad_norm": 34.09375,
|
| 12127 |
+
"learning_rate": 9.949895281718709e-06,
|
| 12128 |
+
"loss": 22.1172,
|
| 12129 |
+
"step": 17280
|
| 12130 |
+
},
|
| 12131 |
+
{
|
| 12132 |
+
"epoch": 0.32085586306562097,
|
| 12133 |
+
"grad_norm": 37.1875,
|
| 12134 |
+
"learning_rate": 9.949866285932666e-06,
|
| 12135 |
+
"loss": 21.4919,
|
| 12136 |
+
"step": 17290
|
| 12137 |
+
},
|
| 12138 |
+
{
|
| 12139 |
+
"epoch": 0.3210414361501008,
|
| 12140 |
+
"grad_norm": 36.09375,
|
| 12141 |
+
"learning_rate": 9.949837290146623e-06,
|
| 12142 |
+
"loss": 21.8468,
|
| 12143 |
+
"step": 17300
|
| 12144 |
+
},
|
| 12145 |
+
{
|
| 12146 |
+
"epoch": 0.32122700923458064,
|
| 12147 |
+
"grad_norm": 36.21875,
|
| 12148 |
+
"learning_rate": 9.949808294360581e-06,
|
| 12149 |
+
"loss": 21.9655,
|
| 12150 |
+
"step": 17310
|
| 12151 |
+
},
|
| 12152 |
+
{
|
| 12153 |
+
"epoch": 0.32141258231906045,
|
| 12154 |
+
"grad_norm": 35.8125,
|
| 12155 |
+
"learning_rate": 9.949779298574538e-06,
|
| 12156 |
+
"loss": 21.8789,
|
| 12157 |
+
"step": 17320
|
| 12158 |
+
},
|
| 12159 |
+
{
|
| 12160 |
+
"epoch": 0.32159815540354025,
|
| 12161 |
+
"grad_norm": 35.9375,
|
| 12162 |
+
"learning_rate": 9.949750302788497e-06,
|
| 12163 |
+
"loss": 21.9964,
|
| 12164 |
+
"step": 17330
|
| 12165 |
+
},
|
| 12166 |
+
{
|
| 12167 |
+
"epoch": 0.3217837284880201,
|
| 12168 |
+
"grad_norm": 35.40625,
|
| 12169 |
+
"learning_rate": 9.949721307002455e-06,
|
| 12170 |
+
"loss": 22.188,
|
| 12171 |
+
"step": 17340
|
| 12172 |
+
},
|
| 12173 |
+
{
|
| 12174 |
+
"epoch": 0.3219693015724999,
|
| 12175 |
+
"grad_norm": 35.0,
|
| 12176 |
+
"learning_rate": 9.94969231121641e-06,
|
| 12177 |
+
"loss": 21.9757,
|
| 12178 |
+
"step": 17350
|
| 12179 |
+
},
|
| 12180 |
+
{
|
| 12181 |
+
"epoch": 0.32215487465697973,
|
| 12182 |
+
"grad_norm": 35.34375,
|
| 12183 |
+
"learning_rate": 9.94966331543037e-06,
|
| 12184 |
+
"loss": 21.7447,
|
| 12185 |
+
"step": 17360
|
| 12186 |
+
},
|
| 12187 |
+
{
|
| 12188 |
+
"epoch": 0.3223404477414596,
|
| 12189 |
+
"grad_norm": 36.0625,
|
| 12190 |
+
"learning_rate": 9.949634319644327e-06,
|
| 12191 |
+
"loss": 21.7478,
|
| 12192 |
+
"step": 17370
|
| 12193 |
+
},
|
| 12194 |
+
{
|
| 12195 |
+
"epoch": 0.3225260208259394,
|
| 12196 |
+
"grad_norm": 36.125,
|
| 12197 |
+
"learning_rate": 9.949605323858284e-06,
|
| 12198 |
+
"loss": 22.5053,
|
| 12199 |
+
"step": 17380
|
| 12200 |
+
},
|
| 12201 |
+
{
|
| 12202 |
+
"epoch": 0.32271159391041926,
|
| 12203 |
+
"grad_norm": 35.09375,
|
| 12204 |
+
"learning_rate": 9.949576328072242e-06,
|
| 12205 |
+
"loss": 21.8131,
|
| 12206 |
+
"step": 17390
|
| 12207 |
+
},
|
| 12208 |
+
{
|
| 12209 |
+
"epoch": 0.32289716699489907,
|
| 12210 |
+
"grad_norm": 36.75,
|
| 12211 |
+
"learning_rate": 9.949547332286201e-06,
|
| 12212 |
+
"loss": 22.0129,
|
| 12213 |
+
"step": 17400
|
| 12214 |
+
},
|
| 12215 |
+
{
|
| 12216 |
+
"epoch": 0.32308274007937887,
|
| 12217 |
+
"grad_norm": 33.84375,
|
| 12218 |
+
"learning_rate": 9.949518336500157e-06,
|
| 12219 |
+
"loss": 21.799,
|
| 12220 |
+
"step": 17410
|
| 12221 |
+
},
|
| 12222 |
+
{
|
| 12223 |
+
"epoch": 0.32326831316385873,
|
| 12224 |
+
"grad_norm": 34.875,
|
| 12225 |
+
"learning_rate": 9.949489340714114e-06,
|
| 12226 |
+
"loss": 21.6667,
|
| 12227 |
+
"step": 17420
|
| 12228 |
+
},
|
| 12229 |
+
{
|
| 12230 |
+
"epoch": 0.32345388624833854,
|
| 12231 |
+
"grad_norm": 34.375,
|
| 12232 |
+
"learning_rate": 9.949460344928073e-06,
|
| 12233 |
+
"loss": 22.099,
|
| 12234 |
+
"step": 17430
|
| 12235 |
+
},
|
| 12236 |
+
{
|
| 12237 |
+
"epoch": 0.32363945933281835,
|
| 12238 |
+
"grad_norm": 35.375,
|
| 12239 |
+
"learning_rate": 9.94943134914203e-06,
|
| 12240 |
+
"loss": 21.9377,
|
| 12241 |
+
"step": 17440
|
| 12242 |
+
},
|
| 12243 |
+
{
|
| 12244 |
+
"epoch": 0.3238250324172982,
|
| 12245 |
+
"grad_norm": 36.8125,
|
| 12246 |
+
"learning_rate": 9.949402353355988e-06,
|
| 12247 |
+
"loss": 22.0756,
|
| 12248 |
+
"step": 17450
|
| 12249 |
+
},
|
| 12250 |
+
{
|
| 12251 |
+
"epoch": 0.324010605501778,
|
| 12252 |
+
"grad_norm": 33.84375,
|
| 12253 |
+
"learning_rate": 9.949373357569945e-06,
|
| 12254 |
+
"loss": 21.7746,
|
| 12255 |
+
"step": 17460
|
| 12256 |
+
},
|
| 12257 |
+
{
|
| 12258 |
+
"epoch": 0.3241961785862578,
|
| 12259 |
+
"grad_norm": 36.78125,
|
| 12260 |
+
"learning_rate": 9.949344361783903e-06,
|
| 12261 |
+
"loss": 21.9471,
|
| 12262 |
+
"step": 17470
|
| 12263 |
+
},
|
| 12264 |
+
{
|
| 12265 |
+
"epoch": 0.3243817516707377,
|
| 12266 |
+
"grad_norm": 36.4375,
|
| 12267 |
+
"learning_rate": 9.94931536599786e-06,
|
| 12268 |
+
"loss": 21.7579,
|
| 12269 |
+
"step": 17480
|
| 12270 |
+
},
|
| 12271 |
+
{
|
| 12272 |
+
"epoch": 0.3245673247552175,
|
| 12273 |
+
"grad_norm": 34.96875,
|
| 12274 |
+
"learning_rate": 9.949286370211818e-06,
|
| 12275 |
+
"loss": 21.5875,
|
| 12276 |
+
"step": 17490
|
| 12277 |
+
},
|
| 12278 |
+
{
|
| 12279 |
+
"epoch": 0.32475289783969735,
|
| 12280 |
+
"grad_norm": 34.53125,
|
| 12281 |
+
"learning_rate": 9.949257374425777e-06,
|
| 12282 |
+
"loss": 21.6696,
|
| 12283 |
+
"step": 17500
|
| 12284 |
+
},
|
| 12285 |
+
{
|
| 12286 |
+
"epoch": 0.32493847092417716,
|
| 12287 |
+
"grad_norm": 34.6875,
|
| 12288 |
+
"learning_rate": 9.949228378639732e-06,
|
| 12289 |
+
"loss": 22.0292,
|
| 12290 |
+
"step": 17510
|
| 12291 |
+
},
|
| 12292 |
+
{
|
| 12293 |
+
"epoch": 0.32512404400865696,
|
| 12294 |
+
"grad_norm": 34.34375,
|
| 12295 |
+
"learning_rate": 9.94919938285369e-06,
|
| 12296 |
+
"loss": 21.6901,
|
| 12297 |
+
"step": 17520
|
| 12298 |
+
},
|
| 12299 |
+
{
|
| 12300 |
+
"epoch": 0.3253096170931368,
|
| 12301 |
+
"grad_norm": 34.71875,
|
| 12302 |
+
"learning_rate": 9.949170387067649e-06,
|
| 12303 |
+
"loss": 21.5895,
|
| 12304 |
+
"step": 17530
|
| 12305 |
+
},
|
| 12306 |
+
{
|
| 12307 |
+
"epoch": 0.32549519017761663,
|
| 12308 |
+
"grad_norm": 34.625,
|
| 12309 |
+
"learning_rate": 9.949141391281606e-06,
|
| 12310 |
+
"loss": 21.7416,
|
| 12311 |
+
"step": 17540
|
| 12312 |
+
},
|
| 12313 |
+
{
|
| 12314 |
+
"epoch": 0.32568076326209644,
|
| 12315 |
+
"grad_norm": 34.0,
|
| 12316 |
+
"learning_rate": 9.949112395495564e-06,
|
| 12317 |
+
"loss": 21.7771,
|
| 12318 |
+
"step": 17550
|
| 12319 |
+
},
|
| 12320 |
+
{
|
| 12321 |
+
"epoch": 0.3258663363465763,
|
| 12322 |
+
"grad_norm": 35.875,
|
| 12323 |
+
"learning_rate": 9.949083399709521e-06,
|
| 12324 |
+
"loss": 21.8724,
|
| 12325 |
+
"step": 17560
|
| 12326 |
+
},
|
| 12327 |
+
{
|
| 12328 |
+
"epoch": 0.3260519094310561,
|
| 12329 |
+
"grad_norm": 34.78125,
|
| 12330 |
+
"learning_rate": 9.949054403923479e-06,
|
| 12331 |
+
"loss": 21.6644,
|
| 12332 |
+
"step": 17570
|
| 12333 |
+
},
|
| 12334 |
+
{
|
| 12335 |
+
"epoch": 0.32623748251553597,
|
| 12336 |
+
"grad_norm": 35.0,
|
| 12337 |
+
"learning_rate": 9.949025408137436e-06,
|
| 12338 |
+
"loss": 22.1336,
|
| 12339 |
+
"step": 17580
|
| 12340 |
+
},
|
| 12341 |
+
{
|
| 12342 |
+
"epoch": 0.3264230556000158,
|
| 12343 |
+
"grad_norm": 34.46875,
|
| 12344 |
+
"learning_rate": 9.948996412351393e-06,
|
| 12345 |
+
"loss": 21.7064,
|
| 12346 |
+
"step": 17590
|
| 12347 |
+
},
|
| 12348 |
+
{
|
| 12349 |
+
"epoch": 0.3266086286844956,
|
| 12350 |
+
"grad_norm": 35.375,
|
| 12351 |
+
"learning_rate": 9.94896741656535e-06,
|
| 12352 |
+
"loss": 21.7408,
|
| 12353 |
+
"step": 17600
|
| 12354 |
+
},
|
| 12355 |
+
{
|
| 12356 |
+
"epoch": 0.32679420176897545,
|
| 12357 |
+
"grad_norm": 33.625,
|
| 12358 |
+
"learning_rate": 9.94893842077931e-06,
|
| 12359 |
+
"loss": 21.6225,
|
| 12360 |
+
"step": 17610
|
| 12361 |
+
},
|
| 12362 |
+
{
|
| 12363 |
+
"epoch": 0.32697977485345525,
|
| 12364 |
+
"grad_norm": 35.84375,
|
| 12365 |
+
"learning_rate": 9.948909424993266e-06,
|
| 12366 |
+
"loss": 21.4826,
|
| 12367 |
+
"step": 17620
|
| 12368 |
+
},
|
| 12369 |
+
{
|
| 12370 |
+
"epoch": 0.32716534793793506,
|
| 12371 |
+
"grad_norm": 33.90625,
|
| 12372 |
+
"learning_rate": 9.948880429207225e-06,
|
| 12373 |
+
"loss": 21.8825,
|
| 12374 |
+
"step": 17630
|
| 12375 |
+
},
|
| 12376 |
+
{
|
| 12377 |
+
"epoch": 0.3273509210224149,
|
| 12378 |
+
"grad_norm": 36.03125,
|
| 12379 |
+
"learning_rate": 9.948851433421182e-06,
|
| 12380 |
+
"loss": 21.4889,
|
| 12381 |
+
"step": 17640
|
| 12382 |
+
},
|
| 12383 |
+
{
|
| 12384 |
+
"epoch": 0.3275364941068947,
|
| 12385 |
+
"grad_norm": 32.84375,
|
| 12386 |
+
"learning_rate": 9.94882243763514e-06,
|
| 12387 |
+
"loss": 21.5165,
|
| 12388 |
+
"step": 17650
|
| 12389 |
+
},
|
| 12390 |
+
{
|
| 12391 |
+
"epoch": 0.3277220671913746,
|
| 12392 |
+
"grad_norm": 35.0625,
|
| 12393 |
+
"learning_rate": 9.948793441849097e-06,
|
| 12394 |
+
"loss": 21.6394,
|
| 12395 |
+
"step": 17660
|
| 12396 |
+
},
|
| 12397 |
+
{
|
| 12398 |
+
"epoch": 0.3279076402758544,
|
| 12399 |
+
"grad_norm": 35.0625,
|
| 12400 |
+
"learning_rate": 9.948764446063054e-06,
|
| 12401 |
+
"loss": 21.623,
|
| 12402 |
+
"step": 17670
|
| 12403 |
+
},
|
| 12404 |
+
{
|
| 12405 |
+
"epoch": 0.3280932133603342,
|
| 12406 |
+
"grad_norm": 34.125,
|
| 12407 |
+
"learning_rate": 9.948735450277012e-06,
|
| 12408 |
+
"loss": 22.1933,
|
| 12409 |
+
"step": 17680
|
| 12410 |
+
},
|
| 12411 |
+
{
|
| 12412 |
+
"epoch": 0.32827878644481406,
|
| 12413 |
+
"grad_norm": 36.65625,
|
| 12414 |
+
"learning_rate": 9.948706454490969e-06,
|
| 12415 |
+
"loss": 21.7992,
|
| 12416 |
+
"step": 17690
|
| 12417 |
+
},
|
| 12418 |
+
{
|
| 12419 |
+
"epoch": 0.32846435952929387,
|
| 12420 |
+
"grad_norm": 35.15625,
|
| 12421 |
+
"learning_rate": 9.948677458704927e-06,
|
| 12422 |
+
"loss": 21.9871,
|
| 12423 |
+
"step": 17700
|
| 12424 |
+
},
|
| 12425 |
+
{
|
| 12426 |
+
"epoch": 0.3286499326137737,
|
| 12427 |
+
"grad_norm": 38.0,
|
| 12428 |
+
"learning_rate": 9.948648462918886e-06,
|
| 12429 |
+
"loss": 22.3474,
|
| 12430 |
+
"step": 17710
|
| 12431 |
+
},
|
| 12432 |
+
{
|
| 12433 |
+
"epoch": 0.32883550569825354,
|
| 12434 |
+
"grad_norm": 36.0,
|
| 12435 |
+
"learning_rate": 9.948619467132843e-06,
|
| 12436 |
+
"loss": 22.0369,
|
| 12437 |
+
"step": 17720
|
| 12438 |
+
},
|
| 12439 |
+
{
|
| 12440 |
+
"epoch": 0.32902107878273334,
|
| 12441 |
+
"grad_norm": 35.3125,
|
| 12442 |
+
"learning_rate": 9.948590471346799e-06,
|
| 12443 |
+
"loss": 21.3599,
|
| 12444 |
+
"step": 17730
|
| 12445 |
+
},
|
| 12446 |
+
{
|
| 12447 |
+
"epoch": 0.3292066518672132,
|
| 12448 |
+
"grad_norm": 34.90625,
|
| 12449 |
+
"learning_rate": 9.948561475560758e-06,
|
| 12450 |
+
"loss": 21.8559,
|
| 12451 |
+
"step": 17740
|
| 12452 |
+
},
|
| 12453 |
+
{
|
| 12454 |
+
"epoch": 0.329392224951693,
|
| 12455 |
+
"grad_norm": 35.875,
|
| 12456 |
+
"learning_rate": 9.948532479774715e-06,
|
| 12457 |
+
"loss": 21.8818,
|
| 12458 |
+
"step": 17750
|
| 12459 |
+
},
|
| 12460 |
+
{
|
| 12461 |
+
"epoch": 0.3295777980361728,
|
| 12462 |
+
"grad_norm": 32.96875,
|
| 12463 |
+
"learning_rate": 9.948503483988673e-06,
|
| 12464 |
+
"loss": 21.4791,
|
| 12465 |
+
"step": 17760
|
| 12466 |
+
},
|
| 12467 |
+
{
|
| 12468 |
+
"epoch": 0.3297633711206527,
|
| 12469 |
+
"grad_norm": 33.21875,
|
| 12470 |
+
"learning_rate": 9.94847448820263e-06,
|
| 12471 |
+
"loss": 21.6707,
|
| 12472 |
+
"step": 17770
|
| 12473 |
+
},
|
| 12474 |
+
{
|
| 12475 |
+
"epoch": 0.3299489442051325,
|
| 12476 |
+
"grad_norm": 34.875,
|
| 12477 |
+
"learning_rate": 9.948445492416588e-06,
|
| 12478 |
+
"loss": 21.4599,
|
| 12479 |
+
"step": 17780
|
| 12480 |
+
},
|
| 12481 |
+
{
|
| 12482 |
+
"epoch": 0.3301345172896123,
|
| 12483 |
+
"grad_norm": 35.625,
|
| 12484 |
+
"learning_rate": 9.948416496630545e-06,
|
| 12485 |
+
"loss": 21.9671,
|
| 12486 |
+
"step": 17790
|
| 12487 |
+
},
|
| 12488 |
+
{
|
| 12489 |
+
"epoch": 0.33032009037409216,
|
| 12490 |
+
"grad_norm": 34.0625,
|
| 12491 |
+
"learning_rate": 9.948387500844502e-06,
|
| 12492 |
+
"loss": 21.7585,
|
| 12493 |
+
"step": 17800
|
| 12494 |
+
},
|
| 12495 |
+
{
|
| 12496 |
+
"epoch": 0.33050566345857196,
|
| 12497 |
+
"grad_norm": 36.21875,
|
| 12498 |
+
"learning_rate": 9.948358505058461e-06,
|
| 12499 |
+
"loss": 21.5565,
|
| 12500 |
+
"step": 17810
|
| 12501 |
+
},
|
| 12502 |
+
{
|
| 12503 |
+
"epoch": 0.33069123654305177,
|
| 12504 |
+
"grad_norm": 33.5625,
|
| 12505 |
+
"learning_rate": 9.948329509272419e-06,
|
| 12506 |
+
"loss": 21.3623,
|
| 12507 |
+
"step": 17820
|
| 12508 |
+
},
|
| 12509 |
+
{
|
| 12510 |
+
"epoch": 0.33087680962753163,
|
| 12511 |
+
"grad_norm": 35.21875,
|
| 12512 |
+
"learning_rate": 9.948300513486376e-06,
|
| 12513 |
+
"loss": 21.5242,
|
| 12514 |
+
"step": 17830
|
| 12515 |
+
},
|
| 12516 |
+
{
|
| 12517 |
+
"epoch": 0.33106238271201144,
|
| 12518 |
+
"grad_norm": 33.96875,
|
| 12519 |
+
"learning_rate": 9.948271517700334e-06,
|
| 12520 |
+
"loss": 22.0939,
|
| 12521 |
+
"step": 17840
|
| 12522 |
+
},
|
| 12523 |
+
{
|
| 12524 |
+
"epoch": 0.3312479557964913,
|
| 12525 |
+
"grad_norm": 33.96875,
|
| 12526 |
+
"learning_rate": 9.948242521914291e-06,
|
| 12527 |
+
"loss": 21.8529,
|
| 12528 |
+
"step": 17850
|
| 12529 |
+
},
|
| 12530 |
+
{
|
| 12531 |
+
"epoch": 0.3314335288809711,
|
| 12532 |
+
"grad_norm": 36.78125,
|
| 12533 |
+
"learning_rate": 9.948213526128248e-06,
|
| 12534 |
+
"loss": 21.7178,
|
| 12535 |
+
"step": 17860
|
| 12536 |
+
},
|
| 12537 |
+
{
|
| 12538 |
+
"epoch": 0.3316191019654509,
|
| 12539 |
+
"grad_norm": 36.84375,
|
| 12540 |
+
"learning_rate": 9.948184530342206e-06,
|
| 12541 |
+
"loss": 21.9362,
|
| 12542 |
+
"step": 17870
|
| 12543 |
+
},
|
| 12544 |
+
{
|
| 12545 |
+
"epoch": 0.3318046750499308,
|
| 12546 |
+
"grad_norm": 35.375,
|
| 12547 |
+
"learning_rate": 9.948155534556165e-06,
|
| 12548 |
+
"loss": 21.678,
|
| 12549 |
+
"step": 17880
|
| 12550 |
+
},
|
| 12551 |
+
{
|
| 12552 |
+
"epoch": 0.3319902481344106,
|
| 12553 |
+
"grad_norm": 35.78125,
|
| 12554 |
+
"learning_rate": 9.94812653877012e-06,
|
| 12555 |
+
"loss": 21.7038,
|
| 12556 |
+
"step": 17890
|
| 12557 |
+
},
|
| 12558 |
+
{
|
| 12559 |
+
"epoch": 0.3321758212188904,
|
| 12560 |
+
"grad_norm": 36.3125,
|
| 12561 |
+
"learning_rate": 9.948097542984078e-06,
|
| 12562 |
+
"loss": 21.7922,
|
| 12563 |
+
"step": 17900
|
| 12564 |
+
},
|
| 12565 |
+
{
|
| 12566 |
+
"epoch": 0.33236139430337025,
|
| 12567 |
+
"grad_norm": 35.65625,
|
| 12568 |
+
"learning_rate": 9.948068547198037e-06,
|
| 12569 |
+
"loss": 21.4636,
|
| 12570 |
+
"step": 17910
|
| 12571 |
+
},
|
| 12572 |
+
{
|
| 12573 |
+
"epoch": 0.33254696738785006,
|
| 12574 |
+
"grad_norm": 33.84375,
|
| 12575 |
+
"learning_rate": 9.948039551411995e-06,
|
| 12576 |
+
"loss": 21.4673,
|
| 12577 |
+
"step": 17920
|
| 12578 |
+
},
|
| 12579 |
+
{
|
| 12580 |
+
"epoch": 0.3327325404723299,
|
| 12581 |
+
"grad_norm": 34.625,
|
| 12582 |
+
"learning_rate": 9.948010555625952e-06,
|
| 12583 |
+
"loss": 21.6747,
|
| 12584 |
+
"step": 17930
|
| 12585 |
+
},
|
| 12586 |
+
{
|
| 12587 |
+
"epoch": 0.3329181135568097,
|
| 12588 |
+
"grad_norm": 34.46875,
|
| 12589 |
+
"learning_rate": 9.94798155983991e-06,
|
| 12590 |
+
"loss": 21.5015,
|
| 12591 |
+
"step": 17940
|
| 12592 |
+
},
|
| 12593 |
+
{
|
| 12594 |
+
"epoch": 0.33310368664128953,
|
| 12595 |
+
"grad_norm": 34.65625,
|
| 12596 |
+
"learning_rate": 9.947952564053867e-06,
|
| 12597 |
+
"loss": 22.1432,
|
| 12598 |
+
"step": 17950
|
| 12599 |
+
},
|
| 12600 |
+
{
|
| 12601 |
+
"epoch": 0.3332892597257694,
|
| 12602 |
+
"grad_norm": 33.96875,
|
| 12603 |
+
"learning_rate": 9.947923568267824e-06,
|
| 12604 |
+
"loss": 21.695,
|
| 12605 |
+
"step": 17960
|
| 12606 |
+
},
|
| 12607 |
+
{
|
| 12608 |
+
"epoch": 0.3334748328102492,
|
| 12609 |
+
"grad_norm": 37.53125,
|
| 12610 |
+
"learning_rate": 9.947894572481782e-06,
|
| 12611 |
+
"loss": 21.7699,
|
| 12612 |
+
"step": 17970
|
| 12613 |
+
},
|
| 12614 |
+
{
|
| 12615 |
+
"epoch": 0.333660405894729,
|
| 12616 |
+
"grad_norm": 33.53125,
|
| 12617 |
+
"learning_rate": 9.94786557669574e-06,
|
| 12618 |
+
"loss": 21.9693,
|
| 12619 |
+
"step": 17980
|
| 12620 |
+
},
|
| 12621 |
+
{
|
| 12622 |
+
"epoch": 0.33384597897920887,
|
| 12623 |
+
"grad_norm": 36.03125,
|
| 12624 |
+
"learning_rate": 9.947836580909698e-06,
|
| 12625 |
+
"loss": 21.9864,
|
| 12626 |
+
"step": 17990
|
| 12627 |
+
},
|
| 12628 |
+
{
|
| 12629 |
+
"epoch": 0.3340315520636887,
|
| 12630 |
+
"grad_norm": 35.40625,
|
| 12631 |
+
"learning_rate": 9.947807585123654e-06,
|
| 12632 |
+
"loss": 21.6196,
|
| 12633 |
+
"step": 18000
|
| 12634 |
+
},
|
| 12635 |
+
{
|
| 12636 |
+
"epoch": 0.33421712514816854,
|
| 12637 |
+
"grad_norm": 36.0625,
|
| 12638 |
+
"learning_rate": 9.947778589337613e-06,
|
| 12639 |
+
"loss": 21.6735,
|
| 12640 |
+
"step": 18010
|
| 12641 |
+
},
|
| 12642 |
+
{
|
| 12643 |
+
"epoch": 0.33440269823264834,
|
| 12644 |
+
"grad_norm": 33.78125,
|
| 12645 |
+
"learning_rate": 9.94774959355157e-06,
|
| 12646 |
+
"loss": 21.4556,
|
| 12647 |
+
"step": 18020
|
| 12648 |
+
},
|
| 12649 |
+
{
|
| 12650 |
+
"epoch": 0.33458827131712815,
|
| 12651 |
+
"grad_norm": 34.125,
|
| 12652 |
+
"learning_rate": 9.947720597765528e-06,
|
| 12653 |
+
"loss": 22.1459,
|
| 12654 |
+
"step": 18030
|
| 12655 |
+
},
|
| 12656 |
+
{
|
| 12657 |
+
"epoch": 0.334773844401608,
|
| 12658 |
+
"grad_norm": 34.65625,
|
| 12659 |
+
"learning_rate": 9.947691601979485e-06,
|
| 12660 |
+
"loss": 21.8858,
|
| 12661 |
+
"step": 18040
|
| 12662 |
+
},
|
| 12663 |
+
{
|
| 12664 |
+
"epoch": 0.3349594174860878,
|
| 12665 |
+
"grad_norm": 37.84375,
|
| 12666 |
+
"learning_rate": 9.947662606193443e-06,
|
| 12667 |
+
"loss": 21.5832,
|
| 12668 |
+
"step": 18050
|
| 12669 |
+
},
|
| 12670 |
+
{
|
| 12671 |
+
"epoch": 0.3351449905705676,
|
| 12672 |
+
"grad_norm": 34.6875,
|
| 12673 |
+
"learning_rate": 9.9476336104074e-06,
|
| 12674 |
+
"loss": 21.8271,
|
| 12675 |
+
"step": 18060
|
| 12676 |
+
},
|
| 12677 |
+
{
|
| 12678 |
+
"epoch": 0.3353305636550475,
|
| 12679 |
+
"grad_norm": 36.25,
|
| 12680 |
+
"learning_rate": 9.947604614621357e-06,
|
| 12681 |
+
"loss": 21.8723,
|
| 12682 |
+
"step": 18070
|
| 12683 |
+
},
|
| 12684 |
+
{
|
| 12685 |
+
"epoch": 0.3355161367395273,
|
| 12686 |
+
"grad_norm": 34.9375,
|
| 12687 |
+
"learning_rate": 9.947575618835315e-06,
|
| 12688 |
+
"loss": 21.7849,
|
| 12689 |
+
"step": 18080
|
| 12690 |
+
},
|
| 12691 |
+
{
|
| 12692 |
+
"epoch": 0.3357017098240071,
|
| 12693 |
+
"grad_norm": 35.8125,
|
| 12694 |
+
"learning_rate": 9.947546623049274e-06,
|
| 12695 |
+
"loss": 21.3291,
|
| 12696 |
+
"step": 18090
|
| 12697 |
+
},
|
| 12698 |
+
{
|
| 12699 |
+
"epoch": 0.33588728290848696,
|
| 12700 |
+
"grad_norm": 33.65625,
|
| 12701 |
+
"learning_rate": 9.94751762726323e-06,
|
| 12702 |
+
"loss": 21.2726,
|
| 12703 |
+
"step": 18100
|
| 12704 |
+
},
|
| 12705 |
+
{
|
| 12706 |
+
"epoch": 0.33607285599296677,
|
| 12707 |
+
"grad_norm": 34.09375,
|
| 12708 |
+
"learning_rate": 9.947488631477189e-06,
|
| 12709 |
+
"loss": 22.0293,
|
| 12710 |
+
"step": 18110
|
| 12711 |
+
},
|
| 12712 |
+
{
|
| 12713 |
+
"epoch": 0.33625842907744663,
|
| 12714 |
+
"grad_norm": 34.0,
|
| 12715 |
+
"learning_rate": 9.947459635691146e-06,
|
| 12716 |
+
"loss": 21.8386,
|
| 12717 |
+
"step": 18120
|
| 12718 |
+
},
|
| 12719 |
+
{
|
| 12720 |
+
"epoch": 0.33644400216192644,
|
| 12721 |
+
"grad_norm": 35.09375,
|
| 12722 |
+
"learning_rate": 9.947430639905104e-06,
|
| 12723 |
+
"loss": 21.636,
|
| 12724 |
+
"step": 18130
|
| 12725 |
+
},
|
| 12726 |
+
{
|
| 12727 |
+
"epoch": 0.33662957524640624,
|
| 12728 |
+
"grad_norm": 31.484375,
|
| 12729 |
+
"learning_rate": 9.947401644119061e-06,
|
| 12730 |
+
"loss": 21.6889,
|
| 12731 |
+
"step": 18140
|
| 12732 |
+
},
|
| 12733 |
+
{
|
| 12734 |
+
"epoch": 0.3368151483308861,
|
| 12735 |
+
"grad_norm": 36.78125,
|
| 12736 |
+
"learning_rate": 9.947372648333018e-06,
|
| 12737 |
+
"loss": 21.4289,
|
| 12738 |
+
"step": 18150
|
| 12739 |
+
},
|
| 12740 |
+
{
|
| 12741 |
+
"epoch": 0.3370007214153659,
|
| 12742 |
+
"grad_norm": 34.9375,
|
| 12743 |
+
"learning_rate": 9.947343652546976e-06,
|
| 12744 |
+
"loss": 21.5468,
|
| 12745 |
+
"step": 18160
|
| 12746 |
+
},
|
| 12747 |
+
{
|
| 12748 |
+
"epoch": 0.3371862944998457,
|
| 12749 |
+
"grad_norm": 34.25,
|
| 12750 |
+
"learning_rate": 9.947314656760933e-06,
|
| 12751 |
+
"loss": 21.5357,
|
| 12752 |
+
"step": 18170
|
| 12753 |
+
},
|
| 12754 |
+
{
|
| 12755 |
+
"epoch": 0.3373718675843256,
|
| 12756 |
+
"grad_norm": 33.6875,
|
| 12757 |
+
"learning_rate": 9.94728566097489e-06,
|
| 12758 |
+
"loss": 21.5039,
|
| 12759 |
+
"step": 18180
|
| 12760 |
+
},
|
| 12761 |
+
{
|
| 12762 |
+
"epoch": 0.3375574406688054,
|
| 12763 |
+
"grad_norm": 36.46875,
|
| 12764 |
+
"learning_rate": 9.94725666518885e-06,
|
| 12765 |
+
"loss": 21.9166,
|
| 12766 |
+
"step": 18190
|
| 12767 |
+
},
|
| 12768 |
+
{
|
| 12769 |
+
"epoch": 0.33774301375328525,
|
| 12770 |
+
"grad_norm": 35.09375,
|
| 12771 |
+
"learning_rate": 9.947227669402807e-06,
|
| 12772 |
+
"loss": 22.2752,
|
| 12773 |
+
"step": 18200
|
| 12774 |
+
},
|
| 12775 |
+
{
|
| 12776 |
+
"epoch": 0.33792858683776505,
|
| 12777 |
+
"grad_norm": 34.8125,
|
| 12778 |
+
"learning_rate": 9.947198673616763e-06,
|
| 12779 |
+
"loss": 21.7475,
|
| 12780 |
+
"step": 18210
|
| 12781 |
+
},
|
| 12782 |
+
{
|
| 12783 |
+
"epoch": 0.33811415992224486,
|
| 12784 |
+
"grad_norm": 36.3125,
|
| 12785 |
+
"learning_rate": 9.947169677830722e-06,
|
| 12786 |
+
"loss": 22.0181,
|
| 12787 |
+
"step": 18220
|
| 12788 |
+
},
|
| 12789 |
+
{
|
| 12790 |
+
"epoch": 0.3382997330067247,
|
| 12791 |
+
"grad_norm": 34.875,
|
| 12792 |
+
"learning_rate": 9.94714068204468e-06,
|
| 12793 |
+
"loss": 21.1761,
|
| 12794 |
+
"step": 18230
|
| 12795 |
+
},
|
| 12796 |
+
{
|
| 12797 |
+
"epoch": 0.33848530609120453,
|
| 12798 |
+
"grad_norm": 33.65625,
|
| 12799 |
+
"learning_rate": 9.947111686258637e-06,
|
| 12800 |
+
"loss": 21.6197,
|
| 12801 |
+
"step": 18240
|
| 12802 |
+
},
|
| 12803 |
+
{
|
| 12804 |
+
"epoch": 0.33867087917568434,
|
| 12805 |
+
"grad_norm": 34.1875,
|
| 12806 |
+
"learning_rate": 9.947082690472594e-06,
|
| 12807 |
+
"loss": 21.3926,
|
| 12808 |
+
"step": 18250
|
| 12809 |
+
},
|
| 12810 |
+
{
|
| 12811 |
+
"epoch": 0.3388564522601642,
|
| 12812 |
+
"grad_norm": 34.3125,
|
| 12813 |
+
"learning_rate": 9.947053694686552e-06,
|
| 12814 |
+
"loss": 21.5869,
|
| 12815 |
+
"step": 18260
|
| 12816 |
+
},
|
| 12817 |
+
{
|
| 12818 |
+
"epoch": 0.339042025344644,
|
| 12819 |
+
"grad_norm": 34.25,
|
| 12820 |
+
"learning_rate": 9.947024698900509e-06,
|
| 12821 |
+
"loss": 22.0923,
|
| 12822 |
+
"step": 18270
|
| 12823 |
+
},
|
| 12824 |
+
{
|
| 12825 |
+
"epoch": 0.33922759842912387,
|
| 12826 |
+
"grad_norm": 36.625,
|
| 12827 |
+
"learning_rate": 9.946995703114466e-06,
|
| 12828 |
+
"loss": 21.7863,
|
| 12829 |
+
"step": 18280
|
| 12830 |
+
},
|
| 12831 |
+
{
|
| 12832 |
+
"epoch": 0.3394131715136037,
|
| 12833 |
+
"grad_norm": 34.21875,
|
| 12834 |
+
"learning_rate": 9.946966707328425e-06,
|
| 12835 |
+
"loss": 21.7017,
|
| 12836 |
+
"step": 18290
|
| 12837 |
+
},
|
| 12838 |
+
{
|
| 12839 |
+
"epoch": 0.3395987445980835,
|
| 12840 |
+
"grad_norm": 35.03125,
|
| 12841 |
+
"learning_rate": 9.946937711542383e-06,
|
| 12842 |
+
"loss": 21.9386,
|
| 12843 |
+
"step": 18300
|
| 12844 |
+
},
|
| 12845 |
+
{
|
| 12846 |
+
"epoch": 0.33978431768256334,
|
| 12847 |
+
"grad_norm": 35.5,
|
| 12848 |
+
"learning_rate": 9.94690871575634e-06,
|
| 12849 |
+
"loss": 21.9028,
|
| 12850 |
+
"step": 18310
|
| 12851 |
+
},
|
| 12852 |
+
{
|
| 12853 |
+
"epoch": 0.33996989076704315,
|
| 12854 |
+
"grad_norm": 36.46875,
|
| 12855 |
+
"learning_rate": 9.946879719970298e-06,
|
| 12856 |
+
"loss": 21.3511,
|
| 12857 |
+
"step": 18320
|
| 12858 |
+
},
|
| 12859 |
+
{
|
| 12860 |
+
"epoch": 0.34015546385152295,
|
| 12861 |
+
"grad_norm": 35.1875,
|
| 12862 |
+
"learning_rate": 9.946850724184255e-06,
|
| 12863 |
+
"loss": 21.4002,
|
| 12864 |
+
"step": 18330
|
| 12865 |
+
},
|
| 12866 |
+
{
|
| 12867 |
+
"epoch": 0.3403410369360028,
|
| 12868 |
+
"grad_norm": 33.96875,
|
| 12869 |
+
"learning_rate": 9.946821728398212e-06,
|
| 12870 |
+
"loss": 21.6701,
|
| 12871 |
+
"step": 18340
|
| 12872 |
+
},
|
| 12873 |
+
{
|
| 12874 |
+
"epoch": 0.3405266100204826,
|
| 12875 |
+
"grad_norm": 34.4375,
|
| 12876 |
+
"learning_rate": 9.94679273261217e-06,
|
| 12877 |
+
"loss": 21.6811,
|
| 12878 |
+
"step": 18350
|
| 12879 |
+
},
|
| 12880 |
+
{
|
| 12881 |
+
"epoch": 0.34071218310496243,
|
| 12882 |
+
"grad_norm": 33.09375,
|
| 12883 |
+
"learning_rate": 9.946763736826129e-06,
|
| 12884 |
+
"loss": 21.7387,
|
| 12885 |
+
"step": 18360
|
| 12886 |
+
},
|
| 12887 |
+
{
|
| 12888 |
+
"epoch": 0.3408977561894423,
|
| 12889 |
+
"grad_norm": 35.0625,
|
| 12890 |
+
"learning_rate": 9.946734741040085e-06,
|
| 12891 |
+
"loss": 22.08,
|
| 12892 |
+
"step": 18370
|
| 12893 |
+
},
|
| 12894 |
+
{
|
| 12895 |
+
"epoch": 0.3410833292739221,
|
| 12896 |
+
"grad_norm": 35.90625,
|
| 12897 |
+
"learning_rate": 9.946705745254042e-06,
|
| 12898 |
+
"loss": 21.8435,
|
| 12899 |
+
"step": 18380
|
| 12900 |
+
},
|
| 12901 |
+
{
|
| 12902 |
+
"epoch": 0.34126890235840196,
|
| 12903 |
+
"grad_norm": 33.8125,
|
| 12904 |
+
"learning_rate": 9.946676749468001e-06,
|
| 12905 |
+
"loss": 21.7757,
|
| 12906 |
+
"step": 18390
|
| 12907 |
+
},
|
| 12908 |
+
{
|
| 12909 |
+
"epoch": 0.34145447544288177,
|
| 12910 |
+
"grad_norm": 35.625,
|
| 12911 |
+
"learning_rate": 9.946647753681959e-06,
|
| 12912 |
+
"loss": 21.6643,
|
| 12913 |
+
"step": 18400
|
| 12914 |
+
},
|
| 12915 |
+
{
|
| 12916 |
+
"epoch": 0.3416400485273616,
|
| 12917 |
+
"grad_norm": 36.875,
|
| 12918 |
+
"learning_rate": 9.946618757895916e-06,
|
| 12919 |
+
"loss": 21.495,
|
| 12920 |
+
"step": 18410
|
| 12921 |
+
},
|
| 12922 |
+
{
|
| 12923 |
+
"epoch": 0.34182562161184143,
|
| 12924 |
+
"grad_norm": 37.375,
|
| 12925 |
+
"learning_rate": 9.946589762109873e-06,
|
| 12926 |
+
"loss": 21.7553,
|
| 12927 |
+
"step": 18420
|
| 12928 |
+
},
|
| 12929 |
+
{
|
| 12930 |
+
"epoch": 0.34201119469632124,
|
| 12931 |
+
"grad_norm": 34.625,
|
| 12932 |
+
"learning_rate": 9.94656076632383e-06,
|
| 12933 |
+
"loss": 21.4652,
|
| 12934 |
+
"step": 18430
|
| 12935 |
+
},
|
| 12936 |
+
{
|
| 12937 |
+
"epoch": 0.34219676778080105,
|
| 12938 |
+
"grad_norm": 34.71875,
|
| 12939 |
+
"learning_rate": 9.946531770537788e-06,
|
| 12940 |
+
"loss": 21.7942,
|
| 12941 |
+
"step": 18440
|
| 12942 |
+
},
|
| 12943 |
+
{
|
| 12944 |
+
"epoch": 0.3423823408652809,
|
| 12945 |
+
"grad_norm": 35.21875,
|
| 12946 |
+
"learning_rate": 9.946502774751746e-06,
|
| 12947 |
+
"loss": 21.7887,
|
| 12948 |
+
"step": 18450
|
| 12949 |
+
},
|
| 12950 |
+
{
|
| 12951 |
+
"epoch": 0.3425679139497607,
|
| 12952 |
+
"grad_norm": 34.5,
|
| 12953 |
+
"learning_rate": 9.946473778965705e-06,
|
| 12954 |
+
"loss": 21.8405,
|
| 12955 |
+
"step": 18460
|
| 12956 |
+
},
|
| 12957 |
+
{
|
| 12958 |
+
"epoch": 0.3427534870342406,
|
| 12959 |
+
"grad_norm": 33.3125,
|
| 12960 |
+
"learning_rate": 9.946444783179662e-06,
|
| 12961 |
+
"loss": 21.3681,
|
| 12962 |
+
"step": 18470
|
| 12963 |
+
},
|
| 12964 |
+
{
|
| 12965 |
+
"epoch": 0.3429390601187204,
|
| 12966 |
+
"grad_norm": 34.90625,
|
| 12967 |
+
"learning_rate": 9.946415787393618e-06,
|
| 12968 |
+
"loss": 21.4823,
|
| 12969 |
+
"step": 18480
|
| 12970 |
+
},
|
| 12971 |
+
{
|
| 12972 |
+
"epoch": 0.3431246332032002,
|
| 12973 |
+
"grad_norm": 34.78125,
|
| 12974 |
+
"learning_rate": 9.946386791607577e-06,
|
| 12975 |
+
"loss": 21.7108,
|
| 12976 |
+
"step": 18490
|
| 12977 |
+
},
|
| 12978 |
+
{
|
| 12979 |
+
"epoch": 0.34331020628768005,
|
| 12980 |
+
"grad_norm": 33.875,
|
| 12981 |
+
"learning_rate": 9.946357795821534e-06,
|
| 12982 |
+
"loss": 21.6695,
|
| 12983 |
+
"step": 18500
|
| 12984 |
+
},
|
| 12985 |
+
{
|
| 12986 |
+
"epoch": 0.34349577937215986,
|
| 12987 |
+
"grad_norm": 33.8125,
|
| 12988 |
+
"learning_rate": 9.946328800035492e-06,
|
| 12989 |
+
"loss": 21.7427,
|
| 12990 |
+
"step": 18510
|
| 12991 |
+
},
|
| 12992 |
+
{
|
| 12993 |
+
"epoch": 0.34368135245663967,
|
| 12994 |
+
"grad_norm": 35.03125,
|
| 12995 |
+
"learning_rate": 9.94629980424945e-06,
|
| 12996 |
+
"loss": 21.5864,
|
| 12997 |
+
"step": 18520
|
| 12998 |
+
},
|
| 12999 |
+
{
|
| 13000 |
+
"epoch": 0.34386692554111953,
|
| 13001 |
+
"grad_norm": 33.1875,
|
| 13002 |
+
"learning_rate": 9.946270808463407e-06,
|
| 13003 |
+
"loss": 21.6827,
|
| 13004 |
+
"step": 18530
|
| 13005 |
+
},
|
| 13006 |
+
{
|
| 13007 |
+
"epoch": 0.34405249862559933,
|
| 13008 |
+
"grad_norm": 36.78125,
|
| 13009 |
+
"learning_rate": 9.946241812677364e-06,
|
| 13010 |
+
"loss": 22.1864,
|
| 13011 |
+
"step": 18540
|
| 13012 |
+
},
|
| 13013 |
+
{
|
| 13014 |
+
"epoch": 0.3442380717100792,
|
| 13015 |
+
"grad_norm": 34.53125,
|
| 13016 |
+
"learning_rate": 9.946212816891321e-06,
|
| 13017 |
+
"loss": 21.6761,
|
| 13018 |
+
"step": 18550
|
| 13019 |
+
},
|
| 13020 |
+
{
|
| 13021 |
+
"epoch": 0.344423644794559,
|
| 13022 |
+
"grad_norm": 35.75,
|
| 13023 |
+
"learning_rate": 9.94618382110528e-06,
|
| 13024 |
+
"loss": 21.5846,
|
| 13025 |
+
"step": 18560
|
| 13026 |
+
},
|
| 13027 |
+
{
|
| 13028 |
+
"epoch": 0.3446092178790388,
|
| 13029 |
+
"grad_norm": 34.0625,
|
| 13030 |
+
"learning_rate": 9.946154825319238e-06,
|
| 13031 |
+
"loss": 21.4387,
|
| 13032 |
+
"step": 18570
|
| 13033 |
+
},
|
| 13034 |
+
{
|
| 13035 |
+
"epoch": 0.34479479096351867,
|
| 13036 |
+
"grad_norm": 34.1875,
|
| 13037 |
+
"learning_rate": 9.946125829533195e-06,
|
| 13038 |
+
"loss": 21.8834,
|
| 13039 |
+
"step": 18580
|
| 13040 |
+
},
|
| 13041 |
+
{
|
| 13042 |
+
"epoch": 0.3449803640479985,
|
| 13043 |
+
"grad_norm": 32.5,
|
| 13044 |
+
"learning_rate": 9.946096833747153e-06,
|
| 13045 |
+
"loss": 21.6082,
|
| 13046 |
+
"step": 18590
|
| 13047 |
+
},
|
| 13048 |
+
{
|
| 13049 |
+
"epoch": 0.3451659371324783,
|
| 13050 |
+
"grad_norm": 34.71875,
|
| 13051 |
+
"learning_rate": 9.94606783796111e-06,
|
| 13052 |
+
"loss": 21.4526,
|
| 13053 |
+
"step": 18600
|
| 13054 |
+
},
|
| 13055 |
+
{
|
| 13056 |
+
"epoch": 0.34535151021695815,
|
| 13057 |
+
"grad_norm": 35.8125,
|
| 13058 |
+
"learning_rate": 9.946038842175068e-06,
|
| 13059 |
+
"loss": 21.8455,
|
| 13060 |
+
"step": 18610
|
| 13061 |
+
},
|
| 13062 |
+
{
|
| 13063 |
+
"epoch": 0.34553708330143795,
|
| 13064 |
+
"grad_norm": 34.78125,
|
| 13065 |
+
"learning_rate": 9.946009846389025e-06,
|
| 13066 |
+
"loss": 21.6449,
|
| 13067 |
+
"step": 18620
|
| 13068 |
+
},
|
| 13069 |
+
{
|
| 13070 |
+
"epoch": 0.34572265638591776,
|
| 13071 |
+
"grad_norm": 35.125,
|
| 13072 |
+
"learning_rate": 9.945980850602982e-06,
|
| 13073 |
+
"loss": 21.0863,
|
| 13074 |
+
"step": 18630
|
| 13075 |
+
},
|
| 13076 |
+
{
|
| 13077 |
+
"epoch": 0.3459082294703976,
|
| 13078 |
+
"grad_norm": 34.84375,
|
| 13079 |
+
"learning_rate": 9.94595185481694e-06,
|
| 13080 |
+
"loss": 21.0995,
|
| 13081 |
+
"step": 18640
|
| 13082 |
+
},
|
| 13083 |
+
{
|
| 13084 |
+
"epoch": 0.3460938025548774,
|
| 13085 |
+
"grad_norm": 37.25,
|
| 13086 |
+
"learning_rate": 9.945922859030897e-06,
|
| 13087 |
+
"loss": 21.4652,
|
| 13088 |
+
"step": 18650
|
| 13089 |
+
},
|
| 13090 |
+
{
|
| 13091 |
+
"epoch": 0.3462793756393573,
|
| 13092 |
+
"grad_norm": 33.875,
|
| 13093 |
+
"learning_rate": 9.945893863244855e-06,
|
| 13094 |
+
"loss": 21.5573,
|
| 13095 |
+
"step": 18660
|
| 13096 |
+
},
|
| 13097 |
+
{
|
| 13098 |
+
"epoch": 0.3464649487238371,
|
| 13099 |
+
"grad_norm": 32.21875,
|
| 13100 |
+
"learning_rate": 9.945864867458814e-06,
|
| 13101 |
+
"loss": 21.6797,
|
| 13102 |
+
"step": 18670
|
| 13103 |
+
},
|
| 13104 |
+
{
|
| 13105 |
+
"epoch": 0.3466505218083169,
|
| 13106 |
+
"grad_norm": 35.8125,
|
| 13107 |
+
"learning_rate": 9.945835871672771e-06,
|
| 13108 |
+
"loss": 21.5204,
|
| 13109 |
+
"step": 18680
|
| 13110 |
+
},
|
| 13111 |
+
{
|
| 13112 |
+
"epoch": 0.34683609489279676,
|
| 13113 |
+
"grad_norm": 34.625,
|
| 13114 |
+
"learning_rate": 9.945806875886728e-06,
|
| 13115 |
+
"loss": 21.4348,
|
| 13116 |
+
"step": 18690
|
| 13117 |
+
},
|
| 13118 |
+
{
|
| 13119 |
+
"epoch": 0.34702166797727657,
|
| 13120 |
+
"grad_norm": 35.25,
|
| 13121 |
+
"learning_rate": 9.945777880100686e-06,
|
| 13122 |
+
"loss": 21.6341,
|
| 13123 |
+
"step": 18700
|
| 13124 |
+
},
|
| 13125 |
+
{
|
| 13126 |
+
"epoch": 0.3472072410617564,
|
| 13127 |
+
"grad_norm": 36.0625,
|
| 13128 |
+
"learning_rate": 9.945748884314643e-06,
|
| 13129 |
+
"loss": 21.5154,
|
| 13130 |
+
"step": 18710
|
| 13131 |
+
},
|
| 13132 |
+
{
|
| 13133 |
+
"epoch": 0.34739281414623624,
|
| 13134 |
+
"grad_norm": 34.40625,
|
| 13135 |
+
"learning_rate": 9.9457198885286e-06,
|
| 13136 |
+
"loss": 21.8781,
|
| 13137 |
+
"step": 18720
|
| 13138 |
+
},
|
| 13139 |
+
{
|
| 13140 |
+
"epoch": 0.34757838723071605,
|
| 13141 |
+
"grad_norm": 36.40625,
|
| 13142 |
+
"learning_rate": 9.945690892742558e-06,
|
| 13143 |
+
"loss": 21.9311,
|
| 13144 |
+
"step": 18730
|
| 13145 |
+
},
|
| 13146 |
+
{
|
| 13147 |
+
"epoch": 0.3477639603151959,
|
| 13148 |
+
"grad_norm": 34.625,
|
| 13149 |
+
"learning_rate": 9.945661896956517e-06,
|
| 13150 |
+
"loss": 21.7879,
|
| 13151 |
+
"step": 18740
|
| 13152 |
+
},
|
| 13153 |
+
{
|
| 13154 |
+
"epoch": 0.3479495333996757,
|
| 13155 |
+
"grad_norm": 34.78125,
|
| 13156 |
+
"learning_rate": 9.945632901170473e-06,
|
| 13157 |
+
"loss": 21.7787,
|
| 13158 |
+
"step": 18750
|
| 13159 |
+
},
|
| 13160 |
+
{
|
| 13161 |
+
"epoch": 0.3481351064841555,
|
| 13162 |
+
"grad_norm": 36.125,
|
| 13163 |
+
"learning_rate": 9.94560390538443e-06,
|
| 13164 |
+
"loss": 21.6464,
|
| 13165 |
+
"step": 18760
|
| 13166 |
+
},
|
| 13167 |
+
{
|
| 13168 |
+
"epoch": 0.3483206795686354,
|
| 13169 |
+
"grad_norm": 34.5625,
|
| 13170 |
+
"learning_rate": 9.94557490959839e-06,
|
| 13171 |
+
"loss": 21.5736,
|
| 13172 |
+
"step": 18770
|
| 13173 |
+
},
|
| 13174 |
+
{
|
| 13175 |
+
"epoch": 0.3485062526531152,
|
| 13176 |
+
"grad_norm": 34.75,
|
| 13177 |
+
"learning_rate": 9.945545913812347e-06,
|
| 13178 |
+
"loss": 21.753,
|
| 13179 |
+
"step": 18780
|
| 13180 |
+
},
|
| 13181 |
+
{
|
| 13182 |
+
"epoch": 0.348691825737595,
|
| 13183 |
+
"grad_norm": 36.125,
|
| 13184 |
+
"learning_rate": 9.945516918026304e-06,
|
| 13185 |
+
"loss": 21.4789,
|
| 13186 |
+
"step": 18790
|
| 13187 |
+
},
|
| 13188 |
+
{
|
| 13189 |
+
"epoch": 0.34887739882207486,
|
| 13190 |
+
"grad_norm": 34.90625,
|
| 13191 |
+
"learning_rate": 9.945487922240262e-06,
|
| 13192 |
+
"loss": 21.8735,
|
| 13193 |
+
"step": 18800
|
| 13194 |
+
},
|
| 13195 |
+
{
|
| 13196 |
+
"epoch": 0.34906297190655466,
|
| 13197 |
+
"grad_norm": 35.21875,
|
| 13198 |
+
"learning_rate": 9.945458926454219e-06,
|
| 13199 |
+
"loss": 21.6979,
|
| 13200 |
+
"step": 18810
|
| 13201 |
+
},
|
| 13202 |
+
{
|
| 13203 |
+
"epoch": 0.3492485449910345,
|
| 13204 |
+
"grad_norm": 35.03125,
|
| 13205 |
+
"learning_rate": 9.945429930668176e-06,
|
| 13206 |
+
"loss": 21.2156,
|
| 13207 |
+
"step": 18820
|
| 13208 |
+
},
|
| 13209 |
+
{
|
| 13210 |
+
"epoch": 0.34943411807551433,
|
| 13211 |
+
"grad_norm": 35.125,
|
| 13212 |
+
"learning_rate": 9.945400934882134e-06,
|
| 13213 |
+
"loss": 22.11,
|
| 13214 |
+
"step": 18830
|
| 13215 |
+
},
|
| 13216 |
+
{
|
| 13217 |
+
"epoch": 0.34961969115999414,
|
| 13218 |
+
"grad_norm": 34.90625,
|
| 13219 |
+
"learning_rate": 9.945371939096093e-06,
|
| 13220 |
+
"loss": 21.4453,
|
| 13221 |
+
"step": 18840
|
| 13222 |
+
},
|
| 13223 |
+
{
|
| 13224 |
+
"epoch": 0.349805264244474,
|
| 13225 |
+
"grad_norm": 34.5,
|
| 13226 |
+
"learning_rate": 9.945342943310049e-06,
|
| 13227 |
+
"loss": 21.5445,
|
| 13228 |
+
"step": 18850
|
| 13229 |
+
},
|
| 13230 |
+
{
|
| 13231 |
+
"epoch": 0.3499908373289538,
|
| 13232 |
+
"grad_norm": 36.71875,
|
| 13233 |
+
"learning_rate": 9.945313947524006e-06,
|
| 13234 |
+
"loss": 21.6646,
|
| 13235 |
+
"step": 18860
|
| 13236 |
+
},
|
| 13237 |
+
{
|
| 13238 |
+
"epoch": 0.3501764104134336,
|
| 13239 |
+
"grad_norm": 33.53125,
|
| 13240 |
+
"learning_rate": 9.945284951737965e-06,
|
| 13241 |
+
"loss": 21.4822,
|
| 13242 |
+
"step": 18870
|
| 13243 |
+
},
|
| 13244 |
+
{
|
| 13245 |
+
"epoch": 0.3503619834979135,
|
| 13246 |
+
"grad_norm": 35.71875,
|
| 13247 |
+
"learning_rate": 9.945255955951923e-06,
|
| 13248 |
+
"loss": 21.2241,
|
| 13249 |
+
"step": 18880
|
| 13250 |
+
},
|
| 13251 |
+
{
|
| 13252 |
+
"epoch": 0.3505475565823933,
|
| 13253 |
+
"grad_norm": 33.78125,
|
| 13254 |
+
"learning_rate": 9.94522696016588e-06,
|
| 13255 |
+
"loss": 21.5542,
|
| 13256 |
+
"step": 18890
|
| 13257 |
+
},
|
| 13258 |
+
{
|
| 13259 |
+
"epoch": 0.3507331296668731,
|
| 13260 |
+
"grad_norm": 33.15625,
|
| 13261 |
+
"learning_rate": 9.945197964379837e-06,
|
| 13262 |
+
"loss": 21.23,
|
| 13263 |
+
"step": 18900
|
| 13264 |
+
},
|
| 13265 |
+
{
|
| 13266 |
+
"epoch": 0.35091870275135295,
|
| 13267 |
+
"grad_norm": 34.9375,
|
| 13268 |
+
"learning_rate": 9.945168968593795e-06,
|
| 13269 |
+
"loss": 21.4728,
|
| 13270 |
+
"step": 18910
|
| 13271 |
+
},
|
| 13272 |
+
{
|
| 13273 |
+
"epoch": 0.35110427583583276,
|
| 13274 |
+
"grad_norm": 35.84375,
|
| 13275 |
+
"learning_rate": 9.945139972807752e-06,
|
| 13276 |
+
"loss": 21.7163,
|
| 13277 |
+
"step": 18920
|
| 13278 |
+
},
|
| 13279 |
+
{
|
| 13280 |
+
"epoch": 0.3512898489203126,
|
| 13281 |
+
"grad_norm": 35.03125,
|
| 13282 |
+
"learning_rate": 9.94511097702171e-06,
|
| 13283 |
+
"loss": 21.5852,
|
| 13284 |
+
"step": 18930
|
| 13285 |
+
},
|
| 13286 |
+
{
|
| 13287 |
+
"epoch": 0.3514754220047924,
|
| 13288 |
+
"grad_norm": 36.84375,
|
| 13289 |
+
"learning_rate": 9.945081981235669e-06,
|
| 13290 |
+
"loss": 21.5413,
|
| 13291 |
+
"step": 18940
|
| 13292 |
+
},
|
| 13293 |
+
{
|
| 13294 |
+
"epoch": 0.35166099508927223,
|
| 13295 |
+
"grad_norm": 34.625,
|
| 13296 |
+
"learning_rate": 9.945052985449626e-06,
|
| 13297 |
+
"loss": 21.824,
|
| 13298 |
+
"step": 18950
|
| 13299 |
+
},
|
| 13300 |
+
{
|
| 13301 |
+
"epoch": 0.3518465681737521,
|
| 13302 |
+
"grad_norm": 36.28125,
|
| 13303 |
+
"learning_rate": 9.945023989663582e-06,
|
| 13304 |
+
"loss": 21.5073,
|
| 13305 |
+
"step": 18960
|
| 13306 |
+
},
|
| 13307 |
+
{
|
| 13308 |
+
"epoch": 0.3520321412582319,
|
| 13309 |
+
"grad_norm": 35.25,
|
| 13310 |
+
"learning_rate": 9.944994993877541e-06,
|
| 13311 |
+
"loss": 21.6476,
|
| 13312 |
+
"step": 18970
|
| 13313 |
+
},
|
| 13314 |
+
{
|
| 13315 |
+
"epoch": 0.3522177143427117,
|
| 13316 |
+
"grad_norm": 36.21875,
|
| 13317 |
+
"learning_rate": 9.944965998091498e-06,
|
| 13318 |
+
"loss": 21.9834,
|
| 13319 |
+
"step": 18980
|
| 13320 |
+
},
|
| 13321 |
+
{
|
| 13322 |
+
"epoch": 0.35240328742719157,
|
| 13323 |
+
"grad_norm": 35.75,
|
| 13324 |
+
"learning_rate": 9.944937002305456e-06,
|
| 13325 |
+
"loss": 21.3338,
|
| 13326 |
+
"step": 18990
|
| 13327 |
+
},
|
| 13328 |
+
{
|
| 13329 |
+
"epoch": 0.3525888605116714,
|
| 13330 |
+
"grad_norm": 32.65625,
|
| 13331 |
+
"learning_rate": 9.944908006519413e-06,
|
| 13332 |
+
"loss": 21.7214,
|
| 13333 |
+
"step": 19000
|
| 13334 |
+
},
|
| 13335 |
+
{
|
| 13336 |
+
"epoch": 0.35277443359615124,
|
| 13337 |
+
"grad_norm": 35.03125,
|
| 13338 |
+
"learning_rate": 9.944879010733372e-06,
|
| 13339 |
+
"loss": 21.3903,
|
| 13340 |
+
"step": 19010
|
| 13341 |
+
},
|
| 13342 |
+
{
|
| 13343 |
+
"epoch": 0.35296000668063104,
|
| 13344 |
+
"grad_norm": 36.28125,
|
| 13345 |
+
"learning_rate": 9.944850014947328e-06,
|
| 13346 |
+
"loss": 21.4519,
|
| 13347 |
+
"step": 19020
|
| 13348 |
+
},
|
| 13349 |
+
{
|
| 13350 |
+
"epoch": 0.35314557976511085,
|
| 13351 |
+
"grad_norm": 35.0625,
|
| 13352 |
+
"learning_rate": 9.944821019161285e-06,
|
| 13353 |
+
"loss": 21.318,
|
| 13354 |
+
"step": 19030
|
| 13355 |
+
},
|
| 13356 |
+
{
|
| 13357 |
+
"epoch": 0.3533311528495907,
|
| 13358 |
+
"grad_norm": 36.03125,
|
| 13359 |
+
"learning_rate": 9.944792023375245e-06,
|
| 13360 |
+
"loss": 21.5192,
|
| 13361 |
+
"step": 19040
|
| 13362 |
+
},
|
| 13363 |
+
{
|
| 13364 |
+
"epoch": 0.3535167259340705,
|
| 13365 |
+
"grad_norm": 36.65625,
|
| 13366 |
+
"learning_rate": 9.944763027589202e-06,
|
| 13367 |
+
"loss": 21.8759,
|
| 13368 |
+
"step": 19050
|
| 13369 |
+
},
|
| 13370 |
+
{
|
| 13371 |
+
"epoch": 0.3537022990185503,
|
| 13372 |
+
"grad_norm": 36.5625,
|
| 13373 |
+
"learning_rate": 9.94473403180316e-06,
|
| 13374 |
+
"loss": 21.4364,
|
| 13375 |
+
"step": 19060
|
| 13376 |
+
},
|
| 13377 |
+
{
|
| 13378 |
+
"epoch": 0.3538878721030302,
|
| 13379 |
+
"grad_norm": 33.71875,
|
| 13380 |
+
"learning_rate": 9.944705036017117e-06,
|
| 13381 |
+
"loss": 21.6865,
|
| 13382 |
+
"step": 19070
|
| 13383 |
+
},
|
| 13384 |
+
{
|
| 13385 |
+
"epoch": 0.35407344518751,
|
| 13386 |
+
"grad_norm": 33.5625,
|
| 13387 |
+
"learning_rate": 9.944676040231074e-06,
|
| 13388 |
+
"loss": 21.4553,
|
| 13389 |
+
"step": 19080
|
| 13390 |
+
},
|
| 13391 |
+
{
|
| 13392 |
+
"epoch": 0.35425901827198986,
|
| 13393 |
+
"grad_norm": 34.375,
|
| 13394 |
+
"learning_rate": 9.944647044445032e-06,
|
| 13395 |
+
"loss": 21.4714,
|
| 13396 |
+
"step": 19090
|
| 13397 |
+
},
|
| 13398 |
+
{
|
| 13399 |
+
"epoch": 0.35444459135646966,
|
| 13400 |
+
"grad_norm": 35.5,
|
| 13401 |
+
"learning_rate": 9.944618048658989e-06,
|
| 13402 |
+
"loss": 21.2766,
|
| 13403 |
+
"step": 19100
|
| 13404 |
+
},
|
| 13405 |
+
{
|
| 13406 |
+
"epoch": 0.35463016444094947,
|
| 13407 |
+
"grad_norm": 36.40625,
|
| 13408 |
+
"learning_rate": 9.944589052872946e-06,
|
| 13409 |
+
"loss": 21.7674,
|
| 13410 |
+
"step": 19110
|
| 13411 |
+
},
|
| 13412 |
+
{
|
| 13413 |
+
"epoch": 0.35481573752542933,
|
| 13414 |
+
"grad_norm": 33.59375,
|
| 13415 |
+
"learning_rate": 9.944560057086904e-06,
|
| 13416 |
+
"loss": 21.3237,
|
| 13417 |
+
"step": 19120
|
| 13418 |
+
},
|
| 13419 |
+
{
|
| 13420 |
+
"epoch": 0.35500131060990914,
|
| 13421 |
+
"grad_norm": 34.21875,
|
| 13422 |
+
"learning_rate": 9.944531061300861e-06,
|
| 13423 |
+
"loss": 21.6426,
|
| 13424 |
+
"step": 19130
|
| 13425 |
+
},
|
| 13426 |
+
{
|
| 13427 |
+
"epoch": 0.35518688369438894,
|
| 13428 |
+
"grad_norm": 38.28125,
|
| 13429 |
+
"learning_rate": 9.94450206551482e-06,
|
| 13430 |
+
"loss": 21.7993,
|
| 13431 |
+
"step": 19140
|
| 13432 |
+
},
|
| 13433 |
+
{
|
| 13434 |
+
"epoch": 0.3553724567788688,
|
| 13435 |
+
"grad_norm": 34.78125,
|
| 13436 |
+
"learning_rate": 9.944473069728778e-06,
|
| 13437 |
+
"loss": 21.6046,
|
| 13438 |
+
"step": 19150
|
| 13439 |
+
},
|
| 13440 |
+
{
|
| 13441 |
+
"epoch": 0.3555580298633486,
|
| 13442 |
+
"grad_norm": 34.625,
|
| 13443 |
+
"learning_rate": 9.944444073942735e-06,
|
| 13444 |
+
"loss": 21.9528,
|
| 13445 |
+
"step": 19160
|
| 13446 |
+
},
|
| 13447 |
+
{
|
| 13448 |
+
"epoch": 0.3557436029478285,
|
| 13449 |
+
"grad_norm": 33.4375,
|
| 13450 |
+
"learning_rate": 9.944415078156693e-06,
|
| 13451 |
+
"loss": 21.5006,
|
| 13452 |
+
"step": 19170
|
| 13453 |
+
},
|
| 13454 |
+
{
|
| 13455 |
+
"epoch": 0.3559291760323083,
|
| 13456 |
+
"grad_norm": 33.1875,
|
| 13457 |
+
"learning_rate": 9.94438608237065e-06,
|
| 13458 |
+
"loss": 21.2048,
|
| 13459 |
+
"step": 19180
|
| 13460 |
+
},
|
| 13461 |
+
{
|
| 13462 |
+
"epoch": 0.3561147491167881,
|
| 13463 |
+
"grad_norm": 34.15625,
|
| 13464 |
+
"learning_rate": 9.944357086584607e-06,
|
| 13465 |
+
"loss": 21.4316,
|
| 13466 |
+
"step": 19190
|
| 13467 |
+
},
|
| 13468 |
+
{
|
| 13469 |
+
"epoch": 0.35630032220126795,
|
| 13470 |
+
"grad_norm": 35.46875,
|
| 13471 |
+
"learning_rate": 9.944328090798565e-06,
|
| 13472 |
+
"loss": 21.9165,
|
| 13473 |
+
"step": 19200
|
| 13474 |
+
},
|
| 13475 |
+
{
|
| 13476 |
+
"epoch": 0.35648589528574776,
|
| 13477 |
+
"grad_norm": 36.6875,
|
| 13478 |
+
"learning_rate": 9.944299095012522e-06,
|
| 13479 |
+
"loss": 21.4528,
|
| 13480 |
+
"step": 19210
|
| 13481 |
+
},
|
| 13482 |
+
{
|
| 13483 |
+
"epoch": 0.35667146837022756,
|
| 13484 |
+
"grad_norm": 34.5625,
|
| 13485 |
+
"learning_rate": 9.944270099226481e-06,
|
| 13486 |
+
"loss": 21.9393,
|
| 13487 |
+
"step": 19220
|
| 13488 |
+
},
|
| 13489 |
+
{
|
| 13490 |
+
"epoch": 0.3568570414547074,
|
| 13491 |
+
"grad_norm": 36.8125,
|
| 13492 |
+
"learning_rate": 9.944241103440437e-06,
|
| 13493 |
+
"loss": 21.4406,
|
| 13494 |
+
"step": 19230
|
| 13495 |
+
},
|
| 13496 |
+
{
|
| 13497 |
+
"epoch": 0.35704261453918723,
|
| 13498 |
+
"grad_norm": 34.09375,
|
| 13499 |
+
"learning_rate": 9.944212107654394e-06,
|
| 13500 |
+
"loss": 21.279,
|
| 13501 |
+
"step": 19240
|
| 13502 |
+
},
|
| 13503 |
+
{
|
| 13504 |
+
"epoch": 0.35722818762366704,
|
| 13505 |
+
"grad_norm": 34.875,
|
| 13506 |
+
"learning_rate": 9.944183111868353e-06,
|
| 13507 |
+
"loss": 20.9569,
|
| 13508 |
+
"step": 19250
|
| 13509 |
+
},
|
| 13510 |
+
{
|
| 13511 |
+
"epoch": 0.3574137607081469,
|
| 13512 |
+
"grad_norm": 34.96875,
|
| 13513 |
+
"learning_rate": 9.944154116082311e-06,
|
| 13514 |
+
"loss": 21.32,
|
| 13515 |
+
"step": 19260
|
| 13516 |
+
},
|
| 13517 |
+
{
|
| 13518 |
+
"epoch": 0.3575993337926267,
|
| 13519 |
+
"grad_norm": 34.21875,
|
| 13520 |
+
"learning_rate": 9.944125120296268e-06,
|
| 13521 |
+
"loss": 21.2426,
|
| 13522 |
+
"step": 19270
|
| 13523 |
+
},
|
| 13524 |
+
{
|
| 13525 |
+
"epoch": 0.35778490687710657,
|
| 13526 |
+
"grad_norm": 34.53125,
|
| 13527 |
+
"learning_rate": 9.944096124510226e-06,
|
| 13528 |
+
"loss": 21.2983,
|
| 13529 |
+
"step": 19280
|
| 13530 |
+
},
|
| 13531 |
+
{
|
| 13532 |
+
"epoch": 0.3579704799615864,
|
| 13533 |
+
"grad_norm": 35.375,
|
| 13534 |
+
"learning_rate": 9.944067128724183e-06,
|
| 13535 |
+
"loss": 21.687,
|
| 13536 |
+
"step": 19290
|
| 13537 |
+
},
|
| 13538 |
+
{
|
| 13539 |
+
"epoch": 0.3581560530460662,
|
| 13540 |
+
"grad_norm": 35.5,
|
| 13541 |
+
"learning_rate": 9.94403813293814e-06,
|
| 13542 |
+
"loss": 21.6977,
|
| 13543 |
+
"step": 19300
|
| 13544 |
+
},
|
| 13545 |
+
{
|
| 13546 |
+
"epoch": 0.35834162613054604,
|
| 13547 |
+
"grad_norm": 35.3125,
|
| 13548 |
+
"learning_rate": 9.944009137152098e-06,
|
| 13549 |
+
"loss": 21.5511,
|
| 13550 |
+
"step": 19310
|
| 13551 |
+
},
|
| 13552 |
+
{
|
| 13553 |
+
"epoch": 0.35852719921502585,
|
| 13554 |
+
"grad_norm": 34.84375,
|
| 13555 |
+
"learning_rate": 9.943980141366057e-06,
|
| 13556 |
+
"loss": 21.4945,
|
| 13557 |
+
"step": 19320
|
| 13558 |
+
},
|
| 13559 |
+
{
|
| 13560 |
+
"epoch": 0.35871277229950566,
|
| 13561 |
+
"grad_norm": 33.125,
|
| 13562 |
+
"learning_rate": 9.943951145580014e-06,
|
| 13563 |
+
"loss": 21.4095,
|
| 13564 |
+
"step": 19330
|
| 13565 |
+
},
|
| 13566 |
+
{
|
| 13567 |
+
"epoch": 0.3588983453839855,
|
| 13568 |
+
"grad_norm": 35.4375,
|
| 13569 |
+
"learning_rate": 9.94392214979397e-06,
|
| 13570 |
+
"loss": 21.623,
|
| 13571 |
+
"step": 19340
|
| 13572 |
+
},
|
| 13573 |
+
{
|
| 13574 |
+
"epoch": 0.3590839184684653,
|
| 13575 |
+
"grad_norm": 35.96875,
|
| 13576 |
+
"learning_rate": 9.94389315400793e-06,
|
| 13577 |
+
"loss": 21.2826,
|
| 13578 |
+
"step": 19350
|
| 13579 |
+
},
|
| 13580 |
+
{
|
| 13581 |
+
"epoch": 0.3592694915529452,
|
| 13582 |
+
"grad_norm": 33.1875,
|
| 13583 |
+
"learning_rate": 9.943864158221887e-06,
|
| 13584 |
+
"loss": 21.2331,
|
| 13585 |
+
"step": 19360
|
| 13586 |
+
},
|
| 13587 |
+
{
|
| 13588 |
+
"epoch": 0.359455064637425,
|
| 13589 |
+
"grad_norm": 33.0,
|
| 13590 |
+
"learning_rate": 9.943835162435844e-06,
|
| 13591 |
+
"loss": 21.4414,
|
| 13592 |
+
"step": 19370
|
| 13593 |
+
},
|
| 13594 |
+
{
|
| 13595 |
+
"epoch": 0.3596406377219048,
|
| 13596 |
+
"grad_norm": 34.4375,
|
| 13597 |
+
"learning_rate": 9.943806166649801e-06,
|
| 13598 |
+
"loss": 21.6911,
|
| 13599 |
+
"step": 19380
|
| 13600 |
+
},
|
| 13601 |
+
{
|
| 13602 |
+
"epoch": 0.35982621080638466,
|
| 13603 |
+
"grad_norm": 36.46875,
|
| 13604 |
+
"learning_rate": 9.943777170863759e-06,
|
| 13605 |
+
"loss": 21.9565,
|
| 13606 |
+
"step": 19390
|
| 13607 |
+
},
|
| 13608 |
+
{
|
| 13609 |
+
"epoch": 0.36001178389086447,
|
| 13610 |
+
"grad_norm": 33.5625,
|
| 13611 |
+
"learning_rate": 9.943748175077716e-06,
|
| 13612 |
+
"loss": 21.1798,
|
| 13613 |
+
"step": 19400
|
| 13614 |
+
},
|
| 13615 |
+
{
|
| 13616 |
+
"epoch": 0.3601973569753443,
|
| 13617 |
+
"grad_norm": 35.0625,
|
| 13618 |
+
"learning_rate": 9.943719179291674e-06,
|
| 13619 |
+
"loss": 21.36,
|
| 13620 |
+
"step": 19410
|
| 13621 |
+
},
|
| 13622 |
+
{
|
| 13623 |
+
"epoch": 0.36038293005982414,
|
| 13624 |
+
"grad_norm": 35.15625,
|
| 13625 |
+
"learning_rate": 9.943690183505633e-06,
|
| 13626 |
+
"loss": 21.8302,
|
| 13627 |
+
"step": 19420
|
| 13628 |
+
},
|
| 13629 |
+
{
|
| 13630 |
+
"epoch": 0.36056850314430394,
|
| 13631 |
+
"grad_norm": 35.09375,
|
| 13632 |
+
"learning_rate": 9.94366118771959e-06,
|
| 13633 |
+
"loss": 21.5303,
|
| 13634 |
+
"step": 19430
|
| 13635 |
+
},
|
| 13636 |
+
{
|
| 13637 |
+
"epoch": 0.3607540762287838,
|
| 13638 |
+
"grad_norm": 34.625,
|
| 13639 |
+
"learning_rate": 9.943632191933546e-06,
|
| 13640 |
+
"loss": 21.3503,
|
| 13641 |
+
"step": 19440
|
| 13642 |
+
},
|
| 13643 |
+
{
|
| 13644 |
+
"epoch": 0.3609396493132636,
|
| 13645 |
+
"grad_norm": 35.5625,
|
| 13646 |
+
"learning_rate": 9.943603196147505e-06,
|
| 13647 |
+
"loss": 21.5557,
|
| 13648 |
+
"step": 19450
|
| 13649 |
+
},
|
| 13650 |
+
{
|
| 13651 |
+
"epoch": 0.3611252223977434,
|
| 13652 |
+
"grad_norm": 36.0,
|
| 13653 |
+
"learning_rate": 9.943574200361462e-06,
|
| 13654 |
+
"loss": 21.2134,
|
| 13655 |
+
"step": 19460
|
| 13656 |
+
},
|
| 13657 |
+
{
|
| 13658 |
+
"epoch": 0.3613107954822233,
|
| 13659 |
+
"grad_norm": 36.375,
|
| 13660 |
+
"learning_rate": 9.94354520457542e-06,
|
| 13661 |
+
"loss": 21.1214,
|
| 13662 |
+
"step": 19470
|
| 13663 |
+
},
|
| 13664 |
+
{
|
| 13665 |
+
"epoch": 0.3614963685667031,
|
| 13666 |
+
"grad_norm": 35.4375,
|
| 13667 |
+
"learning_rate": 9.943516208789377e-06,
|
| 13668 |
+
"loss": 21.4935,
|
| 13669 |
+
"step": 19480
|
| 13670 |
+
},
|
| 13671 |
+
{
|
| 13672 |
+
"epoch": 0.3616819416511829,
|
| 13673 |
+
"grad_norm": 34.65625,
|
| 13674 |
+
"learning_rate": 9.943487213003336e-06,
|
| 13675 |
+
"loss": 21.3517,
|
| 13676 |
+
"step": 19490
|
| 13677 |
+
},
|
| 13678 |
+
{
|
| 13679 |
+
"epoch": 0.36186751473566275,
|
| 13680 |
+
"grad_norm": 34.4375,
|
| 13681 |
+
"learning_rate": 9.943458217217292e-06,
|
| 13682 |
+
"loss": 21.4391,
|
| 13683 |
+
"step": 19500
|
| 13684 |
+
},
|
| 13685 |
+
{
|
| 13686 |
+
"epoch": 0.36205308782014256,
|
| 13687 |
+
"grad_norm": 37.65625,
|
| 13688 |
+
"learning_rate": 9.94342922143125e-06,
|
| 13689 |
+
"loss": 21.6687,
|
| 13690 |
+
"step": 19510
|
| 13691 |
+
},
|
| 13692 |
+
{
|
| 13693 |
+
"epoch": 0.36223866090462237,
|
| 13694 |
+
"grad_norm": 35.65625,
|
| 13695 |
+
"learning_rate": 9.943400225645209e-06,
|
| 13696 |
+
"loss": 20.8702,
|
| 13697 |
+
"step": 19520
|
| 13698 |
+
},
|
| 13699 |
+
{
|
| 13700 |
+
"epoch": 0.36242423398910223,
|
| 13701 |
+
"grad_norm": 36.90625,
|
| 13702 |
+
"learning_rate": 9.943371229859166e-06,
|
| 13703 |
+
"loss": 21.0082,
|
| 13704 |
+
"step": 19530
|
| 13705 |
+
},
|
| 13706 |
+
{
|
| 13707 |
+
"epoch": 0.36260980707358204,
|
| 13708 |
+
"grad_norm": 33.25,
|
| 13709 |
+
"learning_rate": 9.943342234073123e-06,
|
| 13710 |
+
"loss": 21.6859,
|
| 13711 |
+
"step": 19540
|
| 13712 |
+
},
|
| 13713 |
+
{
|
| 13714 |
+
"epoch": 0.3627953801580619,
|
| 13715 |
+
"grad_norm": 35.90625,
|
| 13716 |
+
"learning_rate": 9.94331323828708e-06,
|
| 13717 |
+
"loss": 21.2681,
|
| 13718 |
+
"step": 19550
|
| 13719 |
+
},
|
| 13720 |
+
{
|
| 13721 |
+
"epoch": 0.3629809532425417,
|
| 13722 |
+
"grad_norm": 35.75,
|
| 13723 |
+
"learning_rate": 9.943284242501038e-06,
|
| 13724 |
+
"loss": 21.4027,
|
| 13725 |
+
"step": 19560
|
| 13726 |
+
},
|
| 13727 |
+
{
|
| 13728 |
+
"epoch": 0.3631665263270215,
|
| 13729 |
+
"grad_norm": 35.0625,
|
| 13730 |
+
"learning_rate": 9.943255246714996e-06,
|
| 13731 |
+
"loss": 21.4005,
|
| 13732 |
+
"step": 19570
|
| 13733 |
+
},
|
| 13734 |
+
{
|
| 13735 |
+
"epoch": 0.3633520994115014,
|
| 13736 |
+
"grad_norm": 35.0,
|
| 13737 |
+
"learning_rate": 9.943226250928953e-06,
|
| 13738 |
+
"loss": 22.0243,
|
| 13739 |
+
"step": 19580
|
| 13740 |
+
},
|
| 13741 |
+
{
|
| 13742 |
+
"epoch": 0.3635376724959812,
|
| 13743 |
+
"grad_norm": 33.84375,
|
| 13744 |
+
"learning_rate": 9.943197255142912e-06,
|
| 13745 |
+
"loss": 21.1517,
|
| 13746 |
+
"step": 19590
|
| 13747 |
+
},
|
| 13748 |
+
{
|
| 13749 |
+
"epoch": 0.363723245580461,
|
| 13750 |
+
"grad_norm": 34.84375,
|
| 13751 |
+
"learning_rate": 9.94316825935687e-06,
|
| 13752 |
+
"loss": 21.5607,
|
| 13753 |
+
"step": 19600
|
| 13754 |
+
},
|
| 13755 |
+
{
|
| 13756 |
+
"epoch": 0.36390881866494085,
|
| 13757 |
+
"grad_norm": 34.40625,
|
| 13758 |
+
"learning_rate": 9.943139263570825e-06,
|
| 13759 |
+
"loss": 21.4868,
|
| 13760 |
+
"step": 19610
|
| 13761 |
+
},
|
| 13762 |
+
{
|
| 13763 |
+
"epoch": 0.36409439174942065,
|
| 13764 |
+
"grad_norm": 36.9375,
|
| 13765 |
+
"learning_rate": 9.943110267784784e-06,
|
| 13766 |
+
"loss": 21.6598,
|
| 13767 |
+
"step": 19620
|
| 13768 |
+
},
|
| 13769 |
+
{
|
| 13770 |
+
"epoch": 0.3642799648339005,
|
| 13771 |
+
"grad_norm": 34.0625,
|
| 13772 |
+
"learning_rate": 9.943081271998742e-06,
|
| 13773 |
+
"loss": 21.3738,
|
| 13774 |
+
"step": 19630
|
| 13775 |
+
},
|
| 13776 |
+
{
|
| 13777 |
+
"epoch": 0.3644655379183803,
|
| 13778 |
+
"grad_norm": 37.0,
|
| 13779 |
+
"learning_rate": 9.943052276212699e-06,
|
| 13780 |
+
"loss": 21.3922,
|
| 13781 |
+
"step": 19640
|
| 13782 |
+
},
|
| 13783 |
+
{
|
| 13784 |
+
"epoch": 0.36465111100286013,
|
| 13785 |
+
"grad_norm": 34.78125,
|
| 13786 |
+
"learning_rate": 9.943023280426657e-06,
|
| 13787 |
+
"loss": 21.692,
|
| 13788 |
+
"step": 19650
|
| 13789 |
+
},
|
| 13790 |
+
{
|
| 13791 |
+
"epoch": 0.36483668408734,
|
| 13792 |
+
"grad_norm": 34.9375,
|
| 13793 |
+
"learning_rate": 9.942994284640614e-06,
|
| 13794 |
+
"loss": 21.7272,
|
| 13795 |
+
"step": 19660
|
| 13796 |
+
},
|
| 13797 |
+
{
|
| 13798 |
+
"epoch": 0.3650222571718198,
|
| 13799 |
+
"grad_norm": 36.0,
|
| 13800 |
+
"learning_rate": 9.942965288854571e-06,
|
| 13801 |
+
"loss": 21.4188,
|
| 13802 |
+
"step": 19670
|
| 13803 |
+
},
|
| 13804 |
+
{
|
| 13805 |
+
"epoch": 0.3652078302562996,
|
| 13806 |
+
"grad_norm": 33.375,
|
| 13807 |
+
"learning_rate": 9.942936293068529e-06,
|
| 13808 |
+
"loss": 21.5631,
|
| 13809 |
+
"step": 19680
|
| 13810 |
+
},
|
| 13811 |
+
{
|
| 13812 |
+
"epoch": 0.36539340334077947,
|
| 13813 |
+
"grad_norm": 33.40625,
|
| 13814 |
+
"learning_rate": 9.942907297282486e-06,
|
| 13815 |
+
"loss": 21.3399,
|
| 13816 |
+
"step": 19690
|
| 13817 |
+
},
|
| 13818 |
+
{
|
| 13819 |
+
"epoch": 0.36557897642525927,
|
| 13820 |
+
"grad_norm": 32.84375,
|
| 13821 |
+
"learning_rate": 9.942878301496445e-06,
|
| 13822 |
+
"loss": 21.2938,
|
| 13823 |
+
"step": 19700
|
| 13824 |
+
},
|
| 13825 |
+
{
|
| 13826 |
+
"epoch": 0.36576454950973913,
|
| 13827 |
+
"grad_norm": 34.0625,
|
| 13828 |
+
"learning_rate": 9.942849305710401e-06,
|
| 13829 |
+
"loss": 21.2609,
|
| 13830 |
+
"step": 19710
|
| 13831 |
+
},
|
| 13832 |
+
{
|
| 13833 |
+
"epoch": 0.36595012259421894,
|
| 13834 |
+
"grad_norm": 35.21875,
|
| 13835 |
+
"learning_rate": 9.942820309924358e-06,
|
| 13836 |
+
"loss": 21.7159,
|
| 13837 |
+
"step": 19720
|
| 13838 |
+
},
|
| 13839 |
+
{
|
| 13840 |
+
"epoch": 0.36613569567869875,
|
| 13841 |
+
"grad_norm": 36.09375,
|
| 13842 |
+
"learning_rate": 9.942791314138317e-06,
|
| 13843 |
+
"loss": 21.7571,
|
| 13844 |
+
"step": 19730
|
| 13845 |
+
},
|
| 13846 |
+
{
|
| 13847 |
+
"epoch": 0.3663212687631786,
|
| 13848 |
+
"grad_norm": 37.9375,
|
| 13849 |
+
"learning_rate": 9.942762318352275e-06,
|
| 13850 |
+
"loss": 21.2937,
|
| 13851 |
+
"step": 19740
|
| 13852 |
+
},
|
| 13853 |
+
{
|
| 13854 |
+
"epoch": 0.3665068418476584,
|
| 13855 |
+
"grad_norm": 37.84375,
|
| 13856 |
+
"learning_rate": 9.942733322566232e-06,
|
| 13857 |
+
"loss": 21.4546,
|
| 13858 |
+
"step": 19750
|
| 13859 |
+
},
|
| 13860 |
+
{
|
| 13861 |
+
"epoch": 0.3666924149321382,
|
| 13862 |
+
"grad_norm": 35.71875,
|
| 13863 |
+
"learning_rate": 9.94270432678019e-06,
|
| 13864 |
+
"loss": 21.3717,
|
| 13865 |
+
"step": 19760
|
| 13866 |
+
},
|
| 13867 |
+
{
|
| 13868 |
+
"epoch": 0.3668779880166181,
|
| 13869 |
+
"grad_norm": 34.8125,
|
| 13870 |
+
"learning_rate": 9.942675330994147e-06,
|
| 13871 |
+
"loss": 22.14,
|
| 13872 |
+
"step": 19770
|
| 13873 |
+
},
|
| 13874 |
+
{
|
| 13875 |
+
"epoch": 0.3670635611010979,
|
| 13876 |
+
"grad_norm": 36.3125,
|
| 13877 |
+
"learning_rate": 9.942646335208105e-06,
|
| 13878 |
+
"loss": 21.1358,
|
| 13879 |
+
"step": 19780
|
| 13880 |
+
},
|
| 13881 |
+
{
|
| 13882 |
+
"epoch": 0.3672491341855777,
|
| 13883 |
+
"grad_norm": 34.84375,
|
| 13884 |
+
"learning_rate": 9.942617339422062e-06,
|
| 13885 |
+
"loss": 21.3856,
|
| 13886 |
+
"step": 19790
|
| 13887 |
+
},
|
| 13888 |
+
{
|
| 13889 |
+
"epoch": 0.36743470727005756,
|
| 13890 |
+
"grad_norm": 36.03125,
|
| 13891 |
+
"learning_rate": 9.942588343636021e-06,
|
| 13892 |
+
"loss": 21.5621,
|
| 13893 |
+
"step": 19800
|
| 13894 |
+
},
|
| 13895 |
+
{
|
| 13896 |
+
"epoch": 0.36762028035453737,
|
| 13897 |
+
"grad_norm": 34.15625,
|
| 13898 |
+
"learning_rate": 9.942559347849978e-06,
|
| 13899 |
+
"loss": 21.6981,
|
| 13900 |
+
"step": 19810
|
| 13901 |
+
},
|
| 13902 |
+
{
|
| 13903 |
+
"epoch": 0.3678058534390172,
|
| 13904 |
+
"grad_norm": 35.8125,
|
| 13905 |
+
"learning_rate": 9.942530352063934e-06,
|
| 13906 |
+
"loss": 21.2015,
|
| 13907 |
+
"step": 19820
|
| 13908 |
+
},
|
| 13909 |
+
{
|
| 13910 |
+
"epoch": 0.36799142652349703,
|
| 13911 |
+
"grad_norm": 37.4375,
|
| 13912 |
+
"learning_rate": 9.942501356277893e-06,
|
| 13913 |
+
"loss": 22.0159,
|
| 13914 |
+
"step": 19830
|
| 13915 |
+
},
|
| 13916 |
+
{
|
| 13917 |
+
"epoch": 0.36817699960797684,
|
| 13918 |
+
"grad_norm": 34.21875,
|
| 13919 |
+
"learning_rate": 9.94247236049185e-06,
|
| 13920 |
+
"loss": 21.571,
|
| 13921 |
+
"step": 19840
|
| 13922 |
+
},
|
| 13923 |
+
{
|
| 13924 |
+
"epoch": 0.3683625726924567,
|
| 13925 |
+
"grad_norm": 35.375,
|
| 13926 |
+
"learning_rate": 9.942443364705808e-06,
|
| 13927 |
+
"loss": 21.416,
|
| 13928 |
+
"step": 19850
|
| 13929 |
+
},
|
| 13930 |
+
{
|
| 13931 |
+
"epoch": 0.3685481457769365,
|
| 13932 |
+
"grad_norm": 34.625,
|
| 13933 |
+
"learning_rate": 9.942414368919765e-06,
|
| 13934 |
+
"loss": 20.9028,
|
| 13935 |
+
"step": 19860
|
| 13936 |
+
},
|
| 13937 |
+
{
|
| 13938 |
+
"epoch": 0.3687337188614163,
|
| 13939 |
+
"grad_norm": 35.4375,
|
| 13940 |
+
"learning_rate": 9.942385373133723e-06,
|
| 13941 |
+
"loss": 21.527,
|
| 13942 |
+
"step": 19870
|
| 13943 |
+
},
|
| 13944 |
+
{
|
| 13945 |
+
"epoch": 0.3689192919458962,
|
| 13946 |
+
"grad_norm": 34.375,
|
| 13947 |
+
"learning_rate": 9.94235637734768e-06,
|
| 13948 |
+
"loss": 21.1836,
|
| 13949 |
+
"step": 19880
|
| 13950 |
+
},
|
| 13951 |
+
{
|
| 13952 |
+
"epoch": 0.369104865030376,
|
| 13953 |
+
"grad_norm": 38.21875,
|
| 13954 |
+
"learning_rate": 9.942327381561638e-06,
|
| 13955 |
+
"loss": 21.2912,
|
| 13956 |
+
"step": 19890
|
| 13957 |
+
},
|
| 13958 |
+
{
|
| 13959 |
+
"epoch": 0.36929043811485585,
|
| 13960 |
+
"grad_norm": 35.875,
|
| 13961 |
+
"learning_rate": 9.942298385775597e-06,
|
| 13962 |
+
"loss": 21.7566,
|
| 13963 |
+
"step": 19900
|
| 13964 |
+
},
|
| 13965 |
+
{
|
| 13966 |
+
"epoch": 0.36947601119933565,
|
| 13967 |
+
"grad_norm": 37.375,
|
| 13968 |
+
"learning_rate": 9.942269389989554e-06,
|
| 13969 |
+
"loss": 21.5261,
|
| 13970 |
+
"step": 19910
|
| 13971 |
+
},
|
| 13972 |
+
{
|
| 13973 |
+
"epoch": 0.36966158428381546,
|
| 13974 |
+
"grad_norm": 35.96875,
|
| 13975 |
+
"learning_rate": 9.942240394203512e-06,
|
| 13976 |
+
"loss": 21.7249,
|
| 13977 |
+
"step": 19920
|
| 13978 |
+
},
|
| 13979 |
+
{
|
| 13980 |
+
"epoch": 0.3698471573682953,
|
| 13981 |
+
"grad_norm": 34.5625,
|
| 13982 |
+
"learning_rate": 9.942211398417469e-06,
|
| 13983 |
+
"loss": 21.9828,
|
| 13984 |
+
"step": 19930
|
| 13985 |
+
},
|
| 13986 |
+
{
|
| 13987 |
+
"epoch": 0.3700327304527751,
|
| 13988 |
+
"grad_norm": 35.96875,
|
| 13989 |
+
"learning_rate": 9.942182402631426e-06,
|
| 13990 |
+
"loss": 21.419,
|
| 13991 |
+
"step": 19940
|
| 13992 |
+
},
|
| 13993 |
+
{
|
| 13994 |
+
"epoch": 0.37021830353725493,
|
| 13995 |
+
"grad_norm": 35.59375,
|
| 13996 |
+
"learning_rate": 9.942153406845384e-06,
|
| 13997 |
+
"loss": 21.6967,
|
| 13998 |
+
"step": 19950
|
| 13999 |
+
},
|
| 14000 |
+
{
|
| 14001 |
+
"epoch": 0.3704038766217348,
|
| 14002 |
+
"grad_norm": 33.3125,
|
| 14003 |
+
"learning_rate": 9.942124411059341e-06,
|
| 14004 |
+
"loss": 21.5348,
|
| 14005 |
+
"step": 19960
|
| 14006 |
+
},
|
| 14007 |
+
{
|
| 14008 |
+
"epoch": 0.3705894497062146,
|
| 14009 |
+
"grad_norm": 35.34375,
|
| 14010 |
+
"learning_rate": 9.9420954152733e-06,
|
| 14011 |
+
"loss": 21.3894,
|
| 14012 |
+
"step": 19970
|
| 14013 |
+
},
|
| 14014 |
+
{
|
| 14015 |
+
"epoch": 0.37077502279069446,
|
| 14016 |
+
"grad_norm": 33.28125,
|
| 14017 |
+
"learning_rate": 9.942066419487256e-06,
|
| 14018 |
+
"loss": 21.2541,
|
| 14019 |
+
"step": 19980
|
| 14020 |
+
},
|
| 14021 |
+
{
|
| 14022 |
+
"epoch": 0.37096059587517427,
|
| 14023 |
+
"grad_norm": 36.15625,
|
| 14024 |
+
"learning_rate": 9.942037423701213e-06,
|
| 14025 |
+
"loss": 21.0304,
|
| 14026 |
+
"step": 19990
|
| 14027 |
+
},
|
| 14028 |
+
{
|
| 14029 |
+
"epoch": 0.3711461689596541,
|
| 14030 |
+
"grad_norm": 35.65625,
|
| 14031 |
+
"learning_rate": 9.942008427915173e-06,
|
| 14032 |
+
"loss": 21.6275,
|
| 14033 |
+
"step": 20000
|
| 14034 |
+
},
|
| 14035 |
+
{
|
| 14036 |
+
"epoch": 0.3711461689596541,
|
| 14037 |
+
"eval_loss": 2.6757473945617676,
|
| 14038 |
+
"eval_runtime": 453.3076,
|
| 14039 |
+
"eval_samples_per_second": 3203.381,
|
| 14040 |
+
"eval_steps_per_second": 50.054,
|
| 14041 |
+
"step": 20000
|
| 14042 |
}
|
| 14043 |
],
|
| 14044 |
"logging_steps": 10,
|
|
|
|
| 14058 |
"attributes": {}
|
| 14059 |
}
|
| 14060 |
},
|
| 14061 |
+
"total_flos": 3.491045518082048e+18,
|
| 14062 |
"train_batch_size": 8,
|
| 14063 |
"trial_name": null,
|
| 14064 |
"trial_params": null
|