Training in progress, step 22500, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1769 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737632172
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c052d2a67b46c3cbd43e1aaef6787d4e22f25e4730e41749c440d5f7ef1edfa2
|
| 3 |
size 737632172
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475354682
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08cf78d547b8f41ee223ea3ee959627730e71a470e0b2fc768b9df22602cd24c
|
| 3 |
size 1475354682
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71f7b2a24ea005bf8d4cda6609b3f02fff97ffd137300264d740b525a5d16d52
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2be8bf00d7769668daa21530103090701683c486d14d68b216dc7599084911c5
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6ef13f5707990bc90ed5888b6375995ece6a20da4e110402f911e1ccc4380cc
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6712ce65d3859451db93ee2b906b7b2aadd22b863c4396671311580298e33eef
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c23b4a4f7a5f1ca851bf1110551e94214f4e9a551744c59b43604b27936d6b6
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f319a4bebd2c89fbe15733682cc2571137e2e88a7af83c46922d3bce0e3020b
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29f83a80e96cc577cd7fc37cafb41eb24ffb7cdd78cafd492938746bbf31281a
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06f91ff7510df73136b368e7b7956418d774a7089cc90de91ae93a237ac8dcaf
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fed0d2c4bde11cab9fcbb818c02c913c6ca7dd78332351188e7f279c6394b16
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -14167,6 +14167,1772 @@
|
|
| 14167 |
"eval_samples_per_second": 1803.669,
|
| 14168 |
"eval_steps_per_second": 56.365,
|
| 14169 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14170 |
}
|
| 14171 |
],
|
| 14172 |
"logging_steps": 10,
|
|
@@ -14186,7 +15952,7 @@
|
|
| 14186 |
"attributes": {}
|
| 14187 |
}
|
| 14188 |
},
|
| 14189 |
-
"total_flos":
|
| 14190 |
"train_batch_size": 4,
|
| 14191 |
"trial_name": null,
|
| 14192 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9985520994557892,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 22500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 14167 |
"eval_samples_per_second": 1803.669,
|
| 14168 |
"eval_steps_per_second": 56.365,
|
| 14169 |
"step": 20000
|
| 14170 |
+
},
|
| 14171 |
+
{
|
| 14172 |
+
"epoch": 0.8880456671160151,
|
| 14173 |
+
"grad_norm": 65.7813491821289,
|
| 14174 |
+
"learning_rate": 9.965310740183166e-06,
|
| 14175 |
+
"loss": 10.058,
|
| 14176 |
+
"step": 20010
|
| 14177 |
+
},
|
| 14178 |
+
{
|
| 14179 |
+
"epoch": 0.8884894680491066,
|
| 14180 |
+
"grad_norm": 56.84501647949219,
|
| 14181 |
+
"learning_rate": 9.965293404221238e-06,
|
| 14182 |
+
"loss": 10.5991,
|
| 14183 |
+
"step": 20020
|
| 14184 |
+
},
|
| 14185 |
+
{
|
| 14186 |
+
"epoch": 0.888933268982198,
|
| 14187 |
+
"grad_norm": 55.88136672973633,
|
| 14188 |
+
"learning_rate": 9.96527606825931e-06,
|
| 14189 |
+
"loss": 10.2387,
|
| 14190 |
+
"step": 20030
|
| 14191 |
+
},
|
| 14192 |
+
{
|
| 14193 |
+
"epoch": 0.8893770699152895,
|
| 14194 |
+
"grad_norm": 65.21720886230469,
|
| 14195 |
+
"learning_rate": 9.965258732297384e-06,
|
| 14196 |
+
"loss": 10.6988,
|
| 14197 |
+
"step": 20040
|
| 14198 |
+
},
|
| 14199 |
+
{
|
| 14200 |
+
"epoch": 0.8898208708483809,
|
| 14201 |
+
"grad_norm": 57.60173416137695,
|
| 14202 |
+
"learning_rate": 9.965241396335455e-06,
|
| 14203 |
+
"loss": 10.0901,
|
| 14204 |
+
"step": 20050
|
| 14205 |
+
},
|
| 14206 |
+
{
|
| 14207 |
+
"epoch": 0.8902646717814724,
|
| 14208 |
+
"grad_norm": 66.22525787353516,
|
| 14209 |
+
"learning_rate": 9.965224060373528e-06,
|
| 14210 |
+
"loss": 9.9539,
|
| 14211 |
+
"step": 20060
|
| 14212 |
+
},
|
| 14213 |
+
{
|
| 14214 |
+
"epoch": 0.8907084727145639,
|
| 14215 |
+
"grad_norm": 59.16026306152344,
|
| 14216 |
+
"learning_rate": 9.965206724411601e-06,
|
| 14217 |
+
"loss": 10.2055,
|
| 14218 |
+
"step": 20070
|
| 14219 |
+
},
|
| 14220 |
+
{
|
| 14221 |
+
"epoch": 0.8911522736476554,
|
| 14222 |
+
"grad_norm": 63.042850494384766,
|
| 14223 |
+
"learning_rate": 9.965189388449673e-06,
|
| 14224 |
+
"loss": 10.608,
|
| 14225 |
+
"step": 20080
|
| 14226 |
+
},
|
| 14227 |
+
{
|
| 14228 |
+
"epoch": 0.8915960745807469,
|
| 14229 |
+
"grad_norm": 70.37860870361328,
|
| 14230 |
+
"learning_rate": 9.965172052487746e-06,
|
| 14231 |
+
"loss": 10.2199,
|
| 14232 |
+
"step": 20090
|
| 14233 |
+
},
|
| 14234 |
+
{
|
| 14235 |
+
"epoch": 0.8920398755138382,
|
| 14236 |
+
"grad_norm": 59.544456481933594,
|
| 14237 |
+
"learning_rate": 9.965154716525819e-06,
|
| 14238 |
+
"loss": 10.625,
|
| 14239 |
+
"step": 20100
|
| 14240 |
+
},
|
| 14241 |
+
{
|
| 14242 |
+
"epoch": 0.8924836764469297,
|
| 14243 |
+
"grad_norm": 59.02389907836914,
|
| 14244 |
+
"learning_rate": 9.96513738056389e-06,
|
| 14245 |
+
"loss": 10.6333,
|
| 14246 |
+
"step": 20110
|
| 14247 |
+
},
|
| 14248 |
+
{
|
| 14249 |
+
"epoch": 0.8929274773800212,
|
| 14250 |
+
"grad_norm": 55.048667907714844,
|
| 14251 |
+
"learning_rate": 9.965120044601963e-06,
|
| 14252 |
+
"loss": 9.668,
|
| 14253 |
+
"step": 20120
|
| 14254 |
+
},
|
| 14255 |
+
{
|
| 14256 |
+
"epoch": 0.8933712783131127,
|
| 14257 |
+
"grad_norm": 70.93208312988281,
|
| 14258 |
+
"learning_rate": 9.965102708640036e-06,
|
| 14259 |
+
"loss": 10.0719,
|
| 14260 |
+
"step": 20130
|
| 14261 |
+
},
|
| 14262 |
+
{
|
| 14263 |
+
"epoch": 0.8938150792462041,
|
| 14264 |
+
"grad_norm": 60.164588928222656,
|
| 14265 |
+
"learning_rate": 9.965085372678108e-06,
|
| 14266 |
+
"loss": 10.5491,
|
| 14267 |
+
"step": 20140
|
| 14268 |
+
},
|
| 14269 |
+
{
|
| 14270 |
+
"epoch": 0.8942588801792956,
|
| 14271 |
+
"grad_norm": 63.05065155029297,
|
| 14272 |
+
"learning_rate": 9.965068036716181e-06,
|
| 14273 |
+
"loss": 10.5735,
|
| 14274 |
+
"step": 20150
|
| 14275 |
+
},
|
| 14276 |
+
{
|
| 14277 |
+
"epoch": 0.894702681112387,
|
| 14278 |
+
"grad_norm": 66.36126708984375,
|
| 14279 |
+
"learning_rate": 9.965050700754254e-06,
|
| 14280 |
+
"loss": 10.0222,
|
| 14281 |
+
"step": 20160
|
| 14282 |
+
},
|
| 14283 |
+
{
|
| 14284 |
+
"epoch": 0.8951464820454785,
|
| 14285 |
+
"grad_norm": 65.72769165039062,
|
| 14286 |
+
"learning_rate": 9.965033364792325e-06,
|
| 14287 |
+
"loss": 9.764,
|
| 14288 |
+
"step": 20170
|
| 14289 |
+
},
|
| 14290 |
+
{
|
| 14291 |
+
"epoch": 0.8955902829785699,
|
| 14292 |
+
"grad_norm": 65.65251922607422,
|
| 14293 |
+
"learning_rate": 9.965016028830398e-06,
|
| 14294 |
+
"loss": 10.2024,
|
| 14295 |
+
"step": 20180
|
| 14296 |
+
},
|
| 14297 |
+
{
|
| 14298 |
+
"epoch": 0.8960340839116614,
|
| 14299 |
+
"grad_norm": 75.98188018798828,
|
| 14300 |
+
"learning_rate": 9.964998692868472e-06,
|
| 14301 |
+
"loss": 10.6188,
|
| 14302 |
+
"step": 20190
|
| 14303 |
+
},
|
| 14304 |
+
{
|
| 14305 |
+
"epoch": 0.8964778848447529,
|
| 14306 |
+
"grad_norm": 53.20927429199219,
|
| 14307 |
+
"learning_rate": 9.964981356906545e-06,
|
| 14308 |
+
"loss": 10.6707,
|
| 14309 |
+
"step": 20200
|
| 14310 |
+
},
|
| 14311 |
+
{
|
| 14312 |
+
"epoch": 0.8969216857778444,
|
| 14313 |
+
"grad_norm": 68.85663604736328,
|
| 14314 |
+
"learning_rate": 9.964964020944616e-06,
|
| 14315 |
+
"loss": 10.2216,
|
| 14316 |
+
"step": 20210
|
| 14317 |
+
},
|
| 14318 |
+
{
|
| 14319 |
+
"epoch": 0.8973654867109359,
|
| 14320 |
+
"grad_norm": 60.16876983642578,
|
| 14321 |
+
"learning_rate": 9.964946684982689e-06,
|
| 14322 |
+
"loss": 9.6589,
|
| 14323 |
+
"step": 20220
|
| 14324 |
+
},
|
| 14325 |
+
{
|
| 14326 |
+
"epoch": 0.8978092876440272,
|
| 14327 |
+
"grad_norm": 63.93645477294922,
|
| 14328 |
+
"learning_rate": 9.964929349020762e-06,
|
| 14329 |
+
"loss": 10.0363,
|
| 14330 |
+
"step": 20230
|
| 14331 |
+
},
|
| 14332 |
+
{
|
| 14333 |
+
"epoch": 0.8982530885771187,
|
| 14334 |
+
"grad_norm": 60.31320571899414,
|
| 14335 |
+
"learning_rate": 9.964912013058834e-06,
|
| 14336 |
+
"loss": 10.2793,
|
| 14337 |
+
"step": 20240
|
| 14338 |
+
},
|
| 14339 |
+
{
|
| 14340 |
+
"epoch": 0.8986968895102102,
|
| 14341 |
+
"grad_norm": 57.123626708984375,
|
| 14342 |
+
"learning_rate": 9.964894677096907e-06,
|
| 14343 |
+
"loss": 10.699,
|
| 14344 |
+
"step": 20250
|
| 14345 |
+
},
|
| 14346 |
+
{
|
| 14347 |
+
"epoch": 0.8991406904433017,
|
| 14348 |
+
"grad_norm": 67.34060668945312,
|
| 14349 |
+
"learning_rate": 9.96487734113498e-06,
|
| 14350 |
+
"loss": 10.715,
|
| 14351 |
+
"step": 20260
|
| 14352 |
+
},
|
| 14353 |
+
{
|
| 14354 |
+
"epoch": 0.8995844913763931,
|
| 14355 |
+
"grad_norm": 60.88409423828125,
|
| 14356 |
+
"learning_rate": 9.964860005173051e-06,
|
| 14357 |
+
"loss": 10.485,
|
| 14358 |
+
"step": 20270
|
| 14359 |
+
},
|
| 14360 |
+
{
|
| 14361 |
+
"epoch": 0.9000282923094846,
|
| 14362 |
+
"grad_norm": 59.35341262817383,
|
| 14363 |
+
"learning_rate": 9.964842669211124e-06,
|
| 14364 |
+
"loss": 9.8806,
|
| 14365 |
+
"step": 20280
|
| 14366 |
+
},
|
| 14367 |
+
{
|
| 14368 |
+
"epoch": 0.900472093242576,
|
| 14369 |
+
"grad_norm": 70.66928100585938,
|
| 14370 |
+
"learning_rate": 9.964825333249197e-06,
|
| 14371 |
+
"loss": 10.1476,
|
| 14372 |
+
"step": 20290
|
| 14373 |
+
},
|
| 14374 |
+
{
|
| 14375 |
+
"epoch": 0.9009158941756675,
|
| 14376 |
+
"grad_norm": 57.342830657958984,
|
| 14377 |
+
"learning_rate": 9.964807997287269e-06,
|
| 14378 |
+
"loss": 10.0399,
|
| 14379 |
+
"step": 20300
|
| 14380 |
+
},
|
| 14381 |
+
{
|
| 14382 |
+
"epoch": 0.901359695108759,
|
| 14383 |
+
"grad_norm": 72.41960906982422,
|
| 14384 |
+
"learning_rate": 9.964790661325342e-06,
|
| 14385 |
+
"loss": 10.3914,
|
| 14386 |
+
"step": 20310
|
| 14387 |
+
},
|
| 14388 |
+
{
|
| 14389 |
+
"epoch": 0.9018034960418504,
|
| 14390 |
+
"grad_norm": 66.77056121826172,
|
| 14391 |
+
"learning_rate": 9.964773325363415e-06,
|
| 14392 |
+
"loss": 10.1905,
|
| 14393 |
+
"step": 20320
|
| 14394 |
+
},
|
| 14395 |
+
{
|
| 14396 |
+
"epoch": 0.9022472969749419,
|
| 14397 |
+
"grad_norm": 72.86076354980469,
|
| 14398 |
+
"learning_rate": 9.964755989401486e-06,
|
| 14399 |
+
"loss": 10.1389,
|
| 14400 |
+
"step": 20330
|
| 14401 |
+
},
|
| 14402 |
+
{
|
| 14403 |
+
"epoch": 0.9026910979080334,
|
| 14404 |
+
"grad_norm": 64.16802215576172,
|
| 14405 |
+
"learning_rate": 9.96473865343956e-06,
|
| 14406 |
+
"loss": 10.4992,
|
| 14407 |
+
"step": 20340
|
| 14408 |
+
},
|
| 14409 |
+
{
|
| 14410 |
+
"epoch": 0.9031348988411249,
|
| 14411 |
+
"grad_norm": 71.86752319335938,
|
| 14412 |
+
"learning_rate": 9.964721317477632e-06,
|
| 14413 |
+
"loss": 9.9231,
|
| 14414 |
+
"step": 20350
|
| 14415 |
+
},
|
| 14416 |
+
{
|
| 14417 |
+
"epoch": 0.9035786997742162,
|
| 14418 |
+
"grad_norm": 69.46532440185547,
|
| 14419 |
+
"learning_rate": 9.964703981515704e-06,
|
| 14420 |
+
"loss": 10.2235,
|
| 14421 |
+
"step": 20360
|
| 14422 |
+
},
|
| 14423 |
+
{
|
| 14424 |
+
"epoch": 0.9040225007073077,
|
| 14425 |
+
"grad_norm": 58.126220703125,
|
| 14426 |
+
"learning_rate": 9.964686645553777e-06,
|
| 14427 |
+
"loss": 9.7999,
|
| 14428 |
+
"step": 20370
|
| 14429 |
+
},
|
| 14430 |
+
{
|
| 14431 |
+
"epoch": 0.9044663016403992,
|
| 14432 |
+
"grad_norm": 60.532958984375,
|
| 14433 |
+
"learning_rate": 9.96466930959185e-06,
|
| 14434 |
+
"loss": 10.689,
|
| 14435 |
+
"step": 20380
|
| 14436 |
+
},
|
| 14437 |
+
{
|
| 14438 |
+
"epoch": 0.9049101025734907,
|
| 14439 |
+
"grad_norm": 68.76100158691406,
|
| 14440 |
+
"learning_rate": 9.964651973629921e-06,
|
| 14441 |
+
"loss": 10.3952,
|
| 14442 |
+
"step": 20390
|
| 14443 |
+
},
|
| 14444 |
+
{
|
| 14445 |
+
"epoch": 0.9053539035065821,
|
| 14446 |
+
"grad_norm": 68.4054183959961,
|
| 14447 |
+
"learning_rate": 9.964634637667994e-06,
|
| 14448 |
+
"loss": 10.5912,
|
| 14449 |
+
"step": 20400
|
| 14450 |
+
},
|
| 14451 |
+
{
|
| 14452 |
+
"epoch": 0.9057977044396736,
|
| 14453 |
+
"grad_norm": 58.026851654052734,
|
| 14454 |
+
"learning_rate": 9.964617301706067e-06,
|
| 14455 |
+
"loss": 10.5631,
|
| 14456 |
+
"step": 20410
|
| 14457 |
+
},
|
| 14458 |
+
{
|
| 14459 |
+
"epoch": 0.906241505372765,
|
| 14460 |
+
"grad_norm": 68.02127838134766,
|
| 14461 |
+
"learning_rate": 9.96459996574414e-06,
|
| 14462 |
+
"loss": 10.4884,
|
| 14463 |
+
"step": 20420
|
| 14464 |
+
},
|
| 14465 |
+
{
|
| 14466 |
+
"epoch": 0.9066853063058565,
|
| 14467 |
+
"grad_norm": 71.5272216796875,
|
| 14468 |
+
"learning_rate": 9.964582629782212e-06,
|
| 14469 |
+
"loss": 10.2185,
|
| 14470 |
+
"step": 20430
|
| 14471 |
+
},
|
| 14472 |
+
{
|
| 14473 |
+
"epoch": 0.907129107238948,
|
| 14474 |
+
"grad_norm": 69.71690368652344,
|
| 14475 |
+
"learning_rate": 9.964565293820285e-06,
|
| 14476 |
+
"loss": 9.6736,
|
| 14477 |
+
"step": 20440
|
| 14478 |
+
},
|
| 14479 |
+
{
|
| 14480 |
+
"epoch": 0.9075729081720394,
|
| 14481 |
+
"grad_norm": 71.79097747802734,
|
| 14482 |
+
"learning_rate": 9.964547957858358e-06,
|
| 14483 |
+
"loss": 10.3408,
|
| 14484 |
+
"step": 20450
|
| 14485 |
+
},
|
| 14486 |
+
{
|
| 14487 |
+
"epoch": 0.9080167091051309,
|
| 14488 |
+
"grad_norm": 62.07600402832031,
|
| 14489 |
+
"learning_rate": 9.96453062189643e-06,
|
| 14490 |
+
"loss": 10.1348,
|
| 14491 |
+
"step": 20460
|
| 14492 |
+
},
|
| 14493 |
+
{
|
| 14494 |
+
"epoch": 0.9084605100382224,
|
| 14495 |
+
"grad_norm": 70.9972152709961,
|
| 14496 |
+
"learning_rate": 9.964513285934503e-06,
|
| 14497 |
+
"loss": 10.3505,
|
| 14498 |
+
"step": 20470
|
| 14499 |
+
},
|
| 14500 |
+
{
|
| 14501 |
+
"epoch": 0.9089043109713139,
|
| 14502 |
+
"grad_norm": 62.994075775146484,
|
| 14503 |
+
"learning_rate": 9.964495949972576e-06,
|
| 14504 |
+
"loss": 10.8796,
|
| 14505 |
+
"step": 20480
|
| 14506 |
+
},
|
| 14507 |
+
{
|
| 14508 |
+
"epoch": 0.9093481119044052,
|
| 14509 |
+
"grad_norm": 57.46833801269531,
|
| 14510 |
+
"learning_rate": 9.964478614010647e-06,
|
| 14511 |
+
"loss": 10.4861,
|
| 14512 |
+
"step": 20490
|
| 14513 |
+
},
|
| 14514 |
+
{
|
| 14515 |
+
"epoch": 0.9097919128374967,
|
| 14516 |
+
"grad_norm": 71.40296173095703,
|
| 14517 |
+
"learning_rate": 9.96446127804872e-06,
|
| 14518 |
+
"loss": 9.9325,
|
| 14519 |
+
"step": 20500
|
| 14520 |
+
},
|
| 14521 |
+
{
|
| 14522 |
+
"epoch": 0.9102357137705882,
|
| 14523 |
+
"grad_norm": 71.27222442626953,
|
| 14524 |
+
"learning_rate": 9.964443942086793e-06,
|
| 14525 |
+
"loss": 10.204,
|
| 14526 |
+
"step": 20510
|
| 14527 |
+
},
|
| 14528 |
+
{
|
| 14529 |
+
"epoch": 0.9106795147036797,
|
| 14530 |
+
"grad_norm": 76.38119506835938,
|
| 14531 |
+
"learning_rate": 9.964426606124865e-06,
|
| 14532 |
+
"loss": 9.9359,
|
| 14533 |
+
"step": 20520
|
| 14534 |
+
},
|
| 14535 |
+
{
|
| 14536 |
+
"epoch": 0.9111233156367712,
|
| 14537 |
+
"grad_norm": 60.81074905395508,
|
| 14538 |
+
"learning_rate": 9.964409270162938e-06,
|
| 14539 |
+
"loss": 10.2593,
|
| 14540 |
+
"step": 20530
|
| 14541 |
+
},
|
| 14542 |
+
{
|
| 14543 |
+
"epoch": 0.9115671165698626,
|
| 14544 |
+
"grad_norm": 71.3153305053711,
|
| 14545 |
+
"learning_rate": 9.96439193420101e-06,
|
| 14546 |
+
"loss": 10.3253,
|
| 14547 |
+
"step": 20540
|
| 14548 |
+
},
|
| 14549 |
+
{
|
| 14550 |
+
"epoch": 0.912010917502954,
|
| 14551 |
+
"grad_norm": 68.32502746582031,
|
| 14552 |
+
"learning_rate": 9.964374598239082e-06,
|
| 14553 |
+
"loss": 10.073,
|
| 14554 |
+
"step": 20550
|
| 14555 |
+
},
|
| 14556 |
+
{
|
| 14557 |
+
"epoch": 0.9124547184360455,
|
| 14558 |
+
"grad_norm": 60.2060546875,
|
| 14559 |
+
"learning_rate": 9.964357262277155e-06,
|
| 14560 |
+
"loss": 10.3595,
|
| 14561 |
+
"step": 20560
|
| 14562 |
+
},
|
| 14563 |
+
{
|
| 14564 |
+
"epoch": 0.912898519369137,
|
| 14565 |
+
"grad_norm": 69.63631439208984,
|
| 14566 |
+
"learning_rate": 9.964339926315228e-06,
|
| 14567 |
+
"loss": 10.1938,
|
| 14568 |
+
"step": 20570
|
| 14569 |
+
},
|
| 14570 |
+
{
|
| 14571 |
+
"epoch": 0.9133423203022284,
|
| 14572 |
+
"grad_norm": 57.60273742675781,
|
| 14573 |
+
"learning_rate": 9.9643225903533e-06,
|
| 14574 |
+
"loss": 10.612,
|
| 14575 |
+
"step": 20580
|
| 14576 |
+
},
|
| 14577 |
+
{
|
| 14578 |
+
"epoch": 0.9137861212353199,
|
| 14579 |
+
"grad_norm": 58.673763275146484,
|
| 14580 |
+
"learning_rate": 9.964305254391373e-06,
|
| 14581 |
+
"loss": 9.8792,
|
| 14582 |
+
"step": 20590
|
| 14583 |
+
},
|
| 14584 |
+
{
|
| 14585 |
+
"epoch": 0.9142299221684114,
|
| 14586 |
+
"grad_norm": 65.37464141845703,
|
| 14587 |
+
"learning_rate": 9.964287918429446e-06,
|
| 14588 |
+
"loss": 10.0737,
|
| 14589 |
+
"step": 20600
|
| 14590 |
+
},
|
| 14591 |
+
{
|
| 14592 |
+
"epoch": 0.9146737231015029,
|
| 14593 |
+
"grad_norm": 63.91664123535156,
|
| 14594 |
+
"learning_rate": 9.964270582467517e-06,
|
| 14595 |
+
"loss": 9.939,
|
| 14596 |
+
"step": 20610
|
| 14597 |
+
},
|
| 14598 |
+
{
|
| 14599 |
+
"epoch": 0.9151175240345942,
|
| 14600 |
+
"grad_norm": 69.06259155273438,
|
| 14601 |
+
"learning_rate": 9.96425324650559e-06,
|
| 14602 |
+
"loss": 10.422,
|
| 14603 |
+
"step": 20620
|
| 14604 |
+
},
|
| 14605 |
+
{
|
| 14606 |
+
"epoch": 0.9155613249676857,
|
| 14607 |
+
"grad_norm": 68.58475494384766,
|
| 14608 |
+
"learning_rate": 9.964235910543663e-06,
|
| 14609 |
+
"loss": 9.9517,
|
| 14610 |
+
"step": 20630
|
| 14611 |
+
},
|
| 14612 |
+
{
|
| 14613 |
+
"epoch": 0.9160051259007772,
|
| 14614 |
+
"grad_norm": 61.14803695678711,
|
| 14615 |
+
"learning_rate": 9.964218574581736e-06,
|
| 14616 |
+
"loss": 10.2273,
|
| 14617 |
+
"step": 20640
|
| 14618 |
+
},
|
| 14619 |
+
{
|
| 14620 |
+
"epoch": 0.9164489268338687,
|
| 14621 |
+
"grad_norm": 61.80668258666992,
|
| 14622 |
+
"learning_rate": 9.964201238619808e-06,
|
| 14623 |
+
"loss": 10.4235,
|
| 14624 |
+
"step": 20650
|
| 14625 |
+
},
|
| 14626 |
+
{
|
| 14627 |
+
"epoch": 0.9168927277669602,
|
| 14628 |
+
"grad_norm": 68.1749038696289,
|
| 14629 |
+
"learning_rate": 9.96418390265788e-06,
|
| 14630 |
+
"loss": 10.5679,
|
| 14631 |
+
"step": 20660
|
| 14632 |
+
},
|
| 14633 |
+
{
|
| 14634 |
+
"epoch": 0.9173365287000516,
|
| 14635 |
+
"grad_norm": 58.40181350708008,
|
| 14636 |
+
"learning_rate": 9.964166566695954e-06,
|
| 14637 |
+
"loss": 10.4709,
|
| 14638 |
+
"step": 20670
|
| 14639 |
+
},
|
| 14640 |
+
{
|
| 14641 |
+
"epoch": 0.917780329633143,
|
| 14642 |
+
"grad_norm": 59.180511474609375,
|
| 14643 |
+
"learning_rate": 9.964149230734025e-06,
|
| 14644 |
+
"loss": 9.9807,
|
| 14645 |
+
"step": 20680
|
| 14646 |
+
},
|
| 14647 |
+
{
|
| 14648 |
+
"epoch": 0.9182241305662345,
|
| 14649 |
+
"grad_norm": 73.31056213378906,
|
| 14650 |
+
"learning_rate": 9.964131894772098e-06,
|
| 14651 |
+
"loss": 10.3648,
|
| 14652 |
+
"step": 20690
|
| 14653 |
+
},
|
| 14654 |
+
{
|
| 14655 |
+
"epoch": 0.918667931499326,
|
| 14656 |
+
"grad_norm": 69.01880645751953,
|
| 14657 |
+
"learning_rate": 9.964114558810171e-06,
|
| 14658 |
+
"loss": 9.9768,
|
| 14659 |
+
"step": 20700
|
| 14660 |
+
},
|
| 14661 |
+
{
|
| 14662 |
+
"epoch": 0.9191117324324174,
|
| 14663 |
+
"grad_norm": 68.61119079589844,
|
| 14664 |
+
"learning_rate": 9.964097222848243e-06,
|
| 14665 |
+
"loss": 10.1686,
|
| 14666 |
+
"step": 20710
|
| 14667 |
+
},
|
| 14668 |
+
{
|
| 14669 |
+
"epoch": 0.9195555333655089,
|
| 14670 |
+
"grad_norm": 62.651973724365234,
|
| 14671 |
+
"learning_rate": 9.964079886886316e-06,
|
| 14672 |
+
"loss": 10.1275,
|
| 14673 |
+
"step": 20720
|
| 14674 |
+
},
|
| 14675 |
+
{
|
| 14676 |
+
"epoch": 0.9199993342986004,
|
| 14677 |
+
"grad_norm": 62.72435760498047,
|
| 14678 |
+
"learning_rate": 9.964062550924389e-06,
|
| 14679 |
+
"loss": 10.2096,
|
| 14680 |
+
"step": 20730
|
| 14681 |
+
},
|
| 14682 |
+
{
|
| 14683 |
+
"epoch": 0.9204431352316919,
|
| 14684 |
+
"grad_norm": 57.11748504638672,
|
| 14685 |
+
"learning_rate": 9.96404521496246e-06,
|
| 14686 |
+
"loss": 10.5196,
|
| 14687 |
+
"step": 20740
|
| 14688 |
+
},
|
| 14689 |
+
{
|
| 14690 |
+
"epoch": 0.9208869361647832,
|
| 14691 |
+
"grad_norm": 64.23450469970703,
|
| 14692 |
+
"learning_rate": 9.964027879000533e-06,
|
| 14693 |
+
"loss": 10.4117,
|
| 14694 |
+
"step": 20750
|
| 14695 |
+
},
|
| 14696 |
+
{
|
| 14697 |
+
"epoch": 0.9213307370978747,
|
| 14698 |
+
"grad_norm": 69.50020599365234,
|
| 14699 |
+
"learning_rate": 9.964010543038607e-06,
|
| 14700 |
+
"loss": 10.4837,
|
| 14701 |
+
"step": 20760
|
| 14702 |
+
},
|
| 14703 |
+
{
|
| 14704 |
+
"epoch": 0.9217745380309662,
|
| 14705 |
+
"grad_norm": 55.89120101928711,
|
| 14706 |
+
"learning_rate": 9.963993207076678e-06,
|
| 14707 |
+
"loss": 9.9386,
|
| 14708 |
+
"step": 20770
|
| 14709 |
+
},
|
| 14710 |
+
{
|
| 14711 |
+
"epoch": 0.9222183389640577,
|
| 14712 |
+
"grad_norm": 59.105995178222656,
|
| 14713 |
+
"learning_rate": 9.963975871114751e-06,
|
| 14714 |
+
"loss": 10.2322,
|
| 14715 |
+
"step": 20780
|
| 14716 |
+
},
|
| 14717 |
+
{
|
| 14718 |
+
"epoch": 0.9226621398971492,
|
| 14719 |
+
"grad_norm": 55.461021423339844,
|
| 14720 |
+
"learning_rate": 9.963958535152824e-06,
|
| 14721 |
+
"loss": 10.1765,
|
| 14722 |
+
"step": 20790
|
| 14723 |
+
},
|
| 14724 |
+
{
|
| 14725 |
+
"epoch": 0.9231059408302406,
|
| 14726 |
+
"grad_norm": 76.23094940185547,
|
| 14727 |
+
"learning_rate": 9.963941199190897e-06,
|
| 14728 |
+
"loss": 10.2576,
|
| 14729 |
+
"step": 20800
|
| 14730 |
+
},
|
| 14731 |
+
{
|
| 14732 |
+
"epoch": 0.923549741763332,
|
| 14733 |
+
"grad_norm": 72.75599670410156,
|
| 14734 |
+
"learning_rate": 9.963923863228969e-06,
|
| 14735 |
+
"loss": 10.7945,
|
| 14736 |
+
"step": 20810
|
| 14737 |
+
},
|
| 14738 |
+
{
|
| 14739 |
+
"epoch": 0.9239935426964235,
|
| 14740 |
+
"grad_norm": 61.83809280395508,
|
| 14741 |
+
"learning_rate": 9.963906527267042e-06,
|
| 14742 |
+
"loss": 10.4771,
|
| 14743 |
+
"step": 20820
|
| 14744 |
+
},
|
| 14745 |
+
{
|
| 14746 |
+
"epoch": 0.924437343629515,
|
| 14747 |
+
"grad_norm": 58.66376876831055,
|
| 14748 |
+
"learning_rate": 9.963889191305115e-06,
|
| 14749 |
+
"loss": 10.0925,
|
| 14750 |
+
"step": 20830
|
| 14751 |
+
},
|
| 14752 |
+
{
|
| 14753 |
+
"epoch": 0.9248811445626064,
|
| 14754 |
+
"grad_norm": 70.21393585205078,
|
| 14755 |
+
"learning_rate": 9.963871855343186e-06,
|
| 14756 |
+
"loss": 10.2228,
|
| 14757 |
+
"step": 20840
|
| 14758 |
+
},
|
| 14759 |
+
{
|
| 14760 |
+
"epoch": 0.9253249454956979,
|
| 14761 |
+
"grad_norm": 55.493282318115234,
|
| 14762 |
+
"learning_rate": 9.96385451938126e-06,
|
| 14763 |
+
"loss": 10.1143,
|
| 14764 |
+
"step": 20850
|
| 14765 |
+
},
|
| 14766 |
+
{
|
| 14767 |
+
"epoch": 0.9257687464287894,
|
| 14768 |
+
"grad_norm": 58.75727081298828,
|
| 14769 |
+
"learning_rate": 9.963837183419332e-06,
|
| 14770 |
+
"loss": 10.2045,
|
| 14771 |
+
"step": 20860
|
| 14772 |
+
},
|
| 14773 |
+
{
|
| 14774 |
+
"epoch": 0.9262125473618809,
|
| 14775 |
+
"grad_norm": 60.97832107543945,
|
| 14776 |
+
"learning_rate": 9.963819847457404e-06,
|
| 14777 |
+
"loss": 10.176,
|
| 14778 |
+
"step": 20870
|
| 14779 |
+
},
|
| 14780 |
+
{
|
| 14781 |
+
"epoch": 0.9266563482949723,
|
| 14782 |
+
"grad_norm": 66.70062255859375,
|
| 14783 |
+
"learning_rate": 9.963802511495477e-06,
|
| 14784 |
+
"loss": 10.8102,
|
| 14785 |
+
"step": 20880
|
| 14786 |
+
},
|
| 14787 |
+
{
|
| 14788 |
+
"epoch": 0.9271001492280637,
|
| 14789 |
+
"grad_norm": 59.54788589477539,
|
| 14790 |
+
"learning_rate": 9.96378517553355e-06,
|
| 14791 |
+
"loss": 9.8927,
|
| 14792 |
+
"step": 20890
|
| 14793 |
+
},
|
| 14794 |
+
{
|
| 14795 |
+
"epoch": 0.9275439501611552,
|
| 14796 |
+
"grad_norm": 56.45012283325195,
|
| 14797 |
+
"learning_rate": 9.963767839571621e-06,
|
| 14798 |
+
"loss": 10.2811,
|
| 14799 |
+
"step": 20900
|
| 14800 |
+
},
|
| 14801 |
+
{
|
| 14802 |
+
"epoch": 0.9279877510942467,
|
| 14803 |
+
"grad_norm": 63.22991943359375,
|
| 14804 |
+
"learning_rate": 9.963750503609694e-06,
|
| 14805 |
+
"loss": 10.0369,
|
| 14806 |
+
"step": 20910
|
| 14807 |
+
},
|
| 14808 |
+
{
|
| 14809 |
+
"epoch": 0.9284315520273382,
|
| 14810 |
+
"grad_norm": 66.25374603271484,
|
| 14811 |
+
"learning_rate": 9.963733167647767e-06,
|
| 14812 |
+
"loss": 10.2809,
|
| 14813 |
+
"step": 20920
|
| 14814 |
+
},
|
| 14815 |
+
{
|
| 14816 |
+
"epoch": 0.9288753529604296,
|
| 14817 |
+
"grad_norm": 65.86581420898438,
|
| 14818 |
+
"learning_rate": 9.96371583168584e-06,
|
| 14819 |
+
"loss": 10.2681,
|
| 14820 |
+
"step": 20930
|
| 14821 |
+
},
|
| 14822 |
+
{
|
| 14823 |
+
"epoch": 0.929319153893521,
|
| 14824 |
+
"grad_norm": 68.9689712524414,
|
| 14825 |
+
"learning_rate": 9.963698495723912e-06,
|
| 14826 |
+
"loss": 10.6552,
|
| 14827 |
+
"step": 20940
|
| 14828 |
+
},
|
| 14829 |
+
{
|
| 14830 |
+
"epoch": 0.9297629548266125,
|
| 14831 |
+
"grad_norm": 71.05926513671875,
|
| 14832 |
+
"learning_rate": 9.963681159761985e-06,
|
| 14833 |
+
"loss": 9.7715,
|
| 14834 |
+
"step": 20950
|
| 14835 |
+
},
|
| 14836 |
+
{
|
| 14837 |
+
"epoch": 0.930206755759704,
|
| 14838 |
+
"grad_norm": 76.48685455322266,
|
| 14839 |
+
"learning_rate": 9.963663823800058e-06,
|
| 14840 |
+
"loss": 10.4682,
|
| 14841 |
+
"step": 20960
|
| 14842 |
+
},
|
| 14843 |
+
{
|
| 14844 |
+
"epoch": 0.9306505566927954,
|
| 14845 |
+
"grad_norm": 63.782257080078125,
|
| 14846 |
+
"learning_rate": 9.96364648783813e-06,
|
| 14847 |
+
"loss": 10.3633,
|
| 14848 |
+
"step": 20970
|
| 14849 |
+
},
|
| 14850 |
+
{
|
| 14851 |
+
"epoch": 0.9310943576258869,
|
| 14852 |
+
"grad_norm": 71.73765563964844,
|
| 14853 |
+
"learning_rate": 9.963629151876202e-06,
|
| 14854 |
+
"loss": 10.465,
|
| 14855 |
+
"step": 20980
|
| 14856 |
+
},
|
| 14857 |
+
{
|
| 14858 |
+
"epoch": 0.9315381585589784,
|
| 14859 |
+
"grad_norm": 60.337677001953125,
|
| 14860 |
+
"learning_rate": 9.963611815914275e-06,
|
| 14861 |
+
"loss": 9.9345,
|
| 14862 |
+
"step": 20990
|
| 14863 |
+
},
|
| 14864 |
+
{
|
| 14865 |
+
"epoch": 0.9319819594920699,
|
| 14866 |
+
"grad_norm": 58.7932243347168,
|
| 14867 |
+
"learning_rate": 9.963594479952347e-06,
|
| 14868 |
+
"loss": 9.7854,
|
| 14869 |
+
"step": 21000
|
| 14870 |
+
},
|
| 14871 |
+
{
|
| 14872 |
+
"epoch": 0.9319819594920699,
|
| 14873 |
+
"eval_loss": 0.32051748037338257,
|
| 14874 |
+
"eval_runtime": 673.8843,
|
| 14875 |
+
"eval_samples_per_second": 1802.076,
|
| 14876 |
+
"eval_steps_per_second": 56.315,
|
| 14877 |
+
"step": 21000
|
| 14878 |
+
},
|
| 14879 |
+
{
|
| 14880 |
+
"epoch": 0.9324257604251613,
|
| 14881 |
+
"grad_norm": 61.75080871582031,
|
| 14882 |
+
"learning_rate": 9.96357714399042e-06,
|
| 14883 |
+
"loss": 10.7108,
|
| 14884 |
+
"step": 21010
|
| 14885 |
+
},
|
| 14886 |
+
{
|
| 14887 |
+
"epoch": 0.9328695613582527,
|
| 14888 |
+
"grad_norm": 66.45942687988281,
|
| 14889 |
+
"learning_rate": 9.963559808028493e-06,
|
| 14890 |
+
"loss": 9.7115,
|
| 14891 |
+
"step": 21020
|
| 14892 |
+
},
|
| 14893 |
+
{
|
| 14894 |
+
"epoch": 0.9333133622913442,
|
| 14895 |
+
"grad_norm": 66.86365509033203,
|
| 14896 |
+
"learning_rate": 9.963542472066564e-06,
|
| 14897 |
+
"loss": 10.4903,
|
| 14898 |
+
"step": 21030
|
| 14899 |
+
},
|
| 14900 |
+
{
|
| 14901 |
+
"epoch": 0.9337571632244357,
|
| 14902 |
+
"grad_norm": 59.50422668457031,
|
| 14903 |
+
"learning_rate": 9.963525136104637e-06,
|
| 14904 |
+
"loss": 10.2654,
|
| 14905 |
+
"step": 21040
|
| 14906 |
+
},
|
| 14907 |
+
{
|
| 14908 |
+
"epoch": 0.9342009641575272,
|
| 14909 |
+
"grad_norm": 65.55677032470703,
|
| 14910 |
+
"learning_rate": 9.96350780014271e-06,
|
| 14911 |
+
"loss": 10.2086,
|
| 14912 |
+
"step": 21050
|
| 14913 |
+
},
|
| 14914 |
+
{
|
| 14915 |
+
"epoch": 0.9346447650906186,
|
| 14916 |
+
"grad_norm": 60.787967681884766,
|
| 14917 |
+
"learning_rate": 9.963490464180784e-06,
|
| 14918 |
+
"loss": 10.4747,
|
| 14919 |
+
"step": 21060
|
| 14920 |
+
},
|
| 14921 |
+
{
|
| 14922 |
+
"epoch": 0.93508856602371,
|
| 14923 |
+
"grad_norm": 58.43632507324219,
|
| 14924 |
+
"learning_rate": 9.963473128218855e-06,
|
| 14925 |
+
"loss": 10.1393,
|
| 14926 |
+
"step": 21070
|
| 14927 |
+
},
|
| 14928 |
+
{
|
| 14929 |
+
"epoch": 0.9355323669568015,
|
| 14930 |
+
"grad_norm": 80.03214263916016,
|
| 14931 |
+
"learning_rate": 9.963455792256928e-06,
|
| 14932 |
+
"loss": 10.1442,
|
| 14933 |
+
"step": 21080
|
| 14934 |
+
},
|
| 14935 |
+
{
|
| 14936 |
+
"epoch": 0.935976167889893,
|
| 14937 |
+
"grad_norm": 58.377357482910156,
|
| 14938 |
+
"learning_rate": 9.963438456295001e-06,
|
| 14939 |
+
"loss": 10.3524,
|
| 14940 |
+
"step": 21090
|
| 14941 |
+
},
|
| 14942 |
+
{
|
| 14943 |
+
"epoch": 0.9364199688229845,
|
| 14944 |
+
"grad_norm": 62.32807159423828,
|
| 14945 |
+
"learning_rate": 9.963421120333073e-06,
|
| 14946 |
+
"loss": 10.1192,
|
| 14947 |
+
"step": 21100
|
| 14948 |
+
},
|
| 14949 |
+
{
|
| 14950 |
+
"epoch": 0.9368637697560759,
|
| 14951 |
+
"grad_norm": 62.65857696533203,
|
| 14952 |
+
"learning_rate": 9.963403784371146e-06,
|
| 14953 |
+
"loss": 10.2389,
|
| 14954 |
+
"step": 21110
|
| 14955 |
+
},
|
| 14956 |
+
{
|
| 14957 |
+
"epoch": 0.9373075706891674,
|
| 14958 |
+
"grad_norm": 57.880252838134766,
|
| 14959 |
+
"learning_rate": 9.963386448409219e-06,
|
| 14960 |
+
"loss": 10.244,
|
| 14961 |
+
"step": 21120
|
| 14962 |
+
},
|
| 14963 |
+
{
|
| 14964 |
+
"epoch": 0.9377513716222589,
|
| 14965 |
+
"grad_norm": 65.94375610351562,
|
| 14966 |
+
"learning_rate": 9.96336911244729e-06,
|
| 14967 |
+
"loss": 10.4006,
|
| 14968 |
+
"step": 21130
|
| 14969 |
+
},
|
| 14970 |
+
{
|
| 14971 |
+
"epoch": 0.9381951725553503,
|
| 14972 |
+
"grad_norm": 57.031944274902344,
|
| 14973 |
+
"learning_rate": 9.963351776485363e-06,
|
| 14974 |
+
"loss": 10.1134,
|
| 14975 |
+
"step": 21140
|
| 14976 |
+
},
|
| 14977 |
+
{
|
| 14978 |
+
"epoch": 0.9386389734884417,
|
| 14979 |
+
"grad_norm": 54.85254669189453,
|
| 14980 |
+
"learning_rate": 9.963334440523436e-06,
|
| 14981 |
+
"loss": 10.1874,
|
| 14982 |
+
"step": 21150
|
| 14983 |
+
},
|
| 14984 |
+
{
|
| 14985 |
+
"epoch": 0.9390827744215332,
|
| 14986 |
+
"grad_norm": 61.38536834716797,
|
| 14987 |
+
"learning_rate": 9.963317104561508e-06,
|
| 14988 |
+
"loss": 10.2983,
|
| 14989 |
+
"step": 21160
|
| 14990 |
+
},
|
| 14991 |
+
{
|
| 14992 |
+
"epoch": 0.9395265753546247,
|
| 14993 |
+
"grad_norm": 63.140010833740234,
|
| 14994 |
+
"learning_rate": 9.96329976859958e-06,
|
| 14995 |
+
"loss": 10.518,
|
| 14996 |
+
"step": 21170
|
| 14997 |
+
},
|
| 14998 |
+
{
|
| 14999 |
+
"epoch": 0.9399703762877162,
|
| 15000 |
+
"grad_norm": 64.6523208618164,
|
| 15001 |
+
"learning_rate": 9.963282432637654e-06,
|
| 15002 |
+
"loss": 9.8054,
|
| 15003 |
+
"step": 21180
|
| 15004 |
+
},
|
| 15005 |
+
{
|
| 15006 |
+
"epoch": 0.9404141772208076,
|
| 15007 |
+
"grad_norm": 64.04774475097656,
|
| 15008 |
+
"learning_rate": 9.963265096675727e-06,
|
| 15009 |
+
"loss": 10.1823,
|
| 15010 |
+
"step": 21190
|
| 15011 |
+
},
|
| 15012 |
+
{
|
| 15013 |
+
"epoch": 0.940857978153899,
|
| 15014 |
+
"grad_norm": 61.016780853271484,
|
| 15015 |
+
"learning_rate": 9.963247760713798e-06,
|
| 15016 |
+
"loss": 10.0684,
|
| 15017 |
+
"step": 21200
|
| 15018 |
+
},
|
| 15019 |
+
{
|
| 15020 |
+
"epoch": 0.9413017790869905,
|
| 15021 |
+
"grad_norm": 55.790435791015625,
|
| 15022 |
+
"learning_rate": 9.963230424751871e-06,
|
| 15023 |
+
"loss": 9.9463,
|
| 15024 |
+
"step": 21210
|
| 15025 |
+
},
|
| 15026 |
+
{
|
| 15027 |
+
"epoch": 0.941745580020082,
|
| 15028 |
+
"grad_norm": 58.64189910888672,
|
| 15029 |
+
"learning_rate": 9.963213088789944e-06,
|
| 15030 |
+
"loss": 9.7533,
|
| 15031 |
+
"step": 21220
|
| 15032 |
+
},
|
| 15033 |
+
{
|
| 15034 |
+
"epoch": 0.9421893809531735,
|
| 15035 |
+
"grad_norm": 62.45125961303711,
|
| 15036 |
+
"learning_rate": 9.963195752828016e-06,
|
| 15037 |
+
"loss": 10.2871,
|
| 15038 |
+
"step": 21230
|
| 15039 |
+
},
|
| 15040 |
+
{
|
| 15041 |
+
"epoch": 0.9426331818862649,
|
| 15042 |
+
"grad_norm": 57.653106689453125,
|
| 15043 |
+
"learning_rate": 9.963178416866089e-06,
|
| 15044 |
+
"loss": 9.6557,
|
| 15045 |
+
"step": 21240
|
| 15046 |
+
},
|
| 15047 |
+
{
|
| 15048 |
+
"epoch": 0.9430769828193564,
|
| 15049 |
+
"grad_norm": 70.4138412475586,
|
| 15050 |
+
"learning_rate": 9.963161080904162e-06,
|
| 15051 |
+
"loss": 9.947,
|
| 15052 |
+
"step": 21250
|
| 15053 |
+
},
|
| 15054 |
+
{
|
| 15055 |
+
"epoch": 0.9435207837524479,
|
| 15056 |
+
"grad_norm": 62.09977340698242,
|
| 15057 |
+
"learning_rate": 9.963143744942233e-06,
|
| 15058 |
+
"loss": 10.3042,
|
| 15059 |
+
"step": 21260
|
| 15060 |
+
},
|
| 15061 |
+
{
|
| 15062 |
+
"epoch": 0.9439645846855393,
|
| 15063 |
+
"grad_norm": 57.68659973144531,
|
| 15064 |
+
"learning_rate": 9.963126408980306e-06,
|
| 15065 |
+
"loss": 10.1486,
|
| 15066 |
+
"step": 21270
|
| 15067 |
+
},
|
| 15068 |
+
{
|
| 15069 |
+
"epoch": 0.9444083856186307,
|
| 15070 |
+
"grad_norm": 58.4940185546875,
|
| 15071 |
+
"learning_rate": 9.96310907301838e-06,
|
| 15072 |
+
"loss": 10.6721,
|
| 15073 |
+
"step": 21280
|
| 15074 |
+
},
|
| 15075 |
+
{
|
| 15076 |
+
"epoch": 0.9448521865517222,
|
| 15077 |
+
"grad_norm": 62.28818893432617,
|
| 15078 |
+
"learning_rate": 9.963091737056451e-06,
|
| 15079 |
+
"loss": 10.3957,
|
| 15080 |
+
"step": 21290
|
| 15081 |
+
},
|
| 15082 |
+
{
|
| 15083 |
+
"epoch": 0.9452959874848137,
|
| 15084 |
+
"grad_norm": 52.62106704711914,
|
| 15085 |
+
"learning_rate": 9.963074401094524e-06,
|
| 15086 |
+
"loss": 9.9156,
|
| 15087 |
+
"step": 21300
|
| 15088 |
+
},
|
| 15089 |
+
{
|
| 15090 |
+
"epoch": 0.9457397884179052,
|
| 15091 |
+
"grad_norm": 55.59827423095703,
|
| 15092 |
+
"learning_rate": 9.963057065132597e-06,
|
| 15093 |
+
"loss": 10.5472,
|
| 15094 |
+
"step": 21310
|
| 15095 |
+
},
|
| 15096 |
+
{
|
| 15097 |
+
"epoch": 0.9461835893509966,
|
| 15098 |
+
"grad_norm": 66.30583953857422,
|
| 15099 |
+
"learning_rate": 9.96303972917067e-06,
|
| 15100 |
+
"loss": 10.2904,
|
| 15101 |
+
"step": 21320
|
| 15102 |
+
},
|
| 15103 |
+
{
|
| 15104 |
+
"epoch": 0.946627390284088,
|
| 15105 |
+
"grad_norm": 61.947025299072266,
|
| 15106 |
+
"learning_rate": 9.963022393208741e-06,
|
| 15107 |
+
"loss": 10.238,
|
| 15108 |
+
"step": 21330
|
| 15109 |
+
},
|
| 15110 |
+
{
|
| 15111 |
+
"epoch": 0.9470711912171795,
|
| 15112 |
+
"grad_norm": 64.70133209228516,
|
| 15113 |
+
"learning_rate": 9.963005057246815e-06,
|
| 15114 |
+
"loss": 10.2199,
|
| 15115 |
+
"step": 21340
|
| 15116 |
+
},
|
| 15117 |
+
{
|
| 15118 |
+
"epoch": 0.947514992150271,
|
| 15119 |
+
"grad_norm": 60.59946823120117,
|
| 15120 |
+
"learning_rate": 9.962987721284888e-06,
|
| 15121 |
+
"loss": 10.3811,
|
| 15122 |
+
"step": 21350
|
| 15123 |
+
},
|
| 15124 |
+
{
|
| 15125 |
+
"epoch": 0.9479587930833625,
|
| 15126 |
+
"grad_norm": 57.58180618286133,
|
| 15127 |
+
"learning_rate": 9.962970385322959e-06,
|
| 15128 |
+
"loss": 10.4364,
|
| 15129 |
+
"step": 21360
|
| 15130 |
+
},
|
| 15131 |
+
{
|
| 15132 |
+
"epoch": 0.9484025940164539,
|
| 15133 |
+
"grad_norm": 63.964500427246094,
|
| 15134 |
+
"learning_rate": 9.962953049361032e-06,
|
| 15135 |
+
"loss": 10.1727,
|
| 15136 |
+
"step": 21370
|
| 15137 |
+
},
|
| 15138 |
+
{
|
| 15139 |
+
"epoch": 0.9488463949495454,
|
| 15140 |
+
"grad_norm": 66.83090209960938,
|
| 15141 |
+
"learning_rate": 9.962935713399105e-06,
|
| 15142 |
+
"loss": 10.4908,
|
| 15143 |
+
"step": 21380
|
| 15144 |
+
},
|
| 15145 |
+
{
|
| 15146 |
+
"epoch": 0.9492901958826369,
|
| 15147 |
+
"grad_norm": 58.45000457763672,
|
| 15148 |
+
"learning_rate": 9.962918377437177e-06,
|
| 15149 |
+
"loss": 10.3677,
|
| 15150 |
+
"step": 21390
|
| 15151 |
+
},
|
| 15152 |
+
{
|
| 15153 |
+
"epoch": 0.9497339968157283,
|
| 15154 |
+
"grad_norm": 62.023040771484375,
|
| 15155 |
+
"learning_rate": 9.96290104147525e-06,
|
| 15156 |
+
"loss": 10.0164,
|
| 15157 |
+
"step": 21400
|
| 15158 |
+
},
|
| 15159 |
+
{
|
| 15160 |
+
"epoch": 0.9501777977488197,
|
| 15161 |
+
"grad_norm": 58.94234848022461,
|
| 15162 |
+
"learning_rate": 9.962883705513323e-06,
|
| 15163 |
+
"loss": 10.4101,
|
| 15164 |
+
"step": 21410
|
| 15165 |
+
},
|
| 15166 |
+
{
|
| 15167 |
+
"epoch": 0.9506215986819112,
|
| 15168 |
+
"grad_norm": 58.807456970214844,
|
| 15169 |
+
"learning_rate": 9.962866369551396e-06,
|
| 15170 |
+
"loss": 9.882,
|
| 15171 |
+
"step": 21420
|
| 15172 |
+
},
|
| 15173 |
+
{
|
| 15174 |
+
"epoch": 0.9510653996150027,
|
| 15175 |
+
"grad_norm": 54.390098571777344,
|
| 15176 |
+
"learning_rate": 9.962849033589467e-06,
|
| 15177 |
+
"loss": 10.6928,
|
| 15178 |
+
"step": 21430
|
| 15179 |
+
},
|
| 15180 |
+
{
|
| 15181 |
+
"epoch": 0.9515092005480942,
|
| 15182 |
+
"grad_norm": 65.57588195800781,
|
| 15183 |
+
"learning_rate": 9.96283169762754e-06,
|
| 15184 |
+
"loss": 10.8391,
|
| 15185 |
+
"step": 21440
|
| 15186 |
+
},
|
| 15187 |
+
{
|
| 15188 |
+
"epoch": 0.9519530014811857,
|
| 15189 |
+
"grad_norm": 59.871700286865234,
|
| 15190 |
+
"learning_rate": 9.962814361665613e-06,
|
| 15191 |
+
"loss": 10.1765,
|
| 15192 |
+
"step": 21450
|
| 15193 |
+
},
|
| 15194 |
+
{
|
| 15195 |
+
"epoch": 0.952396802414277,
|
| 15196 |
+
"grad_norm": 61.5579948425293,
|
| 15197 |
+
"learning_rate": 9.962797025703685e-06,
|
| 15198 |
+
"loss": 10.1676,
|
| 15199 |
+
"step": 21460
|
| 15200 |
+
},
|
| 15201 |
+
{
|
| 15202 |
+
"epoch": 0.9528406033473685,
|
| 15203 |
+
"grad_norm": 50.21920394897461,
|
| 15204 |
+
"learning_rate": 9.962779689741758e-06,
|
| 15205 |
+
"loss": 10.3828,
|
| 15206 |
+
"step": 21470
|
| 15207 |
+
},
|
| 15208 |
+
{
|
| 15209 |
+
"epoch": 0.95328440428046,
|
| 15210 |
+
"grad_norm": 59.22177505493164,
|
| 15211 |
+
"learning_rate": 9.962762353779831e-06,
|
| 15212 |
+
"loss": 10.3205,
|
| 15213 |
+
"step": 21480
|
| 15214 |
+
},
|
| 15215 |
+
{
|
| 15216 |
+
"epoch": 0.9537282052135515,
|
| 15217 |
+
"grad_norm": 66.43260955810547,
|
| 15218 |
+
"learning_rate": 9.962745017817902e-06,
|
| 15219 |
+
"loss": 9.9529,
|
| 15220 |
+
"step": 21490
|
| 15221 |
+
},
|
| 15222 |
+
{
|
| 15223 |
+
"epoch": 0.9541720061466429,
|
| 15224 |
+
"grad_norm": 52.945499420166016,
|
| 15225 |
+
"learning_rate": 9.962727681855975e-06,
|
| 15226 |
+
"loss": 10.0895,
|
| 15227 |
+
"step": 21500
|
| 15228 |
+
},
|
| 15229 |
+
{
|
| 15230 |
+
"epoch": 0.9546158070797344,
|
| 15231 |
+
"grad_norm": 65.87628173828125,
|
| 15232 |
+
"learning_rate": 9.962710345894048e-06,
|
| 15233 |
+
"loss": 9.9705,
|
| 15234 |
+
"step": 21510
|
| 15235 |
+
},
|
| 15236 |
+
{
|
| 15237 |
+
"epoch": 0.9550596080128259,
|
| 15238 |
+
"grad_norm": 60.30337142944336,
|
| 15239 |
+
"learning_rate": 9.96269300993212e-06,
|
| 15240 |
+
"loss": 10.2607,
|
| 15241 |
+
"step": 21520
|
| 15242 |
+
},
|
| 15243 |
+
{
|
| 15244 |
+
"epoch": 0.9555034089459173,
|
| 15245 |
+
"grad_norm": 69.21615600585938,
|
| 15246 |
+
"learning_rate": 9.962675673970193e-06,
|
| 15247 |
+
"loss": 9.8337,
|
| 15248 |
+
"step": 21530
|
| 15249 |
+
},
|
| 15250 |
+
{
|
| 15251 |
+
"epoch": 0.9559472098790087,
|
| 15252 |
+
"grad_norm": 60.485984802246094,
|
| 15253 |
+
"learning_rate": 9.962658338008266e-06,
|
| 15254 |
+
"loss": 10.2629,
|
| 15255 |
+
"step": 21540
|
| 15256 |
+
},
|
| 15257 |
+
{
|
| 15258 |
+
"epoch": 0.9563910108121002,
|
| 15259 |
+
"grad_norm": 57.54688262939453,
|
| 15260 |
+
"learning_rate": 9.962641002046339e-06,
|
| 15261 |
+
"loss": 10.5513,
|
| 15262 |
+
"step": 21550
|
| 15263 |
+
},
|
| 15264 |
+
{
|
| 15265 |
+
"epoch": 0.9568348117451917,
|
| 15266 |
+
"grad_norm": 56.43334197998047,
|
| 15267 |
+
"learning_rate": 9.96262366608441e-06,
|
| 15268 |
+
"loss": 10.5496,
|
| 15269 |
+
"step": 21560
|
| 15270 |
+
},
|
| 15271 |
+
{
|
| 15272 |
+
"epoch": 0.9572786126782832,
|
| 15273 |
+
"grad_norm": 69.3460464477539,
|
| 15274 |
+
"learning_rate": 9.962606330122484e-06,
|
| 15275 |
+
"loss": 10.0796,
|
| 15276 |
+
"step": 21570
|
| 15277 |
+
},
|
| 15278 |
+
{
|
| 15279 |
+
"epoch": 0.9577224136113747,
|
| 15280 |
+
"grad_norm": 59.36885070800781,
|
| 15281 |
+
"learning_rate": 9.962588994160557e-06,
|
| 15282 |
+
"loss": 9.7917,
|
| 15283 |
+
"step": 21580
|
| 15284 |
+
},
|
| 15285 |
+
{
|
| 15286 |
+
"epoch": 0.958166214544466,
|
| 15287 |
+
"grad_norm": 50.145694732666016,
|
| 15288 |
+
"learning_rate": 9.962571658198628e-06,
|
| 15289 |
+
"loss": 10.1281,
|
| 15290 |
+
"step": 21590
|
| 15291 |
+
},
|
| 15292 |
+
{
|
| 15293 |
+
"epoch": 0.9586100154775575,
|
| 15294 |
+
"grad_norm": 63.263710021972656,
|
| 15295 |
+
"learning_rate": 9.962554322236701e-06,
|
| 15296 |
+
"loss": 10.6324,
|
| 15297 |
+
"step": 21600
|
| 15298 |
+
},
|
| 15299 |
+
{
|
| 15300 |
+
"epoch": 0.959053816410649,
|
| 15301 |
+
"grad_norm": 64.55142211914062,
|
| 15302 |
+
"learning_rate": 9.962536986274774e-06,
|
| 15303 |
+
"loss": 10.3349,
|
| 15304 |
+
"step": 21610
|
| 15305 |
+
},
|
| 15306 |
+
{
|
| 15307 |
+
"epoch": 0.9594976173437405,
|
| 15308 |
+
"grad_norm": 69.35453796386719,
|
| 15309 |
+
"learning_rate": 9.962519650312846e-06,
|
| 15310 |
+
"loss": 10.1361,
|
| 15311 |
+
"step": 21620
|
| 15312 |
+
},
|
| 15313 |
+
{
|
| 15314 |
+
"epoch": 0.9599414182768319,
|
| 15315 |
+
"grad_norm": 54.69525909423828,
|
| 15316 |
+
"learning_rate": 9.962502314350919e-06,
|
| 15317 |
+
"loss": 10.561,
|
| 15318 |
+
"step": 21630
|
| 15319 |
+
},
|
| 15320 |
+
{
|
| 15321 |
+
"epoch": 0.9603852192099234,
|
| 15322 |
+
"grad_norm": 58.769649505615234,
|
| 15323 |
+
"learning_rate": 9.962484978388992e-06,
|
| 15324 |
+
"loss": 9.6632,
|
| 15325 |
+
"step": 21640
|
| 15326 |
+
},
|
| 15327 |
+
{
|
| 15328 |
+
"epoch": 0.9608290201430149,
|
| 15329 |
+
"grad_norm": 62.73846435546875,
|
| 15330 |
+
"learning_rate": 9.962467642427063e-06,
|
| 15331 |
+
"loss": 10.144,
|
| 15332 |
+
"step": 21650
|
| 15333 |
+
},
|
| 15334 |
+
{
|
| 15335 |
+
"epoch": 0.9612728210761063,
|
| 15336 |
+
"grad_norm": 56.944557189941406,
|
| 15337 |
+
"learning_rate": 9.962450306465136e-06,
|
| 15338 |
+
"loss": 10.048,
|
| 15339 |
+
"step": 21660
|
| 15340 |
+
},
|
| 15341 |
+
{
|
| 15342 |
+
"epoch": 0.9617166220091977,
|
| 15343 |
+
"grad_norm": 51.97823715209961,
|
| 15344 |
+
"learning_rate": 9.96243297050321e-06,
|
| 15345 |
+
"loss": 9.9427,
|
| 15346 |
+
"step": 21670
|
| 15347 |
+
},
|
| 15348 |
+
{
|
| 15349 |
+
"epoch": 0.9621604229422892,
|
| 15350 |
+
"grad_norm": 60.300987243652344,
|
| 15351 |
+
"learning_rate": 9.96241563454128e-06,
|
| 15352 |
+
"loss": 10.6487,
|
| 15353 |
+
"step": 21680
|
| 15354 |
+
},
|
| 15355 |
+
{
|
| 15356 |
+
"epoch": 0.9626042238753807,
|
| 15357 |
+
"grad_norm": 61.2890739440918,
|
| 15358 |
+
"learning_rate": 9.962398298579354e-06,
|
| 15359 |
+
"loss": 10.0074,
|
| 15360 |
+
"step": 21690
|
| 15361 |
+
},
|
| 15362 |
+
{
|
| 15363 |
+
"epoch": 0.9630480248084722,
|
| 15364 |
+
"grad_norm": 53.57798767089844,
|
| 15365 |
+
"learning_rate": 9.962380962617427e-06,
|
| 15366 |
+
"loss": 10.3964,
|
| 15367 |
+
"step": 21700
|
| 15368 |
+
},
|
| 15369 |
+
{
|
| 15370 |
+
"epoch": 0.9634918257415637,
|
| 15371 |
+
"grad_norm": 63.061988830566406,
|
| 15372 |
+
"learning_rate": 9.9623636266555e-06,
|
| 15373 |
+
"loss": 10.4485,
|
| 15374 |
+
"step": 21710
|
| 15375 |
+
},
|
| 15376 |
+
{
|
| 15377 |
+
"epoch": 0.963935626674655,
|
| 15378 |
+
"grad_norm": 60.63272476196289,
|
| 15379 |
+
"learning_rate": 9.962346290693571e-06,
|
| 15380 |
+
"loss": 10.3628,
|
| 15381 |
+
"step": 21720
|
| 15382 |
+
},
|
| 15383 |
+
{
|
| 15384 |
+
"epoch": 0.9643794276077465,
|
| 15385 |
+
"grad_norm": 69.05794525146484,
|
| 15386 |
+
"learning_rate": 9.962328954731644e-06,
|
| 15387 |
+
"loss": 10.0831,
|
| 15388 |
+
"step": 21730
|
| 15389 |
+
},
|
| 15390 |
+
{
|
| 15391 |
+
"epoch": 0.964823228540838,
|
| 15392 |
+
"grad_norm": 63.956844329833984,
|
| 15393 |
+
"learning_rate": 9.962311618769717e-06,
|
| 15394 |
+
"loss": 10.7723,
|
| 15395 |
+
"step": 21740
|
| 15396 |
+
},
|
| 15397 |
+
{
|
| 15398 |
+
"epoch": 0.9652670294739295,
|
| 15399 |
+
"grad_norm": 58.707271575927734,
|
| 15400 |
+
"learning_rate": 9.962294282807789e-06,
|
| 15401 |
+
"loss": 9.9793,
|
| 15402 |
+
"step": 21750
|
| 15403 |
+
},
|
| 15404 |
+
{
|
| 15405 |
+
"epoch": 0.9657108304070209,
|
| 15406 |
+
"grad_norm": 60.982521057128906,
|
| 15407 |
+
"learning_rate": 9.962276946845862e-06,
|
| 15408 |
+
"loss": 10.037,
|
| 15409 |
+
"step": 21760
|
| 15410 |
+
},
|
| 15411 |
+
{
|
| 15412 |
+
"epoch": 0.9661546313401124,
|
| 15413 |
+
"grad_norm": 58.97859191894531,
|
| 15414 |
+
"learning_rate": 9.962259610883935e-06,
|
| 15415 |
+
"loss": 10.2802,
|
| 15416 |
+
"step": 21770
|
| 15417 |
+
},
|
| 15418 |
+
{
|
| 15419 |
+
"epoch": 0.9665984322732039,
|
| 15420 |
+
"grad_norm": 59.34490966796875,
|
| 15421 |
+
"learning_rate": 9.962242274922006e-06,
|
| 15422 |
+
"loss": 10.1092,
|
| 15423 |
+
"step": 21780
|
| 15424 |
+
},
|
| 15425 |
+
{
|
| 15426 |
+
"epoch": 0.9670422332062953,
|
| 15427 |
+
"grad_norm": 72.03850555419922,
|
| 15428 |
+
"learning_rate": 9.96222493896008e-06,
|
| 15429 |
+
"loss": 10.4551,
|
| 15430 |
+
"step": 21790
|
| 15431 |
+
},
|
| 15432 |
+
{
|
| 15433 |
+
"epoch": 0.9674860341393868,
|
| 15434 |
+
"grad_norm": 64.43966674804688,
|
| 15435 |
+
"learning_rate": 9.962207602998152e-06,
|
| 15436 |
+
"loss": 10.2547,
|
| 15437 |
+
"step": 21800
|
| 15438 |
+
},
|
| 15439 |
+
{
|
| 15440 |
+
"epoch": 0.9679298350724782,
|
| 15441 |
+
"grad_norm": 58.295806884765625,
|
| 15442 |
+
"learning_rate": 9.962190267036224e-06,
|
| 15443 |
+
"loss": 10.3897,
|
| 15444 |
+
"step": 21810
|
| 15445 |
+
},
|
| 15446 |
+
{
|
| 15447 |
+
"epoch": 0.9683736360055697,
|
| 15448 |
+
"grad_norm": 64.32368469238281,
|
| 15449 |
+
"learning_rate": 9.962172931074297e-06,
|
| 15450 |
+
"loss": 10.4014,
|
| 15451 |
+
"step": 21820
|
| 15452 |
+
},
|
| 15453 |
+
{
|
| 15454 |
+
"epoch": 0.9688174369386612,
|
| 15455 |
+
"grad_norm": 61.49608612060547,
|
| 15456 |
+
"learning_rate": 9.96215559511237e-06,
|
| 15457 |
+
"loss": 10.3173,
|
| 15458 |
+
"step": 21830
|
| 15459 |
+
},
|
| 15460 |
+
{
|
| 15461 |
+
"epoch": 0.9692612378717527,
|
| 15462 |
+
"grad_norm": 66.80955505371094,
|
| 15463 |
+
"learning_rate": 9.962138259150441e-06,
|
| 15464 |
+
"loss": 10.3526,
|
| 15465 |
+
"step": 21840
|
| 15466 |
+
},
|
| 15467 |
+
{
|
| 15468 |
+
"epoch": 0.969705038804844,
|
| 15469 |
+
"grad_norm": 60.56246566772461,
|
| 15470 |
+
"learning_rate": 9.962120923188514e-06,
|
| 15471 |
+
"loss": 9.8569,
|
| 15472 |
+
"step": 21850
|
| 15473 |
+
},
|
| 15474 |
+
{
|
| 15475 |
+
"epoch": 0.9701488397379355,
|
| 15476 |
+
"grad_norm": 57.73928451538086,
|
| 15477 |
+
"learning_rate": 9.962103587226588e-06,
|
| 15478 |
+
"loss": 10.3068,
|
| 15479 |
+
"step": 21860
|
| 15480 |
+
},
|
| 15481 |
+
{
|
| 15482 |
+
"epoch": 0.970592640671027,
|
| 15483 |
+
"grad_norm": 54.70594024658203,
|
| 15484 |
+
"learning_rate": 9.962086251264659e-06,
|
| 15485 |
+
"loss": 10.054,
|
| 15486 |
+
"step": 21870
|
| 15487 |
+
},
|
| 15488 |
+
{
|
| 15489 |
+
"epoch": 0.9710364416041185,
|
| 15490 |
+
"grad_norm": 54.87747573852539,
|
| 15491 |
+
"learning_rate": 9.962068915302732e-06,
|
| 15492 |
+
"loss": 9.9986,
|
| 15493 |
+
"step": 21880
|
| 15494 |
+
},
|
| 15495 |
+
{
|
| 15496 |
+
"epoch": 0.9714802425372099,
|
| 15497 |
+
"grad_norm": 60.417457580566406,
|
| 15498 |
+
"learning_rate": 9.962051579340805e-06,
|
| 15499 |
+
"loss": 10.3474,
|
| 15500 |
+
"step": 21890
|
| 15501 |
+
},
|
| 15502 |
+
{
|
| 15503 |
+
"epoch": 0.9719240434703014,
|
| 15504 |
+
"grad_norm": 68.63028717041016,
|
| 15505 |
+
"learning_rate": 9.962034243378876e-06,
|
| 15506 |
+
"loss": 10.0733,
|
| 15507 |
+
"step": 21900
|
| 15508 |
+
},
|
| 15509 |
+
{
|
| 15510 |
+
"epoch": 0.9723678444033929,
|
| 15511 |
+
"grad_norm": 63.004581451416016,
|
| 15512 |
+
"learning_rate": 9.96201690741695e-06,
|
| 15513 |
+
"loss": 10.58,
|
| 15514 |
+
"step": 21910
|
| 15515 |
+
},
|
| 15516 |
+
{
|
| 15517 |
+
"epoch": 0.9728116453364843,
|
| 15518 |
+
"grad_norm": 72.34359741210938,
|
| 15519 |
+
"learning_rate": 9.961999571455023e-06,
|
| 15520 |
+
"loss": 10.2606,
|
| 15521 |
+
"step": 21920
|
| 15522 |
+
},
|
| 15523 |
+
{
|
| 15524 |
+
"epoch": 0.9732554462695758,
|
| 15525 |
+
"grad_norm": 66.6717758178711,
|
| 15526 |
+
"learning_rate": 9.961982235493096e-06,
|
| 15527 |
+
"loss": 10.5124,
|
| 15528 |
+
"step": 21930
|
| 15529 |
+
},
|
| 15530 |
+
{
|
| 15531 |
+
"epoch": 0.9736992472026672,
|
| 15532 |
+
"grad_norm": 68.4973373413086,
|
| 15533 |
+
"learning_rate": 9.961964899531167e-06,
|
| 15534 |
+
"loss": 10.3562,
|
| 15535 |
+
"step": 21940
|
| 15536 |
+
},
|
| 15537 |
+
{
|
| 15538 |
+
"epoch": 0.9741430481357587,
|
| 15539 |
+
"grad_norm": 67.8966064453125,
|
| 15540 |
+
"learning_rate": 9.96194756356924e-06,
|
| 15541 |
+
"loss": 9.8984,
|
| 15542 |
+
"step": 21950
|
| 15543 |
+
},
|
| 15544 |
+
{
|
| 15545 |
+
"epoch": 0.9745868490688502,
|
| 15546 |
+
"grad_norm": 64.77039337158203,
|
| 15547 |
+
"learning_rate": 9.961930227607313e-06,
|
| 15548 |
+
"loss": 10.4107,
|
| 15549 |
+
"step": 21960
|
| 15550 |
+
},
|
| 15551 |
+
{
|
| 15552 |
+
"epoch": 0.9750306500019417,
|
| 15553 |
+
"grad_norm": 56.87838363647461,
|
| 15554 |
+
"learning_rate": 9.961912891645385e-06,
|
| 15555 |
+
"loss": 10.3442,
|
| 15556 |
+
"step": 21970
|
| 15557 |
+
},
|
| 15558 |
+
{
|
| 15559 |
+
"epoch": 0.975474450935033,
|
| 15560 |
+
"grad_norm": 63.49540710449219,
|
| 15561 |
+
"learning_rate": 9.961895555683458e-06,
|
| 15562 |
+
"loss": 9.7954,
|
| 15563 |
+
"step": 21980
|
| 15564 |
+
},
|
| 15565 |
+
{
|
| 15566 |
+
"epoch": 0.9759182518681245,
|
| 15567 |
+
"grad_norm": 66.3160171508789,
|
| 15568 |
+
"learning_rate": 9.96187821972153e-06,
|
| 15569 |
+
"loss": 9.961,
|
| 15570 |
+
"step": 21990
|
| 15571 |
+
},
|
| 15572 |
+
{
|
| 15573 |
+
"epoch": 0.976362052801216,
|
| 15574 |
+
"grad_norm": 59.505393981933594,
|
| 15575 |
+
"learning_rate": 9.961860883759602e-06,
|
| 15576 |
+
"loss": 10.3546,
|
| 15577 |
+
"step": 22000
|
| 15578 |
+
},
|
| 15579 |
+
{
|
| 15580 |
+
"epoch": 0.976362052801216,
|
| 15581 |
+
"eval_loss": 0.3203989863395691,
|
| 15582 |
+
"eval_runtime": 674.395,
|
| 15583 |
+
"eval_samples_per_second": 1800.712,
|
| 15584 |
+
"eval_steps_per_second": 56.273,
|
| 15585 |
+
"step": 22000
|
| 15586 |
+
},
|
| 15587 |
+
{
|
| 15588 |
+
"epoch": 0.9768058537343075,
|
| 15589 |
+
"grad_norm": 65.98320770263672,
|
| 15590 |
+
"learning_rate": 9.961843547797675e-06,
|
| 15591 |
+
"loss": 10.2495,
|
| 15592 |
+
"step": 22010
|
| 15593 |
+
},
|
| 15594 |
+
{
|
| 15595 |
+
"epoch": 0.977249654667399,
|
| 15596 |
+
"grad_norm": 60.26272964477539,
|
| 15597 |
+
"learning_rate": 9.961826211835748e-06,
|
| 15598 |
+
"loss": 10.185,
|
| 15599 |
+
"step": 22020
|
| 15600 |
+
},
|
| 15601 |
+
{
|
| 15602 |
+
"epoch": 0.9776934556004904,
|
| 15603 |
+
"grad_norm": 77.41650390625,
|
| 15604 |
+
"learning_rate": 9.96180887587382e-06,
|
| 15605 |
+
"loss": 10.2332,
|
| 15606 |
+
"step": 22030
|
| 15607 |
+
},
|
| 15608 |
+
{
|
| 15609 |
+
"epoch": 0.9781372565335819,
|
| 15610 |
+
"grad_norm": 67.0610580444336,
|
| 15611 |
+
"learning_rate": 9.961791539911893e-06,
|
| 15612 |
+
"loss": 9.9651,
|
| 15613 |
+
"step": 22040
|
| 15614 |
+
},
|
| 15615 |
+
{
|
| 15616 |
+
"epoch": 0.9785810574666733,
|
| 15617 |
+
"grad_norm": 62.729793548583984,
|
| 15618 |
+
"learning_rate": 9.961774203949966e-06,
|
| 15619 |
+
"loss": 10.2186,
|
| 15620 |
+
"step": 22050
|
| 15621 |
+
},
|
| 15622 |
+
{
|
| 15623 |
+
"epoch": 0.9790248583997648,
|
| 15624 |
+
"grad_norm": 59.35409927368164,
|
| 15625 |
+
"learning_rate": 9.961756867988037e-06,
|
| 15626 |
+
"loss": 9.6738,
|
| 15627 |
+
"step": 22060
|
| 15628 |
+
},
|
| 15629 |
+
{
|
| 15630 |
+
"epoch": 0.9794686593328562,
|
| 15631 |
+
"grad_norm": 63.04311752319336,
|
| 15632 |
+
"learning_rate": 9.96173953202611e-06,
|
| 15633 |
+
"loss": 10.0731,
|
| 15634 |
+
"step": 22070
|
| 15635 |
+
},
|
| 15636 |
+
{
|
| 15637 |
+
"epoch": 0.9799124602659477,
|
| 15638 |
+
"grad_norm": 69.83229064941406,
|
| 15639 |
+
"learning_rate": 9.961722196064183e-06,
|
| 15640 |
+
"loss": 10.3592,
|
| 15641 |
+
"step": 22080
|
| 15642 |
+
},
|
| 15643 |
+
{
|
| 15644 |
+
"epoch": 0.9803562611990392,
|
| 15645 |
+
"grad_norm": 71.35539245605469,
|
| 15646 |
+
"learning_rate": 9.961704860102255e-06,
|
| 15647 |
+
"loss": 10.2029,
|
| 15648 |
+
"step": 22090
|
| 15649 |
+
},
|
| 15650 |
+
{
|
| 15651 |
+
"epoch": 0.9808000621321307,
|
| 15652 |
+
"grad_norm": 57.54240798950195,
|
| 15653 |
+
"learning_rate": 9.961687524140328e-06,
|
| 15654 |
+
"loss": 10.6619,
|
| 15655 |
+
"step": 22100
|
| 15656 |
+
},
|
| 15657 |
+
{
|
| 15658 |
+
"epoch": 0.981243863065222,
|
| 15659 |
+
"grad_norm": 62.44277572631836,
|
| 15660 |
+
"learning_rate": 9.961670188178401e-06,
|
| 15661 |
+
"loss": 10.35,
|
| 15662 |
+
"step": 22110
|
| 15663 |
+
},
|
| 15664 |
+
{
|
| 15665 |
+
"epoch": 0.9816876639983135,
|
| 15666 |
+
"grad_norm": 61.99805450439453,
|
| 15667 |
+
"learning_rate": 9.961652852216472e-06,
|
| 15668 |
+
"loss": 10.4932,
|
| 15669 |
+
"step": 22120
|
| 15670 |
+
},
|
| 15671 |
+
{
|
| 15672 |
+
"epoch": 0.982131464931405,
|
| 15673 |
+
"grad_norm": 63.21669387817383,
|
| 15674 |
+
"learning_rate": 9.961635516254545e-06,
|
| 15675 |
+
"loss": 10.4017,
|
| 15676 |
+
"step": 22130
|
| 15677 |
+
},
|
| 15678 |
+
{
|
| 15679 |
+
"epoch": 0.9825752658644965,
|
| 15680 |
+
"grad_norm": 59.47304916381836,
|
| 15681 |
+
"learning_rate": 9.961618180292618e-06,
|
| 15682 |
+
"loss": 10.439,
|
| 15683 |
+
"step": 22140
|
| 15684 |
+
},
|
| 15685 |
+
{
|
| 15686 |
+
"epoch": 0.983019066797588,
|
| 15687 |
+
"grad_norm": 63.6852912902832,
|
| 15688 |
+
"learning_rate": 9.961600844330692e-06,
|
| 15689 |
+
"loss": 10.2629,
|
| 15690 |
+
"step": 22150
|
| 15691 |
+
},
|
| 15692 |
+
{
|
| 15693 |
+
"epoch": 0.9834628677306794,
|
| 15694 |
+
"grad_norm": 58.97916793823242,
|
| 15695 |
+
"learning_rate": 9.961583508368763e-06,
|
| 15696 |
+
"loss": 10.1938,
|
| 15697 |
+
"step": 22160
|
| 15698 |
+
},
|
| 15699 |
+
{
|
| 15700 |
+
"epoch": 0.9839066686637709,
|
| 15701 |
+
"grad_norm": 64.60242462158203,
|
| 15702 |
+
"learning_rate": 9.961566172406836e-06,
|
| 15703 |
+
"loss": 10.5744,
|
| 15704 |
+
"step": 22170
|
| 15705 |
+
},
|
| 15706 |
+
{
|
| 15707 |
+
"epoch": 0.9843504695968623,
|
| 15708 |
+
"grad_norm": 61.11840057373047,
|
| 15709 |
+
"learning_rate": 9.961548836444909e-06,
|
| 15710 |
+
"loss": 9.7976,
|
| 15711 |
+
"step": 22180
|
| 15712 |
+
},
|
| 15713 |
+
{
|
| 15714 |
+
"epoch": 0.9847942705299538,
|
| 15715 |
+
"grad_norm": 64.354248046875,
|
| 15716 |
+
"learning_rate": 9.96153150048298e-06,
|
| 15717 |
+
"loss": 10.3659,
|
| 15718 |
+
"step": 22190
|
| 15719 |
+
},
|
| 15720 |
+
{
|
| 15721 |
+
"epoch": 0.9852380714630452,
|
| 15722 |
+
"grad_norm": 64.9032974243164,
|
| 15723 |
+
"learning_rate": 9.961514164521054e-06,
|
| 15724 |
+
"loss": 10.3499,
|
| 15725 |
+
"step": 22200
|
| 15726 |
+
},
|
| 15727 |
+
{
|
| 15728 |
+
"epoch": 0.9856818723961367,
|
| 15729 |
+
"grad_norm": 58.52092742919922,
|
| 15730 |
+
"learning_rate": 9.961496828559127e-06,
|
| 15731 |
+
"loss": 10.0338,
|
| 15732 |
+
"step": 22210
|
| 15733 |
+
},
|
| 15734 |
+
{
|
| 15735 |
+
"epoch": 0.9861256733292282,
|
| 15736 |
+
"grad_norm": 63.69118881225586,
|
| 15737 |
+
"learning_rate": 9.961479492597198e-06,
|
| 15738 |
+
"loss": 10.3962,
|
| 15739 |
+
"step": 22220
|
| 15740 |
+
},
|
| 15741 |
+
{
|
| 15742 |
+
"epoch": 0.9865694742623197,
|
| 15743 |
+
"grad_norm": 69.64185333251953,
|
| 15744 |
+
"learning_rate": 9.961462156635271e-06,
|
| 15745 |
+
"loss": 9.8446,
|
| 15746 |
+
"step": 22230
|
| 15747 |
+
},
|
| 15748 |
+
{
|
| 15749 |
+
"epoch": 0.987013275195411,
|
| 15750 |
+
"grad_norm": 64.73435974121094,
|
| 15751 |
+
"learning_rate": 9.961444820673344e-06,
|
| 15752 |
+
"loss": 9.8991,
|
| 15753 |
+
"step": 22240
|
| 15754 |
+
},
|
| 15755 |
+
{
|
| 15756 |
+
"epoch": 0.9874570761285025,
|
| 15757 |
+
"grad_norm": 55.16053771972656,
|
| 15758 |
+
"learning_rate": 9.961427484711416e-06,
|
| 15759 |
+
"loss": 10.1616,
|
| 15760 |
+
"step": 22250
|
| 15761 |
+
},
|
| 15762 |
+
{
|
| 15763 |
+
"epoch": 0.987900877061594,
|
| 15764 |
+
"grad_norm": 63.042667388916016,
|
| 15765 |
+
"learning_rate": 9.961410148749489e-06,
|
| 15766 |
+
"loss": 10.7821,
|
| 15767 |
+
"step": 22260
|
| 15768 |
+
},
|
| 15769 |
+
{
|
| 15770 |
+
"epoch": 0.9883446779946855,
|
| 15771 |
+
"grad_norm": 67.79585266113281,
|
| 15772 |
+
"learning_rate": 9.961392812787562e-06,
|
| 15773 |
+
"loss": 10.337,
|
| 15774 |
+
"step": 22270
|
| 15775 |
+
},
|
| 15776 |
+
{
|
| 15777 |
+
"epoch": 0.988788478927777,
|
| 15778 |
+
"grad_norm": 56.07440185546875,
|
| 15779 |
+
"learning_rate": 9.961375476825633e-06,
|
| 15780 |
+
"loss": 9.5085,
|
| 15781 |
+
"step": 22280
|
| 15782 |
+
},
|
| 15783 |
+
{
|
| 15784 |
+
"epoch": 0.9892322798608684,
|
| 15785 |
+
"grad_norm": 70.412841796875,
|
| 15786 |
+
"learning_rate": 9.961358140863706e-06,
|
| 15787 |
+
"loss": 10.1263,
|
| 15788 |
+
"step": 22290
|
| 15789 |
+
},
|
| 15790 |
+
{
|
| 15791 |
+
"epoch": 0.9896760807939599,
|
| 15792 |
+
"grad_norm": 52.69704818725586,
|
| 15793 |
+
"learning_rate": 9.96134080490178e-06,
|
| 15794 |
+
"loss": 10.1837,
|
| 15795 |
+
"step": 22300
|
| 15796 |
+
},
|
| 15797 |
+
{
|
| 15798 |
+
"epoch": 0.9901198817270513,
|
| 15799 |
+
"grad_norm": 61.2188606262207,
|
| 15800 |
+
"learning_rate": 9.96132346893985e-06,
|
| 15801 |
+
"loss": 10.7708,
|
| 15802 |
+
"step": 22310
|
| 15803 |
+
},
|
| 15804 |
+
{
|
| 15805 |
+
"epoch": 0.9905636826601428,
|
| 15806 |
+
"grad_norm": 54.49897766113281,
|
| 15807 |
+
"learning_rate": 9.961306132977924e-06,
|
| 15808 |
+
"loss": 10.7213,
|
| 15809 |
+
"step": 22320
|
| 15810 |
+
},
|
| 15811 |
+
{
|
| 15812 |
+
"epoch": 0.9910074835932342,
|
| 15813 |
+
"grad_norm": 62.42861557006836,
|
| 15814 |
+
"learning_rate": 9.961288797015997e-06,
|
| 15815 |
+
"loss": 10.1813,
|
| 15816 |
+
"step": 22330
|
| 15817 |
+
},
|
| 15818 |
+
{
|
| 15819 |
+
"epoch": 0.9914512845263257,
|
| 15820 |
+
"grad_norm": 60.572418212890625,
|
| 15821 |
+
"learning_rate": 9.961271461054068e-06,
|
| 15822 |
+
"loss": 9.7815,
|
| 15823 |
+
"step": 22340
|
| 15824 |
+
},
|
| 15825 |
+
{
|
| 15826 |
+
"epoch": 0.9918950854594172,
|
| 15827 |
+
"grad_norm": 66.12911224365234,
|
| 15828 |
+
"learning_rate": 9.961254125092141e-06,
|
| 15829 |
+
"loss": 10.526,
|
| 15830 |
+
"step": 22350
|
| 15831 |
+
},
|
| 15832 |
+
{
|
| 15833 |
+
"epoch": 0.9923388863925087,
|
| 15834 |
+
"grad_norm": 68.29117584228516,
|
| 15835 |
+
"learning_rate": 9.961236789130214e-06,
|
| 15836 |
+
"loss": 10.4441,
|
| 15837 |
+
"step": 22360
|
| 15838 |
+
},
|
| 15839 |
+
{
|
| 15840 |
+
"epoch": 0.9927826873256002,
|
| 15841 |
+
"grad_norm": 62.79308319091797,
|
| 15842 |
+
"learning_rate": 9.961219453168287e-06,
|
| 15843 |
+
"loss": 9.7355,
|
| 15844 |
+
"step": 22370
|
| 15845 |
+
},
|
| 15846 |
+
{
|
| 15847 |
+
"epoch": 0.9932264882586915,
|
| 15848 |
+
"grad_norm": 69.58606719970703,
|
| 15849 |
+
"learning_rate": 9.961202117206359e-06,
|
| 15850 |
+
"loss": 10.3635,
|
| 15851 |
+
"step": 22380
|
| 15852 |
+
},
|
| 15853 |
+
{
|
| 15854 |
+
"epoch": 0.993670289191783,
|
| 15855 |
+
"grad_norm": 56.06214904785156,
|
| 15856 |
+
"learning_rate": 9.961184781244432e-06,
|
| 15857 |
+
"loss": 10.2798,
|
| 15858 |
+
"step": 22390
|
| 15859 |
+
},
|
| 15860 |
+
{
|
| 15861 |
+
"epoch": 0.9941140901248745,
|
| 15862 |
+
"grad_norm": 68.97488403320312,
|
| 15863 |
+
"learning_rate": 9.961167445282505e-06,
|
| 15864 |
+
"loss": 10.7732,
|
| 15865 |
+
"step": 22400
|
| 15866 |
+
},
|
| 15867 |
+
{
|
| 15868 |
+
"epoch": 0.994557891057966,
|
| 15869 |
+
"grad_norm": 56.97425079345703,
|
| 15870 |
+
"learning_rate": 9.961150109320576e-06,
|
| 15871 |
+
"loss": 10.1022,
|
| 15872 |
+
"step": 22410
|
| 15873 |
+
},
|
| 15874 |
+
{
|
| 15875 |
+
"epoch": 0.9950016919910574,
|
| 15876 |
+
"grad_norm": 56.741455078125,
|
| 15877 |
+
"learning_rate": 9.96113277335865e-06,
|
| 15878 |
+
"loss": 10.2925,
|
| 15879 |
+
"step": 22420
|
| 15880 |
+
},
|
| 15881 |
+
{
|
| 15882 |
+
"epoch": 0.9954454929241489,
|
| 15883 |
+
"grad_norm": 69.6789321899414,
|
| 15884 |
+
"learning_rate": 9.961115437396722e-06,
|
| 15885 |
+
"loss": 9.8957,
|
| 15886 |
+
"step": 22430
|
| 15887 |
+
},
|
| 15888 |
+
{
|
| 15889 |
+
"epoch": 0.9958892938572403,
|
| 15890 |
+
"grad_norm": 58.19102478027344,
|
| 15891 |
+
"learning_rate": 9.961098101434794e-06,
|
| 15892 |
+
"loss": 9.5713,
|
| 15893 |
+
"step": 22440
|
| 15894 |
+
},
|
| 15895 |
+
{
|
| 15896 |
+
"epoch": 0.9963330947903318,
|
| 15897 |
+
"grad_norm": 56.53800964355469,
|
| 15898 |
+
"learning_rate": 9.961080765472867e-06,
|
| 15899 |
+
"loss": 9.8579,
|
| 15900 |
+
"step": 22450
|
| 15901 |
+
},
|
| 15902 |
+
{
|
| 15903 |
+
"epoch": 0.9967768957234232,
|
| 15904 |
+
"grad_norm": 58.388755798339844,
|
| 15905 |
+
"learning_rate": 9.96106342951094e-06,
|
| 15906 |
+
"loss": 10.0617,
|
| 15907 |
+
"step": 22460
|
| 15908 |
+
},
|
| 15909 |
+
{
|
| 15910 |
+
"epoch": 0.9972206966565147,
|
| 15911 |
+
"grad_norm": 57.43764877319336,
|
| 15912 |
+
"learning_rate": 9.961046093549011e-06,
|
| 15913 |
+
"loss": 10.0687,
|
| 15914 |
+
"step": 22470
|
| 15915 |
+
},
|
| 15916 |
+
{
|
| 15917 |
+
"epoch": 0.9976644975896062,
|
| 15918 |
+
"grad_norm": 64.95555877685547,
|
| 15919 |
+
"learning_rate": 9.961028757587084e-06,
|
| 15920 |
+
"loss": 10.092,
|
| 15921 |
+
"step": 22480
|
| 15922 |
+
},
|
| 15923 |
+
{
|
| 15924 |
+
"epoch": 0.9981082985226977,
|
| 15925 |
+
"grad_norm": 57.88254928588867,
|
| 15926 |
+
"learning_rate": 9.961011421625158e-06,
|
| 15927 |
+
"loss": 10.4509,
|
| 15928 |
+
"step": 22490
|
| 15929 |
+
},
|
| 15930 |
+
{
|
| 15931 |
+
"epoch": 0.9985520994557892,
|
| 15932 |
+
"grad_norm": 63.7093391418457,
|
| 15933 |
+
"learning_rate": 9.960994085663229e-06,
|
| 15934 |
+
"loss": 10.5474,
|
| 15935 |
+
"step": 22500
|
| 15936 |
}
|
| 15937 |
],
|
| 15938 |
"logging_steps": 10,
|
|
|
|
| 15952 |
"attributes": {}
|
| 15953 |
}
|
| 15954 |
},
|
| 15955 |
+
"total_flos": 7.851877211308032e+18,
|
| 15956 |
"train_batch_size": 4,
|
| 15957 |
"trial_name": null,
|
| 15958 |
"trial_params": null
|