Training in progress, step 17500, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1769 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737632172
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f780a9a78fdadce0c173bf611a5da60db156d63194a2e6a49f1f18c27d761ce
|
| 3 |
size 737632172
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475354682
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15d1e92749084ac9dde10d7d65367e2e60f9c34a59a6069753dd8472f0fc8a13
|
| 3 |
size 1475354682
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1754ce1fea08e0a1abf50b88b05ad2235accf247d46d7ee2f8c08c6670f73f31
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae26017d4550577988f9e10089ab5b71db8da5c695439c0a0fea91d6a1fd0704
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d1f128b23b661bf875e117cc47a5648d99e77550cfacf4588ce64a1dd7dbde3
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aaf41da8bd40bcccaff03238fa84745187c3a9d568a9b5f691e9996625af1de6
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a50ddb223b7bd2b99f1b2554cda38ae044aac0f187628b6ded5c4d407979e294
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6bf0de30b7a6e43c74608e8f1fa3b7d38bb356d58e402c397bc6ad56aa95795
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd016a3c3e3ba2a5ae38a6d0f24920c1961e6c3882d668aaebde5a2d6e1459fb
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4e5c265f62dd45b87e17d9c102ed3afb1ecc9d2d1466b032139f4181be9bfb9
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4aa89b4c5d338501a2c77924372d3acbefc23cb2b700c704822eb4c4c76c5fb
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -10627,6 +10627,1772 @@
|
|
| 10627 |
"eval_samples_per_second": 1807.337,
|
| 10628 |
"eval_steps_per_second": 56.48,
|
| 10629 |
"step": 15000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10630 |
}
|
| 10631 |
],
|
| 10632 |
"logging_steps": 10,
|
|
@@ -10646,7 +12412,7 @@
|
|
| 10646 |
"attributes": {}
|
| 10647 |
}
|
| 10648 |
},
|
| 10649 |
-
"total_flos":
|
| 10650 |
"train_batch_size": 4,
|
| 10651 |
"trial_name": null,
|
| 10652 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.7766516329100582,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 17500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 10627 |
"eval_samples_per_second": 1807.337,
|
| 10628 |
"eval_steps_per_second": 56.48,
|
| 10629 |
"step": 15000
|
| 10630 |
+
},
|
| 10631 |
+
{
|
| 10632 |
+
"epoch": 0.6661452005702841,
|
| 10633 |
+
"grad_norm": 70.91802215576172,
|
| 10634 |
+
"learning_rate": 9.973978721146893e-06,
|
| 10635 |
+
"loss": 10.7977,
|
| 10636 |
+
"step": 15010
|
| 10637 |
+
},
|
| 10638 |
+
{
|
| 10639 |
+
"epoch": 0.6665890015033756,
|
| 10640 |
+
"grad_norm": 73.55622100830078,
|
| 10641 |
+
"learning_rate": 9.973961385184965e-06,
|
| 10642 |
+
"loss": 10.6874,
|
| 10643 |
+
"step": 15020
|
| 10644 |
+
},
|
| 10645 |
+
{
|
| 10646 |
+
"epoch": 0.6670328024364671,
|
| 10647 |
+
"grad_norm": 83.61444854736328,
|
| 10648 |
+
"learning_rate": 9.973944049223038e-06,
|
| 10649 |
+
"loss": 10.7632,
|
| 10650 |
+
"step": 15030
|
| 10651 |
+
},
|
| 10652 |
+
{
|
| 10653 |
+
"epoch": 0.6674766033695586,
|
| 10654 |
+
"grad_norm": 67.87864685058594,
|
| 10655 |
+
"learning_rate": 9.97392671326111e-06,
|
| 10656 |
+
"loss": 10.6371,
|
| 10657 |
+
"step": 15040
|
| 10658 |
+
},
|
| 10659 |
+
{
|
| 10660 |
+
"epoch": 0.6679204043026501,
|
| 10661 |
+
"grad_norm": 67.57317352294922,
|
| 10662 |
+
"learning_rate": 9.973909377299182e-06,
|
| 10663 |
+
"loss": 10.4995,
|
| 10664 |
+
"step": 15050
|
| 10665 |
+
},
|
| 10666 |
+
{
|
| 10667 |
+
"epoch": 0.6683642052357415,
|
| 10668 |
+
"grad_norm": 75.98468780517578,
|
| 10669 |
+
"learning_rate": 9.973892041337255e-06,
|
| 10670 |
+
"loss": 10.5515,
|
| 10671 |
+
"step": 15060
|
| 10672 |
+
},
|
| 10673 |
+
{
|
| 10674 |
+
"epoch": 0.668808006168833,
|
| 10675 |
+
"grad_norm": 70.60570526123047,
|
| 10676 |
+
"learning_rate": 9.973874705375328e-06,
|
| 10677 |
+
"loss": 10.7693,
|
| 10678 |
+
"step": 15070
|
| 10679 |
+
},
|
| 10680 |
+
{
|
| 10681 |
+
"epoch": 0.6692518071019244,
|
| 10682 |
+
"grad_norm": 64.22474670410156,
|
| 10683 |
+
"learning_rate": 9.9738573694134e-06,
|
| 10684 |
+
"loss": 10.5848,
|
| 10685 |
+
"step": 15080
|
| 10686 |
+
},
|
| 10687 |
+
{
|
| 10688 |
+
"epoch": 0.6696956080350159,
|
| 10689 |
+
"grad_norm": 70.5452880859375,
|
| 10690 |
+
"learning_rate": 9.973840033451473e-06,
|
| 10691 |
+
"loss": 10.1573,
|
| 10692 |
+
"step": 15090
|
| 10693 |
+
},
|
| 10694 |
+
{
|
| 10695 |
+
"epoch": 0.6701394089681073,
|
| 10696 |
+
"grad_norm": 65.60162353515625,
|
| 10697 |
+
"learning_rate": 9.973822697489546e-06,
|
| 10698 |
+
"loss": 10.7798,
|
| 10699 |
+
"step": 15100
|
| 10700 |
+
},
|
| 10701 |
+
{
|
| 10702 |
+
"epoch": 0.6705832099011988,
|
| 10703 |
+
"grad_norm": 66.75809478759766,
|
| 10704 |
+
"learning_rate": 9.973805361527617e-06,
|
| 10705 |
+
"loss": 10.576,
|
| 10706 |
+
"step": 15110
|
| 10707 |
+
},
|
| 10708 |
+
{
|
| 10709 |
+
"epoch": 0.6710270108342903,
|
| 10710 |
+
"grad_norm": 71.2154312133789,
|
| 10711 |
+
"learning_rate": 9.97378802556569e-06,
|
| 10712 |
+
"loss": 11.1499,
|
| 10713 |
+
"step": 15120
|
| 10714 |
+
},
|
| 10715 |
+
{
|
| 10716 |
+
"epoch": 0.6714708117673818,
|
| 10717 |
+
"grad_norm": 77.17507934570312,
|
| 10718 |
+
"learning_rate": 9.973770689603763e-06,
|
| 10719 |
+
"loss": 10.311,
|
| 10720 |
+
"step": 15130
|
| 10721 |
+
},
|
| 10722 |
+
{
|
| 10723 |
+
"epoch": 0.6719146127004733,
|
| 10724 |
+
"grad_norm": 76.38935852050781,
|
| 10725 |
+
"learning_rate": 9.973753353641835e-06,
|
| 10726 |
+
"loss": 10.6521,
|
| 10727 |
+
"step": 15140
|
| 10728 |
+
},
|
| 10729 |
+
{
|
| 10730 |
+
"epoch": 0.6723584136335646,
|
| 10731 |
+
"grad_norm": 65.29672241210938,
|
| 10732 |
+
"learning_rate": 9.973736017679908e-06,
|
| 10733 |
+
"loss": 10.6403,
|
| 10734 |
+
"step": 15150
|
| 10735 |
+
},
|
| 10736 |
+
{
|
| 10737 |
+
"epoch": 0.6728022145666561,
|
| 10738 |
+
"grad_norm": 63.178077697753906,
|
| 10739 |
+
"learning_rate": 9.973718681717981e-06,
|
| 10740 |
+
"loss": 10.3128,
|
| 10741 |
+
"step": 15160
|
| 10742 |
+
},
|
| 10743 |
+
{
|
| 10744 |
+
"epoch": 0.6732460154997476,
|
| 10745 |
+
"grad_norm": 65.84847259521484,
|
| 10746 |
+
"learning_rate": 9.973701345756054e-06,
|
| 10747 |
+
"loss": 10.3361,
|
| 10748 |
+
"step": 15170
|
| 10749 |
+
},
|
| 10750 |
+
{
|
| 10751 |
+
"epoch": 0.6736898164328391,
|
| 10752 |
+
"grad_norm": 72.8542251586914,
|
| 10753 |
+
"learning_rate": 9.973684009794125e-06,
|
| 10754 |
+
"loss": 10.3667,
|
| 10755 |
+
"step": 15180
|
| 10756 |
+
},
|
| 10757 |
+
{
|
| 10758 |
+
"epoch": 0.6741336173659305,
|
| 10759 |
+
"grad_norm": 72.04983520507812,
|
| 10760 |
+
"learning_rate": 9.973666673832198e-06,
|
| 10761 |
+
"loss": 11.1101,
|
| 10762 |
+
"step": 15190
|
| 10763 |
+
},
|
| 10764 |
+
{
|
| 10765 |
+
"epoch": 0.674577418299022,
|
| 10766 |
+
"grad_norm": 63.43279266357422,
|
| 10767 |
+
"learning_rate": 9.973649337870272e-06,
|
| 10768 |
+
"loss": 10.7974,
|
| 10769 |
+
"step": 15200
|
| 10770 |
+
},
|
| 10771 |
+
{
|
| 10772 |
+
"epoch": 0.6750212192321134,
|
| 10773 |
+
"grad_norm": 76.92269897460938,
|
| 10774 |
+
"learning_rate": 9.973632001908343e-06,
|
| 10775 |
+
"loss": 10.4673,
|
| 10776 |
+
"step": 15210
|
| 10777 |
+
},
|
| 10778 |
+
{
|
| 10779 |
+
"epoch": 0.6754650201652049,
|
| 10780 |
+
"grad_norm": 78.26722717285156,
|
| 10781 |
+
"learning_rate": 9.973614665946416e-06,
|
| 10782 |
+
"loss": 10.9101,
|
| 10783 |
+
"step": 15220
|
| 10784 |
+
},
|
| 10785 |
+
{
|
| 10786 |
+
"epoch": 0.6759088210982963,
|
| 10787 |
+
"grad_norm": 67.4601058959961,
|
| 10788 |
+
"learning_rate": 9.973597329984489e-06,
|
| 10789 |
+
"loss": 10.7099,
|
| 10790 |
+
"step": 15230
|
| 10791 |
+
},
|
| 10792 |
+
{
|
| 10793 |
+
"epoch": 0.6763526220313878,
|
| 10794 |
+
"grad_norm": 67.75270080566406,
|
| 10795 |
+
"learning_rate": 9.97357999402256e-06,
|
| 10796 |
+
"loss": 10.8756,
|
| 10797 |
+
"step": 15240
|
| 10798 |
+
},
|
| 10799 |
+
{
|
| 10800 |
+
"epoch": 0.6767964229644793,
|
| 10801 |
+
"grad_norm": 65.31672668457031,
|
| 10802 |
+
"learning_rate": 9.973562658060634e-06,
|
| 10803 |
+
"loss": 10.5466,
|
| 10804 |
+
"step": 15250
|
| 10805 |
+
},
|
| 10806 |
+
{
|
| 10807 |
+
"epoch": 0.6772402238975708,
|
| 10808 |
+
"grad_norm": 84.87113952636719,
|
| 10809 |
+
"learning_rate": 9.973545322098707e-06,
|
| 10810 |
+
"loss": 10.5983,
|
| 10811 |
+
"step": 15260
|
| 10812 |
+
},
|
| 10813 |
+
{
|
| 10814 |
+
"epoch": 0.6776840248306623,
|
| 10815 |
+
"grad_norm": 64.18128204345703,
|
| 10816 |
+
"learning_rate": 9.973527986136778e-06,
|
| 10817 |
+
"loss": 10.241,
|
| 10818 |
+
"step": 15270
|
| 10819 |
+
},
|
| 10820 |
+
{
|
| 10821 |
+
"epoch": 0.6781278257637536,
|
| 10822 |
+
"grad_norm": 86.96048736572266,
|
| 10823 |
+
"learning_rate": 9.973510650174851e-06,
|
| 10824 |
+
"loss": 10.4494,
|
| 10825 |
+
"step": 15280
|
| 10826 |
+
},
|
| 10827 |
+
{
|
| 10828 |
+
"epoch": 0.6785716266968451,
|
| 10829 |
+
"grad_norm": 77.51726531982422,
|
| 10830 |
+
"learning_rate": 9.973493314212924e-06,
|
| 10831 |
+
"loss": 10.4231,
|
| 10832 |
+
"step": 15290
|
| 10833 |
+
},
|
| 10834 |
+
{
|
| 10835 |
+
"epoch": 0.6790154276299366,
|
| 10836 |
+
"grad_norm": 74.92723083496094,
|
| 10837 |
+
"learning_rate": 9.973475978250996e-06,
|
| 10838 |
+
"loss": 10.1194,
|
| 10839 |
+
"step": 15300
|
| 10840 |
+
},
|
| 10841 |
+
{
|
| 10842 |
+
"epoch": 0.6794592285630281,
|
| 10843 |
+
"grad_norm": 66.97340393066406,
|
| 10844 |
+
"learning_rate": 9.973458642289069e-06,
|
| 10845 |
+
"loss": 10.7708,
|
| 10846 |
+
"step": 15310
|
| 10847 |
+
},
|
| 10848 |
+
{
|
| 10849 |
+
"epoch": 0.6799030294961195,
|
| 10850 |
+
"grad_norm": 79.47786712646484,
|
| 10851 |
+
"learning_rate": 9.973441306327142e-06,
|
| 10852 |
+
"loss": 10.2036,
|
| 10853 |
+
"step": 15320
|
| 10854 |
+
},
|
| 10855 |
+
{
|
| 10856 |
+
"epoch": 0.680346830429211,
|
| 10857 |
+
"grad_norm": 85.9738540649414,
|
| 10858 |
+
"learning_rate": 9.973423970365213e-06,
|
| 10859 |
+
"loss": 10.7741,
|
| 10860 |
+
"step": 15330
|
| 10861 |
+
},
|
| 10862 |
+
{
|
| 10863 |
+
"epoch": 0.6807906313623024,
|
| 10864 |
+
"grad_norm": 83.63733673095703,
|
| 10865 |
+
"learning_rate": 9.973406634403286e-06,
|
| 10866 |
+
"loss": 10.5102,
|
| 10867 |
+
"step": 15340
|
| 10868 |
+
},
|
| 10869 |
+
{
|
| 10870 |
+
"epoch": 0.6812344322953939,
|
| 10871 |
+
"grad_norm": 68.50708770751953,
|
| 10872 |
+
"learning_rate": 9.97338929844136e-06,
|
| 10873 |
+
"loss": 10.3726,
|
| 10874 |
+
"step": 15350
|
| 10875 |
+
},
|
| 10876 |
+
{
|
| 10877 |
+
"epoch": 0.6816782332284853,
|
| 10878 |
+
"grad_norm": 74.40569305419922,
|
| 10879 |
+
"learning_rate": 9.97337196247943e-06,
|
| 10880 |
+
"loss": 11.1048,
|
| 10881 |
+
"step": 15360
|
| 10882 |
+
},
|
| 10883 |
+
{
|
| 10884 |
+
"epoch": 0.6821220341615768,
|
| 10885 |
+
"grad_norm": 81.3375473022461,
|
| 10886 |
+
"learning_rate": 9.973354626517504e-06,
|
| 10887 |
+
"loss": 9.9319,
|
| 10888 |
+
"step": 15370
|
| 10889 |
+
},
|
| 10890 |
+
{
|
| 10891 |
+
"epoch": 0.6825658350946683,
|
| 10892 |
+
"grad_norm": 74.2603988647461,
|
| 10893 |
+
"learning_rate": 9.973337290555577e-06,
|
| 10894 |
+
"loss": 10.8757,
|
| 10895 |
+
"step": 15380
|
| 10896 |
+
},
|
| 10897 |
+
{
|
| 10898 |
+
"epoch": 0.6830096360277598,
|
| 10899 |
+
"grad_norm": 71.74883270263672,
|
| 10900 |
+
"learning_rate": 9.97331995459365e-06,
|
| 10901 |
+
"loss": 10.64,
|
| 10902 |
+
"step": 15390
|
| 10903 |
+
},
|
| 10904 |
+
{
|
| 10905 |
+
"epoch": 0.6834534369608513,
|
| 10906 |
+
"grad_norm": 68.61750030517578,
|
| 10907 |
+
"learning_rate": 9.973302618631721e-06,
|
| 10908 |
+
"loss": 10.7962,
|
| 10909 |
+
"step": 15400
|
| 10910 |
+
},
|
| 10911 |
+
{
|
| 10912 |
+
"epoch": 0.6838972378939426,
|
| 10913 |
+
"grad_norm": 59.030921936035156,
|
| 10914 |
+
"learning_rate": 9.973285282669794e-06,
|
| 10915 |
+
"loss": 10.5287,
|
| 10916 |
+
"step": 15410
|
| 10917 |
+
},
|
| 10918 |
+
{
|
| 10919 |
+
"epoch": 0.6843410388270341,
|
| 10920 |
+
"grad_norm": 76.87126922607422,
|
| 10921 |
+
"learning_rate": 9.973267946707867e-06,
|
| 10922 |
+
"loss": 10.624,
|
| 10923 |
+
"step": 15420
|
| 10924 |
+
},
|
| 10925 |
+
{
|
| 10926 |
+
"epoch": 0.6847848397601256,
|
| 10927 |
+
"grad_norm": 78.29729461669922,
|
| 10928 |
+
"learning_rate": 9.973250610745939e-06,
|
| 10929 |
+
"loss": 10.6998,
|
| 10930 |
+
"step": 15430
|
| 10931 |
+
},
|
| 10932 |
+
{
|
| 10933 |
+
"epoch": 0.6852286406932171,
|
| 10934 |
+
"grad_norm": 70.97583770751953,
|
| 10935 |
+
"learning_rate": 9.973233274784012e-06,
|
| 10936 |
+
"loss": 10.6444,
|
| 10937 |
+
"step": 15440
|
| 10938 |
+
},
|
| 10939 |
+
{
|
| 10940 |
+
"epoch": 0.6856724416263085,
|
| 10941 |
+
"grad_norm": 65.24356842041016,
|
| 10942 |
+
"learning_rate": 9.973215938822085e-06,
|
| 10943 |
+
"loss": 10.6682,
|
| 10944 |
+
"step": 15450
|
| 10945 |
+
},
|
| 10946 |
+
{
|
| 10947 |
+
"epoch": 0.6861162425594,
|
| 10948 |
+
"grad_norm": 71.17062377929688,
|
| 10949 |
+
"learning_rate": 9.973198602860156e-06,
|
| 10950 |
+
"loss": 11.0228,
|
| 10951 |
+
"step": 15460
|
| 10952 |
+
},
|
| 10953 |
+
{
|
| 10954 |
+
"epoch": 0.6865600434924914,
|
| 10955 |
+
"grad_norm": 87.5817642211914,
|
| 10956 |
+
"learning_rate": 9.97318126689823e-06,
|
| 10957 |
+
"loss": 10.3963,
|
| 10958 |
+
"step": 15470
|
| 10959 |
+
},
|
| 10960 |
+
{
|
| 10961 |
+
"epoch": 0.6870038444255829,
|
| 10962 |
+
"grad_norm": 62.55752182006836,
|
| 10963 |
+
"learning_rate": 9.973163930936302e-06,
|
| 10964 |
+
"loss": 10.5304,
|
| 10965 |
+
"step": 15480
|
| 10966 |
+
},
|
| 10967 |
+
{
|
| 10968 |
+
"epoch": 0.6874476453586744,
|
| 10969 |
+
"grad_norm": 72.95471954345703,
|
| 10970 |
+
"learning_rate": 9.973146594974374e-06,
|
| 10971 |
+
"loss": 10.7116,
|
| 10972 |
+
"step": 15490
|
| 10973 |
+
},
|
| 10974 |
+
{
|
| 10975 |
+
"epoch": 0.6878914462917658,
|
| 10976 |
+
"grad_norm": 63.889129638671875,
|
| 10977 |
+
"learning_rate": 9.973129259012447e-06,
|
| 10978 |
+
"loss": 10.2496,
|
| 10979 |
+
"step": 15500
|
| 10980 |
+
},
|
| 10981 |
+
{
|
| 10982 |
+
"epoch": 0.6883352472248573,
|
| 10983 |
+
"grad_norm": 70.15679168701172,
|
| 10984 |
+
"learning_rate": 9.97311192305052e-06,
|
| 10985 |
+
"loss": 10.0643,
|
| 10986 |
+
"step": 15510
|
| 10987 |
+
},
|
| 10988 |
+
{
|
| 10989 |
+
"epoch": 0.6887790481579488,
|
| 10990 |
+
"grad_norm": 69.72803497314453,
|
| 10991 |
+
"learning_rate": 9.973094587088591e-06,
|
| 10992 |
+
"loss": 10.5465,
|
| 10993 |
+
"step": 15520
|
| 10994 |
+
},
|
| 10995 |
+
{
|
| 10996 |
+
"epoch": 0.6892228490910403,
|
| 10997 |
+
"grad_norm": 64.5792236328125,
|
| 10998 |
+
"learning_rate": 9.973077251126664e-06,
|
| 10999 |
+
"loss": 10.7322,
|
| 11000 |
+
"step": 15530
|
| 11001 |
+
},
|
| 11002 |
+
{
|
| 11003 |
+
"epoch": 0.6896666500241316,
|
| 11004 |
+
"grad_norm": 82.74343872070312,
|
| 11005 |
+
"learning_rate": 9.973059915164738e-06,
|
| 11006 |
+
"loss": 10.3258,
|
| 11007 |
+
"step": 15540
|
| 11008 |
+
},
|
| 11009 |
+
{
|
| 11010 |
+
"epoch": 0.6901104509572231,
|
| 11011 |
+
"grad_norm": 73.86137390136719,
|
| 11012 |
+
"learning_rate": 9.973042579202809e-06,
|
| 11013 |
+
"loss": 10.4541,
|
| 11014 |
+
"step": 15550
|
| 11015 |
+
},
|
| 11016 |
+
{
|
| 11017 |
+
"epoch": 0.6905542518903146,
|
| 11018 |
+
"grad_norm": 81.49348449707031,
|
| 11019 |
+
"learning_rate": 9.973025243240882e-06,
|
| 11020 |
+
"loss": 10.4592,
|
| 11021 |
+
"step": 15560
|
| 11022 |
+
},
|
| 11023 |
+
{
|
| 11024 |
+
"epoch": 0.6909980528234061,
|
| 11025 |
+
"grad_norm": 70.07816314697266,
|
| 11026 |
+
"learning_rate": 9.973007907278955e-06,
|
| 11027 |
+
"loss": 10.6212,
|
| 11028 |
+
"step": 15570
|
| 11029 |
+
},
|
| 11030 |
+
{
|
| 11031 |
+
"epoch": 0.6914418537564975,
|
| 11032 |
+
"grad_norm": 75.75015258789062,
|
| 11033 |
+
"learning_rate": 9.972990571317026e-06,
|
| 11034 |
+
"loss": 10.459,
|
| 11035 |
+
"step": 15580
|
| 11036 |
+
},
|
| 11037 |
+
{
|
| 11038 |
+
"epoch": 0.691885654689589,
|
| 11039 |
+
"grad_norm": 69.51868438720703,
|
| 11040 |
+
"learning_rate": 9.9729732353551e-06,
|
| 11041 |
+
"loss": 10.6566,
|
| 11042 |
+
"step": 15590
|
| 11043 |
+
},
|
| 11044 |
+
{
|
| 11045 |
+
"epoch": 0.6923294556226804,
|
| 11046 |
+
"grad_norm": 72.89574432373047,
|
| 11047 |
+
"learning_rate": 9.972955899393173e-06,
|
| 11048 |
+
"loss": 10.8038,
|
| 11049 |
+
"step": 15600
|
| 11050 |
+
},
|
| 11051 |
+
{
|
| 11052 |
+
"epoch": 0.6927732565557719,
|
| 11053 |
+
"grad_norm": 70.88813018798828,
|
| 11054 |
+
"learning_rate": 9.972938563431246e-06,
|
| 11055 |
+
"loss": 10.7221,
|
| 11056 |
+
"step": 15610
|
| 11057 |
+
},
|
| 11058 |
+
{
|
| 11059 |
+
"epoch": 0.6932170574888634,
|
| 11060 |
+
"grad_norm": 84.63404083251953,
|
| 11061 |
+
"learning_rate": 9.972921227469317e-06,
|
| 11062 |
+
"loss": 10.8532,
|
| 11063 |
+
"step": 15620
|
| 11064 |
+
},
|
| 11065 |
+
{
|
| 11066 |
+
"epoch": 0.6936608584219548,
|
| 11067 |
+
"grad_norm": 68.11902618408203,
|
| 11068 |
+
"learning_rate": 9.97290389150739e-06,
|
| 11069 |
+
"loss": 10.7919,
|
| 11070 |
+
"step": 15630
|
| 11071 |
+
},
|
| 11072 |
+
{
|
| 11073 |
+
"epoch": 0.6941046593550463,
|
| 11074 |
+
"grad_norm": 88.98064422607422,
|
| 11075 |
+
"learning_rate": 9.972886555545463e-06,
|
| 11076 |
+
"loss": 10.5843,
|
| 11077 |
+
"step": 15640
|
| 11078 |
+
},
|
| 11079 |
+
{
|
| 11080 |
+
"epoch": 0.6945484602881378,
|
| 11081 |
+
"grad_norm": 72.20332336425781,
|
| 11082 |
+
"learning_rate": 9.972869219583535e-06,
|
| 11083 |
+
"loss": 10.7185,
|
| 11084 |
+
"step": 15650
|
| 11085 |
+
},
|
| 11086 |
+
{
|
| 11087 |
+
"epoch": 0.6949922612212293,
|
| 11088 |
+
"grad_norm": 61.422576904296875,
|
| 11089 |
+
"learning_rate": 9.972851883621608e-06,
|
| 11090 |
+
"loss": 10.5118,
|
| 11091 |
+
"step": 15660
|
| 11092 |
+
},
|
| 11093 |
+
{
|
| 11094 |
+
"epoch": 0.6954360621543206,
|
| 11095 |
+
"grad_norm": 62.006317138671875,
|
| 11096 |
+
"learning_rate": 9.97283454765968e-06,
|
| 11097 |
+
"loss": 10.2841,
|
| 11098 |
+
"step": 15670
|
| 11099 |
+
},
|
| 11100 |
+
{
|
| 11101 |
+
"epoch": 0.6958798630874121,
|
| 11102 |
+
"grad_norm": 69.10589599609375,
|
| 11103 |
+
"learning_rate": 9.972817211697752e-06,
|
| 11104 |
+
"loss": 10.6618,
|
| 11105 |
+
"step": 15680
|
| 11106 |
+
},
|
| 11107 |
+
{
|
| 11108 |
+
"epoch": 0.6963236640205036,
|
| 11109 |
+
"grad_norm": 79.93278503417969,
|
| 11110 |
+
"learning_rate": 9.972799875735825e-06,
|
| 11111 |
+
"loss": 10.4638,
|
| 11112 |
+
"step": 15690
|
| 11113 |
+
},
|
| 11114 |
+
{
|
| 11115 |
+
"epoch": 0.6967674649535951,
|
| 11116 |
+
"grad_norm": 71.16869354248047,
|
| 11117 |
+
"learning_rate": 9.972782539773898e-06,
|
| 11118 |
+
"loss": 10.4969,
|
| 11119 |
+
"step": 15700
|
| 11120 |
+
},
|
| 11121 |
+
{
|
| 11122 |
+
"epoch": 0.6972112658866866,
|
| 11123 |
+
"grad_norm": 76.26361846923828,
|
| 11124 |
+
"learning_rate": 9.97276520381197e-06,
|
| 11125 |
+
"loss": 10.5915,
|
| 11126 |
+
"step": 15710
|
| 11127 |
+
},
|
| 11128 |
+
{
|
| 11129 |
+
"epoch": 0.697655066819778,
|
| 11130 |
+
"grad_norm": 80.85043334960938,
|
| 11131 |
+
"learning_rate": 9.972747867850043e-06,
|
| 11132 |
+
"loss": 11.0115,
|
| 11133 |
+
"step": 15720
|
| 11134 |
+
},
|
| 11135 |
+
{
|
| 11136 |
+
"epoch": 0.6980988677528694,
|
| 11137 |
+
"grad_norm": 65.5965576171875,
|
| 11138 |
+
"learning_rate": 9.972730531888116e-06,
|
| 11139 |
+
"loss": 10.756,
|
| 11140 |
+
"step": 15730
|
| 11141 |
+
},
|
| 11142 |
+
{
|
| 11143 |
+
"epoch": 0.6985426686859609,
|
| 11144 |
+
"grad_norm": 67.6102066040039,
|
| 11145 |
+
"learning_rate": 9.972713195926187e-06,
|
| 11146 |
+
"loss": 10.5331,
|
| 11147 |
+
"step": 15740
|
| 11148 |
+
},
|
| 11149 |
+
{
|
| 11150 |
+
"epoch": 0.6989864696190524,
|
| 11151 |
+
"grad_norm": 69.96884155273438,
|
| 11152 |
+
"learning_rate": 9.97269585996426e-06,
|
| 11153 |
+
"loss": 10.6435,
|
| 11154 |
+
"step": 15750
|
| 11155 |
+
},
|
| 11156 |
+
{
|
| 11157 |
+
"epoch": 0.6994302705521438,
|
| 11158 |
+
"grad_norm": 58.43931198120117,
|
| 11159 |
+
"learning_rate": 9.972678524002333e-06,
|
| 11160 |
+
"loss": 10.2629,
|
| 11161 |
+
"step": 15760
|
| 11162 |
+
},
|
| 11163 |
+
{
|
| 11164 |
+
"epoch": 0.6998740714852353,
|
| 11165 |
+
"grad_norm": 89.6247329711914,
|
| 11166 |
+
"learning_rate": 9.972661188040405e-06,
|
| 11167 |
+
"loss": 11.0823,
|
| 11168 |
+
"step": 15770
|
| 11169 |
+
},
|
| 11170 |
+
{
|
| 11171 |
+
"epoch": 0.7003178724183268,
|
| 11172 |
+
"grad_norm": 86.3462142944336,
|
| 11173 |
+
"learning_rate": 9.972643852078478e-06,
|
| 11174 |
+
"loss": 11.0013,
|
| 11175 |
+
"step": 15780
|
| 11176 |
+
},
|
| 11177 |
+
{
|
| 11178 |
+
"epoch": 0.7007616733514183,
|
| 11179 |
+
"grad_norm": 67.15304565429688,
|
| 11180 |
+
"learning_rate": 9.972626516116551e-06,
|
| 11181 |
+
"loss": 10.5305,
|
| 11182 |
+
"step": 15790
|
| 11183 |
+
},
|
| 11184 |
+
{
|
| 11185 |
+
"epoch": 0.7012054742845096,
|
| 11186 |
+
"grad_norm": 70.22341918945312,
|
| 11187 |
+
"learning_rate": 9.972609180154624e-06,
|
| 11188 |
+
"loss": 10.2173,
|
| 11189 |
+
"step": 15800
|
| 11190 |
+
},
|
| 11191 |
+
{
|
| 11192 |
+
"epoch": 0.7016492752176011,
|
| 11193 |
+
"grad_norm": 67.83938598632812,
|
| 11194 |
+
"learning_rate": 9.972591844192695e-06,
|
| 11195 |
+
"loss": 10.1845,
|
| 11196 |
+
"step": 15810
|
| 11197 |
+
},
|
| 11198 |
+
{
|
| 11199 |
+
"epoch": 0.7020930761506926,
|
| 11200 |
+
"grad_norm": 73.8240966796875,
|
| 11201 |
+
"learning_rate": 9.972574508230769e-06,
|
| 11202 |
+
"loss": 10.6668,
|
| 11203 |
+
"step": 15820
|
| 11204 |
+
},
|
| 11205 |
+
{
|
| 11206 |
+
"epoch": 0.7025368770837841,
|
| 11207 |
+
"grad_norm": 74.54997253417969,
|
| 11208 |
+
"learning_rate": 9.972557172268842e-06,
|
| 11209 |
+
"loss": 10.4764,
|
| 11210 |
+
"step": 15830
|
| 11211 |
+
},
|
| 11212 |
+
{
|
| 11213 |
+
"epoch": 0.7029806780168756,
|
| 11214 |
+
"grad_norm": 77.97396087646484,
|
| 11215 |
+
"learning_rate": 9.972539836306913e-06,
|
| 11216 |
+
"loss": 10.8094,
|
| 11217 |
+
"step": 15840
|
| 11218 |
+
},
|
| 11219 |
+
{
|
| 11220 |
+
"epoch": 0.703424478949967,
|
| 11221 |
+
"grad_norm": 69.54945373535156,
|
| 11222 |
+
"learning_rate": 9.972522500344986e-06,
|
| 11223 |
+
"loss": 10.1263,
|
| 11224 |
+
"step": 15850
|
| 11225 |
+
},
|
| 11226 |
+
{
|
| 11227 |
+
"epoch": 0.7038682798830584,
|
| 11228 |
+
"grad_norm": 74.77880859375,
|
| 11229 |
+
"learning_rate": 9.972505164383059e-06,
|
| 11230 |
+
"loss": 10.8418,
|
| 11231 |
+
"step": 15860
|
| 11232 |
+
},
|
| 11233 |
+
{
|
| 11234 |
+
"epoch": 0.7043120808161499,
|
| 11235 |
+
"grad_norm": 73.15492248535156,
|
| 11236 |
+
"learning_rate": 9.97248782842113e-06,
|
| 11237 |
+
"loss": 10.6459,
|
| 11238 |
+
"step": 15870
|
| 11239 |
+
},
|
| 11240 |
+
{
|
| 11241 |
+
"epoch": 0.7047558817492414,
|
| 11242 |
+
"grad_norm": 75.93856048583984,
|
| 11243 |
+
"learning_rate": 9.972470492459204e-06,
|
| 11244 |
+
"loss": 10.5095,
|
| 11245 |
+
"step": 15880
|
| 11246 |
+
},
|
| 11247 |
+
{
|
| 11248 |
+
"epoch": 0.7051996826823328,
|
| 11249 |
+
"grad_norm": 71.69056701660156,
|
| 11250 |
+
"learning_rate": 9.972453156497277e-06,
|
| 11251 |
+
"loss": 10.5275,
|
| 11252 |
+
"step": 15890
|
| 11253 |
+
},
|
| 11254 |
+
{
|
| 11255 |
+
"epoch": 0.7056434836154243,
|
| 11256 |
+
"grad_norm": 70.82780456542969,
|
| 11257 |
+
"learning_rate": 9.972435820535348e-06,
|
| 11258 |
+
"loss": 10.1364,
|
| 11259 |
+
"step": 15900
|
| 11260 |
+
},
|
| 11261 |
+
{
|
| 11262 |
+
"epoch": 0.7060872845485158,
|
| 11263 |
+
"grad_norm": 75.50528717041016,
|
| 11264 |
+
"learning_rate": 9.972418484573421e-06,
|
| 11265 |
+
"loss": 10.4329,
|
| 11266 |
+
"step": 15910
|
| 11267 |
+
},
|
| 11268 |
+
{
|
| 11269 |
+
"epoch": 0.7065310854816073,
|
| 11270 |
+
"grad_norm": 78.77973937988281,
|
| 11271 |
+
"learning_rate": 9.972401148611494e-06,
|
| 11272 |
+
"loss": 10.6305,
|
| 11273 |
+
"step": 15920
|
| 11274 |
+
},
|
| 11275 |
+
{
|
| 11276 |
+
"epoch": 0.7069748864146986,
|
| 11277 |
+
"grad_norm": 70.21820068359375,
|
| 11278 |
+
"learning_rate": 9.972383812649567e-06,
|
| 11279 |
+
"loss": 10.6905,
|
| 11280 |
+
"step": 15930
|
| 11281 |
+
},
|
| 11282 |
+
{
|
| 11283 |
+
"epoch": 0.7074186873477901,
|
| 11284 |
+
"grad_norm": 88.8916015625,
|
| 11285 |
+
"learning_rate": 9.972366476687639e-06,
|
| 11286 |
+
"loss": 10.3912,
|
| 11287 |
+
"step": 15940
|
| 11288 |
+
},
|
| 11289 |
+
{
|
| 11290 |
+
"epoch": 0.7078624882808816,
|
| 11291 |
+
"grad_norm": 84.31858825683594,
|
| 11292 |
+
"learning_rate": 9.972349140725712e-06,
|
| 11293 |
+
"loss": 10.5527,
|
| 11294 |
+
"step": 15950
|
| 11295 |
+
},
|
| 11296 |
+
{
|
| 11297 |
+
"epoch": 0.7083062892139731,
|
| 11298 |
+
"grad_norm": 71.62870788574219,
|
| 11299 |
+
"learning_rate": 9.972331804763785e-06,
|
| 11300 |
+
"loss": 10.6158,
|
| 11301 |
+
"step": 15960
|
| 11302 |
+
},
|
| 11303 |
+
{
|
| 11304 |
+
"epoch": 0.7087500901470646,
|
| 11305 |
+
"grad_norm": 64.43877410888672,
|
| 11306 |
+
"learning_rate": 9.972314468801856e-06,
|
| 11307 |
+
"loss": 10.8082,
|
| 11308 |
+
"step": 15970
|
| 11309 |
+
},
|
| 11310 |
+
{
|
| 11311 |
+
"epoch": 0.709193891080156,
|
| 11312 |
+
"grad_norm": 63.440391540527344,
|
| 11313 |
+
"learning_rate": 9.97229713283993e-06,
|
| 11314 |
+
"loss": 10.5228,
|
| 11315 |
+
"step": 15980
|
| 11316 |
+
},
|
| 11317 |
+
{
|
| 11318 |
+
"epoch": 0.7096376920132474,
|
| 11319 |
+
"grad_norm": 68.9405746459961,
|
| 11320 |
+
"learning_rate": 9.972279796878002e-06,
|
| 11321 |
+
"loss": 10.631,
|
| 11322 |
+
"step": 15990
|
| 11323 |
+
},
|
| 11324 |
+
{
|
| 11325 |
+
"epoch": 0.7100814929463389,
|
| 11326 |
+
"grad_norm": 78.99846649169922,
|
| 11327 |
+
"learning_rate": 9.972262460916074e-06,
|
| 11328 |
+
"loss": 10.7683,
|
| 11329 |
+
"step": 16000
|
| 11330 |
+
},
|
| 11331 |
+
{
|
| 11332 |
+
"epoch": 0.7100814929463389,
|
| 11333 |
+
"eval_loss": 0.3311347961425781,
|
| 11334 |
+
"eval_runtime": 678.7995,
|
| 11335 |
+
"eval_samples_per_second": 1789.028,
|
| 11336 |
+
"eval_steps_per_second": 55.908,
|
| 11337 |
+
"step": 16000
|
| 11338 |
+
},
|
| 11339 |
+
{
|
| 11340 |
+
"epoch": 0.7105252938794304,
|
| 11341 |
+
"grad_norm": 69.47785949707031,
|
| 11342 |
+
"learning_rate": 9.972245124954147e-06,
|
| 11343 |
+
"loss": 10.1387,
|
| 11344 |
+
"step": 16010
|
| 11345 |
+
},
|
| 11346 |
+
{
|
| 11347 |
+
"epoch": 0.7109690948125218,
|
| 11348 |
+
"grad_norm": 69.83346557617188,
|
| 11349 |
+
"learning_rate": 9.97222778899222e-06,
|
| 11350 |
+
"loss": 10.7619,
|
| 11351 |
+
"step": 16020
|
| 11352 |
+
},
|
| 11353 |
+
{
|
| 11354 |
+
"epoch": 0.7114128957456133,
|
| 11355 |
+
"grad_norm": 84.6226577758789,
|
| 11356 |
+
"learning_rate": 9.972210453030291e-06,
|
| 11357 |
+
"loss": 10.7781,
|
| 11358 |
+
"step": 16030
|
| 11359 |
+
},
|
| 11360 |
+
{
|
| 11361 |
+
"epoch": 0.7118566966787048,
|
| 11362 |
+
"grad_norm": 68.47029876708984,
|
| 11363 |
+
"learning_rate": 9.972193117068364e-06,
|
| 11364 |
+
"loss": 10.684,
|
| 11365 |
+
"step": 16040
|
| 11366 |
+
},
|
| 11367 |
+
{
|
| 11368 |
+
"epoch": 0.7123004976117963,
|
| 11369 |
+
"grad_norm": 78.78176879882812,
|
| 11370 |
+
"learning_rate": 9.972175781106437e-06,
|
| 11371 |
+
"loss": 10.7629,
|
| 11372 |
+
"step": 16050
|
| 11373 |
+
},
|
| 11374 |
+
{
|
| 11375 |
+
"epoch": 0.7127442985448877,
|
| 11376 |
+
"grad_norm": 70.59607696533203,
|
| 11377 |
+
"learning_rate": 9.97215844514451e-06,
|
| 11378 |
+
"loss": 10.7124,
|
| 11379 |
+
"step": 16060
|
| 11380 |
+
},
|
| 11381 |
+
{
|
| 11382 |
+
"epoch": 0.7131880994779791,
|
| 11383 |
+
"grad_norm": 69.93449401855469,
|
| 11384 |
+
"learning_rate": 9.972141109182582e-06,
|
| 11385 |
+
"loss": 11.4954,
|
| 11386 |
+
"step": 16070
|
| 11387 |
+
},
|
| 11388 |
+
{
|
| 11389 |
+
"epoch": 0.7136319004110706,
|
| 11390 |
+
"grad_norm": 67.49927520751953,
|
| 11391 |
+
"learning_rate": 9.972123773220655e-06,
|
| 11392 |
+
"loss": 10.9881,
|
| 11393 |
+
"step": 16080
|
| 11394 |
+
},
|
| 11395 |
+
{
|
| 11396 |
+
"epoch": 0.7140757013441621,
|
| 11397 |
+
"grad_norm": 66.40914916992188,
|
| 11398 |
+
"learning_rate": 9.972106437258728e-06,
|
| 11399 |
+
"loss": 10.8346,
|
| 11400 |
+
"step": 16090
|
| 11401 |
+
},
|
| 11402 |
+
{
|
| 11403 |
+
"epoch": 0.7145195022772536,
|
| 11404 |
+
"grad_norm": 66.59365844726562,
|
| 11405 |
+
"learning_rate": 9.9720891012968e-06,
|
| 11406 |
+
"loss": 10.703,
|
| 11407 |
+
"step": 16100
|
| 11408 |
+
},
|
| 11409 |
+
{
|
| 11410 |
+
"epoch": 0.714963303210345,
|
| 11411 |
+
"grad_norm": 61.00757598876953,
|
| 11412 |
+
"learning_rate": 9.972071765334873e-06,
|
| 11413 |
+
"loss": 10.3785,
|
| 11414 |
+
"step": 16110
|
| 11415 |
+
},
|
| 11416 |
+
{
|
| 11417 |
+
"epoch": 0.7154071041434364,
|
| 11418 |
+
"grad_norm": 78.33125305175781,
|
| 11419 |
+
"learning_rate": 9.972054429372946e-06,
|
| 11420 |
+
"loss": 10.8235,
|
| 11421 |
+
"step": 16120
|
| 11422 |
+
},
|
| 11423 |
+
{
|
| 11424 |
+
"epoch": 0.7158509050765279,
|
| 11425 |
+
"grad_norm": 63.80758285522461,
|
| 11426 |
+
"learning_rate": 9.972037093411017e-06,
|
| 11427 |
+
"loss": 10.8951,
|
| 11428 |
+
"step": 16130
|
| 11429 |
+
},
|
| 11430 |
+
{
|
| 11431 |
+
"epoch": 0.7162947060096194,
|
| 11432 |
+
"grad_norm": 76.70967864990234,
|
| 11433 |
+
"learning_rate": 9.97201975744909e-06,
|
| 11434 |
+
"loss": 10.9641,
|
| 11435 |
+
"step": 16140
|
| 11436 |
+
},
|
| 11437 |
+
{
|
| 11438 |
+
"epoch": 0.7167385069427108,
|
| 11439 |
+
"grad_norm": 71.60503387451172,
|
| 11440 |
+
"learning_rate": 9.972002421487163e-06,
|
| 11441 |
+
"loss": 10.5517,
|
| 11442 |
+
"step": 16150
|
| 11443 |
+
},
|
| 11444 |
+
{
|
| 11445 |
+
"epoch": 0.7171823078758023,
|
| 11446 |
+
"grad_norm": 74.2575912475586,
|
| 11447 |
+
"learning_rate": 9.971985085525235e-06,
|
| 11448 |
+
"loss": 10.161,
|
| 11449 |
+
"step": 16160
|
| 11450 |
+
},
|
| 11451 |
+
{
|
| 11452 |
+
"epoch": 0.7176261088088938,
|
| 11453 |
+
"grad_norm": 71.10789489746094,
|
| 11454 |
+
"learning_rate": 9.971967749563308e-06,
|
| 11455 |
+
"loss": 10.6625,
|
| 11456 |
+
"step": 16170
|
| 11457 |
+
},
|
| 11458 |
+
{
|
| 11459 |
+
"epoch": 0.7180699097419853,
|
| 11460 |
+
"grad_norm": 71.27813720703125,
|
| 11461 |
+
"learning_rate": 9.97195041360138e-06,
|
| 11462 |
+
"loss": 10.9964,
|
| 11463 |
+
"step": 16180
|
| 11464 |
+
},
|
| 11465 |
+
{
|
| 11466 |
+
"epoch": 0.7185137106750767,
|
| 11467 |
+
"grad_norm": 63.57979965209961,
|
| 11468 |
+
"learning_rate": 9.971933077639454e-06,
|
| 11469 |
+
"loss": 10.1936,
|
| 11470 |
+
"step": 16190
|
| 11471 |
+
},
|
| 11472 |
+
{
|
| 11473 |
+
"epoch": 0.7189575116081681,
|
| 11474 |
+
"grad_norm": 74.10035705566406,
|
| 11475 |
+
"learning_rate": 9.971915741677525e-06,
|
| 11476 |
+
"loss": 10.8882,
|
| 11477 |
+
"step": 16200
|
| 11478 |
+
},
|
| 11479 |
+
{
|
| 11480 |
+
"epoch": 0.7194013125412596,
|
| 11481 |
+
"grad_norm": 73.26065826416016,
|
| 11482 |
+
"learning_rate": 9.971898405715598e-06,
|
| 11483 |
+
"loss": 10.5628,
|
| 11484 |
+
"step": 16210
|
| 11485 |
+
},
|
| 11486 |
+
{
|
| 11487 |
+
"epoch": 0.7198451134743511,
|
| 11488 |
+
"grad_norm": 68.37783813476562,
|
| 11489 |
+
"learning_rate": 9.971881069753671e-06,
|
| 11490 |
+
"loss": 10.6096,
|
| 11491 |
+
"step": 16220
|
| 11492 |
+
},
|
| 11493 |
+
{
|
| 11494 |
+
"epoch": 0.7202889144074426,
|
| 11495 |
+
"grad_norm": 84.5615234375,
|
| 11496 |
+
"learning_rate": 9.971863733791743e-06,
|
| 11497 |
+
"loss": 10.8151,
|
| 11498 |
+
"step": 16230
|
| 11499 |
+
},
|
| 11500 |
+
{
|
| 11501 |
+
"epoch": 0.720732715340534,
|
| 11502 |
+
"grad_norm": 63.7740478515625,
|
| 11503 |
+
"learning_rate": 9.971846397829816e-06,
|
| 11504 |
+
"loss": 10.403,
|
| 11505 |
+
"step": 16240
|
| 11506 |
+
},
|
| 11507 |
+
{
|
| 11508 |
+
"epoch": 0.7211765162736254,
|
| 11509 |
+
"grad_norm": 70.54612731933594,
|
| 11510 |
+
"learning_rate": 9.971829061867889e-06,
|
| 11511 |
+
"loss": 10.0854,
|
| 11512 |
+
"step": 16250
|
| 11513 |
+
},
|
| 11514 |
+
{
|
| 11515 |
+
"epoch": 0.7216203172067169,
|
| 11516 |
+
"grad_norm": 63.2757682800293,
|
| 11517 |
+
"learning_rate": 9.97181172590596e-06,
|
| 11518 |
+
"loss": 10.6528,
|
| 11519 |
+
"step": 16260
|
| 11520 |
+
},
|
| 11521 |
+
{
|
| 11522 |
+
"epoch": 0.7220641181398084,
|
| 11523 |
+
"grad_norm": 71.35702514648438,
|
| 11524 |
+
"learning_rate": 9.971794389944033e-06,
|
| 11525 |
+
"loss": 10.4698,
|
| 11526 |
+
"step": 16270
|
| 11527 |
+
},
|
| 11528 |
+
{
|
| 11529 |
+
"epoch": 0.7225079190728998,
|
| 11530 |
+
"grad_norm": 66.68602752685547,
|
| 11531 |
+
"learning_rate": 9.971777053982106e-06,
|
| 11532 |
+
"loss": 10.593,
|
| 11533 |
+
"step": 16280
|
| 11534 |
+
},
|
| 11535 |
+
{
|
| 11536 |
+
"epoch": 0.7229517200059913,
|
| 11537 |
+
"grad_norm": 67.9871597290039,
|
| 11538 |
+
"learning_rate": 9.971759718020178e-06,
|
| 11539 |
+
"loss": 10.6789,
|
| 11540 |
+
"step": 16290
|
| 11541 |
+
},
|
| 11542 |
+
{
|
| 11543 |
+
"epoch": 0.7233955209390828,
|
| 11544 |
+
"grad_norm": 78.72682189941406,
|
| 11545 |
+
"learning_rate": 9.97174238205825e-06,
|
| 11546 |
+
"loss": 10.5447,
|
| 11547 |
+
"step": 16300
|
| 11548 |
+
},
|
| 11549 |
+
{
|
| 11550 |
+
"epoch": 0.7238393218721743,
|
| 11551 |
+
"grad_norm": 74.193359375,
|
| 11552 |
+
"learning_rate": 9.971725046096324e-06,
|
| 11553 |
+
"loss": 10.7683,
|
| 11554 |
+
"step": 16310
|
| 11555 |
+
},
|
| 11556 |
+
{
|
| 11557 |
+
"epoch": 0.7242831228052657,
|
| 11558 |
+
"grad_norm": 73.49239349365234,
|
| 11559 |
+
"learning_rate": 9.971707710134397e-06,
|
| 11560 |
+
"loss": 10.0631,
|
| 11561 |
+
"step": 16320
|
| 11562 |
+
},
|
| 11563 |
+
{
|
| 11564 |
+
"epoch": 0.7247269237383571,
|
| 11565 |
+
"grad_norm": 85.48190307617188,
|
| 11566 |
+
"learning_rate": 9.971690374172468e-06,
|
| 11567 |
+
"loss": 10.7823,
|
| 11568 |
+
"step": 16330
|
| 11569 |
+
},
|
| 11570 |
+
{
|
| 11571 |
+
"epoch": 0.7251707246714486,
|
| 11572 |
+
"grad_norm": 62.24036407470703,
|
| 11573 |
+
"learning_rate": 9.971673038210541e-06,
|
| 11574 |
+
"loss": 9.9273,
|
| 11575 |
+
"step": 16340
|
| 11576 |
+
},
|
| 11577 |
+
{
|
| 11578 |
+
"epoch": 0.7256145256045401,
|
| 11579 |
+
"grad_norm": 64.34358978271484,
|
| 11580 |
+
"learning_rate": 9.971655702248615e-06,
|
| 11581 |
+
"loss": 10.3995,
|
| 11582 |
+
"step": 16350
|
| 11583 |
+
},
|
| 11584 |
+
{
|
| 11585 |
+
"epoch": 0.7260583265376316,
|
| 11586 |
+
"grad_norm": 66.04435729980469,
|
| 11587 |
+
"learning_rate": 9.971638366286686e-06,
|
| 11588 |
+
"loss": 10.2451,
|
| 11589 |
+
"step": 16360
|
| 11590 |
+
},
|
| 11591 |
+
{
|
| 11592 |
+
"epoch": 0.726502127470723,
|
| 11593 |
+
"grad_norm": 64.41586303710938,
|
| 11594 |
+
"learning_rate": 9.971621030324759e-06,
|
| 11595 |
+
"loss": 10.8199,
|
| 11596 |
+
"step": 16370
|
| 11597 |
+
},
|
| 11598 |
+
{
|
| 11599 |
+
"epoch": 0.7269459284038144,
|
| 11600 |
+
"grad_norm": 69.1751937866211,
|
| 11601 |
+
"learning_rate": 9.971603694362832e-06,
|
| 11602 |
+
"loss": 10.4755,
|
| 11603 |
+
"step": 16380
|
| 11604 |
+
},
|
| 11605 |
+
{
|
| 11606 |
+
"epoch": 0.7273897293369059,
|
| 11607 |
+
"grad_norm": 66.9188003540039,
|
| 11608 |
+
"learning_rate": 9.971586358400903e-06,
|
| 11609 |
+
"loss": 10.3615,
|
| 11610 |
+
"step": 16390
|
| 11611 |
+
},
|
| 11612 |
+
{
|
| 11613 |
+
"epoch": 0.7278335302699974,
|
| 11614 |
+
"grad_norm": 75.37189483642578,
|
| 11615 |
+
"learning_rate": 9.971569022438977e-06,
|
| 11616 |
+
"loss": 11.1675,
|
| 11617 |
+
"step": 16400
|
| 11618 |
+
},
|
| 11619 |
+
{
|
| 11620 |
+
"epoch": 0.7282773312030889,
|
| 11621 |
+
"grad_norm": 64.27860260009766,
|
| 11622 |
+
"learning_rate": 9.97155168647705e-06,
|
| 11623 |
+
"loss": 10.897,
|
| 11624 |
+
"step": 16410
|
| 11625 |
+
},
|
| 11626 |
+
{
|
| 11627 |
+
"epoch": 0.7287211321361803,
|
| 11628 |
+
"grad_norm": 71.226806640625,
|
| 11629 |
+
"learning_rate": 9.971534350515123e-06,
|
| 11630 |
+
"loss": 10.5914,
|
| 11631 |
+
"step": 16420
|
| 11632 |
+
},
|
| 11633 |
+
{
|
| 11634 |
+
"epoch": 0.7291649330692718,
|
| 11635 |
+
"grad_norm": 65.22527313232422,
|
| 11636 |
+
"learning_rate": 9.971517014553194e-06,
|
| 11637 |
+
"loss": 10.7522,
|
| 11638 |
+
"step": 16430
|
| 11639 |
+
},
|
| 11640 |
+
{
|
| 11641 |
+
"epoch": 0.7296087340023633,
|
| 11642 |
+
"grad_norm": 70.96646118164062,
|
| 11643 |
+
"learning_rate": 9.971499678591267e-06,
|
| 11644 |
+
"loss": 10.519,
|
| 11645 |
+
"step": 16440
|
| 11646 |
+
},
|
| 11647 |
+
{
|
| 11648 |
+
"epoch": 0.7300525349354547,
|
| 11649 |
+
"grad_norm": 78.15453338623047,
|
| 11650 |
+
"learning_rate": 9.97148234262934e-06,
|
| 11651 |
+
"loss": 10.4566,
|
| 11652 |
+
"step": 16450
|
| 11653 |
+
},
|
| 11654 |
+
{
|
| 11655 |
+
"epoch": 0.7304963358685461,
|
| 11656 |
+
"grad_norm": 66.62492370605469,
|
| 11657 |
+
"learning_rate": 9.971465006667412e-06,
|
| 11658 |
+
"loss": 11.0907,
|
| 11659 |
+
"step": 16460
|
| 11660 |
+
},
|
| 11661 |
+
{
|
| 11662 |
+
"epoch": 0.7309401368016376,
|
| 11663 |
+
"grad_norm": 76.4091796875,
|
| 11664 |
+
"learning_rate": 9.971447670705485e-06,
|
| 11665 |
+
"loss": 10.673,
|
| 11666 |
+
"step": 16470
|
| 11667 |
+
},
|
| 11668 |
+
{
|
| 11669 |
+
"epoch": 0.7313839377347291,
|
| 11670 |
+
"grad_norm": 68.68971252441406,
|
| 11671 |
+
"learning_rate": 9.971430334743558e-06,
|
| 11672 |
+
"loss": 10.1921,
|
| 11673 |
+
"step": 16480
|
| 11674 |
+
},
|
| 11675 |
+
{
|
| 11676 |
+
"epoch": 0.7318277386678206,
|
| 11677 |
+
"grad_norm": 76.54867553710938,
|
| 11678 |
+
"learning_rate": 9.97141299878163e-06,
|
| 11679 |
+
"loss": 10.1928,
|
| 11680 |
+
"step": 16490
|
| 11681 |
+
},
|
| 11682 |
+
{
|
| 11683 |
+
"epoch": 0.732271539600912,
|
| 11684 |
+
"grad_norm": 72.19425964355469,
|
| 11685 |
+
"learning_rate": 9.971395662819702e-06,
|
| 11686 |
+
"loss": 10.8393,
|
| 11687 |
+
"step": 16500
|
| 11688 |
+
},
|
| 11689 |
+
{
|
| 11690 |
+
"epoch": 0.7327153405340034,
|
| 11691 |
+
"grad_norm": 68.68523406982422,
|
| 11692 |
+
"learning_rate": 9.971378326857775e-06,
|
| 11693 |
+
"loss": 10.8051,
|
| 11694 |
+
"step": 16510
|
| 11695 |
+
},
|
| 11696 |
+
{
|
| 11697 |
+
"epoch": 0.7331591414670949,
|
| 11698 |
+
"grad_norm": 73.1769027709961,
|
| 11699 |
+
"learning_rate": 9.971360990895847e-06,
|
| 11700 |
+
"loss": 10.6388,
|
| 11701 |
+
"step": 16520
|
| 11702 |
+
},
|
| 11703 |
+
{
|
| 11704 |
+
"epoch": 0.7336029424001864,
|
| 11705 |
+
"grad_norm": 61.5041618347168,
|
| 11706 |
+
"learning_rate": 9.97134365493392e-06,
|
| 11707 |
+
"loss": 10.3324,
|
| 11708 |
+
"step": 16530
|
| 11709 |
+
},
|
| 11710 |
+
{
|
| 11711 |
+
"epoch": 0.7340467433332779,
|
| 11712 |
+
"grad_norm": 64.92015838623047,
|
| 11713 |
+
"learning_rate": 9.971326318971993e-06,
|
| 11714 |
+
"loss": 10.3545,
|
| 11715 |
+
"step": 16540
|
| 11716 |
+
},
|
| 11717 |
+
{
|
| 11718 |
+
"epoch": 0.7344905442663693,
|
| 11719 |
+
"grad_norm": 70.95281982421875,
|
| 11720 |
+
"learning_rate": 9.971308983010066e-06,
|
| 11721 |
+
"loss": 10.4945,
|
| 11722 |
+
"step": 16550
|
| 11723 |
+
},
|
| 11724 |
+
{
|
| 11725 |
+
"epoch": 0.7349343451994608,
|
| 11726 |
+
"grad_norm": 62.154808044433594,
|
| 11727 |
+
"learning_rate": 9.971291647048137e-06,
|
| 11728 |
+
"loss": 10.4194,
|
| 11729 |
+
"step": 16560
|
| 11730 |
+
},
|
| 11731 |
+
{
|
| 11732 |
+
"epoch": 0.7353781461325523,
|
| 11733 |
+
"grad_norm": 63.85173416137695,
|
| 11734 |
+
"learning_rate": 9.97127431108621e-06,
|
| 11735 |
+
"loss": 10.6319,
|
| 11736 |
+
"step": 16570
|
| 11737 |
+
},
|
| 11738 |
+
{
|
| 11739 |
+
"epoch": 0.7358219470656437,
|
| 11740 |
+
"grad_norm": 87.90074157714844,
|
| 11741 |
+
"learning_rate": 9.971256975124283e-06,
|
| 11742 |
+
"loss": 10.2764,
|
| 11743 |
+
"step": 16580
|
| 11744 |
+
},
|
| 11745 |
+
{
|
| 11746 |
+
"epoch": 0.7362657479987351,
|
| 11747 |
+
"grad_norm": 68.94241333007812,
|
| 11748 |
+
"learning_rate": 9.971239639162355e-06,
|
| 11749 |
+
"loss": 10.6806,
|
| 11750 |
+
"step": 16590
|
| 11751 |
+
},
|
| 11752 |
+
{
|
| 11753 |
+
"epoch": 0.7367095489318266,
|
| 11754 |
+
"grad_norm": 74.24497985839844,
|
| 11755 |
+
"learning_rate": 9.971222303200428e-06,
|
| 11756 |
+
"loss": 10.0719,
|
| 11757 |
+
"step": 16600
|
| 11758 |
+
},
|
| 11759 |
+
{
|
| 11760 |
+
"epoch": 0.7371533498649181,
|
| 11761 |
+
"grad_norm": 70.3670654296875,
|
| 11762 |
+
"learning_rate": 9.971204967238501e-06,
|
| 11763 |
+
"loss": 10.5849,
|
| 11764 |
+
"step": 16610
|
| 11765 |
+
},
|
| 11766 |
+
{
|
| 11767 |
+
"epoch": 0.7375971507980096,
|
| 11768 |
+
"grad_norm": 69.45691680908203,
|
| 11769 |
+
"learning_rate": 9.971187631276572e-06,
|
| 11770 |
+
"loss": 10.4697,
|
| 11771 |
+
"step": 16620
|
| 11772 |
+
},
|
| 11773 |
+
{
|
| 11774 |
+
"epoch": 0.7380409517311011,
|
| 11775 |
+
"grad_norm": 65.4955062866211,
|
| 11776 |
+
"learning_rate": 9.971170295314645e-06,
|
| 11777 |
+
"loss": 10.3592,
|
| 11778 |
+
"step": 16630
|
| 11779 |
+
},
|
| 11780 |
+
{
|
| 11781 |
+
"epoch": 0.7384847526641924,
|
| 11782 |
+
"grad_norm": 79.28197479248047,
|
| 11783 |
+
"learning_rate": 9.971152959352719e-06,
|
| 11784 |
+
"loss": 10.2718,
|
| 11785 |
+
"step": 16640
|
| 11786 |
+
},
|
| 11787 |
+
{
|
| 11788 |
+
"epoch": 0.7389285535972839,
|
| 11789 |
+
"grad_norm": 57.633644104003906,
|
| 11790 |
+
"learning_rate": 9.97113562339079e-06,
|
| 11791 |
+
"loss": 10.4478,
|
| 11792 |
+
"step": 16650
|
| 11793 |
+
},
|
| 11794 |
+
{
|
| 11795 |
+
"epoch": 0.7393723545303754,
|
| 11796 |
+
"grad_norm": 58.49510955810547,
|
| 11797 |
+
"learning_rate": 9.971118287428863e-06,
|
| 11798 |
+
"loss": 10.6408,
|
| 11799 |
+
"step": 16660
|
| 11800 |
+
},
|
| 11801 |
+
{
|
| 11802 |
+
"epoch": 0.7398161554634669,
|
| 11803 |
+
"grad_norm": 68.87074279785156,
|
| 11804 |
+
"learning_rate": 9.971100951466936e-06,
|
| 11805 |
+
"loss": 10.4892,
|
| 11806 |
+
"step": 16670
|
| 11807 |
+
},
|
| 11808 |
+
{
|
| 11809 |
+
"epoch": 0.7402599563965583,
|
| 11810 |
+
"grad_norm": 83.9663314819336,
|
| 11811 |
+
"learning_rate": 9.971083615505007e-06,
|
| 11812 |
+
"loss": 10.6967,
|
| 11813 |
+
"step": 16680
|
| 11814 |
+
},
|
| 11815 |
+
{
|
| 11816 |
+
"epoch": 0.7407037573296498,
|
| 11817 |
+
"grad_norm": 66.5399398803711,
|
| 11818 |
+
"learning_rate": 9.97106627954308e-06,
|
| 11819 |
+
"loss": 10.2476,
|
| 11820 |
+
"step": 16690
|
| 11821 |
+
},
|
| 11822 |
+
{
|
| 11823 |
+
"epoch": 0.7411475582627413,
|
| 11824 |
+
"grad_norm": 71.39287567138672,
|
| 11825 |
+
"learning_rate": 9.971048943581154e-06,
|
| 11826 |
+
"loss": 10.4425,
|
| 11827 |
+
"step": 16700
|
| 11828 |
+
},
|
| 11829 |
+
{
|
| 11830 |
+
"epoch": 0.7415913591958327,
|
| 11831 |
+
"grad_norm": 67.80374145507812,
|
| 11832 |
+
"learning_rate": 9.971031607619227e-06,
|
| 11833 |
+
"loss": 10.8299,
|
| 11834 |
+
"step": 16710
|
| 11835 |
+
},
|
| 11836 |
+
{
|
| 11837 |
+
"epoch": 0.7420351601289241,
|
| 11838 |
+
"grad_norm": 63.548011779785156,
|
| 11839 |
+
"learning_rate": 9.971014271657298e-06,
|
| 11840 |
+
"loss": 10.5723,
|
| 11841 |
+
"step": 16720
|
| 11842 |
+
},
|
| 11843 |
+
{
|
| 11844 |
+
"epoch": 0.7424789610620156,
|
| 11845 |
+
"grad_norm": 66.04682922363281,
|
| 11846 |
+
"learning_rate": 9.970996935695371e-06,
|
| 11847 |
+
"loss": 10.3872,
|
| 11848 |
+
"step": 16730
|
| 11849 |
+
},
|
| 11850 |
+
{
|
| 11851 |
+
"epoch": 0.7429227619951071,
|
| 11852 |
+
"grad_norm": 69.4638900756836,
|
| 11853 |
+
"learning_rate": 9.970979599733444e-06,
|
| 11854 |
+
"loss": 11.023,
|
| 11855 |
+
"step": 16740
|
| 11856 |
+
},
|
| 11857 |
+
{
|
| 11858 |
+
"epoch": 0.7433665629281986,
|
| 11859 |
+
"grad_norm": 61.98347473144531,
|
| 11860 |
+
"learning_rate": 9.970962263771516e-06,
|
| 11861 |
+
"loss": 10.1672,
|
| 11862 |
+
"step": 16750
|
| 11863 |
+
},
|
| 11864 |
+
{
|
| 11865 |
+
"epoch": 0.7438103638612901,
|
| 11866 |
+
"grad_norm": 75.08468627929688,
|
| 11867 |
+
"learning_rate": 9.970944927809589e-06,
|
| 11868 |
+
"loss": 10.4586,
|
| 11869 |
+
"step": 16760
|
| 11870 |
+
},
|
| 11871 |
+
{
|
| 11872 |
+
"epoch": 0.7442541647943814,
|
| 11873 |
+
"grad_norm": 58.552120208740234,
|
| 11874 |
+
"learning_rate": 9.970927591847662e-06,
|
| 11875 |
+
"loss": 10.6722,
|
| 11876 |
+
"step": 16770
|
| 11877 |
+
},
|
| 11878 |
+
{
|
| 11879 |
+
"epoch": 0.7446979657274729,
|
| 11880 |
+
"grad_norm": 70.47040557861328,
|
| 11881 |
+
"learning_rate": 9.970910255885733e-06,
|
| 11882 |
+
"loss": 10.1118,
|
| 11883 |
+
"step": 16780
|
| 11884 |
+
},
|
| 11885 |
+
{
|
| 11886 |
+
"epoch": 0.7451417666605644,
|
| 11887 |
+
"grad_norm": 75.66903686523438,
|
| 11888 |
+
"learning_rate": 9.970892919923806e-06,
|
| 11889 |
+
"loss": 10.5813,
|
| 11890 |
+
"step": 16790
|
| 11891 |
+
},
|
| 11892 |
+
{
|
| 11893 |
+
"epoch": 0.7455855675936559,
|
| 11894 |
+
"grad_norm": 79.39729309082031,
|
| 11895 |
+
"learning_rate": 9.97087558396188e-06,
|
| 11896 |
+
"loss": 10.3935,
|
| 11897 |
+
"step": 16800
|
| 11898 |
+
},
|
| 11899 |
+
{
|
| 11900 |
+
"epoch": 0.7460293685267473,
|
| 11901 |
+
"grad_norm": 69.61038970947266,
|
| 11902 |
+
"learning_rate": 9.97085824799995e-06,
|
| 11903 |
+
"loss": 10.3304,
|
| 11904 |
+
"step": 16810
|
| 11905 |
+
},
|
| 11906 |
+
{
|
| 11907 |
+
"epoch": 0.7464731694598388,
|
| 11908 |
+
"grad_norm": 70.38590240478516,
|
| 11909 |
+
"learning_rate": 9.970840912038024e-06,
|
| 11910 |
+
"loss": 10.5146,
|
| 11911 |
+
"step": 16820
|
| 11912 |
+
},
|
| 11913 |
+
{
|
| 11914 |
+
"epoch": 0.7469169703929303,
|
| 11915 |
+
"grad_norm": 83.62046813964844,
|
| 11916 |
+
"learning_rate": 9.970823576076097e-06,
|
| 11917 |
+
"loss": 10.7194,
|
| 11918 |
+
"step": 16830
|
| 11919 |
+
},
|
| 11920 |
+
{
|
| 11921 |
+
"epoch": 0.7473607713260217,
|
| 11922 |
+
"grad_norm": 69.001708984375,
|
| 11923 |
+
"learning_rate": 9.970806240114168e-06,
|
| 11924 |
+
"loss": 10.6674,
|
| 11925 |
+
"step": 16840
|
| 11926 |
+
},
|
| 11927 |
+
{
|
| 11928 |
+
"epoch": 0.7478045722591131,
|
| 11929 |
+
"grad_norm": 65.15716552734375,
|
| 11930 |
+
"learning_rate": 9.970788904152241e-06,
|
| 11931 |
+
"loss": 10.6923,
|
| 11932 |
+
"step": 16850
|
| 11933 |
+
},
|
| 11934 |
+
{
|
| 11935 |
+
"epoch": 0.7482483731922046,
|
| 11936 |
+
"grad_norm": 74.37089538574219,
|
| 11937 |
+
"learning_rate": 9.970771568190314e-06,
|
| 11938 |
+
"loss": 10.7676,
|
| 11939 |
+
"step": 16860
|
| 11940 |
+
},
|
| 11941 |
+
{
|
| 11942 |
+
"epoch": 0.7486921741252961,
|
| 11943 |
+
"grad_norm": 64.5125732421875,
|
| 11944 |
+
"learning_rate": 9.970754232228386e-06,
|
| 11945 |
+
"loss": 10.2827,
|
| 11946 |
+
"step": 16870
|
| 11947 |
+
},
|
| 11948 |
+
{
|
| 11949 |
+
"epoch": 0.7491359750583876,
|
| 11950 |
+
"grad_norm": 69.91007232666016,
|
| 11951 |
+
"learning_rate": 9.970736896266459e-06,
|
| 11952 |
+
"loss": 10.6307,
|
| 11953 |
+
"step": 16880
|
| 11954 |
+
},
|
| 11955 |
+
{
|
| 11956 |
+
"epoch": 0.7495797759914791,
|
| 11957 |
+
"grad_norm": 59.29727554321289,
|
| 11958 |
+
"learning_rate": 9.970719560304532e-06,
|
| 11959 |
+
"loss": 10.4378,
|
| 11960 |
+
"step": 16890
|
| 11961 |
+
},
|
| 11962 |
+
{
|
| 11963 |
+
"epoch": 0.7500235769245704,
|
| 11964 |
+
"grad_norm": 90.56709289550781,
|
| 11965 |
+
"learning_rate": 9.970702224342603e-06,
|
| 11966 |
+
"loss": 10.3348,
|
| 11967 |
+
"step": 16900
|
| 11968 |
+
},
|
| 11969 |
+
{
|
| 11970 |
+
"epoch": 0.7504673778576619,
|
| 11971 |
+
"grad_norm": 70.33431243896484,
|
| 11972 |
+
"learning_rate": 9.970684888380676e-06,
|
| 11973 |
+
"loss": 10.3737,
|
| 11974 |
+
"step": 16910
|
| 11975 |
+
},
|
| 11976 |
+
{
|
| 11977 |
+
"epoch": 0.7509111787907534,
|
| 11978 |
+
"grad_norm": 62.954490661621094,
|
| 11979 |
+
"learning_rate": 9.97066755241875e-06,
|
| 11980 |
+
"loss": 10.6825,
|
| 11981 |
+
"step": 16920
|
| 11982 |
+
},
|
| 11983 |
+
{
|
| 11984 |
+
"epoch": 0.7513549797238449,
|
| 11985 |
+
"grad_norm": 64.03510284423828,
|
| 11986 |
+
"learning_rate": 9.970650216456823e-06,
|
| 11987 |
+
"loss": 10.4315,
|
| 11988 |
+
"step": 16930
|
| 11989 |
+
},
|
| 11990 |
+
{
|
| 11991 |
+
"epoch": 0.7517987806569363,
|
| 11992 |
+
"grad_norm": 61.26763153076172,
|
| 11993 |
+
"learning_rate": 9.970632880494894e-06,
|
| 11994 |
+
"loss": 9.9209,
|
| 11995 |
+
"step": 16940
|
| 11996 |
+
},
|
| 11997 |
+
{
|
| 11998 |
+
"epoch": 0.7522425815900278,
|
| 11999 |
+
"grad_norm": 64.45995330810547,
|
| 12000 |
+
"learning_rate": 9.970615544532967e-06,
|
| 12001 |
+
"loss": 10.4647,
|
| 12002 |
+
"step": 16950
|
| 12003 |
+
},
|
| 12004 |
+
{
|
| 12005 |
+
"epoch": 0.7526863825231193,
|
| 12006 |
+
"grad_norm": 71.07040405273438,
|
| 12007 |
+
"learning_rate": 9.97059820857104e-06,
|
| 12008 |
+
"loss": 10.4505,
|
| 12009 |
+
"step": 16960
|
| 12010 |
+
},
|
| 12011 |
+
{
|
| 12012 |
+
"epoch": 0.7531301834562107,
|
| 12013 |
+
"grad_norm": 61.676551818847656,
|
| 12014 |
+
"learning_rate": 9.970580872609112e-06,
|
| 12015 |
+
"loss": 10.1759,
|
| 12016 |
+
"step": 16970
|
| 12017 |
+
},
|
| 12018 |
+
{
|
| 12019 |
+
"epoch": 0.7535739843893022,
|
| 12020 |
+
"grad_norm": 76.77957916259766,
|
| 12021 |
+
"learning_rate": 9.970563536647185e-06,
|
| 12022 |
+
"loss": 10.5416,
|
| 12023 |
+
"step": 16980
|
| 12024 |
+
},
|
| 12025 |
+
{
|
| 12026 |
+
"epoch": 0.7540177853223936,
|
| 12027 |
+
"grad_norm": 83.22810363769531,
|
| 12028 |
+
"learning_rate": 9.970546200685258e-06,
|
| 12029 |
+
"loss": 10.2076,
|
| 12030 |
+
"step": 16990
|
| 12031 |
+
},
|
| 12032 |
+
{
|
| 12033 |
+
"epoch": 0.7544615862554851,
|
| 12034 |
+
"grad_norm": 68.98297882080078,
|
| 12035 |
+
"learning_rate": 9.970528864723329e-06,
|
| 12036 |
+
"loss": 10.4059,
|
| 12037 |
+
"step": 17000
|
| 12038 |
+
},
|
| 12039 |
+
{
|
| 12040 |
+
"epoch": 0.7544615862554851,
|
| 12041 |
+
"eval_loss": 0.3286471664905548,
|
| 12042 |
+
"eval_runtime": 678.3037,
|
| 12043 |
+
"eval_samples_per_second": 1790.335,
|
| 12044 |
+
"eval_steps_per_second": 55.948,
|
| 12045 |
+
"step": 17000
|
| 12046 |
+
},
|
| 12047 |
+
{
|
| 12048 |
+
"epoch": 0.7549053871885766,
|
| 12049 |
+
"grad_norm": 61.05122756958008,
|
| 12050 |
+
"learning_rate": 9.970511528761402e-06,
|
| 12051 |
+
"loss": 10.329,
|
| 12052 |
+
"step": 17010
|
| 12053 |
+
},
|
| 12054 |
+
{
|
| 12055 |
+
"epoch": 0.7553491881216681,
|
| 12056 |
+
"grad_norm": 69.60665893554688,
|
| 12057 |
+
"learning_rate": 9.970494192799475e-06,
|
| 12058 |
+
"loss": 10.3436,
|
| 12059 |
+
"step": 17020
|
| 12060 |
+
},
|
| 12061 |
+
{
|
| 12062 |
+
"epoch": 0.7557929890547594,
|
| 12063 |
+
"grad_norm": 70.77852630615234,
|
| 12064 |
+
"learning_rate": 9.970476856837547e-06,
|
| 12065 |
+
"loss": 10.8488,
|
| 12066 |
+
"step": 17030
|
| 12067 |
+
},
|
| 12068 |
+
{
|
| 12069 |
+
"epoch": 0.7562367899878509,
|
| 12070 |
+
"grad_norm": 71.26077270507812,
|
| 12071 |
+
"learning_rate": 9.97045952087562e-06,
|
| 12072 |
+
"loss": 10.4241,
|
| 12073 |
+
"step": 17040
|
| 12074 |
+
},
|
| 12075 |
+
{
|
| 12076 |
+
"epoch": 0.7566805909209424,
|
| 12077 |
+
"grad_norm": 95.98668670654297,
|
| 12078 |
+
"learning_rate": 9.970442184913693e-06,
|
| 12079 |
+
"loss": 10.1583,
|
| 12080 |
+
"step": 17050
|
| 12081 |
+
},
|
| 12082 |
+
{
|
| 12083 |
+
"epoch": 0.7571243918540339,
|
| 12084 |
+
"grad_norm": 77.86578369140625,
|
| 12085 |
+
"learning_rate": 9.970424848951764e-06,
|
| 12086 |
+
"loss": 10.8454,
|
| 12087 |
+
"step": 17060
|
| 12088 |
+
},
|
| 12089 |
+
{
|
| 12090 |
+
"epoch": 0.7575681927871253,
|
| 12091 |
+
"grad_norm": 81.84298706054688,
|
| 12092 |
+
"learning_rate": 9.970407512989837e-06,
|
| 12093 |
+
"loss": 10.523,
|
| 12094 |
+
"step": 17070
|
| 12095 |
+
},
|
| 12096 |
+
{
|
| 12097 |
+
"epoch": 0.7580119937202168,
|
| 12098 |
+
"grad_norm": 71.15061950683594,
|
| 12099 |
+
"learning_rate": 9.97039017702791e-06,
|
| 12100 |
+
"loss": 10.206,
|
| 12101 |
+
"step": 17080
|
| 12102 |
+
},
|
| 12103 |
+
{
|
| 12104 |
+
"epoch": 0.7584557946533083,
|
| 12105 |
+
"grad_norm": 65.26417541503906,
|
| 12106 |
+
"learning_rate": 9.970372841065982e-06,
|
| 12107 |
+
"loss": 10.8191,
|
| 12108 |
+
"step": 17090
|
| 12109 |
+
},
|
| 12110 |
+
{
|
| 12111 |
+
"epoch": 0.7588995955863997,
|
| 12112 |
+
"grad_norm": 76.11380767822266,
|
| 12113 |
+
"learning_rate": 9.970355505104055e-06,
|
| 12114 |
+
"loss": 10.6828,
|
| 12115 |
+
"step": 17100
|
| 12116 |
+
},
|
| 12117 |
+
{
|
| 12118 |
+
"epoch": 0.7593433965194912,
|
| 12119 |
+
"grad_norm": 82.5249252319336,
|
| 12120 |
+
"learning_rate": 9.970338169142128e-06,
|
| 12121 |
+
"loss": 10.5496,
|
| 12122 |
+
"step": 17110
|
| 12123 |
+
},
|
| 12124 |
+
{
|
| 12125 |
+
"epoch": 0.7597871974525826,
|
| 12126 |
+
"grad_norm": 66.36536407470703,
|
| 12127 |
+
"learning_rate": 9.9703208331802e-06,
|
| 12128 |
+
"loss": 10.0697,
|
| 12129 |
+
"step": 17120
|
| 12130 |
+
},
|
| 12131 |
+
{
|
| 12132 |
+
"epoch": 0.7602309983856741,
|
| 12133 |
+
"grad_norm": 61.46723556518555,
|
| 12134 |
+
"learning_rate": 9.970303497218272e-06,
|
| 12135 |
+
"loss": 10.5154,
|
| 12136 |
+
"step": 17130
|
| 12137 |
+
},
|
| 12138 |
+
{
|
| 12139 |
+
"epoch": 0.7606747993187656,
|
| 12140 |
+
"grad_norm": 65.16950225830078,
|
| 12141 |
+
"learning_rate": 9.970286161256345e-06,
|
| 12142 |
+
"loss": 10.5957,
|
| 12143 |
+
"step": 17140
|
| 12144 |
+
},
|
| 12145 |
+
{
|
| 12146 |
+
"epoch": 0.7611186002518571,
|
| 12147 |
+
"grad_norm": 74.29840850830078,
|
| 12148 |
+
"learning_rate": 9.970268825294418e-06,
|
| 12149 |
+
"loss": 10.9534,
|
| 12150 |
+
"step": 17150
|
| 12151 |
+
},
|
| 12152 |
+
{
|
| 12153 |
+
"epoch": 0.7615624011849484,
|
| 12154 |
+
"grad_norm": 63.95246887207031,
|
| 12155 |
+
"learning_rate": 9.97025148933249e-06,
|
| 12156 |
+
"loss": 10.559,
|
| 12157 |
+
"step": 17160
|
| 12158 |
+
},
|
| 12159 |
+
{
|
| 12160 |
+
"epoch": 0.7620062021180399,
|
| 12161 |
+
"grad_norm": 70.78131866455078,
|
| 12162 |
+
"learning_rate": 9.970234153370563e-06,
|
| 12163 |
+
"loss": 10.4259,
|
| 12164 |
+
"step": 17170
|
| 12165 |
+
},
|
| 12166 |
+
{
|
| 12167 |
+
"epoch": 0.7624500030511314,
|
| 12168 |
+
"grad_norm": 68.86581420898438,
|
| 12169 |
+
"learning_rate": 9.970216817408636e-06,
|
| 12170 |
+
"loss": 10.5729,
|
| 12171 |
+
"step": 17180
|
| 12172 |
+
},
|
| 12173 |
+
{
|
| 12174 |
+
"epoch": 0.7628938039842229,
|
| 12175 |
+
"grad_norm": 69.18133544921875,
|
| 12176 |
+
"learning_rate": 9.970199481446707e-06,
|
| 12177 |
+
"loss": 10.4368,
|
| 12178 |
+
"step": 17190
|
| 12179 |
+
},
|
| 12180 |
+
{
|
| 12181 |
+
"epoch": 0.7633376049173144,
|
| 12182 |
+
"grad_norm": 63.52793502807617,
|
| 12183 |
+
"learning_rate": 9.97018214548478e-06,
|
| 12184 |
+
"loss": 10.4153,
|
| 12185 |
+
"step": 17200
|
| 12186 |
+
},
|
| 12187 |
+
{
|
| 12188 |
+
"epoch": 0.7637814058504058,
|
| 12189 |
+
"grad_norm": 72.20518493652344,
|
| 12190 |
+
"learning_rate": 9.970164809522854e-06,
|
| 12191 |
+
"loss": 10.7712,
|
| 12192 |
+
"step": 17210
|
| 12193 |
+
},
|
| 12194 |
+
{
|
| 12195 |
+
"epoch": 0.7642252067834973,
|
| 12196 |
+
"grad_norm": 71.09992218017578,
|
| 12197 |
+
"learning_rate": 9.970147473560925e-06,
|
| 12198 |
+
"loss": 10.4743,
|
| 12199 |
+
"step": 17220
|
| 12200 |
+
},
|
| 12201 |
+
{
|
| 12202 |
+
"epoch": 0.7646690077165887,
|
| 12203 |
+
"grad_norm": 73.4151611328125,
|
| 12204 |
+
"learning_rate": 9.970130137598998e-06,
|
| 12205 |
+
"loss": 10.7,
|
| 12206 |
+
"step": 17230
|
| 12207 |
+
},
|
| 12208 |
+
{
|
| 12209 |
+
"epoch": 0.7651128086496802,
|
| 12210 |
+
"grad_norm": 59.74842834472656,
|
| 12211 |
+
"learning_rate": 9.970112801637071e-06,
|
| 12212 |
+
"loss": 10.5612,
|
| 12213 |
+
"step": 17240
|
| 12214 |
+
},
|
| 12215 |
+
{
|
| 12216 |
+
"epoch": 0.7655566095827716,
|
| 12217 |
+
"grad_norm": 67.19086456298828,
|
| 12218 |
+
"learning_rate": 9.970095465675142e-06,
|
| 12219 |
+
"loss": 10.3241,
|
| 12220 |
+
"step": 17250
|
| 12221 |
+
},
|
| 12222 |
+
{
|
| 12223 |
+
"epoch": 0.7660004105158631,
|
| 12224 |
+
"grad_norm": 80.37249755859375,
|
| 12225 |
+
"learning_rate": 9.970078129713216e-06,
|
| 12226 |
+
"loss": 10.8193,
|
| 12227 |
+
"step": 17260
|
| 12228 |
+
},
|
| 12229 |
+
{
|
| 12230 |
+
"epoch": 0.7664442114489546,
|
| 12231 |
+
"grad_norm": 64.05519104003906,
|
| 12232 |
+
"learning_rate": 9.970060793751289e-06,
|
| 12233 |
+
"loss": 10.5312,
|
| 12234 |
+
"step": 17270
|
| 12235 |
+
},
|
| 12236 |
+
{
|
| 12237 |
+
"epoch": 0.7668880123820461,
|
| 12238 |
+
"grad_norm": 63.82759475708008,
|
| 12239 |
+
"learning_rate": 9.97004345778936e-06,
|
| 12240 |
+
"loss": 10.2693,
|
| 12241 |
+
"step": 17280
|
| 12242 |
+
},
|
| 12243 |
+
{
|
| 12244 |
+
"epoch": 0.7673318133151374,
|
| 12245 |
+
"grad_norm": 74.00315856933594,
|
| 12246 |
+
"learning_rate": 9.970026121827433e-06,
|
| 12247 |
+
"loss": 10.5823,
|
| 12248 |
+
"step": 17290
|
| 12249 |
+
},
|
| 12250 |
+
{
|
| 12251 |
+
"epoch": 0.7677756142482289,
|
| 12252 |
+
"grad_norm": 72.47602844238281,
|
| 12253 |
+
"learning_rate": 9.970008785865506e-06,
|
| 12254 |
+
"loss": 10.3111,
|
| 12255 |
+
"step": 17300
|
| 12256 |
+
},
|
| 12257 |
+
{
|
| 12258 |
+
"epoch": 0.7682194151813204,
|
| 12259 |
+
"grad_norm": 59.534305572509766,
|
| 12260 |
+
"learning_rate": 9.969991449903578e-06,
|
| 12261 |
+
"loss": 10.7712,
|
| 12262 |
+
"step": 17310
|
| 12263 |
+
},
|
| 12264 |
+
{
|
| 12265 |
+
"epoch": 0.7686632161144119,
|
| 12266 |
+
"grad_norm": 70.15426635742188,
|
| 12267 |
+
"learning_rate": 9.96997411394165e-06,
|
| 12268 |
+
"loss": 10.3361,
|
| 12269 |
+
"step": 17320
|
| 12270 |
+
},
|
| 12271 |
+
{
|
| 12272 |
+
"epoch": 0.7691070170475034,
|
| 12273 |
+
"grad_norm": 60.14602279663086,
|
| 12274 |
+
"learning_rate": 9.969956777979724e-06,
|
| 12275 |
+
"loss": 10.8061,
|
| 12276 |
+
"step": 17330
|
| 12277 |
+
},
|
| 12278 |
+
{
|
| 12279 |
+
"epoch": 0.7695508179805948,
|
| 12280 |
+
"grad_norm": 64.0710678100586,
|
| 12281 |
+
"learning_rate": 9.969939442017795e-06,
|
| 12282 |
+
"loss": 9.9819,
|
| 12283 |
+
"step": 17340
|
| 12284 |
+
},
|
| 12285 |
+
{
|
| 12286 |
+
"epoch": 0.7699946189136863,
|
| 12287 |
+
"grad_norm": 76.11770629882812,
|
| 12288 |
+
"learning_rate": 9.969922106055868e-06,
|
| 12289 |
+
"loss": 10.2555,
|
| 12290 |
+
"step": 17350
|
| 12291 |
+
},
|
| 12292 |
+
{
|
| 12293 |
+
"epoch": 0.7704384198467777,
|
| 12294 |
+
"grad_norm": 68.8619613647461,
|
| 12295 |
+
"learning_rate": 9.969904770093941e-06,
|
| 12296 |
+
"loss": 10.5506,
|
| 12297 |
+
"step": 17360
|
| 12298 |
+
},
|
| 12299 |
+
{
|
| 12300 |
+
"epoch": 0.7708822207798692,
|
| 12301 |
+
"grad_norm": 66.60945129394531,
|
| 12302 |
+
"learning_rate": 9.969887434132014e-06,
|
| 12303 |
+
"loss": 10.3023,
|
| 12304 |
+
"step": 17370
|
| 12305 |
+
},
|
| 12306 |
+
{
|
| 12307 |
+
"epoch": 0.7713260217129606,
|
| 12308 |
+
"grad_norm": 66.75738525390625,
|
| 12309 |
+
"learning_rate": 9.969870098170086e-06,
|
| 12310 |
+
"loss": 10.126,
|
| 12311 |
+
"step": 17380
|
| 12312 |
+
},
|
| 12313 |
+
{
|
| 12314 |
+
"epoch": 0.7717698226460521,
|
| 12315 |
+
"grad_norm": 65.49826049804688,
|
| 12316 |
+
"learning_rate": 9.969852762208159e-06,
|
| 12317 |
+
"loss": 10.6585,
|
| 12318 |
+
"step": 17390
|
| 12319 |
+
},
|
| 12320 |
+
{
|
| 12321 |
+
"epoch": 0.7722136235791436,
|
| 12322 |
+
"grad_norm": 65.2136001586914,
|
| 12323 |
+
"learning_rate": 9.969835426246232e-06,
|
| 12324 |
+
"loss": 10.4105,
|
| 12325 |
+
"step": 17400
|
| 12326 |
+
},
|
| 12327 |
+
{
|
| 12328 |
+
"epoch": 0.7726574245122351,
|
| 12329 |
+
"grad_norm": 62.805213928222656,
|
| 12330 |
+
"learning_rate": 9.969818090284303e-06,
|
| 12331 |
+
"loss": 10.1974,
|
| 12332 |
+
"step": 17410
|
| 12333 |
+
},
|
| 12334 |
+
{
|
| 12335 |
+
"epoch": 0.7731012254453264,
|
| 12336 |
+
"grad_norm": 66.39070892333984,
|
| 12337 |
+
"learning_rate": 9.969800754322376e-06,
|
| 12338 |
+
"loss": 10.7106,
|
| 12339 |
+
"step": 17420
|
| 12340 |
+
},
|
| 12341 |
+
{
|
| 12342 |
+
"epoch": 0.7735450263784179,
|
| 12343 |
+
"grad_norm": 70.36665344238281,
|
| 12344 |
+
"learning_rate": 9.96978341836045e-06,
|
| 12345 |
+
"loss": 10.2655,
|
| 12346 |
+
"step": 17430
|
| 12347 |
+
},
|
| 12348 |
+
{
|
| 12349 |
+
"epoch": 0.7739888273115094,
|
| 12350 |
+
"grad_norm": 62.32572937011719,
|
| 12351 |
+
"learning_rate": 9.96976608239852e-06,
|
| 12352 |
+
"loss": 10.2899,
|
| 12353 |
+
"step": 17440
|
| 12354 |
+
},
|
| 12355 |
+
{
|
| 12356 |
+
"epoch": 0.7744326282446009,
|
| 12357 |
+
"grad_norm": 64.87157440185547,
|
| 12358 |
+
"learning_rate": 9.969748746436594e-06,
|
| 12359 |
+
"loss": 10.2878,
|
| 12360 |
+
"step": 17450
|
| 12361 |
+
},
|
| 12362 |
+
{
|
| 12363 |
+
"epoch": 0.7748764291776924,
|
| 12364 |
+
"grad_norm": 65.008544921875,
|
| 12365 |
+
"learning_rate": 9.969731410474667e-06,
|
| 12366 |
+
"loss": 10.5488,
|
| 12367 |
+
"step": 17460
|
| 12368 |
+
},
|
| 12369 |
+
{
|
| 12370 |
+
"epoch": 0.7753202301107838,
|
| 12371 |
+
"grad_norm": 62.856143951416016,
|
| 12372 |
+
"learning_rate": 9.969714074512738e-06,
|
| 12373 |
+
"loss": 10.3436,
|
| 12374 |
+
"step": 17470
|
| 12375 |
+
},
|
| 12376 |
+
{
|
| 12377 |
+
"epoch": 0.7757640310438753,
|
| 12378 |
+
"grad_norm": 77.94728088378906,
|
| 12379 |
+
"learning_rate": 9.969696738550811e-06,
|
| 12380 |
+
"loss": 10.7276,
|
| 12381 |
+
"step": 17480
|
| 12382 |
+
},
|
| 12383 |
+
{
|
| 12384 |
+
"epoch": 0.7762078319769667,
|
| 12385 |
+
"grad_norm": 59.522884368896484,
|
| 12386 |
+
"learning_rate": 9.969679402588884e-06,
|
| 12387 |
+
"loss": 10.267,
|
| 12388 |
+
"step": 17490
|
| 12389 |
+
},
|
| 12390 |
+
{
|
| 12391 |
+
"epoch": 0.7766516329100582,
|
| 12392 |
+
"grad_norm": 65.48174285888672,
|
| 12393 |
+
"learning_rate": 9.969662066626956e-06,
|
| 12394 |
+
"loss": 10.1914,
|
| 12395 |
+
"step": 17500
|
| 12396 |
}
|
| 12397 |
],
|
| 12398 |
"logging_steps": 10,
|
|
|
|
| 12412 |
"attributes": {}
|
| 12413 |
}
|
| 12414 |
},
|
| 12415 |
+
"total_flos": 6.107015608795136e+18,
|
| 12416 |
"train_batch_size": 4,
|
| 12417 |
"trial_name": null,
|
| 12418 |
"trial_params": null
|