Training in progress, step 20000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1777 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737632172
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0decb17e1576a2e87ecfcfd97d8e2ab8486eb9a2ec6ff00fa3b7efa6f74327ba
|
| 3 |
size 737632172
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475354682
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:018d14bd69f4e34f78162e646a75e937b89f1d651e49bb2da5fd566a3dc03363
|
| 3 |
size 1475354682
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a711ae47907423581a85380ad2222bf6eaf1af9c9ec45797d4f1a9fb127db2c
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e8c873ca3f378713a8a07acffb82e5be966b4efb3815b7ddf04ac4a39c37a73
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0fcb54b765d5b0c806961a1b8bdc3214f4fc0489fbe2c720c7312b23d2db5cf
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2a30b2ad9b3632b41b5d2a70ad5aabce34a6f7a76a9e1e270a22f600a05ec22
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ee9cd8fd6ff53fdc84fbb7925a1d22d7707021b0e4b45ae16328680d2405512
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b60c5d4b71ffd198beb51d796fd8e27c367782bb1efc7c5f1065d3ed20df402
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87c6f1afcb23fc820bb3d68d94d047f124b182adf1d874dcd0fa3a260a51bb2b
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ebfc4481eb53675078ccf162293df1d6b7500f8ba0b2d00cad430e67f4a70a3
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97ffafe779a971f149a59a73318cc7969252e85b03c3f756e6cdd7e796033658
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 1000,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -12393,6 +12393,1780 @@
|
|
| 12393 |
"learning_rate": 9.969662066626956e-06,
|
| 12394 |
"loss": 10.1914,
|
| 12395 |
"step": 17500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12396 |
}
|
| 12397 |
],
|
| 12398 |
"logging_steps": 10,
|
|
@@ -12412,7 +14186,7 @@
|
|
| 12412 |
"attributes": {}
|
| 12413 |
}
|
| 12414 |
},
|
| 12415 |
-
"total_flos": 6.
|
| 12416 |
"train_batch_size": 4,
|
| 12417 |
"trial_name": null,
|
| 12418 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.8876018661829237,
|
| 5 |
"eval_steps": 1000,
|
| 6 |
+
"global_step": 20000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 12393 |
"learning_rate": 9.969662066626956e-06,
|
| 12394 |
"loss": 10.1914,
|
| 12395 |
"step": 17500
|
| 12396 |
+
},
|
| 12397 |
+
{
|
| 12398 |
+
"epoch": 0.7770954338431496,
|
| 12399 |
+
"grad_norm": 66.09324645996094,
|
| 12400 |
+
"learning_rate": 9.969644730665029e-06,
|
| 12401 |
+
"loss": 10.7307,
|
| 12402 |
+
"step": 17510
|
| 12403 |
+
},
|
| 12404 |
+
{
|
| 12405 |
+
"epoch": 0.7775392347762411,
|
| 12406 |
+
"grad_norm": 64.93851470947266,
|
| 12407 |
+
"learning_rate": 9.969627394703102e-06,
|
| 12408 |
+
"loss": 10.0904,
|
| 12409 |
+
"step": 17520
|
| 12410 |
+
},
|
| 12411 |
+
{
|
| 12412 |
+
"epoch": 0.7779830357093326,
|
| 12413 |
+
"grad_norm": 72.18850708007812,
|
| 12414 |
+
"learning_rate": 9.969610058741173e-06,
|
| 12415 |
+
"loss": 10.9298,
|
| 12416 |
+
"step": 17530
|
| 12417 |
+
},
|
| 12418 |
+
{
|
| 12419 |
+
"epoch": 0.7784268366424241,
|
| 12420 |
+
"grad_norm": 74.83529663085938,
|
| 12421 |
+
"learning_rate": 9.969592722779246e-06,
|
| 12422 |
+
"loss": 10.3685,
|
| 12423 |
+
"step": 17540
|
| 12424 |
+
},
|
| 12425 |
+
{
|
| 12426 |
+
"epoch": 0.7788706375755156,
|
| 12427 |
+
"grad_norm": 69.4002456665039,
|
| 12428 |
+
"learning_rate": 9.96957538681732e-06,
|
| 12429 |
+
"loss": 10.3551,
|
| 12430 |
+
"step": 17550
|
| 12431 |
+
},
|
| 12432 |
+
{
|
| 12433 |
+
"epoch": 0.7793144385086069,
|
| 12434 |
+
"grad_norm": 76.83059692382812,
|
| 12435 |
+
"learning_rate": 9.969558050855391e-06,
|
| 12436 |
+
"loss": 10.6919,
|
| 12437 |
+
"step": 17560
|
| 12438 |
+
},
|
| 12439 |
+
{
|
| 12440 |
+
"epoch": 0.7797582394416984,
|
| 12441 |
+
"grad_norm": 67.19385528564453,
|
| 12442 |
+
"learning_rate": 9.969540714893464e-06,
|
| 12443 |
+
"loss": 10.5123,
|
| 12444 |
+
"step": 17570
|
| 12445 |
+
},
|
| 12446 |
+
{
|
| 12447 |
+
"epoch": 0.7802020403747899,
|
| 12448 |
+
"grad_norm": 80.98805236816406,
|
| 12449 |
+
"learning_rate": 9.969523378931537e-06,
|
| 12450 |
+
"loss": 10.7374,
|
| 12451 |
+
"step": 17580
|
| 12452 |
+
},
|
| 12453 |
+
{
|
| 12454 |
+
"epoch": 0.7806458413078814,
|
| 12455 |
+
"grad_norm": 67.6994400024414,
|
| 12456 |
+
"learning_rate": 9.96950604296961e-06,
|
| 12457 |
+
"loss": 10.768,
|
| 12458 |
+
"step": 17590
|
| 12459 |
+
},
|
| 12460 |
+
{
|
| 12461 |
+
"epoch": 0.7810896422409728,
|
| 12462 |
+
"grad_norm": 74.86246490478516,
|
| 12463 |
+
"learning_rate": 9.969488707007682e-06,
|
| 12464 |
+
"loss": 10.1819,
|
| 12465 |
+
"step": 17600
|
| 12466 |
+
},
|
| 12467 |
+
{
|
| 12468 |
+
"epoch": 0.7815334431740643,
|
| 12469 |
+
"grad_norm": 73.82647705078125,
|
| 12470 |
+
"learning_rate": 9.969471371045755e-06,
|
| 12471 |
+
"loss": 10.7857,
|
| 12472 |
+
"step": 17610
|
| 12473 |
+
},
|
| 12474 |
+
{
|
| 12475 |
+
"epoch": 0.7819772441071557,
|
| 12476 |
+
"grad_norm": 68.298095703125,
|
| 12477 |
+
"learning_rate": 9.969454035083828e-06,
|
| 12478 |
+
"loss": 10.6649,
|
| 12479 |
+
"step": 17620
|
| 12480 |
+
},
|
| 12481 |
+
{
|
| 12482 |
+
"epoch": 0.7824210450402472,
|
| 12483 |
+
"grad_norm": 68.30016326904297,
|
| 12484 |
+
"learning_rate": 9.969436699121899e-06,
|
| 12485 |
+
"loss": 10.3087,
|
| 12486 |
+
"step": 17630
|
| 12487 |
+
},
|
| 12488 |
+
{
|
| 12489 |
+
"epoch": 0.7828648459733386,
|
| 12490 |
+
"grad_norm": 67.62581634521484,
|
| 12491 |
+
"learning_rate": 9.969419363159972e-06,
|
| 12492 |
+
"loss": 9.9373,
|
| 12493 |
+
"step": 17640
|
| 12494 |
+
},
|
| 12495 |
+
{
|
| 12496 |
+
"epoch": 0.7833086469064301,
|
| 12497 |
+
"grad_norm": 78.02869415283203,
|
| 12498 |
+
"learning_rate": 9.969402027198045e-06,
|
| 12499 |
+
"loss": 10.7946,
|
| 12500 |
+
"step": 17650
|
| 12501 |
+
},
|
| 12502 |
+
{
|
| 12503 |
+
"epoch": 0.7837524478395216,
|
| 12504 |
+
"grad_norm": 64.94837951660156,
|
| 12505 |
+
"learning_rate": 9.969384691236117e-06,
|
| 12506 |
+
"loss": 10.1355,
|
| 12507 |
+
"step": 17660
|
| 12508 |
+
},
|
| 12509 |
+
{
|
| 12510 |
+
"epoch": 0.7841962487726131,
|
| 12511 |
+
"grad_norm": 65.18085479736328,
|
| 12512 |
+
"learning_rate": 9.96936735527419e-06,
|
| 12513 |
+
"loss": 10.4704,
|
| 12514 |
+
"step": 17670
|
| 12515 |
+
},
|
| 12516 |
+
{
|
| 12517 |
+
"epoch": 0.7846400497057046,
|
| 12518 |
+
"grad_norm": 70.01870727539062,
|
| 12519 |
+
"learning_rate": 9.969350019312263e-06,
|
| 12520 |
+
"loss": 10.6109,
|
| 12521 |
+
"step": 17680
|
| 12522 |
+
},
|
| 12523 |
+
{
|
| 12524 |
+
"epoch": 0.7850838506387959,
|
| 12525 |
+
"grad_norm": 65.34024047851562,
|
| 12526 |
+
"learning_rate": 9.969332683350334e-06,
|
| 12527 |
+
"loss": 10.3535,
|
| 12528 |
+
"step": 17690
|
| 12529 |
+
},
|
| 12530 |
+
{
|
| 12531 |
+
"epoch": 0.7855276515718874,
|
| 12532 |
+
"grad_norm": 79.30274200439453,
|
| 12533 |
+
"learning_rate": 9.969315347388407e-06,
|
| 12534 |
+
"loss": 10.3437,
|
| 12535 |
+
"step": 17700
|
| 12536 |
+
},
|
| 12537 |
+
{
|
| 12538 |
+
"epoch": 0.7859714525049789,
|
| 12539 |
+
"grad_norm": 67.2250747680664,
|
| 12540 |
+
"learning_rate": 9.96929801142648e-06,
|
| 12541 |
+
"loss": 10.4723,
|
| 12542 |
+
"step": 17710
|
| 12543 |
+
},
|
| 12544 |
+
{
|
| 12545 |
+
"epoch": 0.7864152534380704,
|
| 12546 |
+
"grad_norm": 68.58338928222656,
|
| 12547 |
+
"learning_rate": 9.969280675464552e-06,
|
| 12548 |
+
"loss": 10.1836,
|
| 12549 |
+
"step": 17720
|
| 12550 |
+
},
|
| 12551 |
+
{
|
| 12552 |
+
"epoch": 0.7868590543711618,
|
| 12553 |
+
"grad_norm": 70.7483901977539,
|
| 12554 |
+
"learning_rate": 9.969263339502625e-06,
|
| 12555 |
+
"loss": 11.1165,
|
| 12556 |
+
"step": 17730
|
| 12557 |
+
},
|
| 12558 |
+
{
|
| 12559 |
+
"epoch": 0.7873028553042533,
|
| 12560 |
+
"grad_norm": 62.17152404785156,
|
| 12561 |
+
"learning_rate": 9.969246003540698e-06,
|
| 12562 |
+
"loss": 10.2156,
|
| 12563 |
+
"step": 17740
|
| 12564 |
+
},
|
| 12565 |
+
{
|
| 12566 |
+
"epoch": 0.7877466562373447,
|
| 12567 |
+
"grad_norm": 69.29998016357422,
|
| 12568 |
+
"learning_rate": 9.96922866757877e-06,
|
| 12569 |
+
"loss": 10.6663,
|
| 12570 |
+
"step": 17750
|
| 12571 |
+
},
|
| 12572 |
+
{
|
| 12573 |
+
"epoch": 0.7881904571704362,
|
| 12574 |
+
"grad_norm": 64.87523651123047,
|
| 12575 |
+
"learning_rate": 9.969211331616842e-06,
|
| 12576 |
+
"loss": 10.9555,
|
| 12577 |
+
"step": 17760
|
| 12578 |
+
},
|
| 12579 |
+
{
|
| 12580 |
+
"epoch": 0.7886342581035276,
|
| 12581 |
+
"grad_norm": 66.5212173461914,
|
| 12582 |
+
"learning_rate": 9.969193995654915e-06,
|
| 12583 |
+
"loss": 10.5622,
|
| 12584 |
+
"step": 17770
|
| 12585 |
+
},
|
| 12586 |
+
{
|
| 12587 |
+
"epoch": 0.7890780590366191,
|
| 12588 |
+
"grad_norm": 68.40711975097656,
|
| 12589 |
+
"learning_rate": 9.969176659692987e-06,
|
| 12590 |
+
"loss": 10.3267,
|
| 12591 |
+
"step": 17780
|
| 12592 |
+
},
|
| 12593 |
+
{
|
| 12594 |
+
"epoch": 0.7895218599697106,
|
| 12595 |
+
"grad_norm": 70.49530029296875,
|
| 12596 |
+
"learning_rate": 9.96915932373106e-06,
|
| 12597 |
+
"loss": 10.6099,
|
| 12598 |
+
"step": 17790
|
| 12599 |
+
},
|
| 12600 |
+
{
|
| 12601 |
+
"epoch": 0.7899656609028021,
|
| 12602 |
+
"grad_norm": 68.68482971191406,
|
| 12603 |
+
"learning_rate": 9.969141987769133e-06,
|
| 12604 |
+
"loss": 10.5234,
|
| 12605 |
+
"step": 17800
|
| 12606 |
+
},
|
| 12607 |
+
{
|
| 12608 |
+
"epoch": 0.7904094618358936,
|
| 12609 |
+
"grad_norm": 73.87464904785156,
|
| 12610 |
+
"learning_rate": 9.969124651807206e-06,
|
| 12611 |
+
"loss": 10.7718,
|
| 12612 |
+
"step": 17810
|
| 12613 |
+
},
|
| 12614 |
+
{
|
| 12615 |
+
"epoch": 0.7908532627689849,
|
| 12616 |
+
"grad_norm": 65.32040405273438,
|
| 12617 |
+
"learning_rate": 9.969107315845277e-06,
|
| 12618 |
+
"loss": 10.347,
|
| 12619 |
+
"step": 17820
|
| 12620 |
+
},
|
| 12621 |
+
{
|
| 12622 |
+
"epoch": 0.7912970637020764,
|
| 12623 |
+
"grad_norm": 66.08610534667969,
|
| 12624 |
+
"learning_rate": 9.96908997988335e-06,
|
| 12625 |
+
"loss": 10.4261,
|
| 12626 |
+
"step": 17830
|
| 12627 |
+
},
|
| 12628 |
+
{
|
| 12629 |
+
"epoch": 0.7917408646351679,
|
| 12630 |
+
"grad_norm": 55.77153396606445,
|
| 12631 |
+
"learning_rate": 9.969072643921424e-06,
|
| 12632 |
+
"loss": 10.013,
|
| 12633 |
+
"step": 17840
|
| 12634 |
+
},
|
| 12635 |
+
{
|
| 12636 |
+
"epoch": 0.7921846655682594,
|
| 12637 |
+
"grad_norm": 72.32164764404297,
|
| 12638 |
+
"learning_rate": 9.969055307959495e-06,
|
| 12639 |
+
"loss": 10.5249,
|
| 12640 |
+
"step": 17850
|
| 12641 |
+
},
|
| 12642 |
+
{
|
| 12643 |
+
"epoch": 0.7926284665013508,
|
| 12644 |
+
"grad_norm": 73.15257263183594,
|
| 12645 |
+
"learning_rate": 9.969037971997568e-06,
|
| 12646 |
+
"loss": 10.86,
|
| 12647 |
+
"step": 17860
|
| 12648 |
+
},
|
| 12649 |
+
{
|
| 12650 |
+
"epoch": 0.7930722674344423,
|
| 12651 |
+
"grad_norm": 79.71673583984375,
|
| 12652 |
+
"learning_rate": 9.969020636035641e-06,
|
| 12653 |
+
"loss": 10.4343,
|
| 12654 |
+
"step": 17870
|
| 12655 |
+
},
|
| 12656 |
+
{
|
| 12657 |
+
"epoch": 0.7935160683675337,
|
| 12658 |
+
"grad_norm": 68.7105941772461,
|
| 12659 |
+
"learning_rate": 9.969003300073712e-06,
|
| 12660 |
+
"loss": 10.5593,
|
| 12661 |
+
"step": 17880
|
| 12662 |
+
},
|
| 12663 |
+
{
|
| 12664 |
+
"epoch": 0.7939598693006252,
|
| 12665 |
+
"grad_norm": 67.40699005126953,
|
| 12666 |
+
"learning_rate": 9.968985964111786e-06,
|
| 12667 |
+
"loss": 9.8434,
|
| 12668 |
+
"step": 17890
|
| 12669 |
+
},
|
| 12670 |
+
{
|
| 12671 |
+
"epoch": 0.7944036702337167,
|
| 12672 |
+
"grad_norm": 67.37322235107422,
|
| 12673 |
+
"learning_rate": 9.968968628149859e-06,
|
| 12674 |
+
"loss": 10.9941,
|
| 12675 |
+
"step": 17900
|
| 12676 |
+
},
|
| 12677 |
+
{
|
| 12678 |
+
"epoch": 0.7948474711668081,
|
| 12679 |
+
"grad_norm": 66.23359680175781,
|
| 12680 |
+
"learning_rate": 9.96895129218793e-06,
|
| 12681 |
+
"loss": 10.731,
|
| 12682 |
+
"step": 17910
|
| 12683 |
+
},
|
| 12684 |
+
{
|
| 12685 |
+
"epoch": 0.7952912720998996,
|
| 12686 |
+
"grad_norm": 65.20913696289062,
|
| 12687 |
+
"learning_rate": 9.968933956226003e-06,
|
| 12688 |
+
"loss": 10.537,
|
| 12689 |
+
"step": 17920
|
| 12690 |
+
},
|
| 12691 |
+
{
|
| 12692 |
+
"epoch": 0.7957350730329911,
|
| 12693 |
+
"grad_norm": 61.01829528808594,
|
| 12694 |
+
"learning_rate": 9.968916620264076e-06,
|
| 12695 |
+
"loss": 9.9038,
|
| 12696 |
+
"step": 17930
|
| 12697 |
+
},
|
| 12698 |
+
{
|
| 12699 |
+
"epoch": 0.7961788739660826,
|
| 12700 |
+
"grad_norm": 65.00994110107422,
|
| 12701 |
+
"learning_rate": 9.968899284302148e-06,
|
| 12702 |
+
"loss": 10.364,
|
| 12703 |
+
"step": 17940
|
| 12704 |
+
},
|
| 12705 |
+
{
|
| 12706 |
+
"epoch": 0.7966226748991739,
|
| 12707 |
+
"grad_norm": 72.74436950683594,
|
| 12708 |
+
"learning_rate": 9.96888194834022e-06,
|
| 12709 |
+
"loss": 10.5061,
|
| 12710 |
+
"step": 17950
|
| 12711 |
+
},
|
| 12712 |
+
{
|
| 12713 |
+
"epoch": 0.7970664758322654,
|
| 12714 |
+
"grad_norm": 75.44004821777344,
|
| 12715 |
+
"learning_rate": 9.968864612378294e-06,
|
| 12716 |
+
"loss": 10.646,
|
| 12717 |
+
"step": 17960
|
| 12718 |
+
},
|
| 12719 |
+
{
|
| 12720 |
+
"epoch": 0.7975102767653569,
|
| 12721 |
+
"grad_norm": 69.74024200439453,
|
| 12722 |
+
"learning_rate": 9.968847276416365e-06,
|
| 12723 |
+
"loss": 10.5158,
|
| 12724 |
+
"step": 17970
|
| 12725 |
+
},
|
| 12726 |
+
{
|
| 12727 |
+
"epoch": 0.7979540776984484,
|
| 12728 |
+
"grad_norm": 73.52366638183594,
|
| 12729 |
+
"learning_rate": 9.968829940454438e-06,
|
| 12730 |
+
"loss": 10.0728,
|
| 12731 |
+
"step": 17980
|
| 12732 |
+
},
|
| 12733 |
+
{
|
| 12734 |
+
"epoch": 0.7983978786315398,
|
| 12735 |
+
"grad_norm": 62.016883850097656,
|
| 12736 |
+
"learning_rate": 9.968812604492511e-06,
|
| 12737 |
+
"loss": 9.9823,
|
| 12738 |
+
"step": 17990
|
| 12739 |
+
},
|
| 12740 |
+
{
|
| 12741 |
+
"epoch": 0.7988416795646313,
|
| 12742 |
+
"grad_norm": 77.03231811523438,
|
| 12743 |
+
"learning_rate": 9.968795268530583e-06,
|
| 12744 |
+
"loss": 10.4617,
|
| 12745 |
+
"step": 18000
|
| 12746 |
+
},
|
| 12747 |
+
{
|
| 12748 |
+
"epoch": 0.7988416795646313,
|
| 12749 |
+
"eval_loss": 0.32583364844322205,
|
| 12750 |
+
"eval_runtime": 672.7495,
|
| 12751 |
+
"eval_samples_per_second": 1805.116,
|
| 12752 |
+
"eval_steps_per_second": 56.41,
|
| 12753 |
+
"step": 18000
|
| 12754 |
+
},
|
| 12755 |
+
{
|
| 12756 |
+
"epoch": 0.7992854804977227,
|
| 12757 |
+
"grad_norm": 62.19236755371094,
|
| 12758 |
+
"learning_rate": 9.968777932568656e-06,
|
| 12759 |
+
"loss": 10.465,
|
| 12760 |
+
"step": 18010
|
| 12761 |
+
},
|
| 12762 |
+
{
|
| 12763 |
+
"epoch": 0.7997292814308142,
|
| 12764 |
+
"grad_norm": 64.64230346679688,
|
| 12765 |
+
"learning_rate": 9.968760596606729e-06,
|
| 12766 |
+
"loss": 11.0006,
|
| 12767 |
+
"step": 18020
|
| 12768 |
+
},
|
| 12769 |
+
{
|
| 12770 |
+
"epoch": 0.8001730823639057,
|
| 12771 |
+
"grad_norm": 71.75637817382812,
|
| 12772 |
+
"learning_rate": 9.968743260644802e-06,
|
| 12773 |
+
"loss": 10.3129,
|
| 12774 |
+
"step": 18030
|
| 12775 |
+
},
|
| 12776 |
+
{
|
| 12777 |
+
"epoch": 0.8006168832969971,
|
| 12778 |
+
"grad_norm": 68.54603576660156,
|
| 12779 |
+
"learning_rate": 9.968725924682873e-06,
|
| 12780 |
+
"loss": 10.7563,
|
| 12781 |
+
"step": 18040
|
| 12782 |
+
},
|
| 12783 |
+
{
|
| 12784 |
+
"epoch": 0.8010606842300886,
|
| 12785 |
+
"grad_norm": 66.99270629882812,
|
| 12786 |
+
"learning_rate": 9.968708588720946e-06,
|
| 12787 |
+
"loss": 10.2859,
|
| 12788 |
+
"step": 18050
|
| 12789 |
+
},
|
| 12790 |
+
{
|
| 12791 |
+
"epoch": 0.8015044851631801,
|
| 12792 |
+
"grad_norm": 72.47330474853516,
|
| 12793 |
+
"learning_rate": 9.96869125275902e-06,
|
| 12794 |
+
"loss": 10.6183,
|
| 12795 |
+
"step": 18060
|
| 12796 |
+
},
|
| 12797 |
+
{
|
| 12798 |
+
"epoch": 0.8019482860962716,
|
| 12799 |
+
"grad_norm": 69.65715789794922,
|
| 12800 |
+
"learning_rate": 9.96867391679709e-06,
|
| 12801 |
+
"loss": 10.0611,
|
| 12802 |
+
"step": 18070
|
| 12803 |
+
},
|
| 12804 |
+
{
|
| 12805 |
+
"epoch": 0.8023920870293629,
|
| 12806 |
+
"grad_norm": 68.04208374023438,
|
| 12807 |
+
"learning_rate": 9.968656580835164e-06,
|
| 12808 |
+
"loss": 10.9295,
|
| 12809 |
+
"step": 18080
|
| 12810 |
+
},
|
| 12811 |
+
{
|
| 12812 |
+
"epoch": 0.8028358879624544,
|
| 12813 |
+
"grad_norm": 69.00373840332031,
|
| 12814 |
+
"learning_rate": 9.968639244873237e-06,
|
| 12815 |
+
"loss": 10.3371,
|
| 12816 |
+
"step": 18090
|
| 12817 |
+
},
|
| 12818 |
+
{
|
| 12819 |
+
"epoch": 0.8032796888955459,
|
| 12820 |
+
"grad_norm": 62.3974723815918,
|
| 12821 |
+
"learning_rate": 9.968621908911308e-06,
|
| 12822 |
+
"loss": 10.3889,
|
| 12823 |
+
"step": 18100
|
| 12824 |
+
},
|
| 12825 |
+
{
|
| 12826 |
+
"epoch": 0.8037234898286374,
|
| 12827 |
+
"grad_norm": 71.53374481201172,
|
| 12828 |
+
"learning_rate": 9.968604572949381e-06,
|
| 12829 |
+
"loss": 11.0522,
|
| 12830 |
+
"step": 18110
|
| 12831 |
+
},
|
| 12832 |
+
{
|
| 12833 |
+
"epoch": 0.8041672907617289,
|
| 12834 |
+
"grad_norm": 61.865089416503906,
|
| 12835 |
+
"learning_rate": 9.968587236987455e-06,
|
| 12836 |
+
"loss": 10.6471,
|
| 12837 |
+
"step": 18120
|
| 12838 |
+
},
|
| 12839 |
+
{
|
| 12840 |
+
"epoch": 0.8046110916948203,
|
| 12841 |
+
"grad_norm": 70.71613311767578,
|
| 12842 |
+
"learning_rate": 9.968569901025526e-06,
|
| 12843 |
+
"loss": 11.1749,
|
| 12844 |
+
"step": 18130
|
| 12845 |
+
},
|
| 12846 |
+
{
|
| 12847 |
+
"epoch": 0.8050548926279117,
|
| 12848 |
+
"grad_norm": 70.22577667236328,
|
| 12849 |
+
"learning_rate": 9.968552565063599e-06,
|
| 12850 |
+
"loss": 10.5659,
|
| 12851 |
+
"step": 18140
|
| 12852 |
+
},
|
| 12853 |
+
{
|
| 12854 |
+
"epoch": 0.8054986935610032,
|
| 12855 |
+
"grad_norm": 62.12384796142578,
|
| 12856 |
+
"learning_rate": 9.968535229101672e-06,
|
| 12857 |
+
"loss": 10.3108,
|
| 12858 |
+
"step": 18150
|
| 12859 |
+
},
|
| 12860 |
+
{
|
| 12861 |
+
"epoch": 0.8059424944940947,
|
| 12862 |
+
"grad_norm": 67.61980438232422,
|
| 12863 |
+
"learning_rate": 9.968517893139743e-06,
|
| 12864 |
+
"loss": 10.4519,
|
| 12865 |
+
"step": 18160
|
| 12866 |
+
},
|
| 12867 |
+
{
|
| 12868 |
+
"epoch": 0.8063862954271861,
|
| 12869 |
+
"grad_norm": 64.64510345458984,
|
| 12870 |
+
"learning_rate": 9.968500557177817e-06,
|
| 12871 |
+
"loss": 10.4827,
|
| 12872 |
+
"step": 18170
|
| 12873 |
+
},
|
| 12874 |
+
{
|
| 12875 |
+
"epoch": 0.8068300963602776,
|
| 12876 |
+
"grad_norm": 73.72003173828125,
|
| 12877 |
+
"learning_rate": 9.96848322121589e-06,
|
| 12878 |
+
"loss": 9.9235,
|
| 12879 |
+
"step": 18180
|
| 12880 |
+
},
|
| 12881 |
+
{
|
| 12882 |
+
"epoch": 0.8072738972933691,
|
| 12883 |
+
"grad_norm": 66.29298400878906,
|
| 12884 |
+
"learning_rate": 9.968465885253961e-06,
|
| 12885 |
+
"loss": 10.5438,
|
| 12886 |
+
"step": 18190
|
| 12887 |
+
},
|
| 12888 |
+
{
|
| 12889 |
+
"epoch": 0.8077176982264606,
|
| 12890 |
+
"grad_norm": 67.15369415283203,
|
| 12891 |
+
"learning_rate": 9.968448549292034e-06,
|
| 12892 |
+
"loss": 10.3155,
|
| 12893 |
+
"step": 18200
|
| 12894 |
+
},
|
| 12895 |
+
{
|
| 12896 |
+
"epoch": 0.8081614991595519,
|
| 12897 |
+
"grad_norm": 59.32017517089844,
|
| 12898 |
+
"learning_rate": 9.968431213330107e-06,
|
| 12899 |
+
"loss": 10.3011,
|
| 12900 |
+
"step": 18210
|
| 12901 |
+
},
|
| 12902 |
+
{
|
| 12903 |
+
"epoch": 0.8086053000926434,
|
| 12904 |
+
"grad_norm": 72.47640228271484,
|
| 12905 |
+
"learning_rate": 9.96841387736818e-06,
|
| 12906 |
+
"loss": 10.4339,
|
| 12907 |
+
"step": 18220
|
| 12908 |
+
},
|
| 12909 |
+
{
|
| 12910 |
+
"epoch": 0.8090491010257349,
|
| 12911 |
+
"grad_norm": 68.0174789428711,
|
| 12912 |
+
"learning_rate": 9.968396541406252e-06,
|
| 12913 |
+
"loss": 10.1946,
|
| 12914 |
+
"step": 18230
|
| 12915 |
+
},
|
| 12916 |
+
{
|
| 12917 |
+
"epoch": 0.8094929019588264,
|
| 12918 |
+
"grad_norm": 62.51362228393555,
|
| 12919 |
+
"learning_rate": 9.968379205444325e-06,
|
| 12920 |
+
"loss": 10.5557,
|
| 12921 |
+
"step": 18240
|
| 12922 |
+
},
|
| 12923 |
+
{
|
| 12924 |
+
"epoch": 0.8099367028919179,
|
| 12925 |
+
"grad_norm": 72.3086929321289,
|
| 12926 |
+
"learning_rate": 9.968361869482398e-06,
|
| 12927 |
+
"loss": 10.1581,
|
| 12928 |
+
"step": 18250
|
| 12929 |
+
},
|
| 12930 |
+
{
|
| 12931 |
+
"epoch": 0.8103805038250093,
|
| 12932 |
+
"grad_norm": 74.7848892211914,
|
| 12933 |
+
"learning_rate": 9.968344533520469e-06,
|
| 12934 |
+
"loss": 10.6374,
|
| 12935 |
+
"step": 18260
|
| 12936 |
+
},
|
| 12937 |
+
{
|
| 12938 |
+
"epoch": 0.8108243047581007,
|
| 12939 |
+
"grad_norm": 60.53010177612305,
|
| 12940 |
+
"learning_rate": 9.968327197558542e-06,
|
| 12941 |
+
"loss": 10.5352,
|
| 12942 |
+
"step": 18270
|
| 12943 |
+
},
|
| 12944 |
+
{
|
| 12945 |
+
"epoch": 0.8112681056911922,
|
| 12946 |
+
"grad_norm": 71.8178482055664,
|
| 12947 |
+
"learning_rate": 9.968309861596615e-06,
|
| 12948 |
+
"loss": 10.5353,
|
| 12949 |
+
"step": 18280
|
| 12950 |
+
},
|
| 12951 |
+
{
|
| 12952 |
+
"epoch": 0.8117119066242837,
|
| 12953 |
+
"grad_norm": 58.963165283203125,
|
| 12954 |
+
"learning_rate": 9.968292525634687e-06,
|
| 12955 |
+
"loss": 10.3527,
|
| 12956 |
+
"step": 18290
|
| 12957 |
+
},
|
| 12958 |
+
{
|
| 12959 |
+
"epoch": 0.8121557075573751,
|
| 12960 |
+
"grad_norm": 64.75,
|
| 12961 |
+
"learning_rate": 9.96827518967276e-06,
|
| 12962 |
+
"loss": 11.0742,
|
| 12963 |
+
"step": 18300
|
| 12964 |
+
},
|
| 12965 |
+
{
|
| 12966 |
+
"epoch": 0.8125995084904666,
|
| 12967 |
+
"grad_norm": 61.654296875,
|
| 12968 |
+
"learning_rate": 9.968257853710833e-06,
|
| 12969 |
+
"loss": 9.8275,
|
| 12970 |
+
"step": 18310
|
| 12971 |
+
},
|
| 12972 |
+
{
|
| 12973 |
+
"epoch": 0.8130433094235581,
|
| 12974 |
+
"grad_norm": 63.89625930786133,
|
| 12975 |
+
"learning_rate": 9.968240517748904e-06,
|
| 12976 |
+
"loss": 10.3494,
|
| 12977 |
+
"step": 18320
|
| 12978 |
+
},
|
| 12979 |
+
{
|
| 12980 |
+
"epoch": 0.8134871103566496,
|
| 12981 |
+
"grad_norm": 69.73605346679688,
|
| 12982 |
+
"learning_rate": 9.968223181786977e-06,
|
| 12983 |
+
"loss": 10.4358,
|
| 12984 |
+
"step": 18330
|
| 12985 |
+
},
|
| 12986 |
+
{
|
| 12987 |
+
"epoch": 0.8139309112897409,
|
| 12988 |
+
"grad_norm": 69.21589660644531,
|
| 12989 |
+
"learning_rate": 9.96820584582505e-06,
|
| 12990 |
+
"loss": 10.3777,
|
| 12991 |
+
"step": 18340
|
| 12992 |
+
},
|
| 12993 |
+
{
|
| 12994 |
+
"epoch": 0.8143747122228324,
|
| 12995 |
+
"grad_norm": 68.85872650146484,
|
| 12996 |
+
"learning_rate": 9.968188509863122e-06,
|
| 12997 |
+
"loss": 10.7136,
|
| 12998 |
+
"step": 18350
|
| 12999 |
+
},
|
| 13000 |
+
{
|
| 13001 |
+
"epoch": 0.8148185131559239,
|
| 13002 |
+
"grad_norm": 63.11106491088867,
|
| 13003 |
+
"learning_rate": 9.968171173901195e-06,
|
| 13004 |
+
"loss": 11.0215,
|
| 13005 |
+
"step": 18360
|
| 13006 |
+
},
|
| 13007 |
+
{
|
| 13008 |
+
"epoch": 0.8152623140890154,
|
| 13009 |
+
"grad_norm": 56.74385070800781,
|
| 13010 |
+
"learning_rate": 9.968153837939268e-06,
|
| 13011 |
+
"loss": 10.032,
|
| 13012 |
+
"step": 18370
|
| 13013 |
+
},
|
| 13014 |
+
{
|
| 13015 |
+
"epoch": 0.8157061150221069,
|
| 13016 |
+
"grad_norm": 65.63390350341797,
|
| 13017 |
+
"learning_rate": 9.968136501977341e-06,
|
| 13018 |
+
"loss": 10.615,
|
| 13019 |
+
"step": 18380
|
| 13020 |
+
},
|
| 13021 |
+
{
|
| 13022 |
+
"epoch": 0.8161499159551983,
|
| 13023 |
+
"grad_norm": 58.63720703125,
|
| 13024 |
+
"learning_rate": 9.968119166015412e-06,
|
| 13025 |
+
"loss": 10.3503,
|
| 13026 |
+
"step": 18390
|
| 13027 |
+
},
|
| 13028 |
+
{
|
| 13029 |
+
"epoch": 0.8165937168882897,
|
| 13030 |
+
"grad_norm": 60.3001708984375,
|
| 13031 |
+
"learning_rate": 9.968101830053485e-06,
|
| 13032 |
+
"loss": 10.2183,
|
| 13033 |
+
"step": 18400
|
| 13034 |
+
},
|
| 13035 |
+
{
|
| 13036 |
+
"epoch": 0.8170375178213812,
|
| 13037 |
+
"grad_norm": 68.03216552734375,
|
| 13038 |
+
"learning_rate": 9.968084494091559e-06,
|
| 13039 |
+
"loss": 10.5718,
|
| 13040 |
+
"step": 18410
|
| 13041 |
+
},
|
| 13042 |
+
{
|
| 13043 |
+
"epoch": 0.8174813187544727,
|
| 13044 |
+
"grad_norm": 71.72623443603516,
|
| 13045 |
+
"learning_rate": 9.96806715812963e-06,
|
| 13046 |
+
"loss": 10.7557,
|
| 13047 |
+
"step": 18420
|
| 13048 |
+
},
|
| 13049 |
+
{
|
| 13050 |
+
"epoch": 0.8179251196875641,
|
| 13051 |
+
"grad_norm": 69.74810791015625,
|
| 13052 |
+
"learning_rate": 9.968049822167703e-06,
|
| 13053 |
+
"loss": 10.1841,
|
| 13054 |
+
"step": 18430
|
| 13055 |
+
},
|
| 13056 |
+
{
|
| 13057 |
+
"epoch": 0.8183689206206556,
|
| 13058 |
+
"grad_norm": 58.47687530517578,
|
| 13059 |
+
"learning_rate": 9.968032486205776e-06,
|
| 13060 |
+
"loss": 10.0264,
|
| 13061 |
+
"step": 18440
|
| 13062 |
+
},
|
| 13063 |
+
{
|
| 13064 |
+
"epoch": 0.8188127215537471,
|
| 13065 |
+
"grad_norm": 67.85263061523438,
|
| 13066 |
+
"learning_rate": 9.968015150243847e-06,
|
| 13067 |
+
"loss": 10.3225,
|
| 13068 |
+
"step": 18450
|
| 13069 |
+
},
|
| 13070 |
+
{
|
| 13071 |
+
"epoch": 0.8192565224868386,
|
| 13072 |
+
"grad_norm": 67.8355712890625,
|
| 13073 |
+
"learning_rate": 9.96799781428192e-06,
|
| 13074 |
+
"loss": 9.9914,
|
| 13075 |
+
"step": 18460
|
| 13076 |
+
},
|
| 13077 |
+
{
|
| 13078 |
+
"epoch": 0.81970032341993,
|
| 13079 |
+
"grad_norm": 74.0328140258789,
|
| 13080 |
+
"learning_rate": 9.967980478319994e-06,
|
| 13081 |
+
"loss": 10.3044,
|
| 13082 |
+
"step": 18470
|
| 13083 |
+
},
|
| 13084 |
+
{
|
| 13085 |
+
"epoch": 0.8201441243530214,
|
| 13086 |
+
"grad_norm": 75.59931945800781,
|
| 13087 |
+
"learning_rate": 9.967963142358065e-06,
|
| 13088 |
+
"loss": 10.6398,
|
| 13089 |
+
"step": 18480
|
| 13090 |
+
},
|
| 13091 |
+
{
|
| 13092 |
+
"epoch": 0.8205879252861129,
|
| 13093 |
+
"grad_norm": 59.03470230102539,
|
| 13094 |
+
"learning_rate": 9.967945806396138e-06,
|
| 13095 |
+
"loss": 10.1309,
|
| 13096 |
+
"step": 18490
|
| 13097 |
+
},
|
| 13098 |
+
{
|
| 13099 |
+
"epoch": 0.8210317262192044,
|
| 13100 |
+
"grad_norm": 63.74763107299805,
|
| 13101 |
+
"learning_rate": 9.967928470434211e-06,
|
| 13102 |
+
"loss": 9.8492,
|
| 13103 |
+
"step": 18500
|
| 13104 |
+
},
|
| 13105 |
+
{
|
| 13106 |
+
"epoch": 0.8214755271522959,
|
| 13107 |
+
"grad_norm": 58.71684265136719,
|
| 13108 |
+
"learning_rate": 9.967911134472284e-06,
|
| 13109 |
+
"loss": 10.0112,
|
| 13110 |
+
"step": 18510
|
| 13111 |
+
},
|
| 13112 |
+
{
|
| 13113 |
+
"epoch": 0.8219193280853873,
|
| 13114 |
+
"grad_norm": 70.03022003173828,
|
| 13115 |
+
"learning_rate": 9.967893798510356e-06,
|
| 13116 |
+
"loss": 10.6968,
|
| 13117 |
+
"step": 18520
|
| 13118 |
+
},
|
| 13119 |
+
{
|
| 13120 |
+
"epoch": 0.8223631290184787,
|
| 13121 |
+
"grad_norm": 61.144004821777344,
|
| 13122 |
+
"learning_rate": 9.967876462548429e-06,
|
| 13123 |
+
"loss": 10.3381,
|
| 13124 |
+
"step": 18530
|
| 13125 |
+
},
|
| 13126 |
+
{
|
| 13127 |
+
"epoch": 0.8228069299515702,
|
| 13128 |
+
"grad_norm": 67.76824188232422,
|
| 13129 |
+
"learning_rate": 9.967859126586502e-06,
|
| 13130 |
+
"loss": 10.2819,
|
| 13131 |
+
"step": 18540
|
| 13132 |
+
},
|
| 13133 |
+
{
|
| 13134 |
+
"epoch": 0.8232507308846617,
|
| 13135 |
+
"grad_norm": 64.85346221923828,
|
| 13136 |
+
"learning_rate": 9.967841790624573e-06,
|
| 13137 |
+
"loss": 10.7358,
|
| 13138 |
+
"step": 18550
|
| 13139 |
+
},
|
| 13140 |
+
{
|
| 13141 |
+
"epoch": 0.8236945318177531,
|
| 13142 |
+
"grad_norm": 64.5184326171875,
|
| 13143 |
+
"learning_rate": 9.967824454662646e-06,
|
| 13144 |
+
"loss": 10.49,
|
| 13145 |
+
"step": 18560
|
| 13146 |
+
},
|
| 13147 |
+
{
|
| 13148 |
+
"epoch": 0.8241383327508446,
|
| 13149 |
+
"grad_norm": 69.41261291503906,
|
| 13150 |
+
"learning_rate": 9.96780711870072e-06,
|
| 13151 |
+
"loss": 10.6639,
|
| 13152 |
+
"step": 18570
|
| 13153 |
+
},
|
| 13154 |
+
{
|
| 13155 |
+
"epoch": 0.8245821336839361,
|
| 13156 |
+
"grad_norm": 67.25212097167969,
|
| 13157 |
+
"learning_rate": 9.96778978273879e-06,
|
| 13158 |
+
"loss": 10.5115,
|
| 13159 |
+
"step": 18580
|
| 13160 |
+
},
|
| 13161 |
+
{
|
| 13162 |
+
"epoch": 0.8250259346170276,
|
| 13163 |
+
"grad_norm": 62.52476501464844,
|
| 13164 |
+
"learning_rate": 9.967772446776864e-06,
|
| 13165 |
+
"loss": 10.0552,
|
| 13166 |
+
"step": 18590
|
| 13167 |
+
},
|
| 13168 |
+
{
|
| 13169 |
+
"epoch": 0.825469735550119,
|
| 13170 |
+
"grad_norm": 62.43718719482422,
|
| 13171 |
+
"learning_rate": 9.967755110814937e-06,
|
| 13172 |
+
"loss": 10.2405,
|
| 13173 |
+
"step": 18600
|
| 13174 |
+
},
|
| 13175 |
+
{
|
| 13176 |
+
"epoch": 0.8259135364832104,
|
| 13177 |
+
"grad_norm": 67.1116714477539,
|
| 13178 |
+
"learning_rate": 9.96773777485301e-06,
|
| 13179 |
+
"loss": 10.4917,
|
| 13180 |
+
"step": 18610
|
| 13181 |
+
},
|
| 13182 |
+
{
|
| 13183 |
+
"epoch": 0.8263573374163019,
|
| 13184 |
+
"grad_norm": 67.36260986328125,
|
| 13185 |
+
"learning_rate": 9.967720438891081e-06,
|
| 13186 |
+
"loss": 9.8809,
|
| 13187 |
+
"step": 18620
|
| 13188 |
+
},
|
| 13189 |
+
{
|
| 13190 |
+
"epoch": 0.8268011383493934,
|
| 13191 |
+
"grad_norm": 69.18153381347656,
|
| 13192 |
+
"learning_rate": 9.967703102929154e-06,
|
| 13193 |
+
"loss": 10.0411,
|
| 13194 |
+
"step": 18630
|
| 13195 |
+
},
|
| 13196 |
+
{
|
| 13197 |
+
"epoch": 0.8272449392824849,
|
| 13198 |
+
"grad_norm": 54.77642059326172,
|
| 13199 |
+
"learning_rate": 9.967685766967227e-06,
|
| 13200 |
+
"loss": 10.2812,
|
| 13201 |
+
"step": 18640
|
| 13202 |
+
},
|
| 13203 |
+
{
|
| 13204 |
+
"epoch": 0.8276887402155763,
|
| 13205 |
+
"grad_norm": 64.23429107666016,
|
| 13206 |
+
"learning_rate": 9.967668431005299e-06,
|
| 13207 |
+
"loss": 10.2792,
|
| 13208 |
+
"step": 18650
|
| 13209 |
+
},
|
| 13210 |
+
{
|
| 13211 |
+
"epoch": 0.8281325411486677,
|
| 13212 |
+
"grad_norm": 77.61302185058594,
|
| 13213 |
+
"learning_rate": 9.967651095043372e-06,
|
| 13214 |
+
"loss": 10.5122,
|
| 13215 |
+
"step": 18660
|
| 13216 |
+
},
|
| 13217 |
+
{
|
| 13218 |
+
"epoch": 0.8285763420817592,
|
| 13219 |
+
"grad_norm": 63.884666442871094,
|
| 13220 |
+
"learning_rate": 9.967633759081445e-06,
|
| 13221 |
+
"loss": 10.5592,
|
| 13222 |
+
"step": 18670
|
| 13223 |
+
},
|
| 13224 |
+
{
|
| 13225 |
+
"epoch": 0.8290201430148507,
|
| 13226 |
+
"grad_norm": 68.2164077758789,
|
| 13227 |
+
"learning_rate": 9.967616423119516e-06,
|
| 13228 |
+
"loss": 10.0031,
|
| 13229 |
+
"step": 18680
|
| 13230 |
+
},
|
| 13231 |
+
{
|
| 13232 |
+
"epoch": 0.8294639439479422,
|
| 13233 |
+
"grad_norm": 64.70232391357422,
|
| 13234 |
+
"learning_rate": 9.96759908715759e-06,
|
| 13235 |
+
"loss": 10.0672,
|
| 13236 |
+
"step": 18690
|
| 13237 |
+
},
|
| 13238 |
+
{
|
| 13239 |
+
"epoch": 0.8299077448810336,
|
| 13240 |
+
"grad_norm": 70.52904510498047,
|
| 13241 |
+
"learning_rate": 9.967581751195663e-06,
|
| 13242 |
+
"loss": 10.8457,
|
| 13243 |
+
"step": 18700
|
| 13244 |
+
},
|
| 13245 |
+
{
|
| 13246 |
+
"epoch": 0.8303515458141251,
|
| 13247 |
+
"grad_norm": 74.24815368652344,
|
| 13248 |
+
"learning_rate": 9.967564415233734e-06,
|
| 13249 |
+
"loss": 10.2618,
|
| 13250 |
+
"step": 18710
|
| 13251 |
+
},
|
| 13252 |
+
{
|
| 13253 |
+
"epoch": 0.8307953467472166,
|
| 13254 |
+
"grad_norm": 70.21379852294922,
|
| 13255 |
+
"learning_rate": 9.967547079271807e-06,
|
| 13256 |
+
"loss": 10.6413,
|
| 13257 |
+
"step": 18720
|
| 13258 |
+
},
|
| 13259 |
+
{
|
| 13260 |
+
"epoch": 0.831239147680308,
|
| 13261 |
+
"grad_norm": 59.27021408081055,
|
| 13262 |
+
"learning_rate": 9.96752974330988e-06,
|
| 13263 |
+
"loss": 10.0213,
|
| 13264 |
+
"step": 18730
|
| 13265 |
+
},
|
| 13266 |
+
{
|
| 13267 |
+
"epoch": 0.8316829486133994,
|
| 13268 |
+
"grad_norm": 68.8056869506836,
|
| 13269 |
+
"learning_rate": 9.967512407347953e-06,
|
| 13270 |
+
"loss": 10.6691,
|
| 13271 |
+
"step": 18740
|
| 13272 |
+
},
|
| 13273 |
+
{
|
| 13274 |
+
"epoch": 0.8321267495464909,
|
| 13275 |
+
"grad_norm": 59.90221405029297,
|
| 13276 |
+
"learning_rate": 9.967495071386025e-06,
|
| 13277 |
+
"loss": 10.2026,
|
| 13278 |
+
"step": 18750
|
| 13279 |
+
},
|
| 13280 |
+
{
|
| 13281 |
+
"epoch": 0.8325705504795824,
|
| 13282 |
+
"grad_norm": 59.39807891845703,
|
| 13283 |
+
"learning_rate": 9.967477735424098e-06,
|
| 13284 |
+
"loss": 10.296,
|
| 13285 |
+
"step": 18760
|
| 13286 |
+
},
|
| 13287 |
+
{
|
| 13288 |
+
"epoch": 0.8330143514126739,
|
| 13289 |
+
"grad_norm": 60.97962951660156,
|
| 13290 |
+
"learning_rate": 9.96746039946217e-06,
|
| 13291 |
+
"loss": 10.4608,
|
| 13292 |
+
"step": 18770
|
| 13293 |
+
},
|
| 13294 |
+
{
|
| 13295 |
+
"epoch": 0.8334581523457653,
|
| 13296 |
+
"grad_norm": 69.33479309082031,
|
| 13297 |
+
"learning_rate": 9.967443063500242e-06,
|
| 13298 |
+
"loss": 10.1023,
|
| 13299 |
+
"step": 18780
|
| 13300 |
+
},
|
| 13301 |
+
{
|
| 13302 |
+
"epoch": 0.8339019532788567,
|
| 13303 |
+
"grad_norm": 62.024993896484375,
|
| 13304 |
+
"learning_rate": 9.967425727538315e-06,
|
| 13305 |
+
"loss": 10.1188,
|
| 13306 |
+
"step": 18790
|
| 13307 |
+
},
|
| 13308 |
+
{
|
| 13309 |
+
"epoch": 0.8343457542119482,
|
| 13310 |
+
"grad_norm": 70.13167572021484,
|
| 13311 |
+
"learning_rate": 9.967408391576388e-06,
|
| 13312 |
+
"loss": 10.5289,
|
| 13313 |
+
"step": 18800
|
| 13314 |
+
},
|
| 13315 |
+
{
|
| 13316 |
+
"epoch": 0.8347895551450397,
|
| 13317 |
+
"grad_norm": 59.78411865234375,
|
| 13318 |
+
"learning_rate": 9.96739105561446e-06,
|
| 13319 |
+
"loss": 10.0232,
|
| 13320 |
+
"step": 18810
|
| 13321 |
+
},
|
| 13322 |
+
{
|
| 13323 |
+
"epoch": 0.8352333560781312,
|
| 13324 |
+
"grad_norm": 65.34579467773438,
|
| 13325 |
+
"learning_rate": 9.967373719652533e-06,
|
| 13326 |
+
"loss": 10.3009,
|
| 13327 |
+
"step": 18820
|
| 13328 |
+
},
|
| 13329 |
+
{
|
| 13330 |
+
"epoch": 0.8356771570112226,
|
| 13331 |
+
"grad_norm": 66.02912902832031,
|
| 13332 |
+
"learning_rate": 9.967356383690606e-06,
|
| 13333 |
+
"loss": 10.4396,
|
| 13334 |
+
"step": 18830
|
| 13335 |
+
},
|
| 13336 |
+
{
|
| 13337 |
+
"epoch": 0.8361209579443141,
|
| 13338 |
+
"grad_norm": 64.6055679321289,
|
| 13339 |
+
"learning_rate": 9.967339047728677e-06,
|
| 13340 |
+
"loss": 11.1638,
|
| 13341 |
+
"step": 18840
|
| 13342 |
+
},
|
| 13343 |
+
{
|
| 13344 |
+
"epoch": 0.8365647588774056,
|
| 13345 |
+
"grad_norm": 68.47040557861328,
|
| 13346 |
+
"learning_rate": 9.96732171176675e-06,
|
| 13347 |
+
"loss": 10.3824,
|
| 13348 |
+
"step": 18850
|
| 13349 |
+
},
|
| 13350 |
+
{
|
| 13351 |
+
"epoch": 0.837008559810497,
|
| 13352 |
+
"grad_norm": 70.76081848144531,
|
| 13353 |
+
"learning_rate": 9.967304375804823e-06,
|
| 13354 |
+
"loss": 10.4553,
|
| 13355 |
+
"step": 18860
|
| 13356 |
+
},
|
| 13357 |
+
{
|
| 13358 |
+
"epoch": 0.8374523607435884,
|
| 13359 |
+
"grad_norm": 69.01679229736328,
|
| 13360 |
+
"learning_rate": 9.967287039842896e-06,
|
| 13361 |
+
"loss": 10.7882,
|
| 13362 |
+
"step": 18870
|
| 13363 |
+
},
|
| 13364 |
+
{
|
| 13365 |
+
"epoch": 0.8378961616766799,
|
| 13366 |
+
"grad_norm": 72.1138687133789,
|
| 13367 |
+
"learning_rate": 9.967269703880968e-06,
|
| 13368 |
+
"loss": 10.7932,
|
| 13369 |
+
"step": 18880
|
| 13370 |
+
},
|
| 13371 |
+
{
|
| 13372 |
+
"epoch": 0.8383399626097714,
|
| 13373 |
+
"grad_norm": 63.26852035522461,
|
| 13374 |
+
"learning_rate": 9.967252367919041e-06,
|
| 13375 |
+
"loss": 9.8872,
|
| 13376 |
+
"step": 18890
|
| 13377 |
+
},
|
| 13378 |
+
{
|
| 13379 |
+
"epoch": 0.8387837635428629,
|
| 13380 |
+
"grad_norm": 74.27698516845703,
|
| 13381 |
+
"learning_rate": 9.967235031957114e-06,
|
| 13382 |
+
"loss": 10.3047,
|
| 13383 |
+
"step": 18900
|
| 13384 |
+
},
|
| 13385 |
+
{
|
| 13386 |
+
"epoch": 0.8392275644759543,
|
| 13387 |
+
"grad_norm": 69.13713073730469,
|
| 13388 |
+
"learning_rate": 9.967217695995185e-06,
|
| 13389 |
+
"loss": 10.4936,
|
| 13390 |
+
"step": 18910
|
| 13391 |
+
},
|
| 13392 |
+
{
|
| 13393 |
+
"epoch": 0.8396713654090457,
|
| 13394 |
+
"grad_norm": 66.47625732421875,
|
| 13395 |
+
"learning_rate": 9.967200360033258e-06,
|
| 13396 |
+
"loss": 10.6965,
|
| 13397 |
+
"step": 18920
|
| 13398 |
+
},
|
| 13399 |
+
{
|
| 13400 |
+
"epoch": 0.8401151663421372,
|
| 13401 |
+
"grad_norm": 62.18655776977539,
|
| 13402 |
+
"learning_rate": 9.967183024071331e-06,
|
| 13403 |
+
"loss": 10.3633,
|
| 13404 |
+
"step": 18930
|
| 13405 |
+
},
|
| 13406 |
+
{
|
| 13407 |
+
"epoch": 0.8405589672752287,
|
| 13408 |
+
"grad_norm": 60.379478454589844,
|
| 13409 |
+
"learning_rate": 9.967165688109403e-06,
|
| 13410 |
+
"loss": 10.4711,
|
| 13411 |
+
"step": 18940
|
| 13412 |
+
},
|
| 13413 |
+
{
|
| 13414 |
+
"epoch": 0.8410027682083202,
|
| 13415 |
+
"grad_norm": 62.18358612060547,
|
| 13416 |
+
"learning_rate": 9.967148352147476e-06,
|
| 13417 |
+
"loss": 10.3252,
|
| 13418 |
+
"step": 18950
|
| 13419 |
+
},
|
| 13420 |
+
{
|
| 13421 |
+
"epoch": 0.8414465691414116,
|
| 13422 |
+
"grad_norm": 69.07564544677734,
|
| 13423 |
+
"learning_rate": 9.967131016185549e-06,
|
| 13424 |
+
"loss": 10.785,
|
| 13425 |
+
"step": 18960
|
| 13426 |
+
},
|
| 13427 |
+
{
|
| 13428 |
+
"epoch": 0.8418903700745031,
|
| 13429 |
+
"grad_norm": 67.07147216796875,
|
| 13430 |
+
"learning_rate": 9.96711368022362e-06,
|
| 13431 |
+
"loss": 10.6486,
|
| 13432 |
+
"step": 18970
|
| 13433 |
+
},
|
| 13434 |
+
{
|
| 13435 |
+
"epoch": 0.8423341710075946,
|
| 13436 |
+
"grad_norm": 68.17425537109375,
|
| 13437 |
+
"learning_rate": 9.967096344261693e-06,
|
| 13438 |
+
"loss": 10.2245,
|
| 13439 |
+
"step": 18980
|
| 13440 |
+
},
|
| 13441 |
+
{
|
| 13442 |
+
"epoch": 0.842777971940686,
|
| 13443 |
+
"grad_norm": 62.00086975097656,
|
| 13444 |
+
"learning_rate": 9.967079008299767e-06,
|
| 13445 |
+
"loss": 10.4259,
|
| 13446 |
+
"step": 18990
|
| 13447 |
+
},
|
| 13448 |
+
{
|
| 13449 |
+
"epoch": 0.8432217728737774,
|
| 13450 |
+
"grad_norm": 68.44880676269531,
|
| 13451 |
+
"learning_rate": 9.96706167233784e-06,
|
| 13452 |
+
"loss": 10.5632,
|
| 13453 |
+
"step": 19000
|
| 13454 |
+
},
|
| 13455 |
+
{
|
| 13456 |
+
"epoch": 0.8432217728737774,
|
| 13457 |
+
"eval_loss": 0.3246602714061737,
|
| 13458 |
+
"eval_runtime": 673.5905,
|
| 13459 |
+
"eval_samples_per_second": 1802.863,
|
| 13460 |
+
"eval_steps_per_second": 56.34,
|
| 13461 |
+
"step": 19000
|
| 13462 |
+
},
|
| 13463 |
+
{
|
| 13464 |
+
"epoch": 0.8436655738068689,
|
| 13465 |
+
"grad_norm": 67.45813751220703,
|
| 13466 |
+
"learning_rate": 9.967044336375911e-06,
|
| 13467 |
+
"loss": 10.3405,
|
| 13468 |
+
"step": 19010
|
| 13469 |
+
},
|
| 13470 |
+
{
|
| 13471 |
+
"epoch": 0.8441093747399604,
|
| 13472 |
+
"grad_norm": 71.77626037597656,
|
| 13473 |
+
"learning_rate": 9.967027000413984e-06,
|
| 13474 |
+
"loss": 10.9004,
|
| 13475 |
+
"step": 19020
|
| 13476 |
+
},
|
| 13477 |
+
{
|
| 13478 |
+
"epoch": 0.8445531756730519,
|
| 13479 |
+
"grad_norm": 63.87392044067383,
|
| 13480 |
+
"learning_rate": 9.967009664452057e-06,
|
| 13481 |
+
"loss": 10.5393,
|
| 13482 |
+
"step": 19030
|
| 13483 |
+
},
|
| 13484 |
+
{
|
| 13485 |
+
"epoch": 0.8449969766061434,
|
| 13486 |
+
"grad_norm": 62.10248947143555,
|
| 13487 |
+
"learning_rate": 9.966992328490129e-06,
|
| 13488 |
+
"loss": 10.1483,
|
| 13489 |
+
"step": 19040
|
| 13490 |
+
},
|
| 13491 |
+
{
|
| 13492 |
+
"epoch": 0.8454407775392347,
|
| 13493 |
+
"grad_norm": 58.07029342651367,
|
| 13494 |
+
"learning_rate": 9.966974992528202e-06,
|
| 13495 |
+
"loss": 10.4056,
|
| 13496 |
+
"step": 19050
|
| 13497 |
+
},
|
| 13498 |
+
{
|
| 13499 |
+
"epoch": 0.8458845784723262,
|
| 13500 |
+
"grad_norm": 69.88272094726562,
|
| 13501 |
+
"learning_rate": 9.966957656566275e-06,
|
| 13502 |
+
"loss": 10.3336,
|
| 13503 |
+
"step": 19060
|
| 13504 |
+
},
|
| 13505 |
+
{
|
| 13506 |
+
"epoch": 0.8463283794054177,
|
| 13507 |
+
"grad_norm": 57.19210433959961,
|
| 13508 |
+
"learning_rate": 9.966940320604346e-06,
|
| 13509 |
+
"loss": 10.5325,
|
| 13510 |
+
"step": 19070
|
| 13511 |
+
},
|
| 13512 |
+
{
|
| 13513 |
+
"epoch": 0.8467721803385092,
|
| 13514 |
+
"grad_norm": 68.29473876953125,
|
| 13515 |
+
"learning_rate": 9.96692298464242e-06,
|
| 13516 |
+
"loss": 10.5171,
|
| 13517 |
+
"step": 19080
|
| 13518 |
+
},
|
| 13519 |
+
{
|
| 13520 |
+
"epoch": 0.8472159812716006,
|
| 13521 |
+
"grad_norm": 61.379425048828125,
|
| 13522 |
+
"learning_rate": 9.966905648680492e-06,
|
| 13523 |
+
"loss": 9.9401,
|
| 13524 |
+
"step": 19090
|
| 13525 |
+
},
|
| 13526 |
+
{
|
| 13527 |
+
"epoch": 0.8476597822046921,
|
| 13528 |
+
"grad_norm": 68.13114929199219,
|
| 13529 |
+
"learning_rate": 9.966888312718564e-06,
|
| 13530 |
+
"loss": 10.4374,
|
| 13531 |
+
"step": 19100
|
| 13532 |
+
},
|
| 13533 |
+
{
|
| 13534 |
+
"epoch": 0.8481035831377836,
|
| 13535 |
+
"grad_norm": 64.97882843017578,
|
| 13536 |
+
"learning_rate": 9.966870976756637e-06,
|
| 13537 |
+
"loss": 10.4241,
|
| 13538 |
+
"step": 19110
|
| 13539 |
+
},
|
| 13540 |
+
{
|
| 13541 |
+
"epoch": 0.848547384070875,
|
| 13542 |
+
"grad_norm": 66.36862182617188,
|
| 13543 |
+
"learning_rate": 9.96685364079471e-06,
|
| 13544 |
+
"loss": 10.3443,
|
| 13545 |
+
"step": 19120
|
| 13546 |
+
},
|
| 13547 |
+
{
|
| 13548 |
+
"epoch": 0.8489911850039664,
|
| 13549 |
+
"grad_norm": 68.75626373291016,
|
| 13550 |
+
"learning_rate": 9.966836304832783e-06,
|
| 13551 |
+
"loss": 10.2445,
|
| 13552 |
+
"step": 19130
|
| 13553 |
+
},
|
| 13554 |
+
{
|
| 13555 |
+
"epoch": 0.8494349859370579,
|
| 13556 |
+
"grad_norm": 69.54931640625,
|
| 13557 |
+
"learning_rate": 9.966818968870854e-06,
|
| 13558 |
+
"loss": 10.1782,
|
| 13559 |
+
"step": 19140
|
| 13560 |
+
},
|
| 13561 |
+
{
|
| 13562 |
+
"epoch": 0.8498787868701494,
|
| 13563 |
+
"grad_norm": 62.799842834472656,
|
| 13564 |
+
"learning_rate": 9.966801632908927e-06,
|
| 13565 |
+
"loss": 9.9597,
|
| 13566 |
+
"step": 19150
|
| 13567 |
+
},
|
| 13568 |
+
{
|
| 13569 |
+
"epoch": 0.8503225878032409,
|
| 13570 |
+
"grad_norm": 72.54212951660156,
|
| 13571 |
+
"learning_rate": 9.966784296947e-06,
|
| 13572 |
+
"loss": 10.6076,
|
| 13573 |
+
"step": 19160
|
| 13574 |
+
},
|
| 13575 |
+
{
|
| 13576 |
+
"epoch": 0.8507663887363324,
|
| 13577 |
+
"grad_norm": 66.57682037353516,
|
| 13578 |
+
"learning_rate": 9.966766960985072e-06,
|
| 13579 |
+
"loss": 10.3762,
|
| 13580 |
+
"step": 19170
|
| 13581 |
+
},
|
| 13582 |
+
{
|
| 13583 |
+
"epoch": 0.8512101896694237,
|
| 13584 |
+
"grad_norm": 59.173683166503906,
|
| 13585 |
+
"learning_rate": 9.966749625023145e-06,
|
| 13586 |
+
"loss": 10.1135,
|
| 13587 |
+
"step": 19180
|
| 13588 |
+
},
|
| 13589 |
+
{
|
| 13590 |
+
"epoch": 0.8516539906025152,
|
| 13591 |
+
"grad_norm": 74.29920959472656,
|
| 13592 |
+
"learning_rate": 9.966732289061218e-06,
|
| 13593 |
+
"loss": 10.5823,
|
| 13594 |
+
"step": 19190
|
| 13595 |
+
},
|
| 13596 |
+
{
|
| 13597 |
+
"epoch": 0.8520977915356067,
|
| 13598 |
+
"grad_norm": 65.05313873291016,
|
| 13599 |
+
"learning_rate": 9.96671495309929e-06,
|
| 13600 |
+
"loss": 10.5004,
|
| 13601 |
+
"step": 19200
|
| 13602 |
+
},
|
| 13603 |
+
{
|
| 13604 |
+
"epoch": 0.8525415924686982,
|
| 13605 |
+
"grad_norm": 69.46266174316406,
|
| 13606 |
+
"learning_rate": 9.966697617137362e-06,
|
| 13607 |
+
"loss": 10.02,
|
| 13608 |
+
"step": 19210
|
| 13609 |
+
},
|
| 13610 |
+
{
|
| 13611 |
+
"epoch": 0.8529853934017896,
|
| 13612 |
+
"grad_norm": 64.3421859741211,
|
| 13613 |
+
"learning_rate": 9.966680281175436e-06,
|
| 13614 |
+
"loss": 10.7279,
|
| 13615 |
+
"step": 19220
|
| 13616 |
+
},
|
| 13617 |
+
{
|
| 13618 |
+
"epoch": 0.8534291943348811,
|
| 13619 |
+
"grad_norm": 69.0867919921875,
|
| 13620 |
+
"learning_rate": 9.966662945213507e-06,
|
| 13621 |
+
"loss": 10.4257,
|
| 13622 |
+
"step": 19230
|
| 13623 |
+
},
|
| 13624 |
+
{
|
| 13625 |
+
"epoch": 0.8538729952679726,
|
| 13626 |
+
"grad_norm": 70.24497985839844,
|
| 13627 |
+
"learning_rate": 9.96664560925158e-06,
|
| 13628 |
+
"loss": 10.169,
|
| 13629 |
+
"step": 19240
|
| 13630 |
+
},
|
| 13631 |
+
{
|
| 13632 |
+
"epoch": 0.854316796201064,
|
| 13633 |
+
"grad_norm": 67.85358428955078,
|
| 13634 |
+
"learning_rate": 9.966628273289653e-06,
|
| 13635 |
+
"loss": 10.3167,
|
| 13636 |
+
"step": 19250
|
| 13637 |
+
},
|
| 13638 |
+
{
|
| 13639 |
+
"epoch": 0.8547605971341554,
|
| 13640 |
+
"grad_norm": 70.9292221069336,
|
| 13641 |
+
"learning_rate": 9.966610937327724e-06,
|
| 13642 |
+
"loss": 11.0968,
|
| 13643 |
+
"step": 19260
|
| 13644 |
+
},
|
| 13645 |
+
{
|
| 13646 |
+
"epoch": 0.8552043980672469,
|
| 13647 |
+
"grad_norm": 71.09864044189453,
|
| 13648 |
+
"learning_rate": 9.966593601365798e-06,
|
| 13649 |
+
"loss": 10.5742,
|
| 13650 |
+
"step": 19270
|
| 13651 |
+
},
|
| 13652 |
+
{
|
| 13653 |
+
"epoch": 0.8556481990003384,
|
| 13654 |
+
"grad_norm": 69.86164093017578,
|
| 13655 |
+
"learning_rate": 9.96657626540387e-06,
|
| 13656 |
+
"loss": 10.5123,
|
| 13657 |
+
"step": 19280
|
| 13658 |
+
},
|
| 13659 |
+
{
|
| 13660 |
+
"epoch": 0.8560919999334299,
|
| 13661 |
+
"grad_norm": 56.01103210449219,
|
| 13662 |
+
"learning_rate": 9.966558929441942e-06,
|
| 13663 |
+
"loss": 10.0181,
|
| 13664 |
+
"step": 19290
|
| 13665 |
+
},
|
| 13666 |
+
{
|
| 13667 |
+
"epoch": 0.8565358008665214,
|
| 13668 |
+
"grad_norm": 70.41612243652344,
|
| 13669 |
+
"learning_rate": 9.966541593480015e-06,
|
| 13670 |
+
"loss": 10.4778,
|
| 13671 |
+
"step": 19300
|
| 13672 |
+
},
|
| 13673 |
+
{
|
| 13674 |
+
"epoch": 0.8569796017996127,
|
| 13675 |
+
"grad_norm": 66.11145782470703,
|
| 13676 |
+
"learning_rate": 9.966524257518088e-06,
|
| 13677 |
+
"loss": 10.7167,
|
| 13678 |
+
"step": 19310
|
| 13679 |
+
},
|
| 13680 |
+
{
|
| 13681 |
+
"epoch": 0.8574234027327042,
|
| 13682 |
+
"grad_norm": 72.80441284179688,
|
| 13683 |
+
"learning_rate": 9.966506921556161e-06,
|
| 13684 |
+
"loss": 10.3861,
|
| 13685 |
+
"step": 19320
|
| 13686 |
+
},
|
| 13687 |
+
{
|
| 13688 |
+
"epoch": 0.8578672036657957,
|
| 13689 |
+
"grad_norm": 62.77549362182617,
|
| 13690 |
+
"learning_rate": 9.966489585594233e-06,
|
| 13691 |
+
"loss": 10.2368,
|
| 13692 |
+
"step": 19330
|
| 13693 |
+
},
|
| 13694 |
+
{
|
| 13695 |
+
"epoch": 0.8583110045988872,
|
| 13696 |
+
"grad_norm": 68.18376922607422,
|
| 13697 |
+
"learning_rate": 9.966472249632306e-06,
|
| 13698 |
+
"loss": 10.3437,
|
| 13699 |
+
"step": 19340
|
| 13700 |
+
},
|
| 13701 |
+
{
|
| 13702 |
+
"epoch": 0.8587548055319786,
|
| 13703 |
+
"grad_norm": 66.24810028076172,
|
| 13704 |
+
"learning_rate": 9.966454913670379e-06,
|
| 13705 |
+
"loss": 10.0901,
|
| 13706 |
+
"step": 19350
|
| 13707 |
+
},
|
| 13708 |
+
{
|
| 13709 |
+
"epoch": 0.8591986064650701,
|
| 13710 |
+
"grad_norm": 68.41353607177734,
|
| 13711 |
+
"learning_rate": 9.96643757770845e-06,
|
| 13712 |
+
"loss": 10.6208,
|
| 13713 |
+
"step": 19360
|
| 13714 |
+
},
|
| 13715 |
+
{
|
| 13716 |
+
"epoch": 0.8596424073981616,
|
| 13717 |
+
"grad_norm": 61.160438537597656,
|
| 13718 |
+
"learning_rate": 9.966420241746523e-06,
|
| 13719 |
+
"loss": 10.2203,
|
| 13720 |
+
"step": 19370
|
| 13721 |
+
},
|
| 13722 |
+
{
|
| 13723 |
+
"epoch": 0.860086208331253,
|
| 13724 |
+
"grad_norm": 66.53337097167969,
|
| 13725 |
+
"learning_rate": 9.966402905784596e-06,
|
| 13726 |
+
"loss": 10.4119,
|
| 13727 |
+
"step": 19380
|
| 13728 |
+
},
|
| 13729 |
+
{
|
| 13730 |
+
"epoch": 0.8605300092643445,
|
| 13731 |
+
"grad_norm": 74.49799346923828,
|
| 13732 |
+
"learning_rate": 9.966385569822668e-06,
|
| 13733 |
+
"loss": 10.1698,
|
| 13734 |
+
"step": 19390
|
| 13735 |
+
},
|
| 13736 |
+
{
|
| 13737 |
+
"epoch": 0.8609738101974359,
|
| 13738 |
+
"grad_norm": 76.49808502197266,
|
| 13739 |
+
"learning_rate": 9.96636823386074e-06,
|
| 13740 |
+
"loss": 10.356,
|
| 13741 |
+
"step": 19400
|
| 13742 |
+
},
|
| 13743 |
+
{
|
| 13744 |
+
"epoch": 0.8614176111305274,
|
| 13745 |
+
"grad_norm": 72.61251068115234,
|
| 13746 |
+
"learning_rate": 9.966350897898814e-06,
|
| 13747 |
+
"loss": 10.0123,
|
| 13748 |
+
"step": 19410
|
| 13749 |
+
},
|
| 13750 |
+
{
|
| 13751 |
+
"epoch": 0.8618614120636189,
|
| 13752 |
+
"grad_norm": 69.5442123413086,
|
| 13753 |
+
"learning_rate": 9.966333561936885e-06,
|
| 13754 |
+
"loss": 10.7127,
|
| 13755 |
+
"step": 19420
|
| 13756 |
+
},
|
| 13757 |
+
{
|
| 13758 |
+
"epoch": 0.8623052129967104,
|
| 13759 |
+
"grad_norm": 75.41436767578125,
|
| 13760 |
+
"learning_rate": 9.966316225974958e-06,
|
| 13761 |
+
"loss": 10.2879,
|
| 13762 |
+
"step": 19430
|
| 13763 |
+
},
|
| 13764 |
+
{
|
| 13765 |
+
"epoch": 0.8627490139298017,
|
| 13766 |
+
"grad_norm": 62.93849563598633,
|
| 13767 |
+
"learning_rate": 9.966298890013031e-06,
|
| 13768 |
+
"loss": 10.1251,
|
| 13769 |
+
"step": 19440
|
| 13770 |
+
},
|
| 13771 |
+
{
|
| 13772 |
+
"epoch": 0.8631928148628932,
|
| 13773 |
+
"grad_norm": 61.55092239379883,
|
| 13774 |
+
"learning_rate": 9.966281554051103e-06,
|
| 13775 |
+
"loss": 10.4005,
|
| 13776 |
+
"step": 19450
|
| 13777 |
+
},
|
| 13778 |
+
{
|
| 13779 |
+
"epoch": 0.8636366157959847,
|
| 13780 |
+
"grad_norm": 64.35807037353516,
|
| 13781 |
+
"learning_rate": 9.966264218089176e-06,
|
| 13782 |
+
"loss": 10.3085,
|
| 13783 |
+
"step": 19460
|
| 13784 |
+
},
|
| 13785 |
+
{
|
| 13786 |
+
"epoch": 0.8640804167290762,
|
| 13787 |
+
"grad_norm": 63.883033752441406,
|
| 13788 |
+
"learning_rate": 9.966246882127249e-06,
|
| 13789 |
+
"loss": 10.7861,
|
| 13790 |
+
"step": 19470
|
| 13791 |
+
},
|
| 13792 |
+
{
|
| 13793 |
+
"epoch": 0.8645242176621676,
|
| 13794 |
+
"grad_norm": 62.51860809326172,
|
| 13795 |
+
"learning_rate": 9.96622954616532e-06,
|
| 13796 |
+
"loss": 10.5587,
|
| 13797 |
+
"step": 19480
|
| 13798 |
+
},
|
| 13799 |
+
{
|
| 13800 |
+
"epoch": 0.8649680185952591,
|
| 13801 |
+
"grad_norm": 67.6877212524414,
|
| 13802 |
+
"learning_rate": 9.966212210203393e-06,
|
| 13803 |
+
"loss": 10.5259,
|
| 13804 |
+
"step": 19490
|
| 13805 |
+
},
|
| 13806 |
+
{
|
| 13807 |
+
"epoch": 0.8654118195283506,
|
| 13808 |
+
"grad_norm": 55.97256851196289,
|
| 13809 |
+
"learning_rate": 9.966194874241466e-06,
|
| 13810 |
+
"loss": 10.0621,
|
| 13811 |
+
"step": 19500
|
| 13812 |
+
},
|
| 13813 |
+
{
|
| 13814 |
+
"epoch": 0.865855620461442,
|
| 13815 |
+
"grad_norm": 66.48442077636719,
|
| 13816 |
+
"learning_rate": 9.966177538279538e-06,
|
| 13817 |
+
"loss": 10.1117,
|
| 13818 |
+
"step": 19510
|
| 13819 |
+
},
|
| 13820 |
+
{
|
| 13821 |
+
"epoch": 0.8662994213945335,
|
| 13822 |
+
"grad_norm": 71.5040512084961,
|
| 13823 |
+
"learning_rate": 9.966160202317611e-06,
|
| 13824 |
+
"loss": 10.0883,
|
| 13825 |
+
"step": 19520
|
| 13826 |
+
},
|
| 13827 |
+
{
|
| 13828 |
+
"epoch": 0.8667432223276249,
|
| 13829 |
+
"grad_norm": 62.448360443115234,
|
| 13830 |
+
"learning_rate": 9.966142866355684e-06,
|
| 13831 |
+
"loss": 9.8626,
|
| 13832 |
+
"step": 19530
|
| 13833 |
+
},
|
| 13834 |
+
{
|
| 13835 |
+
"epoch": 0.8671870232607164,
|
| 13836 |
+
"grad_norm": 70.04524230957031,
|
| 13837 |
+
"learning_rate": 9.966125530393757e-06,
|
| 13838 |
+
"loss": 10.3496,
|
| 13839 |
+
"step": 19540
|
| 13840 |
+
},
|
| 13841 |
+
{
|
| 13842 |
+
"epoch": 0.8676308241938079,
|
| 13843 |
+
"grad_norm": 68.1249008178711,
|
| 13844 |
+
"learning_rate": 9.966108194431828e-06,
|
| 13845 |
+
"loss": 10.2206,
|
| 13846 |
+
"step": 19550
|
| 13847 |
+
},
|
| 13848 |
+
{
|
| 13849 |
+
"epoch": 0.8680746251268994,
|
| 13850 |
+
"grad_norm": 60.323795318603516,
|
| 13851 |
+
"learning_rate": 9.966090858469902e-06,
|
| 13852 |
+
"loss": 10.545,
|
| 13853 |
+
"step": 19560
|
| 13854 |
+
},
|
| 13855 |
+
{
|
| 13856 |
+
"epoch": 0.8685184260599907,
|
| 13857 |
+
"grad_norm": 62.9224853515625,
|
| 13858 |
+
"learning_rate": 9.966073522507975e-06,
|
| 13859 |
+
"loss": 10.4674,
|
| 13860 |
+
"step": 19570
|
| 13861 |
+
},
|
| 13862 |
+
{
|
| 13863 |
+
"epoch": 0.8689622269930822,
|
| 13864 |
+
"grad_norm": 60.80291748046875,
|
| 13865 |
+
"learning_rate": 9.966056186546046e-06,
|
| 13866 |
+
"loss": 10.1987,
|
| 13867 |
+
"step": 19580
|
| 13868 |
+
},
|
| 13869 |
+
{
|
| 13870 |
+
"epoch": 0.8694060279261737,
|
| 13871 |
+
"grad_norm": 53.80615234375,
|
| 13872 |
+
"learning_rate": 9.966038850584119e-06,
|
| 13873 |
+
"loss": 10.4711,
|
| 13874 |
+
"step": 19590
|
| 13875 |
+
},
|
| 13876 |
+
{
|
| 13877 |
+
"epoch": 0.8698498288592652,
|
| 13878 |
+
"grad_norm": 58.71421813964844,
|
| 13879 |
+
"learning_rate": 9.966021514622192e-06,
|
| 13880 |
+
"loss": 10.2597,
|
| 13881 |
+
"step": 19600
|
| 13882 |
+
},
|
| 13883 |
+
{
|
| 13884 |
+
"epoch": 0.8702936297923567,
|
| 13885 |
+
"grad_norm": 60.54587936401367,
|
| 13886 |
+
"learning_rate": 9.966004178660264e-06,
|
| 13887 |
+
"loss": 10.3637,
|
| 13888 |
+
"step": 19610
|
| 13889 |
+
},
|
| 13890 |
+
{
|
| 13891 |
+
"epoch": 0.8707374307254481,
|
| 13892 |
+
"grad_norm": 58.478153228759766,
|
| 13893 |
+
"learning_rate": 9.965986842698337e-06,
|
| 13894 |
+
"loss": 10.1861,
|
| 13895 |
+
"step": 19620
|
| 13896 |
+
},
|
| 13897 |
+
{
|
| 13898 |
+
"epoch": 0.8711812316585396,
|
| 13899 |
+
"grad_norm": 70.70100402832031,
|
| 13900 |
+
"learning_rate": 9.96596950673641e-06,
|
| 13901 |
+
"loss": 9.8945,
|
| 13902 |
+
"step": 19630
|
| 13903 |
+
},
|
| 13904 |
+
{
|
| 13905 |
+
"epoch": 0.871625032591631,
|
| 13906 |
+
"grad_norm": 64.967041015625,
|
| 13907 |
+
"learning_rate": 9.965952170774481e-06,
|
| 13908 |
+
"loss": 10.0128,
|
| 13909 |
+
"step": 19640
|
| 13910 |
+
},
|
| 13911 |
+
{
|
| 13912 |
+
"epoch": 0.8720688335247225,
|
| 13913 |
+
"grad_norm": 67.52765655517578,
|
| 13914 |
+
"learning_rate": 9.965934834812554e-06,
|
| 13915 |
+
"loss": 10.5425,
|
| 13916 |
+
"step": 19650
|
| 13917 |
+
},
|
| 13918 |
+
{
|
| 13919 |
+
"epoch": 0.8725126344578139,
|
| 13920 |
+
"grad_norm": 63.967247009277344,
|
| 13921 |
+
"learning_rate": 9.965917498850627e-06,
|
| 13922 |
+
"loss": 10.6763,
|
| 13923 |
+
"step": 19660
|
| 13924 |
+
},
|
| 13925 |
+
{
|
| 13926 |
+
"epoch": 0.8729564353909054,
|
| 13927 |
+
"grad_norm": 71.41963958740234,
|
| 13928 |
+
"learning_rate": 9.965900162888699e-06,
|
| 13929 |
+
"loss": 10.6471,
|
| 13930 |
+
"step": 19670
|
| 13931 |
+
},
|
| 13932 |
+
{
|
| 13933 |
+
"epoch": 0.8734002363239969,
|
| 13934 |
+
"grad_norm": 60.292701721191406,
|
| 13935 |
+
"learning_rate": 9.965882826926772e-06,
|
| 13936 |
+
"loss": 10.1802,
|
| 13937 |
+
"step": 19680
|
| 13938 |
+
},
|
| 13939 |
+
{
|
| 13940 |
+
"epoch": 0.8738440372570884,
|
| 13941 |
+
"grad_norm": 63.403560638427734,
|
| 13942 |
+
"learning_rate": 9.965865490964845e-06,
|
| 13943 |
+
"loss": 10.2882,
|
| 13944 |
+
"step": 19690
|
| 13945 |
+
},
|
| 13946 |
+
{
|
| 13947 |
+
"epoch": 0.8742878381901797,
|
| 13948 |
+
"grad_norm": 65.6253662109375,
|
| 13949 |
+
"learning_rate": 9.965848155002916e-06,
|
| 13950 |
+
"loss": 10.2649,
|
| 13951 |
+
"step": 19700
|
| 13952 |
+
},
|
| 13953 |
+
{
|
| 13954 |
+
"epoch": 0.8747316391232712,
|
| 13955 |
+
"grad_norm": 61.846683502197266,
|
| 13956 |
+
"learning_rate": 9.96583081904099e-06,
|
| 13957 |
+
"loss": 10.2564,
|
| 13958 |
+
"step": 19710
|
| 13959 |
+
},
|
| 13960 |
+
{
|
| 13961 |
+
"epoch": 0.8751754400563627,
|
| 13962 |
+
"grad_norm": 65.23727416992188,
|
| 13963 |
+
"learning_rate": 9.965813483079062e-06,
|
| 13964 |
+
"loss": 10.5773,
|
| 13965 |
+
"step": 19720
|
| 13966 |
+
},
|
| 13967 |
+
{
|
| 13968 |
+
"epoch": 0.8756192409894542,
|
| 13969 |
+
"grad_norm": 57.89336395263672,
|
| 13970 |
+
"learning_rate": 9.965796147117134e-06,
|
| 13971 |
+
"loss": 10.3047,
|
| 13972 |
+
"step": 19730
|
| 13973 |
+
},
|
| 13974 |
+
{
|
| 13975 |
+
"epoch": 0.8760630419225457,
|
| 13976 |
+
"grad_norm": 63.75661849975586,
|
| 13977 |
+
"learning_rate": 9.965778811155207e-06,
|
| 13978 |
+
"loss": 9.6075,
|
| 13979 |
+
"step": 19740
|
| 13980 |
+
},
|
| 13981 |
+
{
|
| 13982 |
+
"epoch": 0.8765068428556371,
|
| 13983 |
+
"grad_norm": 52.67669677734375,
|
| 13984 |
+
"learning_rate": 9.96576147519328e-06,
|
| 13985 |
+
"loss": 10.1751,
|
| 13986 |
+
"step": 19750
|
| 13987 |
+
},
|
| 13988 |
+
{
|
| 13989 |
+
"epoch": 0.8769506437887286,
|
| 13990 |
+
"grad_norm": 62.90458297729492,
|
| 13991 |
+
"learning_rate": 9.965744139231353e-06,
|
| 13992 |
+
"loss": 9.9069,
|
| 13993 |
+
"step": 19760
|
| 13994 |
+
},
|
| 13995 |
+
{
|
| 13996 |
+
"epoch": 0.87739444472182,
|
| 13997 |
+
"grad_norm": 75.43021392822266,
|
| 13998 |
+
"learning_rate": 9.965726803269424e-06,
|
| 13999 |
+
"loss": 10.7198,
|
| 14000 |
+
"step": 19770
|
| 14001 |
+
},
|
| 14002 |
+
{
|
| 14003 |
+
"epoch": 0.8778382456549115,
|
| 14004 |
+
"grad_norm": 65.16674041748047,
|
| 14005 |
+
"learning_rate": 9.965709467307497e-06,
|
| 14006 |
+
"loss": 9.8234,
|
| 14007 |
+
"step": 19780
|
| 14008 |
+
},
|
| 14009 |
+
{
|
| 14010 |
+
"epoch": 0.8782820465880029,
|
| 14011 |
+
"grad_norm": 55.970890045166016,
|
| 14012 |
+
"learning_rate": 9.96569213134557e-06,
|
| 14013 |
+
"loss": 10.2538,
|
| 14014 |
+
"step": 19790
|
| 14015 |
+
},
|
| 14016 |
+
{
|
| 14017 |
+
"epoch": 0.8787258475210944,
|
| 14018 |
+
"grad_norm": 63.947113037109375,
|
| 14019 |
+
"learning_rate": 9.965674795383642e-06,
|
| 14020 |
+
"loss": 10.4209,
|
| 14021 |
+
"step": 19800
|
| 14022 |
+
},
|
| 14023 |
+
{
|
| 14024 |
+
"epoch": 0.8791696484541859,
|
| 14025 |
+
"grad_norm": 65.56307983398438,
|
| 14026 |
+
"learning_rate": 9.965657459421715e-06,
|
| 14027 |
+
"loss": 9.7725,
|
| 14028 |
+
"step": 19810
|
| 14029 |
+
},
|
| 14030 |
+
{
|
| 14031 |
+
"epoch": 0.8796134493872774,
|
| 14032 |
+
"grad_norm": 66.70881652832031,
|
| 14033 |
+
"learning_rate": 9.965640123459788e-06,
|
| 14034 |
+
"loss": 10.7015,
|
| 14035 |
+
"step": 19820
|
| 14036 |
+
},
|
| 14037 |
+
{
|
| 14038 |
+
"epoch": 0.8800572503203687,
|
| 14039 |
+
"grad_norm": 65.10243225097656,
|
| 14040 |
+
"learning_rate": 9.96562278749786e-06,
|
| 14041 |
+
"loss": 10.1794,
|
| 14042 |
+
"step": 19830
|
| 14043 |
+
},
|
| 14044 |
+
{
|
| 14045 |
+
"epoch": 0.8805010512534602,
|
| 14046 |
+
"grad_norm": 58.50627517700195,
|
| 14047 |
+
"learning_rate": 9.965605451535932e-06,
|
| 14048 |
+
"loss": 10.758,
|
| 14049 |
+
"step": 19840
|
| 14050 |
+
},
|
| 14051 |
+
{
|
| 14052 |
+
"epoch": 0.8809448521865517,
|
| 14053 |
+
"grad_norm": 66.39806365966797,
|
| 14054 |
+
"learning_rate": 9.965588115574006e-06,
|
| 14055 |
+
"loss": 9.9667,
|
| 14056 |
+
"step": 19850
|
| 14057 |
+
},
|
| 14058 |
+
{
|
| 14059 |
+
"epoch": 0.8813886531196432,
|
| 14060 |
+
"grad_norm": 78.95050048828125,
|
| 14061 |
+
"learning_rate": 9.965570779612077e-06,
|
| 14062 |
+
"loss": 10.5293,
|
| 14063 |
+
"step": 19860
|
| 14064 |
+
},
|
| 14065 |
+
{
|
| 14066 |
+
"epoch": 0.8818324540527347,
|
| 14067 |
+
"grad_norm": 63.072444915771484,
|
| 14068 |
+
"learning_rate": 9.96555344365015e-06,
|
| 14069 |
+
"loss": 10.1306,
|
| 14070 |
+
"step": 19870
|
| 14071 |
+
},
|
| 14072 |
+
{
|
| 14073 |
+
"epoch": 0.8822762549858261,
|
| 14074 |
+
"grad_norm": 61.44473648071289,
|
| 14075 |
+
"learning_rate": 9.965536107688223e-06,
|
| 14076 |
+
"loss": 10.3155,
|
| 14077 |
+
"step": 19880
|
| 14078 |
+
},
|
| 14079 |
+
{
|
| 14080 |
+
"epoch": 0.8827200559189176,
|
| 14081 |
+
"grad_norm": 70.35446166992188,
|
| 14082 |
+
"learning_rate": 9.965518771726294e-06,
|
| 14083 |
+
"loss": 10.2451,
|
| 14084 |
+
"step": 19890
|
| 14085 |
+
},
|
| 14086 |
+
{
|
| 14087 |
+
"epoch": 0.883163856852009,
|
| 14088 |
+
"grad_norm": 58.564395904541016,
|
| 14089 |
+
"learning_rate": 9.965501435764368e-06,
|
| 14090 |
+
"loss": 10.1813,
|
| 14091 |
+
"step": 19900
|
| 14092 |
+
},
|
| 14093 |
+
{
|
| 14094 |
+
"epoch": 0.8836076577851005,
|
| 14095 |
+
"grad_norm": 64.06719970703125,
|
| 14096 |
+
"learning_rate": 9.96548409980244e-06,
|
| 14097 |
+
"loss": 10.2219,
|
| 14098 |
+
"step": 19910
|
| 14099 |
+
},
|
| 14100 |
+
{
|
| 14101 |
+
"epoch": 0.8840514587181919,
|
| 14102 |
+
"grad_norm": 57.828590393066406,
|
| 14103 |
+
"learning_rate": 9.965466763840512e-06,
|
| 14104 |
+
"loss": 10.4087,
|
| 14105 |
+
"step": 19920
|
| 14106 |
+
},
|
| 14107 |
+
{
|
| 14108 |
+
"epoch": 0.8844952596512834,
|
| 14109 |
+
"grad_norm": 61.435123443603516,
|
| 14110 |
+
"learning_rate": 9.965449427878585e-06,
|
| 14111 |
+
"loss": 10.3527,
|
| 14112 |
+
"step": 19930
|
| 14113 |
+
},
|
| 14114 |
+
{
|
| 14115 |
+
"epoch": 0.8849390605843749,
|
| 14116 |
+
"grad_norm": 61.76189041137695,
|
| 14117 |
+
"learning_rate": 9.965432091916658e-06,
|
| 14118 |
+
"loss": 10.3732,
|
| 14119 |
+
"step": 19940
|
| 14120 |
+
},
|
| 14121 |
+
{
|
| 14122 |
+
"epoch": 0.8853828615174664,
|
| 14123 |
+
"grad_norm": 62.846946716308594,
|
| 14124 |
+
"learning_rate": 9.96541475595473e-06,
|
| 14125 |
+
"loss": 10.2244,
|
| 14126 |
+
"step": 19950
|
| 14127 |
+
},
|
| 14128 |
+
{
|
| 14129 |
+
"epoch": 0.8858266624505579,
|
| 14130 |
+
"grad_norm": 63.24193572998047,
|
| 14131 |
+
"learning_rate": 9.965397419992803e-06,
|
| 14132 |
+
"loss": 10.1221,
|
| 14133 |
+
"step": 19960
|
| 14134 |
+
},
|
| 14135 |
+
{
|
| 14136 |
+
"epoch": 0.8862704633836492,
|
| 14137 |
+
"grad_norm": 56.531044006347656,
|
| 14138 |
+
"learning_rate": 9.965380084030876e-06,
|
| 14139 |
+
"loss": 9.9695,
|
| 14140 |
+
"step": 19970
|
| 14141 |
+
},
|
| 14142 |
+
{
|
| 14143 |
+
"epoch": 0.8867142643167407,
|
| 14144 |
+
"grad_norm": 59.855491638183594,
|
| 14145 |
+
"learning_rate": 9.965362748068949e-06,
|
| 14146 |
+
"loss": 10.4246,
|
| 14147 |
+
"step": 19980
|
| 14148 |
+
},
|
| 14149 |
+
{
|
| 14150 |
+
"epoch": 0.8871580652498322,
|
| 14151 |
+
"grad_norm": 56.98590850830078,
|
| 14152 |
+
"learning_rate": 9.96534541210702e-06,
|
| 14153 |
+
"loss": 10.5376,
|
| 14154 |
+
"step": 19990
|
| 14155 |
+
},
|
| 14156 |
+
{
|
| 14157 |
+
"epoch": 0.8876018661829237,
|
| 14158 |
+
"grad_norm": 64.37902069091797,
|
| 14159 |
+
"learning_rate": 9.965328076145093e-06,
|
| 14160 |
+
"loss": 9.9193,
|
| 14161 |
+
"step": 20000
|
| 14162 |
+
},
|
| 14163 |
+
{
|
| 14164 |
+
"epoch": 0.8876018661829237,
|
| 14165 |
+
"eval_loss": 0.32305407524108887,
|
| 14166 |
+
"eval_runtime": 673.2893,
|
| 14167 |
+
"eval_samples_per_second": 1803.669,
|
| 14168 |
+
"eval_steps_per_second": 56.365,
|
| 14169 |
+
"step": 20000
|
| 14170 |
}
|
| 14171 |
],
|
| 14172 |
"logging_steps": 10,
|
|
|
|
| 14186 |
"attributes": {}
|
| 14187 |
}
|
| 14188 |
},
|
| 14189 |
+
"total_flos": 6.979446410051584e+18,
|
| 14190 |
"train_batch_size": 4,
|
| 14191 |
"trial_name": null,
|
| 14192 |
"trial_params": null
|