Training in progress, epoch 6, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 21250800
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9df12a71dee1d6307d39aced09341fdc1861237c2e6377330cdf6b231d0f559
|
| 3 |
size 21250800
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 42543819
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:437ca32a43a75984733a76f54546f4e72538e3e5821ff47338eba93179ce541b
|
| 3 |
size 42543819
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a622d96de8a33c8162add692b156314cdc05ab7511d182b9a6d4ec42a4f47a4a
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0b4392bbcc48b6381192fc61b1ecb513614e4c3c30856ff2953054daa95f753
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94f176bdf2e480bce573061a3d0a5ee54d020abf0bb56e945a357b1d7078d776
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 7071,
|
| 3 |
"best_metric": 0.027155037969350815,
|
| 4 |
"best_model_checkpoint": "/tmp/tmpul_18pvy/adapter/checkpoint-7071",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -8294,6 +8294,1666 @@
|
|
| 8294 |
"eval_samples_per_second": 143.708,
|
| 8295 |
"eval_steps_per_second": 17.972,
|
| 8296 |
"step": 11785
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8297 |
}
|
| 8298 |
],
|
| 8299 |
"logging_steps": 10,
|
|
@@ -8308,7 +9968,7 @@
|
|
| 8308 |
"early_stopping_threshold": 0.0
|
| 8309 |
},
|
| 8310 |
"attributes": {
|
| 8311 |
-
"early_stopping_patience_counter":
|
| 8312 |
}
|
| 8313 |
},
|
| 8314 |
"TrainerControl": {
|
|
@@ -8317,12 +9977,12 @@
|
|
| 8317 |
"should_evaluate": false,
|
| 8318 |
"should_log": false,
|
| 8319 |
"should_save": true,
|
| 8320 |
-
"should_training_stop":
|
| 8321 |
},
|
| 8322 |
"attributes": {}
|
| 8323 |
}
|
| 8324 |
},
|
| 8325 |
-
"total_flos":
|
| 8326 |
"train_batch_size": 8,
|
| 8327 |
"trial_name": null,
|
| 8328 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 7071,
|
| 3 |
"best_metric": 0.027155037969350815,
|
| 4 |
"best_model_checkpoint": "/tmp/tmpul_18pvy/adapter/checkpoint-7071",
|
| 5 |
+
"epoch": 6.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14142,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 8294 |
"eval_samples_per_second": 143.708,
|
| 8295 |
"eval_steps_per_second": 17.972,
|
| 8296 |
"step": 11785
|
| 8297 |
+
},
|
| 8298 |
+
{
|
| 8299 |
+
"epoch": 5.002121340687315,
|
| 8300 |
+
"grad_norm": 20.730009078979492,
|
| 8301 |
+
"learning_rate": 2.773237997957099e-06,
|
| 8302 |
+
"loss": 0.0465,
|
| 8303 |
+
"step": 11790
|
| 8304 |
+
},
|
| 8305 |
+
{
|
| 8306 |
+
"epoch": 5.006364022061943,
|
| 8307 |
+
"grad_norm": 0.003634377848356962,
|
| 8308 |
+
"learning_rate": 2.7614520311149524e-06,
|
| 8309 |
+
"loss": 0.0001,
|
| 8310 |
+
"step": 11800
|
| 8311 |
+
},
|
| 8312 |
+
{
|
| 8313 |
+
"epoch": 5.010606703436572,
|
| 8314 |
+
"grad_norm": 0.00011093047942267731,
|
| 8315 |
+
"learning_rate": 2.749666064272806e-06,
|
| 8316 |
+
"loss": 0.0001,
|
| 8317 |
+
"step": 11810
|
| 8318 |
+
},
|
| 8319 |
+
{
|
| 8320 |
+
"epoch": 5.014849384811201,
|
| 8321 |
+
"grad_norm": 0.0002678089076653123,
|
| 8322 |
+
"learning_rate": 2.7378800974306593e-06,
|
| 8323 |
+
"loss": 0.0001,
|
| 8324 |
+
"step": 11820
|
| 8325 |
+
},
|
| 8326 |
+
{
|
| 8327 |
+
"epoch": 5.01909206618583,
|
| 8328 |
+
"grad_norm": 0.03435564413666725,
|
| 8329 |
+
"learning_rate": 2.7260941305885127e-06,
|
| 8330 |
+
"loss": 0.0002,
|
| 8331 |
+
"step": 11830
|
| 8332 |
+
},
|
| 8333 |
+
{
|
| 8334 |
+
"epoch": 5.023334747560458,
|
| 8335 |
+
"grad_norm": 0.0004733138484880328,
|
| 8336 |
+
"learning_rate": 2.714308163746366e-06,
|
| 8337 |
+
"loss": 0.0,
|
| 8338 |
+
"step": 11840
|
| 8339 |
+
},
|
| 8340 |
+
{
|
| 8341 |
+
"epoch": 5.027577428935087,
|
| 8342 |
+
"grad_norm": 0.028178097680211067,
|
| 8343 |
+
"learning_rate": 2.7025221969042195e-06,
|
| 8344 |
+
"loss": 0.0001,
|
| 8345 |
+
"step": 11850
|
| 8346 |
+
},
|
| 8347 |
+
{
|
| 8348 |
+
"epoch": 5.031820110309716,
|
| 8349 |
+
"grad_norm": 0.0004893006989732385,
|
| 8350 |
+
"learning_rate": 2.690736230062073e-06,
|
| 8351 |
+
"loss": 0.0003,
|
| 8352 |
+
"step": 11860
|
| 8353 |
+
},
|
| 8354 |
+
{
|
| 8355 |
+
"epoch": 5.036062791684344,
|
| 8356 |
+
"grad_norm": 0.0013139487709850073,
|
| 8357 |
+
"learning_rate": 2.678950263219926e-06,
|
| 8358 |
+
"loss": 0.0028,
|
| 8359 |
+
"step": 11870
|
| 8360 |
+
},
|
| 8361 |
+
{
|
| 8362 |
+
"epoch": 5.040305473058973,
|
| 8363 |
+
"grad_norm": 0.0004709514032583684,
|
| 8364 |
+
"learning_rate": 2.6671642963777794e-06,
|
| 8365 |
+
"loss": 0.0008,
|
| 8366 |
+
"step": 11880
|
| 8367 |
+
},
|
| 8368 |
+
{
|
| 8369 |
+
"epoch": 5.044548154433602,
|
| 8370 |
+
"grad_norm": 7.39249589969404e-05,
|
| 8371 |
+
"learning_rate": 2.655378329535633e-06,
|
| 8372 |
+
"loss": 0.0426,
|
| 8373 |
+
"step": 11890
|
| 8374 |
+
},
|
| 8375 |
+
{
|
| 8376 |
+
"epoch": 5.048790835808231,
|
| 8377 |
+
"grad_norm": 0.003630582708865404,
|
| 8378 |
+
"learning_rate": 2.6435923626934862e-06,
|
| 8379 |
+
"loss": 0.0,
|
| 8380 |
+
"step": 11900
|
| 8381 |
+
},
|
| 8382 |
+
{
|
| 8383 |
+
"epoch": 5.053033517182859,
|
| 8384 |
+
"grad_norm": 0.0021470971405506134,
|
| 8385 |
+
"learning_rate": 2.6318063958513396e-06,
|
| 8386 |
+
"loss": 0.0,
|
| 8387 |
+
"step": 11910
|
| 8388 |
+
},
|
| 8389 |
+
{
|
| 8390 |
+
"epoch": 5.0572761985574886,
|
| 8391 |
+
"grad_norm": 4.246901512145996,
|
| 8392 |
+
"learning_rate": 2.620020429009193e-06,
|
| 8393 |
+
"loss": 0.061,
|
| 8394 |
+
"step": 11920
|
| 8395 |
+
},
|
| 8396 |
+
{
|
| 8397 |
+
"epoch": 5.061518879932117,
|
| 8398 |
+
"grad_norm": 0.0005770602147094905,
|
| 8399 |
+
"learning_rate": 2.6082344621670465e-06,
|
| 8400 |
+
"loss": 0.0,
|
| 8401 |
+
"step": 11930
|
| 8402 |
+
},
|
| 8403 |
+
{
|
| 8404 |
+
"epoch": 5.065761561306746,
|
| 8405 |
+
"grad_norm": 0.050185322761535645,
|
| 8406 |
+
"learning_rate": 2.5964484953249e-06,
|
| 8407 |
+
"loss": 0.013,
|
| 8408 |
+
"step": 11940
|
| 8409 |
+
},
|
| 8410 |
+
{
|
| 8411 |
+
"epoch": 5.0700042426813745,
|
| 8412 |
+
"grad_norm": 0.00020702298206742853,
|
| 8413 |
+
"learning_rate": 2.5846625284827533e-06,
|
| 8414 |
+
"loss": 0.0012,
|
| 8415 |
+
"step": 11950
|
| 8416 |
+
},
|
| 8417 |
+
{
|
| 8418 |
+
"epoch": 5.074246924056004,
|
| 8419 |
+
"grad_norm": 0.009266343899071217,
|
| 8420 |
+
"learning_rate": 2.5728765616406067e-06,
|
| 8421 |
+
"loss": 0.0004,
|
| 8422 |
+
"step": 11960
|
| 8423 |
+
},
|
| 8424 |
+
{
|
| 8425 |
+
"epoch": 5.078489605430632,
|
| 8426 |
+
"grad_norm": 0.007173527963459492,
|
| 8427 |
+
"learning_rate": 2.56109059479846e-06,
|
| 8428 |
+
"loss": 0.1258,
|
| 8429 |
+
"step": 11970
|
| 8430 |
+
},
|
| 8431 |
+
{
|
| 8432 |
+
"epoch": 5.082732286805261,
|
| 8433 |
+
"grad_norm": 1.656999945640564,
|
| 8434 |
+
"learning_rate": 2.5493046279563136e-06,
|
| 8435 |
+
"loss": 0.0013,
|
| 8436 |
+
"step": 11980
|
| 8437 |
+
},
|
| 8438 |
+
{
|
| 8439 |
+
"epoch": 5.08697496817989,
|
| 8440 |
+
"grad_norm": 9.17657816899009e-05,
|
| 8441 |
+
"learning_rate": 2.537518661114167e-06,
|
| 8442 |
+
"loss": 0.0002,
|
| 8443 |
+
"step": 11990
|
| 8444 |
+
},
|
| 8445 |
+
{
|
| 8446 |
+
"epoch": 5.091217649554518,
|
| 8447 |
+
"grad_norm": 0.00023294801940210164,
|
| 8448 |
+
"learning_rate": 2.52573269427202e-06,
|
| 8449 |
+
"loss": 0.0003,
|
| 8450 |
+
"step": 12000
|
| 8451 |
+
},
|
| 8452 |
+
{
|
| 8453 |
+
"epoch": 5.095460330929147,
|
| 8454 |
+
"grad_norm": 0.2610091269016266,
|
| 8455 |
+
"learning_rate": 2.5139467274298734e-06,
|
| 8456 |
+
"loss": 0.1479,
|
| 8457 |
+
"step": 12010
|
| 8458 |
+
},
|
| 8459 |
+
{
|
| 8460 |
+
"epoch": 5.099703012303776,
|
| 8461 |
+
"grad_norm": 0.0002062877028947696,
|
| 8462 |
+
"learning_rate": 2.502160760587727e-06,
|
| 8463 |
+
"loss": 0.0051,
|
| 8464 |
+
"step": 12020
|
| 8465 |
+
},
|
| 8466 |
+
{
|
| 8467 |
+
"epoch": 5.103945693678405,
|
| 8468 |
+
"grad_norm": 0.003876827424392104,
|
| 8469 |
+
"learning_rate": 2.4903747937455803e-06,
|
| 8470 |
+
"loss": 0.0,
|
| 8471 |
+
"step": 12030
|
| 8472 |
+
},
|
| 8473 |
+
{
|
| 8474 |
+
"epoch": 5.108188375053033,
|
| 8475 |
+
"grad_norm": 0.00021948596986476332,
|
| 8476 |
+
"learning_rate": 2.4785888269034337e-06,
|
| 8477 |
+
"loss": 0.0011,
|
| 8478 |
+
"step": 12040
|
| 8479 |
+
},
|
| 8480 |
+
{
|
| 8481 |
+
"epoch": 5.1124310564276625,
|
| 8482 |
+
"grad_norm": 0.002087715547531843,
|
| 8483 |
+
"learning_rate": 2.466802860061287e-06,
|
| 8484 |
+
"loss": 0.0001,
|
| 8485 |
+
"step": 12050
|
| 8486 |
+
},
|
| 8487 |
+
{
|
| 8488 |
+
"epoch": 5.116673737802291,
|
| 8489 |
+
"grad_norm": 0.0010263972217217088,
|
| 8490 |
+
"learning_rate": 2.4550168932191405e-06,
|
| 8491 |
+
"loss": 0.0617,
|
| 8492 |
+
"step": 12060
|
| 8493 |
+
},
|
| 8494 |
+
{
|
| 8495 |
+
"epoch": 5.12091641917692,
|
| 8496 |
+
"grad_norm": 0.002517751418054104,
|
| 8497 |
+
"learning_rate": 2.443230926376994e-06,
|
| 8498 |
+
"loss": 0.0,
|
| 8499 |
+
"step": 12070
|
| 8500 |
+
},
|
| 8501 |
+
{
|
| 8502 |
+
"epoch": 5.125159100551548,
|
| 8503 |
+
"grad_norm": 0.0004665980814024806,
|
| 8504 |
+
"learning_rate": 2.4314449595348474e-06,
|
| 8505 |
+
"loss": 0.0005,
|
| 8506 |
+
"step": 12080
|
| 8507 |
+
},
|
| 8508 |
+
{
|
| 8509 |
+
"epoch": 5.129401781926178,
|
| 8510 |
+
"grad_norm": 0.005221213214099407,
|
| 8511 |
+
"learning_rate": 2.419658992692701e-06,
|
| 8512 |
+
"loss": 0.0104,
|
| 8513 |
+
"step": 12090
|
| 8514 |
+
},
|
| 8515 |
+
{
|
| 8516 |
+
"epoch": 5.133644463300806,
|
| 8517 |
+
"grad_norm": 0.00026204856112599373,
|
| 8518 |
+
"learning_rate": 2.407873025850554e-06,
|
| 8519 |
+
"loss": 0.0002,
|
| 8520 |
+
"step": 12100
|
| 8521 |
+
},
|
| 8522 |
+
{
|
| 8523 |
+
"epoch": 5.137887144675435,
|
| 8524 |
+
"grad_norm": 0.05771171301603317,
|
| 8525 |
+
"learning_rate": 2.3960870590084076e-06,
|
| 8526 |
+
"loss": 0.0001,
|
| 8527 |
+
"step": 12110
|
| 8528 |
+
},
|
| 8529 |
+
{
|
| 8530 |
+
"epoch": 5.142129826050064,
|
| 8531 |
+
"grad_norm": 0.0005188264767639339,
|
| 8532 |
+
"learning_rate": 2.384301092166261e-06,
|
| 8533 |
+
"loss": 0.0007,
|
| 8534 |
+
"step": 12120
|
| 8535 |
+
},
|
| 8536 |
+
{
|
| 8537 |
+
"epoch": 5.146372507424692,
|
| 8538 |
+
"grad_norm": 0.018017444759607315,
|
| 8539 |
+
"learning_rate": 2.372515125324114e-06,
|
| 8540 |
+
"loss": 0.0,
|
| 8541 |
+
"step": 12130
|
| 8542 |
+
},
|
| 8543 |
+
{
|
| 8544 |
+
"epoch": 5.150615188799321,
|
| 8545 |
+
"grad_norm": 7.108715057373047,
|
| 8546 |
+
"learning_rate": 2.3607291584819675e-06,
|
| 8547 |
+
"loss": 0.0751,
|
| 8548 |
+
"step": 12140
|
| 8549 |
+
},
|
| 8550 |
+
{
|
| 8551 |
+
"epoch": 5.15485787017395,
|
| 8552 |
+
"grad_norm": 0.0003858147538267076,
|
| 8553 |
+
"learning_rate": 2.348943191639821e-06,
|
| 8554 |
+
"loss": 0.0002,
|
| 8555 |
+
"step": 12150
|
| 8556 |
+
},
|
| 8557 |
+
{
|
| 8558 |
+
"epoch": 5.159100551548579,
|
| 8559 |
+
"grad_norm": 7.319705036934465e-05,
|
| 8560 |
+
"learning_rate": 2.3371572247976743e-06,
|
| 8561 |
+
"loss": 0.0004,
|
| 8562 |
+
"step": 12160
|
| 8563 |
+
},
|
| 8564 |
+
{
|
| 8565 |
+
"epoch": 5.163343232923207,
|
| 8566 |
+
"grad_norm": 0.014056987129151821,
|
| 8567 |
+
"learning_rate": 2.3253712579555277e-06,
|
| 8568 |
+
"loss": 0.0143,
|
| 8569 |
+
"step": 12170
|
| 8570 |
+
},
|
| 8571 |
+
{
|
| 8572 |
+
"epoch": 5.167585914297836,
|
| 8573 |
+
"grad_norm": 0.00044811333646066487,
|
| 8574 |
+
"learning_rate": 2.313585291113381e-06,
|
| 8575 |
+
"loss": 0.0001,
|
| 8576 |
+
"step": 12180
|
| 8577 |
+
},
|
| 8578 |
+
{
|
| 8579 |
+
"epoch": 5.171828595672465,
|
| 8580 |
+
"grad_norm": 8.471779437968507e-05,
|
| 8581 |
+
"learning_rate": 2.3017993242712346e-06,
|
| 8582 |
+
"loss": 0.0189,
|
| 8583 |
+
"step": 12190
|
| 8584 |
+
},
|
| 8585 |
+
{
|
| 8586 |
+
"epoch": 5.176071277047094,
|
| 8587 |
+
"grad_norm": 0.015318558551371098,
|
| 8588 |
+
"learning_rate": 2.290013357429088e-06,
|
| 8589 |
+
"loss": 0.0001,
|
| 8590 |
+
"step": 12200
|
| 8591 |
+
},
|
| 8592 |
+
{
|
| 8593 |
+
"epoch": 5.180313958421722,
|
| 8594 |
+
"grad_norm": 0.02276996523141861,
|
| 8595 |
+
"learning_rate": 2.2782273905869414e-06,
|
| 8596 |
+
"loss": 0.0,
|
| 8597 |
+
"step": 12210
|
| 8598 |
+
},
|
| 8599 |
+
{
|
| 8600 |
+
"epoch": 5.184556639796352,
|
| 8601 |
+
"grad_norm": 0.03285042941570282,
|
| 8602 |
+
"learning_rate": 2.266441423744795e-06,
|
| 8603 |
+
"loss": 0.0002,
|
| 8604 |
+
"step": 12220
|
| 8605 |
+
},
|
| 8606 |
+
{
|
| 8607 |
+
"epoch": 5.18879932117098,
|
| 8608 |
+
"grad_norm": 0.0029086440335959196,
|
| 8609 |
+
"learning_rate": 2.2546554569026483e-06,
|
| 8610 |
+
"loss": 0.0005,
|
| 8611 |
+
"step": 12230
|
| 8612 |
+
},
|
| 8613 |
+
{
|
| 8614 |
+
"epoch": 5.193042002545609,
|
| 8615 |
+
"grad_norm": 0.016023816540837288,
|
| 8616 |
+
"learning_rate": 2.2428694900605017e-06,
|
| 8617 |
+
"loss": 0.0002,
|
| 8618 |
+
"step": 12240
|
| 8619 |
+
},
|
| 8620 |
+
{
|
| 8621 |
+
"epoch": 5.197284683920238,
|
| 8622 |
+
"grad_norm": 0.00030561210587620735,
|
| 8623 |
+
"learning_rate": 2.231083523218355e-06,
|
| 8624 |
+
"loss": 0.0893,
|
| 8625 |
+
"step": 12250
|
| 8626 |
+
},
|
| 8627 |
+
{
|
| 8628 |
+
"epoch": 5.201527365294867,
|
| 8629 |
+
"grad_norm": 0.001297492883168161,
|
| 8630 |
+
"learning_rate": 2.219297556376208e-06,
|
| 8631 |
+
"loss": 0.0399,
|
| 8632 |
+
"step": 12260
|
| 8633 |
+
},
|
| 8634 |
+
{
|
| 8635 |
+
"epoch": 5.205770046669495,
|
| 8636 |
+
"grad_norm": 0.00034048655652441084,
|
| 8637 |
+
"learning_rate": 2.2075115895340615e-06,
|
| 8638 |
+
"loss": 0.0186,
|
| 8639 |
+
"step": 12270
|
| 8640 |
+
},
|
| 8641 |
+
{
|
| 8642 |
+
"epoch": 5.2100127280441235,
|
| 8643 |
+
"grad_norm": 0.013047544285655022,
|
| 8644 |
+
"learning_rate": 2.195725622691915e-06,
|
| 8645 |
+
"loss": 0.109,
|
| 8646 |
+
"step": 12280
|
| 8647 |
+
},
|
| 8648 |
+
{
|
| 8649 |
+
"epoch": 5.214255409418753,
|
| 8650 |
+
"grad_norm": 0.0001713270030450076,
|
| 8651 |
+
"learning_rate": 2.1839396558497684e-06,
|
| 8652 |
+
"loss": 0.0669,
|
| 8653 |
+
"step": 12290
|
| 8654 |
+
},
|
| 8655 |
+
{
|
| 8656 |
+
"epoch": 5.218498090793381,
|
| 8657 |
+
"grad_norm": 0.001454151701182127,
|
| 8658 |
+
"learning_rate": 2.1721536890076218e-06,
|
| 8659 |
+
"loss": 0.0029,
|
| 8660 |
+
"step": 12300
|
| 8661 |
+
},
|
| 8662 |
+
{
|
| 8663 |
+
"epoch": 5.22274077216801,
|
| 8664 |
+
"grad_norm": 0.02842268906533718,
|
| 8665 |
+
"learning_rate": 2.160367722165475e-06,
|
| 8666 |
+
"loss": 0.0367,
|
| 8667 |
+
"step": 12310
|
| 8668 |
+
},
|
| 8669 |
+
{
|
| 8670 |
+
"epoch": 5.226983453542639,
|
| 8671 |
+
"grad_norm": 0.00016341819718945771,
|
| 8672 |
+
"learning_rate": 2.148581755323328e-06,
|
| 8673 |
+
"loss": 0.0392,
|
| 8674 |
+
"step": 12320
|
| 8675 |
+
},
|
| 8676 |
+
{
|
| 8677 |
+
"epoch": 5.231226134917268,
|
| 8678 |
+
"grad_norm": 0.00012527649232652038,
|
| 8679 |
+
"learning_rate": 2.1367957884811816e-06,
|
| 8680 |
+
"loss": 0.0002,
|
| 8681 |
+
"step": 12330
|
| 8682 |
+
},
|
| 8683 |
+
{
|
| 8684 |
+
"epoch": 5.235468816291896,
|
| 8685 |
+
"grad_norm": 0.0004473147273529321,
|
| 8686 |
+
"learning_rate": 2.125009821639035e-06,
|
| 8687 |
+
"loss": 0.0597,
|
| 8688 |
+
"step": 12340
|
| 8689 |
+
},
|
| 8690 |
+
{
|
| 8691 |
+
"epoch": 5.2397114976665256,
|
| 8692 |
+
"grad_norm": 0.008033150807023048,
|
| 8693 |
+
"learning_rate": 2.1132238547968885e-06,
|
| 8694 |
+
"loss": 0.0001,
|
| 8695 |
+
"step": 12350
|
| 8696 |
+
},
|
| 8697 |
+
{
|
| 8698 |
+
"epoch": 5.243954179041154,
|
| 8699 |
+
"grad_norm": 0.006322773173451424,
|
| 8700 |
+
"learning_rate": 2.101437887954742e-06,
|
| 8701 |
+
"loss": 0.0004,
|
| 8702 |
+
"step": 12360
|
| 8703 |
+
},
|
| 8704 |
+
{
|
| 8705 |
+
"epoch": 5.248196860415783,
|
| 8706 |
+
"grad_norm": 0.00017982965800911188,
|
| 8707 |
+
"learning_rate": 2.0896519211125953e-06,
|
| 8708 |
+
"loss": 0.0017,
|
| 8709 |
+
"step": 12370
|
| 8710 |
+
},
|
| 8711 |
+
{
|
| 8712 |
+
"epoch": 5.2524395417904115,
|
| 8713 |
+
"grad_norm": 0.001539862249046564,
|
| 8714 |
+
"learning_rate": 2.0778659542704487e-06,
|
| 8715 |
+
"loss": 0.0374,
|
| 8716 |
+
"step": 12380
|
| 8717 |
+
},
|
| 8718 |
+
{
|
| 8719 |
+
"epoch": 5.25668222316504,
|
| 8720 |
+
"grad_norm": 0.0007864583749324083,
|
| 8721 |
+
"learning_rate": 2.0660799874283017e-06,
|
| 8722 |
+
"loss": 0.0469,
|
| 8723 |
+
"step": 12390
|
| 8724 |
+
},
|
| 8725 |
+
{
|
| 8726 |
+
"epoch": 5.260924904539669,
|
| 8727 |
+
"grad_norm": 0.00022766577603761107,
|
| 8728 |
+
"learning_rate": 2.054294020586155e-06,
|
| 8729 |
+
"loss": 0.0003,
|
| 8730 |
+
"step": 12400
|
| 8731 |
+
},
|
| 8732 |
+
{
|
| 8733 |
+
"epoch": 5.2651675859142975,
|
| 8734 |
+
"grad_norm": 8.477435039822012e-05,
|
| 8735 |
+
"learning_rate": 2.0425080537440086e-06,
|
| 8736 |
+
"loss": 0.0003,
|
| 8737 |
+
"step": 12410
|
| 8738 |
+
},
|
| 8739 |
+
{
|
| 8740 |
+
"epoch": 5.269410267288927,
|
| 8741 |
+
"grad_norm": 0.0012190081179141998,
|
| 8742 |
+
"learning_rate": 2.030722086901862e-06,
|
| 8743 |
+
"loss": 0.0,
|
| 8744 |
+
"step": 12420
|
| 8745 |
+
},
|
| 8746 |
+
{
|
| 8747 |
+
"epoch": 5.273652948663555,
|
| 8748 |
+
"grad_norm": 0.00011515396909089759,
|
| 8749 |
+
"learning_rate": 2.0189361200597154e-06,
|
| 8750 |
+
"loss": 0.0,
|
| 8751 |
+
"step": 12430
|
| 8752 |
+
},
|
| 8753 |
+
{
|
| 8754 |
+
"epoch": 5.277895630038184,
|
| 8755 |
+
"grad_norm": 0.0021234049927443266,
|
| 8756 |
+
"learning_rate": 2.007150153217569e-06,
|
| 8757 |
+
"loss": 0.0001,
|
| 8758 |
+
"step": 12440
|
| 8759 |
+
},
|
| 8760 |
+
{
|
| 8761 |
+
"epoch": 5.282138311412813,
|
| 8762 |
+
"grad_norm": 0.0010740981670096517,
|
| 8763 |
+
"learning_rate": 1.9953641863754222e-06,
|
| 8764 |
+
"loss": 0.1258,
|
| 8765 |
+
"step": 12450
|
| 8766 |
+
},
|
| 8767 |
+
{
|
| 8768 |
+
"epoch": 5.286380992787442,
|
| 8769 |
+
"grad_norm": 0.8639234900474548,
|
| 8770 |
+
"learning_rate": 1.9835782195332757e-06,
|
| 8771 |
+
"loss": 0.001,
|
| 8772 |
+
"step": 12460
|
| 8773 |
+
},
|
| 8774 |
+
{
|
| 8775 |
+
"epoch": 5.29062367416207,
|
| 8776 |
+
"grad_norm": 0.020350001752376556,
|
| 8777 |
+
"learning_rate": 1.971792252691129e-06,
|
| 8778 |
+
"loss": 0.0002,
|
| 8779 |
+
"step": 12470
|
| 8780 |
+
},
|
| 8781 |
+
{
|
| 8782 |
+
"epoch": 5.2948663555366995,
|
| 8783 |
+
"grad_norm": 0.0007364298216998577,
|
| 8784 |
+
"learning_rate": 1.9600062858489825e-06,
|
| 8785 |
+
"loss": 0.0001,
|
| 8786 |
+
"step": 12480
|
| 8787 |
+
},
|
| 8788 |
+
{
|
| 8789 |
+
"epoch": 5.299109036911328,
|
| 8790 |
+
"grad_norm": 9.249017715454102,
|
| 8791 |
+
"learning_rate": 1.948220319006836e-06,
|
| 8792 |
+
"loss": 0.0691,
|
| 8793 |
+
"step": 12490
|
| 8794 |
+
},
|
| 8795 |
+
{
|
| 8796 |
+
"epoch": 5.303351718285957,
|
| 8797 |
+
"grad_norm": 0.003393571125343442,
|
| 8798 |
+
"learning_rate": 1.9364343521646894e-06,
|
| 8799 |
+
"loss": 0.1147,
|
| 8800 |
+
"step": 12500
|
| 8801 |
+
},
|
| 8802 |
+
{
|
| 8803 |
+
"epoch": 5.307594399660585,
|
| 8804 |
+
"grad_norm": 0.00031853283871896565,
|
| 8805 |
+
"learning_rate": 1.9246483853225428e-06,
|
| 8806 |
+
"loss": 0.0001,
|
| 8807 |
+
"step": 12510
|
| 8808 |
+
},
|
| 8809 |
+
{
|
| 8810 |
+
"epoch": 5.311837081035215,
|
| 8811 |
+
"grad_norm": 0.8153578042984009,
|
| 8812 |
+
"learning_rate": 1.9128624184803958e-06,
|
| 8813 |
+
"loss": 0.0019,
|
| 8814 |
+
"step": 12520
|
| 8815 |
+
},
|
| 8816 |
+
{
|
| 8817 |
+
"epoch": 5.316079762409843,
|
| 8818 |
+
"grad_norm": 0.0002199799637310207,
|
| 8819 |
+
"learning_rate": 1.9010764516382494e-06,
|
| 8820 |
+
"loss": 0.0,
|
| 8821 |
+
"step": 12530
|
| 8822 |
+
},
|
| 8823 |
+
{
|
| 8824 |
+
"epoch": 5.320322443784471,
|
| 8825 |
+
"grad_norm": 0.0024004667066037655,
|
| 8826 |
+
"learning_rate": 1.8892904847961028e-06,
|
| 8827 |
+
"loss": 0.0,
|
| 8828 |
+
"step": 12540
|
| 8829 |
+
},
|
| 8830 |
+
{
|
| 8831 |
+
"epoch": 5.324565125159101,
|
| 8832 |
+
"grad_norm": 0.05654584988951683,
|
| 8833 |
+
"learning_rate": 1.8775045179539562e-06,
|
| 8834 |
+
"loss": 0.0005,
|
| 8835 |
+
"step": 12550
|
| 8836 |
+
},
|
| 8837 |
+
{
|
| 8838 |
+
"epoch": 5.328807806533729,
|
| 8839 |
+
"grad_norm": 0.025069650262594223,
|
| 8840 |
+
"learning_rate": 1.8657185511118095e-06,
|
| 8841 |
+
"loss": 0.0018,
|
| 8842 |
+
"step": 12560
|
| 8843 |
+
},
|
| 8844 |
+
{
|
| 8845 |
+
"epoch": 5.333050487908358,
|
| 8846 |
+
"grad_norm": 0.05367057025432587,
|
| 8847 |
+
"learning_rate": 1.8539325842696629e-06,
|
| 8848 |
+
"loss": 0.0001,
|
| 8849 |
+
"step": 12570
|
| 8850 |
+
},
|
| 8851 |
+
{
|
| 8852 |
+
"epoch": 5.337293169282987,
|
| 8853 |
+
"grad_norm": 0.004782603122293949,
|
| 8854 |
+
"learning_rate": 1.8421466174275163e-06,
|
| 8855 |
+
"loss": 0.0014,
|
| 8856 |
+
"step": 12580
|
| 8857 |
+
},
|
| 8858 |
+
{
|
| 8859 |
+
"epoch": 5.341535850657616,
|
| 8860 |
+
"grad_norm": 0.0012631929712370038,
|
| 8861 |
+
"learning_rate": 1.8303606505853697e-06,
|
| 8862 |
+
"loss": 0.0012,
|
| 8863 |
+
"step": 12590
|
| 8864 |
+
},
|
| 8865 |
+
{
|
| 8866 |
+
"epoch": 5.345778532032244,
|
| 8867 |
+
"grad_norm": 0.0004978696233592927,
|
| 8868 |
+
"learning_rate": 1.8185746837432231e-06,
|
| 8869 |
+
"loss": 0.0509,
|
| 8870 |
+
"step": 12600
|
| 8871 |
+
},
|
| 8872 |
+
{
|
| 8873 |
+
"epoch": 5.350021213406873,
|
| 8874 |
+
"grad_norm": 0.04446430504322052,
|
| 8875 |
+
"learning_rate": 1.8067887169010766e-06,
|
| 8876 |
+
"loss": 0.0031,
|
| 8877 |
+
"step": 12610
|
| 8878 |
+
},
|
| 8879 |
+
{
|
| 8880 |
+
"epoch": 5.354263894781502,
|
| 8881 |
+
"grad_norm": 0.23009301722049713,
|
| 8882 |
+
"learning_rate": 1.7950027500589298e-06,
|
| 8883 |
+
"loss": 0.0004,
|
| 8884 |
+
"step": 12620
|
| 8885 |
+
},
|
| 8886 |
+
{
|
| 8887 |
+
"epoch": 5.358506576156131,
|
| 8888 |
+
"grad_norm": 0.0007940650684759021,
|
| 8889 |
+
"learning_rate": 1.7832167832167832e-06,
|
| 8890 |
+
"loss": 0.0002,
|
| 8891 |
+
"step": 12630
|
| 8892 |
+
},
|
| 8893 |
+
{
|
| 8894 |
+
"epoch": 5.362749257530759,
|
| 8895 |
+
"grad_norm": 0.0003066326316911727,
|
| 8896 |
+
"learning_rate": 1.7714308163746366e-06,
|
| 8897 |
+
"loss": 0.0002,
|
| 8898 |
+
"step": 12640
|
| 8899 |
+
},
|
| 8900 |
+
{
|
| 8901 |
+
"epoch": 5.366991938905389,
|
| 8902 |
+
"grad_norm": 0.0017696653958410025,
|
| 8903 |
+
"learning_rate": 1.75964484953249e-06,
|
| 8904 |
+
"loss": 0.0,
|
| 8905 |
+
"step": 12650
|
| 8906 |
+
},
|
| 8907 |
+
{
|
| 8908 |
+
"epoch": 5.371234620280017,
|
| 8909 |
+
"grad_norm": 0.00011425031698308885,
|
| 8910 |
+
"learning_rate": 1.7478588826903434e-06,
|
| 8911 |
+
"loss": 0.0002,
|
| 8912 |
+
"step": 12660
|
| 8913 |
+
},
|
| 8914 |
+
{
|
| 8915 |
+
"epoch": 5.375477301654645,
|
| 8916 |
+
"grad_norm": 7.186361312866211,
|
| 8917 |
+
"learning_rate": 1.7360729158481969e-06,
|
| 8918 |
+
"loss": 0.0809,
|
| 8919 |
+
"step": 12670
|
| 8920 |
+
},
|
| 8921 |
+
{
|
| 8922 |
+
"epoch": 5.379719983029275,
|
| 8923 |
+
"grad_norm": 0.06610995531082153,
|
| 8924 |
+
"learning_rate": 1.7242869490060503e-06,
|
| 8925 |
+
"loss": 0.0193,
|
| 8926 |
+
"step": 12680
|
| 8927 |
+
},
|
| 8928 |
+
{
|
| 8929 |
+
"epoch": 5.383962664403903,
|
| 8930 |
+
"grad_norm": 0.0001348053920082748,
|
| 8931 |
+
"learning_rate": 1.7125009821639035e-06,
|
| 8932 |
+
"loss": 0.0,
|
| 8933 |
+
"step": 12690
|
| 8934 |
+
},
|
| 8935 |
+
{
|
| 8936 |
+
"epoch": 5.388205345778532,
|
| 8937 |
+
"grad_norm": 0.0052658324129879475,
|
| 8938 |
+
"learning_rate": 1.700715015321757e-06,
|
| 8939 |
+
"loss": 0.0002,
|
| 8940 |
+
"step": 12700
|
| 8941 |
+
},
|
| 8942 |
+
{
|
| 8943 |
+
"epoch": 5.3924480271531605,
|
| 8944 |
+
"grad_norm": 0.00025654330966062844,
|
| 8945 |
+
"learning_rate": 1.6889290484796103e-06,
|
| 8946 |
+
"loss": 0.0719,
|
| 8947 |
+
"step": 12710
|
| 8948 |
+
},
|
| 8949 |
+
{
|
| 8950 |
+
"epoch": 5.39669070852779,
|
| 8951 |
+
"grad_norm": 0.00042288817348890007,
|
| 8952 |
+
"learning_rate": 1.6771430816374638e-06,
|
| 8953 |
+
"loss": 0.0003,
|
| 8954 |
+
"step": 12720
|
| 8955 |
+
},
|
| 8956 |
+
{
|
| 8957 |
+
"epoch": 5.400933389902418,
|
| 8958 |
+
"grad_norm": 33.99470138549805,
|
| 8959 |
+
"learning_rate": 1.6653571147953172e-06,
|
| 8960 |
+
"loss": 0.0141,
|
| 8961 |
+
"step": 12730
|
| 8962 |
+
},
|
| 8963 |
+
{
|
| 8964 |
+
"epoch": 5.405176071277047,
|
| 8965 |
+
"grad_norm": 9.667040285421535e-05,
|
| 8966 |
+
"learning_rate": 1.6535711479531706e-06,
|
| 8967 |
+
"loss": 0.0,
|
| 8968 |
+
"step": 12740
|
| 8969 |
+
},
|
| 8970 |
+
{
|
| 8971 |
+
"epoch": 5.409418752651676,
|
| 8972 |
+
"grad_norm": 0.00012805570440832525,
|
| 8973 |
+
"learning_rate": 1.6417851811110238e-06,
|
| 8974 |
+
"loss": 0.0246,
|
| 8975 |
+
"step": 12750
|
| 8976 |
+
},
|
| 8977 |
+
{
|
| 8978 |
+
"epoch": 5.413661434026305,
|
| 8979 |
+
"grad_norm": 0.000495830608997494,
|
| 8980 |
+
"learning_rate": 1.6299992142688772e-06,
|
| 8981 |
+
"loss": 0.0663,
|
| 8982 |
+
"step": 12760
|
| 8983 |
+
},
|
| 8984 |
+
{
|
| 8985 |
+
"epoch": 5.417904115400933,
|
| 8986 |
+
"grad_norm": 0.0206298790872097,
|
| 8987 |
+
"learning_rate": 1.6182132474267307e-06,
|
| 8988 |
+
"loss": 0.0001,
|
| 8989 |
+
"step": 12770
|
| 8990 |
+
},
|
| 8991 |
+
{
|
| 8992 |
+
"epoch": 5.4221467967755625,
|
| 8993 |
+
"grad_norm": 0.0004504290991462767,
|
| 8994 |
+
"learning_rate": 1.606427280584584e-06,
|
| 8995 |
+
"loss": 0.0001,
|
| 8996 |
+
"step": 12780
|
| 8997 |
+
},
|
| 8998 |
+
{
|
| 8999 |
+
"epoch": 5.426389478150191,
|
| 9000 |
+
"grad_norm": 0.0003228692221455276,
|
| 9001 |
+
"learning_rate": 1.5946413137424375e-06,
|
| 9002 |
+
"loss": 0.004,
|
| 9003 |
+
"step": 12790
|
| 9004 |
+
},
|
| 9005 |
+
{
|
| 9006 |
+
"epoch": 5.430632159524819,
|
| 9007 |
+
"grad_norm": 0.007890078239142895,
|
| 9008 |
+
"learning_rate": 1.582855346900291e-06,
|
| 9009 |
+
"loss": 0.0008,
|
| 9010 |
+
"step": 12800
|
| 9011 |
+
},
|
| 9012 |
+
{
|
| 9013 |
+
"epoch": 5.4348748408994485,
|
| 9014 |
+
"grad_norm": 27.009489059448242,
|
| 9015 |
+
"learning_rate": 1.5710693800581443e-06,
|
| 9016 |
+
"loss": 0.1073,
|
| 9017 |
+
"step": 12810
|
| 9018 |
+
},
|
| 9019 |
+
{
|
| 9020 |
+
"epoch": 5.439117522274077,
|
| 9021 |
+
"grad_norm": 0.8009844422340393,
|
| 9022 |
+
"learning_rate": 1.5592834132159975e-06,
|
| 9023 |
+
"loss": 0.0031,
|
| 9024 |
+
"step": 12820
|
| 9025 |
+
},
|
| 9026 |
+
{
|
| 9027 |
+
"epoch": 5.443360203648706,
|
| 9028 |
+
"grad_norm": 0.00046252511674538255,
|
| 9029 |
+
"learning_rate": 1.5474974463738508e-06,
|
| 9030 |
+
"loss": 0.0,
|
| 9031 |
+
"step": 12830
|
| 9032 |
+
},
|
| 9033 |
+
{
|
| 9034 |
+
"epoch": 5.4476028850233345,
|
| 9035 |
+
"grad_norm": 0.036401599645614624,
|
| 9036 |
+
"learning_rate": 1.5357114795317042e-06,
|
| 9037 |
+
"loss": 0.0008,
|
| 9038 |
+
"step": 12840
|
| 9039 |
+
},
|
| 9040 |
+
{
|
| 9041 |
+
"epoch": 5.451845566397964,
|
| 9042 |
+
"grad_norm": 0.006671661976724863,
|
| 9043 |
+
"learning_rate": 1.5239255126895576e-06,
|
| 9044 |
+
"loss": 0.0002,
|
| 9045 |
+
"step": 12850
|
| 9046 |
+
},
|
| 9047 |
+
{
|
| 9048 |
+
"epoch": 5.456088247772592,
|
| 9049 |
+
"grad_norm": 0.011679074726998806,
|
| 9050 |
+
"learning_rate": 1.512139545847411e-06,
|
| 9051 |
+
"loss": 0.0001,
|
| 9052 |
+
"step": 12860
|
| 9053 |
+
},
|
| 9054 |
+
{
|
| 9055 |
+
"epoch": 5.460330929147221,
|
| 9056 |
+
"grad_norm": 0.017675839364528656,
|
| 9057 |
+
"learning_rate": 1.5003535790052644e-06,
|
| 9058 |
+
"loss": 0.0001,
|
| 9059 |
+
"step": 12870
|
| 9060 |
+
},
|
| 9061 |
+
{
|
| 9062 |
+
"epoch": 5.46457361052185,
|
| 9063 |
+
"grad_norm": 0.00043906900100409985,
|
| 9064 |
+
"learning_rate": 1.4885676121631176e-06,
|
| 9065 |
+
"loss": 0.0171,
|
| 9066 |
+
"step": 12880
|
| 9067 |
+
},
|
| 9068 |
+
{
|
| 9069 |
+
"epoch": 5.468816291896479,
|
| 9070 |
+
"grad_norm": 0.026738321408629417,
|
| 9071 |
+
"learning_rate": 1.476781645320971e-06,
|
| 9072 |
+
"loss": 0.0044,
|
| 9073 |
+
"step": 12890
|
| 9074 |
+
},
|
| 9075 |
+
{
|
| 9076 |
+
"epoch": 5.473058973271107,
|
| 9077 |
+
"grad_norm": 0.00010588684381218627,
|
| 9078 |
+
"learning_rate": 1.4649956784788245e-06,
|
| 9079 |
+
"loss": 0.0,
|
| 9080 |
+
"step": 12900
|
| 9081 |
+
},
|
| 9082 |
+
{
|
| 9083 |
+
"epoch": 5.4773016546457365,
|
| 9084 |
+
"grad_norm": 0.000995173933915794,
|
| 9085 |
+
"learning_rate": 1.453209711636678e-06,
|
| 9086 |
+
"loss": 0.0891,
|
| 9087 |
+
"step": 12910
|
| 9088 |
+
},
|
| 9089 |
+
{
|
| 9090 |
+
"epoch": 5.481544336020365,
|
| 9091 |
+
"grad_norm": 0.006141920108348131,
|
| 9092 |
+
"learning_rate": 1.4414237447945313e-06,
|
| 9093 |
+
"loss": 0.0,
|
| 9094 |
+
"step": 12920
|
| 9095 |
+
},
|
| 9096 |
+
{
|
| 9097 |
+
"epoch": 5.485787017394994,
|
| 9098 |
+
"grad_norm": 0.00036171916872262955,
|
| 9099 |
+
"learning_rate": 1.4296377779523848e-06,
|
| 9100 |
+
"loss": 0.001,
|
| 9101 |
+
"step": 12930
|
| 9102 |
+
},
|
| 9103 |
+
{
|
| 9104 |
+
"epoch": 5.490029698769622,
|
| 9105 |
+
"grad_norm": 0.07056690007448196,
|
| 9106 |
+
"learning_rate": 1.4178518111102382e-06,
|
| 9107 |
+
"loss": 0.0019,
|
| 9108 |
+
"step": 12940
|
| 9109 |
+
},
|
| 9110 |
+
{
|
| 9111 |
+
"epoch": 5.494272380144251,
|
| 9112 |
+
"grad_norm": 0.00011696573346853256,
|
| 9113 |
+
"learning_rate": 1.4060658442680914e-06,
|
| 9114 |
+
"loss": 0.0002,
|
| 9115 |
+
"step": 12950
|
| 9116 |
+
},
|
| 9117 |
+
{
|
| 9118 |
+
"epoch": 5.49851506151888,
|
| 9119 |
+
"grad_norm": 0.0001800445024855435,
|
| 9120 |
+
"learning_rate": 1.3942798774259448e-06,
|
| 9121 |
+
"loss": 0.0,
|
| 9122 |
+
"step": 12960
|
| 9123 |
+
},
|
| 9124 |
+
{
|
| 9125 |
+
"epoch": 5.502757742893508,
|
| 9126 |
+
"grad_norm": 0.00014839708455838263,
|
| 9127 |
+
"learning_rate": 1.3824939105837982e-06,
|
| 9128 |
+
"loss": 0.0009,
|
| 9129 |
+
"step": 12970
|
| 9130 |
+
},
|
| 9131 |
+
{
|
| 9132 |
+
"epoch": 5.507000424268138,
|
| 9133 |
+
"grad_norm": 21.6761531829834,
|
| 9134 |
+
"learning_rate": 1.3707079437416516e-06,
|
| 9135 |
+
"loss": 0.0393,
|
| 9136 |
+
"step": 12980
|
| 9137 |
+
},
|
| 9138 |
+
{
|
| 9139 |
+
"epoch": 5.511243105642766,
|
| 9140 |
+
"grad_norm": 0.0011039957171306014,
|
| 9141 |
+
"learning_rate": 1.358921976899505e-06,
|
| 9142 |
+
"loss": 0.001,
|
| 9143 |
+
"step": 12990
|
| 9144 |
+
},
|
| 9145 |
+
{
|
| 9146 |
+
"epoch": 5.515485787017395,
|
| 9147 |
+
"grad_norm": 0.00048109618364833295,
|
| 9148 |
+
"learning_rate": 1.3471360100573585e-06,
|
| 9149 |
+
"loss": 0.0076,
|
| 9150 |
+
"step": 13000
|
| 9151 |
+
},
|
| 9152 |
+
{
|
| 9153 |
+
"epoch": 5.519728468392024,
|
| 9154 |
+
"grad_norm": 0.00025573698803782463,
|
| 9155 |
+
"learning_rate": 1.3353500432152117e-06,
|
| 9156 |
+
"loss": 0.0271,
|
| 9157 |
+
"step": 13010
|
| 9158 |
+
},
|
| 9159 |
+
{
|
| 9160 |
+
"epoch": 5.523971149766653,
|
| 9161 |
+
"grad_norm": 0.00011245541827520356,
|
| 9162 |
+
"learning_rate": 1.3235640763730651e-06,
|
| 9163 |
+
"loss": 0.0011,
|
| 9164 |
+
"step": 13020
|
| 9165 |
+
},
|
| 9166 |
+
{
|
| 9167 |
+
"epoch": 5.528213831141281,
|
| 9168 |
+
"grad_norm": 0.003252882743254304,
|
| 9169 |
+
"learning_rate": 1.3117781095309185e-06,
|
| 9170 |
+
"loss": 0.0,
|
| 9171 |
+
"step": 13030
|
| 9172 |
+
},
|
| 9173 |
+
{
|
| 9174 |
+
"epoch": 5.53245651251591,
|
| 9175 |
+
"grad_norm": 0.00017362892685923725,
|
| 9176 |
+
"learning_rate": 1.299992142688772e-06,
|
| 9177 |
+
"loss": 0.0044,
|
| 9178 |
+
"step": 13040
|
| 9179 |
+
},
|
| 9180 |
+
{
|
| 9181 |
+
"epoch": 5.536699193890539,
|
| 9182 |
+
"grad_norm": 0.0024702006485313177,
|
| 9183 |
+
"learning_rate": 1.2882061758466254e-06,
|
| 9184 |
+
"loss": 0.0013,
|
| 9185 |
+
"step": 13050
|
| 9186 |
+
},
|
| 9187 |
+
{
|
| 9188 |
+
"epoch": 5.540941875265167,
|
| 9189 |
+
"grad_norm": 0.0007814627606421709,
|
| 9190 |
+
"learning_rate": 1.2764202090044788e-06,
|
| 9191 |
+
"loss": 0.0523,
|
| 9192 |
+
"step": 13060
|
| 9193 |
+
},
|
| 9194 |
+
{
|
| 9195 |
+
"epoch": 5.545184556639796,
|
| 9196 |
+
"grad_norm": 0.0003535297291819006,
|
| 9197 |
+
"learning_rate": 1.2646342421623322e-06,
|
| 9198 |
+
"loss": 0.0365,
|
| 9199 |
+
"step": 13070
|
| 9200 |
+
},
|
| 9201 |
+
{
|
| 9202 |
+
"epoch": 5.549427238014426,
|
| 9203 |
+
"grad_norm": 0.12373920530080795,
|
| 9204 |
+
"learning_rate": 1.2528482753201854e-06,
|
| 9205 |
+
"loss": 0.0004,
|
| 9206 |
+
"step": 13080
|
| 9207 |
+
},
|
| 9208 |
+
{
|
| 9209 |
+
"epoch": 5.553669919389054,
|
| 9210 |
+
"grad_norm": 0.0014350833371281624,
|
| 9211 |
+
"learning_rate": 1.2410623084780389e-06,
|
| 9212 |
+
"loss": 0.0011,
|
| 9213 |
+
"step": 13090
|
| 9214 |
+
},
|
| 9215 |
+
{
|
| 9216 |
+
"epoch": 5.557912600763682,
|
| 9217 |
+
"grad_norm": 0.00033664770307950675,
|
| 9218 |
+
"learning_rate": 1.2292763416358923e-06,
|
| 9219 |
+
"loss": 0.0008,
|
| 9220 |
+
"step": 13100
|
| 9221 |
+
},
|
| 9222 |
+
{
|
| 9223 |
+
"epoch": 5.562155282138312,
|
| 9224 |
+
"grad_norm": 0.0005554277449846268,
|
| 9225 |
+
"learning_rate": 1.2174903747937457e-06,
|
| 9226 |
+
"loss": 0.0,
|
| 9227 |
+
"step": 13110
|
| 9228 |
+
},
|
| 9229 |
+
{
|
| 9230 |
+
"epoch": 5.56639796351294,
|
| 9231 |
+
"grad_norm": 0.0002817917848005891,
|
| 9232 |
+
"learning_rate": 1.2057044079515991e-06,
|
| 9233 |
+
"loss": 0.0001,
|
| 9234 |
+
"step": 13120
|
| 9235 |
+
},
|
| 9236 |
+
{
|
| 9237 |
+
"epoch": 5.570640644887569,
|
| 9238 |
+
"grad_norm": 0.03901715949177742,
|
| 9239 |
+
"learning_rate": 1.1939184411094525e-06,
|
| 9240 |
+
"loss": 0.0001,
|
| 9241 |
+
"step": 13130
|
| 9242 |
+
},
|
| 9243 |
+
{
|
| 9244 |
+
"epoch": 5.5748833262621975,
|
| 9245 |
+
"grad_norm": 0.3504720628261566,
|
| 9246 |
+
"learning_rate": 1.1821324742673057e-06,
|
| 9247 |
+
"loss": 0.0768,
|
| 9248 |
+
"step": 13140
|
| 9249 |
+
},
|
| 9250 |
+
{
|
| 9251 |
+
"epoch": 5.579126007636827,
|
| 9252 |
+
"grad_norm": 0.030617402866482735,
|
| 9253 |
+
"learning_rate": 1.1703465074251592e-06,
|
| 9254 |
+
"loss": 0.0376,
|
| 9255 |
+
"step": 13150
|
| 9256 |
+
},
|
| 9257 |
+
{
|
| 9258 |
+
"epoch": 5.583368689011455,
|
| 9259 |
+
"grad_norm": 0.00011435017950134352,
|
| 9260 |
+
"learning_rate": 1.1585605405830126e-06,
|
| 9261 |
+
"loss": 0.0017,
|
| 9262 |
+
"step": 13160
|
| 9263 |
+
},
|
| 9264 |
+
{
|
| 9265 |
+
"epoch": 5.587611370386084,
|
| 9266 |
+
"grad_norm": 0.04216380789875984,
|
| 9267 |
+
"learning_rate": 1.1467745737408658e-06,
|
| 9268 |
+
"loss": 0.0002,
|
| 9269 |
+
"step": 13170
|
| 9270 |
+
},
|
| 9271 |
+
{
|
| 9272 |
+
"epoch": 5.591854051760713,
|
| 9273 |
+
"grad_norm": 0.0005237733130343258,
|
| 9274 |
+
"learning_rate": 1.1349886068987192e-06,
|
| 9275 |
+
"loss": 0.0,
|
| 9276 |
+
"step": 13180
|
| 9277 |
+
},
|
| 9278 |
+
{
|
| 9279 |
+
"epoch": 5.596096733135342,
|
| 9280 |
+
"grad_norm": 15.306489944458008,
|
| 9281 |
+
"learning_rate": 1.1232026400565726e-06,
|
| 9282 |
+
"loss": 0.0833,
|
| 9283 |
+
"step": 13190
|
| 9284 |
+
},
|
| 9285 |
+
{
|
| 9286 |
+
"epoch": 5.60033941450997,
|
| 9287 |
+
"grad_norm": 0.03341296687722206,
|
| 9288 |
+
"learning_rate": 1.111416673214426e-06,
|
| 9289 |
+
"loss": 0.0002,
|
| 9290 |
+
"step": 13200
|
| 9291 |
+
},
|
| 9292 |
+
{
|
| 9293 |
+
"epoch": 5.604582095884599,
|
| 9294 |
+
"grad_norm": 0.006726527586579323,
|
| 9295 |
+
"learning_rate": 1.0996307063722793e-06,
|
| 9296 |
+
"loss": 0.0,
|
| 9297 |
+
"step": 13210
|
| 9298 |
+
},
|
| 9299 |
+
{
|
| 9300 |
+
"epoch": 5.608824777259228,
|
| 9301 |
+
"grad_norm": 0.003565547289326787,
|
| 9302 |
+
"learning_rate": 1.0878447395301327e-06,
|
| 9303 |
+
"loss": 0.0,
|
| 9304 |
+
"step": 13220
|
| 9305 |
+
},
|
| 9306 |
+
{
|
| 9307 |
+
"epoch": 5.613067458633856,
|
| 9308 |
+
"grad_norm": 0.007736480329185724,
|
| 9309 |
+
"learning_rate": 1.0760587726879861e-06,
|
| 9310 |
+
"loss": 0.006,
|
| 9311 |
+
"step": 13230
|
| 9312 |
+
},
|
| 9313 |
+
{
|
| 9314 |
+
"epoch": 5.6173101400084855,
|
| 9315 |
+
"grad_norm": 0.009087463840842247,
|
| 9316 |
+
"learning_rate": 1.0642728058458395e-06,
|
| 9317 |
+
"loss": 0.0474,
|
| 9318 |
+
"step": 13240
|
| 9319 |
+
},
|
| 9320 |
+
{
|
| 9321 |
+
"epoch": 5.621552821383114,
|
| 9322 |
+
"grad_norm": 0.0016269253101199865,
|
| 9323 |
+
"learning_rate": 1.052486839003693e-06,
|
| 9324 |
+
"loss": 0.0142,
|
| 9325 |
+
"step": 13250
|
| 9326 |
+
},
|
| 9327 |
+
{
|
| 9328 |
+
"epoch": 5.625795502757743,
|
| 9329 |
+
"grad_norm": 1.2690098285675049,
|
| 9330 |
+
"learning_rate": 1.0407008721615464e-06,
|
| 9331 |
+
"loss": 0.0011,
|
| 9332 |
+
"step": 13260
|
| 9333 |
+
},
|
| 9334 |
+
{
|
| 9335 |
+
"epoch": 5.6300381841323714,
|
| 9336 |
+
"grad_norm": 0.006056117359548807,
|
| 9337 |
+
"learning_rate": 1.0289149053193998e-06,
|
| 9338 |
+
"loss": 0.0003,
|
| 9339 |
+
"step": 13270
|
| 9340 |
+
},
|
| 9341 |
+
{
|
| 9342 |
+
"epoch": 5.634280865507001,
|
| 9343 |
+
"grad_norm": 0.0192121509462595,
|
| 9344 |
+
"learning_rate": 1.017128938477253e-06,
|
| 9345 |
+
"loss": 0.0428,
|
| 9346 |
+
"step": 13280
|
| 9347 |
+
},
|
| 9348 |
+
{
|
| 9349 |
+
"epoch": 5.638523546881629,
|
| 9350 |
+
"grad_norm": 0.0001612118649063632,
|
| 9351 |
+
"learning_rate": 1.0053429716351064e-06,
|
| 9352 |
+
"loss": 0.0001,
|
| 9353 |
+
"step": 13290
|
| 9354 |
+
},
|
| 9355 |
+
{
|
| 9356 |
+
"epoch": 5.642766228256258,
|
| 9357 |
+
"grad_norm": 8.37560510262847e-05,
|
| 9358 |
+
"learning_rate": 9.935570047929598e-07,
|
| 9359 |
+
"loss": 0.0,
|
| 9360 |
+
"step": 13300
|
| 9361 |
+
},
|
| 9362 |
+
{
|
| 9363 |
+
"epoch": 5.647008909630887,
|
| 9364 |
+
"grad_norm": 0.0011795988539233804,
|
| 9365 |
+
"learning_rate": 9.817710379508133e-07,
|
| 9366 |
+
"loss": 0.002,
|
| 9367 |
+
"step": 13310
|
| 9368 |
+
},
|
| 9369 |
+
{
|
| 9370 |
+
"epoch": 5.651251591005515,
|
| 9371 |
+
"grad_norm": 0.035643886774778366,
|
| 9372 |
+
"learning_rate": 9.699850711086667e-07,
|
| 9373 |
+
"loss": 0.0006,
|
| 9374 |
+
"step": 13320
|
| 9375 |
+
},
|
| 9376 |
+
{
|
| 9377 |
+
"epoch": 5.655494272380144,
|
| 9378 |
+
"grad_norm": 0.001820291276089847,
|
| 9379 |
+
"learning_rate": 9.5819910426652e-07,
|
| 9380 |
+
"loss": 0.0001,
|
| 9381 |
+
"step": 13330
|
| 9382 |
+
},
|
| 9383 |
+
{
|
| 9384 |
+
"epoch": 5.6597369537547735,
|
| 9385 |
+
"grad_norm": 0.07911129295825958,
|
| 9386 |
+
"learning_rate": 9.464131374243734e-07,
|
| 9387 |
+
"loss": 0.0001,
|
| 9388 |
+
"step": 13340
|
| 9389 |
+
},
|
| 9390 |
+
{
|
| 9391 |
+
"epoch": 5.663979635129402,
|
| 9392 |
+
"grad_norm": 0.0026224947068840265,
|
| 9393 |
+
"learning_rate": 9.346271705822268e-07,
|
| 9394 |
+
"loss": 0.0001,
|
| 9395 |
+
"step": 13350
|
| 9396 |
+
},
|
| 9397 |
+
{
|
| 9398 |
+
"epoch": 5.66822231650403,
|
| 9399 |
+
"grad_norm": 0.0009332606568932533,
|
| 9400 |
+
"learning_rate": 9.228412037400802e-07,
|
| 9401 |
+
"loss": 0.0022,
|
| 9402 |
+
"step": 13360
|
| 9403 |
+
},
|
| 9404 |
+
{
|
| 9405 |
+
"epoch": 5.672464997878659,
|
| 9406 |
+
"grad_norm": 0.000820977904368192,
|
| 9407 |
+
"learning_rate": 9.110552368979336e-07,
|
| 9408 |
+
"loss": 0.0,
|
| 9409 |
+
"step": 13370
|
| 9410 |
+
},
|
| 9411 |
+
{
|
| 9412 |
+
"epoch": 5.676707679253288,
|
| 9413 |
+
"grad_norm": 0.28827133774757385,
|
| 9414 |
+
"learning_rate": 8.99269270055787e-07,
|
| 9415 |
+
"loss": 0.003,
|
| 9416 |
+
"step": 13380
|
| 9417 |
+
},
|
| 9418 |
+
{
|
| 9419 |
+
"epoch": 5.680950360627917,
|
| 9420 |
+
"grad_norm": 0.0009737765649333596,
|
| 9421 |
+
"learning_rate": 8.874833032136403e-07,
|
| 9422 |
+
"loss": 0.0579,
|
| 9423 |
+
"step": 13390
|
| 9424 |
+
},
|
| 9425 |
+
{
|
| 9426 |
+
"epoch": 5.685193042002545,
|
| 9427 |
+
"grad_norm": 0.011965712532401085,
|
| 9428 |
+
"learning_rate": 8.756973363714937e-07,
|
| 9429 |
+
"loss": 0.0001,
|
| 9430 |
+
"step": 13400
|
| 9431 |
+
},
|
| 9432 |
+
{
|
| 9433 |
+
"epoch": 5.689435723377175,
|
| 9434 |
+
"grad_norm": 0.00034104377846233547,
|
| 9435 |
+
"learning_rate": 8.639113695293472e-07,
|
| 9436 |
+
"loss": 0.0213,
|
| 9437 |
+
"step": 13410
|
| 9438 |
+
},
|
| 9439 |
+
{
|
| 9440 |
+
"epoch": 5.693678404751803,
|
| 9441 |
+
"grad_norm": 0.00022738671395927668,
|
| 9442 |
+
"learning_rate": 8.521254026872004e-07,
|
| 9443 |
+
"loss": 0.0,
|
| 9444 |
+
"step": 13420
|
| 9445 |
+
},
|
| 9446 |
+
{
|
| 9447 |
+
"epoch": 5.697921086126432,
|
| 9448 |
+
"grad_norm": 0.029800143092870712,
|
| 9449 |
+
"learning_rate": 8.403394358450538e-07,
|
| 9450 |
+
"loss": 0.0001,
|
| 9451 |
+
"step": 13430
|
| 9452 |
+
},
|
| 9453 |
+
{
|
| 9454 |
+
"epoch": 5.702163767501061,
|
| 9455 |
+
"grad_norm": 0.0018794882344081998,
|
| 9456 |
+
"learning_rate": 8.285534690029072e-07,
|
| 9457 |
+
"loss": 0.0,
|
| 9458 |
+
"step": 13440
|
| 9459 |
+
},
|
| 9460 |
+
{
|
| 9461 |
+
"epoch": 5.70640644887569,
|
| 9462 |
+
"grad_norm": 0.006787729915231466,
|
| 9463 |
+
"learning_rate": 8.167675021607606e-07,
|
| 9464 |
+
"loss": 0.0028,
|
| 9465 |
+
"step": 13450
|
| 9466 |
+
},
|
| 9467 |
+
{
|
| 9468 |
+
"epoch": 5.710649130250318,
|
| 9469 |
+
"grad_norm": 0.0012673878809437156,
|
| 9470 |
+
"learning_rate": 8.049815353186139e-07,
|
| 9471 |
+
"loss": 0.0001,
|
| 9472 |
+
"step": 13460
|
| 9473 |
+
},
|
| 9474 |
+
{
|
| 9475 |
+
"epoch": 5.7148918116249465,
|
| 9476 |
+
"grad_norm": 0.0004057384212501347,
|
| 9477 |
+
"learning_rate": 7.931955684764674e-07,
|
| 9478 |
+
"loss": 0.0045,
|
| 9479 |
+
"step": 13470
|
| 9480 |
+
},
|
| 9481 |
+
{
|
| 9482 |
+
"epoch": 5.719134492999576,
|
| 9483 |
+
"grad_norm": 0.03451569378376007,
|
| 9484 |
+
"learning_rate": 7.814096016343208e-07,
|
| 9485 |
+
"loss": 0.0001,
|
| 9486 |
+
"step": 13480
|
| 9487 |
+
},
|
| 9488 |
+
{
|
| 9489 |
+
"epoch": 5.723377174374204,
|
| 9490 |
+
"grad_norm": 0.00978925358504057,
|
| 9491 |
+
"learning_rate": 7.696236347921741e-07,
|
| 9492 |
+
"loss": 0.0006,
|
| 9493 |
+
"step": 13490
|
| 9494 |
+
},
|
| 9495 |
+
{
|
| 9496 |
+
"epoch": 5.727619855748833,
|
| 9497 |
+
"grad_norm": 0.0015652361325919628,
|
| 9498 |
+
"learning_rate": 7.578376679500275e-07,
|
| 9499 |
+
"loss": 0.0001,
|
| 9500 |
+
"step": 13500
|
| 9501 |
+
},
|
| 9502 |
+
{
|
| 9503 |
+
"epoch": 5.731862537123462,
|
| 9504 |
+
"grad_norm": 0.00017095584189519286,
|
| 9505 |
+
"learning_rate": 7.460517011078809e-07,
|
| 9506 |
+
"loss": 0.0001,
|
| 9507 |
+
"step": 13510
|
| 9508 |
+
},
|
| 9509 |
+
{
|
| 9510 |
+
"epoch": 5.736105218498091,
|
| 9511 |
+
"grad_norm": 0.0002125598693965003,
|
| 9512 |
+
"learning_rate": 7.342657342657343e-07,
|
| 9513 |
+
"loss": 0.0332,
|
| 9514 |
+
"step": 13520
|
| 9515 |
+
},
|
| 9516 |
+
{
|
| 9517 |
+
"epoch": 5.740347899872719,
|
| 9518 |
+
"grad_norm": 0.0015327761648222804,
|
| 9519 |
+
"learning_rate": 7.224797674235877e-07,
|
| 9520 |
+
"loss": 0.0002,
|
| 9521 |
+
"step": 13530
|
| 9522 |
+
},
|
| 9523 |
+
{
|
| 9524 |
+
"epoch": 5.744590581247349,
|
| 9525 |
+
"grad_norm": 0.0002329029666725546,
|
| 9526 |
+
"learning_rate": 7.106938005814411e-07,
|
| 9527 |
+
"loss": 0.0004,
|
| 9528 |
+
"step": 13540
|
| 9529 |
+
},
|
| 9530 |
+
{
|
| 9531 |
+
"epoch": 5.748833262621977,
|
| 9532 |
+
"grad_norm": 0.004570275545120239,
|
| 9533 |
+
"learning_rate": 6.989078337392944e-07,
|
| 9534 |
+
"loss": 0.0001,
|
| 9535 |
+
"step": 13550
|
| 9536 |
+
},
|
| 9537 |
+
{
|
| 9538 |
+
"epoch": 5.753075943996606,
|
| 9539 |
+
"grad_norm": 0.0016535528702661395,
|
| 9540 |
+
"learning_rate": 6.871218668971478e-07,
|
| 9541 |
+
"loss": 0.0,
|
| 9542 |
+
"step": 13560
|
| 9543 |
+
},
|
| 9544 |
+
{
|
| 9545 |
+
"epoch": 5.7573186253712345,
|
| 9546 |
+
"grad_norm": 0.002983595710247755,
|
| 9547 |
+
"learning_rate": 6.753359000550013e-07,
|
| 9548 |
+
"loss": 0.0039,
|
| 9549 |
+
"step": 13570
|
| 9550 |
+
},
|
| 9551 |
+
{
|
| 9552 |
+
"epoch": 5.761561306745864,
|
| 9553 |
+
"grad_norm": 0.2122713178396225,
|
| 9554 |
+
"learning_rate": 6.635499332128547e-07,
|
| 9555 |
+
"loss": 0.0002,
|
| 9556 |
+
"step": 13580
|
| 9557 |
+
},
|
| 9558 |
+
{
|
| 9559 |
+
"epoch": 5.765803988120492,
|
| 9560 |
+
"grad_norm": 0.02119540236890316,
|
| 9561 |
+
"learning_rate": 6.517639663707079e-07,
|
| 9562 |
+
"loss": 0.093,
|
| 9563 |
+
"step": 13590
|
| 9564 |
+
},
|
| 9565 |
+
{
|
| 9566 |
+
"epoch": 5.770046669495121,
|
| 9567 |
+
"grad_norm": 0.00022401312889996916,
|
| 9568 |
+
"learning_rate": 6.399779995285613e-07,
|
| 9569 |
+
"loss": 0.0057,
|
| 9570 |
+
"step": 13600
|
| 9571 |
+
},
|
| 9572 |
+
{
|
| 9573 |
+
"epoch": 5.77428935086975,
|
| 9574 |
+
"grad_norm": 0.02134762331843376,
|
| 9575 |
+
"learning_rate": 6.281920326864147e-07,
|
| 9576 |
+
"loss": 0.0001,
|
| 9577 |
+
"step": 13610
|
| 9578 |
+
},
|
| 9579 |
+
{
|
| 9580 |
+
"epoch": 5.778532032244378,
|
| 9581 |
+
"grad_norm": 0.2460596263408661,
|
| 9582 |
+
"learning_rate": 6.16406065844268e-07,
|
| 9583 |
+
"loss": 0.0058,
|
| 9584 |
+
"step": 13620
|
| 9585 |
+
},
|
| 9586 |
+
{
|
| 9587 |
+
"epoch": 5.782774713619007,
|
| 9588 |
+
"grad_norm": 0.00259913457557559,
|
| 9589 |
+
"learning_rate": 6.046200990021215e-07,
|
| 9590 |
+
"loss": 0.0002,
|
| 9591 |
+
"step": 13630
|
| 9592 |
+
},
|
| 9593 |
+
{
|
| 9594 |
+
"epoch": 5.787017394993636,
|
| 9595 |
+
"grad_norm": 5.992967271595262e-05,
|
| 9596 |
+
"learning_rate": 5.928341321599749e-07,
|
| 9597 |
+
"loss": 0.0029,
|
| 9598 |
+
"step": 13640
|
| 9599 |
+
},
|
| 9600 |
+
{
|
| 9601 |
+
"epoch": 5.791260076368265,
|
| 9602 |
+
"grad_norm": 0.00010443546489113942,
|
| 9603 |
+
"learning_rate": 5.810481653178282e-07,
|
| 9604 |
+
"loss": 0.1305,
|
| 9605 |
+
"step": 13650
|
| 9606 |
+
},
|
| 9607 |
+
{
|
| 9608 |
+
"epoch": 5.795502757742893,
|
| 9609 |
+
"grad_norm": 0.0005638987058773637,
|
| 9610 |
+
"learning_rate": 5.692621984756816e-07,
|
| 9611 |
+
"loss": 0.0301,
|
| 9612 |
+
"step": 13660
|
| 9613 |
+
},
|
| 9614 |
+
{
|
| 9615 |
+
"epoch": 5.7997454391175225,
|
| 9616 |
+
"grad_norm": 0.00023100031830836087,
|
| 9617 |
+
"learning_rate": 5.57476231633535e-07,
|
| 9618 |
+
"loss": 0.1182,
|
| 9619 |
+
"step": 13670
|
| 9620 |
+
},
|
| 9621 |
+
{
|
| 9622 |
+
"epoch": 5.803988120492151,
|
| 9623 |
+
"grad_norm": 7.444872608175501e-05,
|
| 9624 |
+
"learning_rate": 5.456902647913885e-07,
|
| 9625 |
+
"loss": 0.0004,
|
| 9626 |
+
"step": 13680
|
| 9627 |
+
},
|
| 9628 |
+
{
|
| 9629 |
+
"epoch": 5.80823080186678,
|
| 9630 |
+
"grad_norm": 0.00608649430796504,
|
| 9631 |
+
"learning_rate": 5.339042979492418e-07,
|
| 9632 |
+
"loss": 0.0001,
|
| 9633 |
+
"step": 13690
|
| 9634 |
+
},
|
| 9635 |
+
{
|
| 9636 |
+
"epoch": 5.8124734832414084,
|
| 9637 |
+
"grad_norm": 0.0011898651719093323,
|
| 9638 |
+
"learning_rate": 5.221183311070952e-07,
|
| 9639 |
+
"loss": 0.0418,
|
| 9640 |
+
"step": 13700
|
| 9641 |
+
},
|
| 9642 |
+
{
|
| 9643 |
+
"epoch": 5.816716164616038,
|
| 9644 |
+
"grad_norm": 0.002850400051102042,
|
| 9645 |
+
"learning_rate": 5.103323642649486e-07,
|
| 9646 |
+
"loss": 0.0002,
|
| 9647 |
+
"step": 13710
|
| 9648 |
+
},
|
| 9649 |
+
{
|
| 9650 |
+
"epoch": 5.820958845990666,
|
| 9651 |
+
"grad_norm": 0.061382923275232315,
|
| 9652 |
+
"learning_rate": 4.985463974228019e-07,
|
| 9653 |
+
"loss": 0.0005,
|
| 9654 |
+
"step": 13720
|
| 9655 |
+
},
|
| 9656 |
+
{
|
| 9657 |
+
"epoch": 5.825201527365294,
|
| 9658 |
+
"grad_norm": 0.0006279785884544253,
|
| 9659 |
+
"learning_rate": 4.867604305806553e-07,
|
| 9660 |
+
"loss": 0.0256,
|
| 9661 |
+
"step": 13730
|
| 9662 |
+
},
|
| 9663 |
+
{
|
| 9664 |
+
"epoch": 5.829444208739924,
|
| 9665 |
+
"grad_norm": 0.006014589220285416,
|
| 9666 |
+
"learning_rate": 4.749744637385087e-07,
|
| 9667 |
+
"loss": 0.0002,
|
| 9668 |
+
"step": 13740
|
| 9669 |
+
},
|
| 9670 |
+
{
|
| 9671 |
+
"epoch": 5.833686890114553,
|
| 9672 |
+
"grad_norm": 0.0007121540838852525,
|
| 9673 |
+
"learning_rate": 4.631884968963621e-07,
|
| 9674 |
+
"loss": 0.0001,
|
| 9675 |
+
"step": 13750
|
| 9676 |
+
},
|
| 9677 |
+
{
|
| 9678 |
+
"epoch": 5.837929571489181,
|
| 9679 |
+
"grad_norm": 0.004926899913698435,
|
| 9680 |
+
"learning_rate": 4.5140253005421545e-07,
|
| 9681 |
+
"loss": 0.0009,
|
| 9682 |
+
"step": 13760
|
| 9683 |
+
},
|
| 9684 |
+
{
|
| 9685 |
+
"epoch": 5.84217225286381,
|
| 9686 |
+
"grad_norm": 0.0010848167585209012,
|
| 9687 |
+
"learning_rate": 4.396165632120689e-07,
|
| 9688 |
+
"loss": 0.0001,
|
| 9689 |
+
"step": 13770
|
| 9690 |
+
},
|
| 9691 |
+
{
|
| 9692 |
+
"epoch": 5.846414934238439,
|
| 9693 |
+
"grad_norm": 0.00028469949029386044,
|
| 9694 |
+
"learning_rate": 4.2783059636992224e-07,
|
| 9695 |
+
"loss": 0.0015,
|
| 9696 |
+
"step": 13780
|
| 9697 |
+
},
|
| 9698 |
+
{
|
| 9699 |
+
"epoch": 5.850657615613067,
|
| 9700 |
+
"grad_norm": 0.00019816796702798456,
|
| 9701 |
+
"learning_rate": 4.160446295277756e-07,
|
| 9702 |
+
"loss": 0.0,
|
| 9703 |
+
"step": 13790
|
| 9704 |
+
},
|
| 9705 |
+
{
|
| 9706 |
+
"epoch": 5.854900296987696,
|
| 9707 |
+
"grad_norm": 0.000983258942142129,
|
| 9708 |
+
"learning_rate": 4.04258662685629e-07,
|
| 9709 |
+
"loss": 0.0,
|
| 9710 |
+
"step": 13800
|
| 9711 |
+
},
|
| 9712 |
+
{
|
| 9713 |
+
"epoch": 5.859142978362325,
|
| 9714 |
+
"grad_norm": 0.0004910666611976922,
|
| 9715 |
+
"learning_rate": 3.9247269584348235e-07,
|
| 9716 |
+
"loss": 0.0,
|
| 9717 |
+
"step": 13810
|
| 9718 |
+
},
|
| 9719 |
+
{
|
| 9720 |
+
"epoch": 5.863385659736954,
|
| 9721 |
+
"grad_norm": 0.0002668113447725773,
|
| 9722 |
+
"learning_rate": 3.8068672900133577e-07,
|
| 9723 |
+
"loss": 0.0049,
|
| 9724 |
+
"step": 13820
|
| 9725 |
+
},
|
| 9726 |
+
{
|
| 9727 |
+
"epoch": 5.867628341111582,
|
| 9728 |
+
"grad_norm": 0.008571380749344826,
|
| 9729 |
+
"learning_rate": 3.6890076215918913e-07,
|
| 9730 |
+
"loss": 0.0416,
|
| 9731 |
+
"step": 13830
|
| 9732 |
+
},
|
| 9733 |
+
{
|
| 9734 |
+
"epoch": 5.871871022486212,
|
| 9735 |
+
"grad_norm": 0.1917121559381485,
|
| 9736 |
+
"learning_rate": 3.571147953170425e-07,
|
| 9737 |
+
"loss": 0.0007,
|
| 9738 |
+
"step": 13840
|
| 9739 |
+
},
|
| 9740 |
+
{
|
| 9741 |
+
"epoch": 5.87611370386084,
|
| 9742 |
+
"grad_norm": 0.002981944242492318,
|
| 9743 |
+
"learning_rate": 3.453288284748959e-07,
|
| 9744 |
+
"loss": 0.0,
|
| 9745 |
+
"step": 13850
|
| 9746 |
+
},
|
| 9747 |
+
{
|
| 9748 |
+
"epoch": 5.880356385235469,
|
| 9749 |
+
"grad_norm": 0.002561131026595831,
|
| 9750 |
+
"learning_rate": 3.335428616327493e-07,
|
| 9751 |
+
"loss": 0.0421,
|
| 9752 |
+
"step": 13860
|
| 9753 |
+
},
|
| 9754 |
+
{
|
| 9755 |
+
"epoch": 5.884599066610098,
|
| 9756 |
+
"grad_norm": 0.006334809586405754,
|
| 9757 |
+
"learning_rate": 3.2175689479060266e-07,
|
| 9758 |
+
"loss": 0.0001,
|
| 9759 |
+
"step": 13870
|
| 9760 |
+
},
|
| 9761 |
+
{
|
| 9762 |
+
"epoch": 5.888841747984726,
|
| 9763 |
+
"grad_norm": 0.0003479032020550221,
|
| 9764 |
+
"learning_rate": 3.099709279484561e-07,
|
| 9765 |
+
"loss": 0.0006,
|
| 9766 |
+
"step": 13880
|
| 9767 |
+
},
|
| 9768 |
+
{
|
| 9769 |
+
"epoch": 5.893084429359355,
|
| 9770 |
+
"grad_norm": 0.00016518193297088146,
|
| 9771 |
+
"learning_rate": 2.981849611063094e-07,
|
| 9772 |
+
"loss": 0.0009,
|
| 9773 |
+
"step": 13890
|
| 9774 |
+
},
|
| 9775 |
+
{
|
| 9776 |
+
"epoch": 5.8973271107339835,
|
| 9777 |
+
"grad_norm": 0.00029828742844983935,
|
| 9778 |
+
"learning_rate": 2.863989942641628e-07,
|
| 9779 |
+
"loss": 0.067,
|
| 9780 |
+
"step": 13900
|
| 9781 |
+
},
|
| 9782 |
+
{
|
| 9783 |
+
"epoch": 5.901569792108613,
|
| 9784 |
+
"grad_norm": 0.016401350498199463,
|
| 9785 |
+
"learning_rate": 2.746130274220162e-07,
|
| 9786 |
+
"loss": 0.0004,
|
| 9787 |
+
"step": 13910
|
| 9788 |
+
},
|
| 9789 |
+
{
|
| 9790 |
+
"epoch": 5.905812473483241,
|
| 9791 |
+
"grad_norm": 0.0003000469005201012,
|
| 9792 |
+
"learning_rate": 2.6282706057986955e-07,
|
| 9793 |
+
"loss": 0.0003,
|
| 9794 |
+
"step": 13920
|
| 9795 |
+
},
|
| 9796 |
+
{
|
| 9797 |
+
"epoch": 5.91005515485787,
|
| 9798 |
+
"grad_norm": 0.004757650662213564,
|
| 9799 |
+
"learning_rate": 2.5104109373772297e-07,
|
| 9800 |
+
"loss": 0.0002,
|
| 9801 |
+
"step": 13930
|
| 9802 |
+
},
|
| 9803 |
+
{
|
| 9804 |
+
"epoch": 5.914297836232499,
|
| 9805 |
+
"grad_norm": 0.0030631220433861017,
|
| 9806 |
+
"learning_rate": 2.3925512689557634e-07,
|
| 9807 |
+
"loss": 0.0001,
|
| 9808 |
+
"step": 13940
|
| 9809 |
+
},
|
| 9810 |
+
{
|
| 9811 |
+
"epoch": 5.918540517607128,
|
| 9812 |
+
"grad_norm": 0.12058387696743011,
|
| 9813 |
+
"learning_rate": 2.274691600534297e-07,
|
| 9814 |
+
"loss": 0.0492,
|
| 9815 |
+
"step": 13950
|
| 9816 |
+
},
|
| 9817 |
+
{
|
| 9818 |
+
"epoch": 5.922783198981756,
|
| 9819 |
+
"grad_norm": 0.00011355668539181352,
|
| 9820 |
+
"learning_rate": 2.156831932112831e-07,
|
| 9821 |
+
"loss": 0.0002,
|
| 9822 |
+
"step": 13960
|
| 9823 |
+
},
|
| 9824 |
+
{
|
| 9825 |
+
"epoch": 5.927025880356386,
|
| 9826 |
+
"grad_norm": 0.0008063128334470093,
|
| 9827 |
+
"learning_rate": 2.038972263691365e-07,
|
| 9828 |
+
"loss": 0.0575,
|
| 9829 |
+
"step": 13970
|
| 9830 |
+
},
|
| 9831 |
+
{
|
| 9832 |
+
"epoch": 5.931268561731014,
|
| 9833 |
+
"grad_norm": 0.000989718595519662,
|
| 9834 |
+
"learning_rate": 1.9211125952698986e-07,
|
| 9835 |
+
"loss": 0.0002,
|
| 9836 |
+
"step": 13980
|
| 9837 |
+
},
|
| 9838 |
+
{
|
| 9839 |
+
"epoch": 5.935511243105643,
|
| 9840 |
+
"grad_norm": 0.0002659430028870702,
|
| 9841 |
+
"learning_rate": 1.8032529268484323e-07,
|
| 9842 |
+
"loss": 0.0,
|
| 9843 |
+
"step": 13990
|
| 9844 |
+
},
|
| 9845 |
+
{
|
| 9846 |
+
"epoch": 5.9397539244802715,
|
| 9847 |
+
"grad_norm": 11.47286605834961,
|
| 9848 |
+
"learning_rate": 1.6853932584269663e-07,
|
| 9849 |
+
"loss": 0.1079,
|
| 9850 |
+
"step": 14000
|
| 9851 |
+
},
|
| 9852 |
+
{
|
| 9853 |
+
"epoch": 5.943996605854901,
|
| 9854 |
+
"grad_norm": 0.0655582994222641,
|
| 9855 |
+
"learning_rate": 1.5675335900055002e-07,
|
| 9856 |
+
"loss": 0.1143,
|
| 9857 |
+
"step": 14010
|
| 9858 |
+
},
|
| 9859 |
+
{
|
| 9860 |
+
"epoch": 5.948239287229529,
|
| 9861 |
+
"grad_norm": 0.00017576891696080565,
|
| 9862 |
+
"learning_rate": 1.4496739215840342e-07,
|
| 9863 |
+
"loss": 0.0182,
|
| 9864 |
+
"step": 14020
|
| 9865 |
+
},
|
| 9866 |
+
{
|
| 9867 |
+
"epoch": 5.9524819686041575,
|
| 9868 |
+
"grad_norm": 0.012820318341255188,
|
| 9869 |
+
"learning_rate": 1.3318142531625676e-07,
|
| 9870 |
+
"loss": 0.0,
|
| 9871 |
+
"step": 14030
|
| 9872 |
+
},
|
| 9873 |
+
{
|
| 9874 |
+
"epoch": 5.956724649978787,
|
| 9875 |
+
"grad_norm": 0.0032070872839540243,
|
| 9876 |
+
"learning_rate": 1.2139545847411015e-07,
|
| 9877 |
+
"loss": 0.0002,
|
| 9878 |
+
"step": 14040
|
| 9879 |
+
},
|
| 9880 |
+
{
|
| 9881 |
+
"epoch": 5.960967331353415,
|
| 9882 |
+
"grad_norm": 0.005197999067604542,
|
| 9883 |
+
"learning_rate": 1.0960949163196355e-07,
|
| 9884 |
+
"loss": 0.0004,
|
| 9885 |
+
"step": 14050
|
| 9886 |
+
},
|
| 9887 |
+
{
|
| 9888 |
+
"epoch": 5.965210012728044,
|
| 9889 |
+
"grad_norm": 0.0012786289444193244,
|
| 9890 |
+
"learning_rate": 9.782352478981693e-08,
|
| 9891 |
+
"loss": 0.0,
|
| 9892 |
+
"step": 14060
|
| 9893 |
+
},
|
| 9894 |
+
{
|
| 9895 |
+
"epoch": 5.969452694102673,
|
| 9896 |
+
"grad_norm": 0.00026581546990200877,
|
| 9897 |
+
"learning_rate": 8.603755794767031e-08,
|
| 9898 |
+
"loss": 0.0001,
|
| 9899 |
+
"step": 14070
|
| 9900 |
+
},
|
| 9901 |
+
{
|
| 9902 |
+
"epoch": 5.973695375477302,
|
| 9903 |
+
"grad_norm": 6.453340756706893e-05,
|
| 9904 |
+
"learning_rate": 7.42515911055237e-08,
|
| 9905 |
+
"loss": 0.0001,
|
| 9906 |
+
"step": 14080
|
| 9907 |
+
},
|
| 9908 |
+
{
|
| 9909 |
+
"epoch": 5.97793805685193,
|
| 9910 |
+
"grad_norm": 0.00021375238429754972,
|
| 9911 |
+
"learning_rate": 6.246562426337707e-08,
|
| 9912 |
+
"loss": 0.0001,
|
| 9913 |
+
"step": 14090
|
| 9914 |
+
},
|
| 9915 |
+
{
|
| 9916 |
+
"epoch": 5.9821807382265595,
|
| 9917 |
+
"grad_norm": 21.972055435180664,
|
| 9918 |
+
"learning_rate": 5.067965742123045e-08,
|
| 9919 |
+
"loss": 0.0041,
|
| 9920 |
+
"step": 14100
|
| 9921 |
+
},
|
| 9922 |
+
{
|
| 9923 |
+
"epoch": 5.986423419601188,
|
| 9924 |
+
"grad_norm": 0.00017499670502729714,
|
| 9925 |
+
"learning_rate": 3.889369057908384e-08,
|
| 9926 |
+
"loss": 0.0001,
|
| 9927 |
+
"step": 14110
|
| 9928 |
+
},
|
| 9929 |
+
{
|
| 9930 |
+
"epoch": 5.990666100975817,
|
| 9931 |
+
"grad_norm": 0.00012034083920298144,
|
| 9932 |
+
"learning_rate": 2.710772373693722e-08,
|
| 9933 |
+
"loss": 0.0,
|
| 9934 |
+
"step": 14120
|
| 9935 |
+
},
|
| 9936 |
+
{
|
| 9937 |
+
"epoch": 5.994908782350445,
|
| 9938 |
+
"grad_norm": 9.79662363533862e-05,
|
| 9939 |
+
"learning_rate": 1.5321756894790602e-08,
|
| 9940 |
+
"loss": 0.0001,
|
| 9941 |
+
"step": 14130
|
| 9942 |
+
},
|
| 9943 |
+
{
|
| 9944 |
+
"epoch": 5.999151463725074,
|
| 9945 |
+
"grad_norm": 0.00059797108406201,
|
| 9946 |
+
"learning_rate": 3.5357900526439855e-09,
|
| 9947 |
+
"loss": 0.0,
|
| 9948 |
+
"step": 14140
|
| 9949 |
+
},
|
| 9950 |
+
{
|
| 9951 |
+
"epoch": 6.0,
|
| 9952 |
+
"eval_loss": 0.0452701561152935,
|
| 9953 |
+
"eval_runtime": 14.591,
|
| 9954 |
+
"eval_samples_per_second": 143.582,
|
| 9955 |
+
"eval_steps_per_second": 17.956,
|
| 9956 |
+
"step": 14142
|
| 9957 |
}
|
| 9958 |
],
|
| 9959 |
"logging_steps": 10,
|
|
|
|
| 9968 |
"early_stopping_threshold": 0.0
|
| 9969 |
},
|
| 9970 |
"attributes": {
|
| 9971 |
+
"early_stopping_patience_counter": 3
|
| 9972 |
}
|
| 9973 |
},
|
| 9974 |
"TrainerControl": {
|
|
|
|
| 9977 |
"should_evaluate": false,
|
| 9978 |
"should_log": false,
|
| 9979 |
"should_save": true,
|
| 9980 |
+
"should_training_stop": true
|
| 9981 |
},
|
| 9982 |
"attributes": {}
|
| 9983 |
}
|
| 9984 |
},
|
| 9985 |
+
"total_flos": 2.3702476082688e+16,
|
| 9986 |
"train_batch_size": 8,
|
| 9987 |
"trial_name": null,
|
| 9988 |
"trial_params": null
|