Training in progress, epoch 7, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 737737736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d5c97055fdc8d547f157f787f2613b82c24d7d6df8d7c1e269f9bfcc02a0dd8
|
| 3 |
size 737737736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1475598539
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef2eb013750544812e1591c65714bd5dde59416d86e56d0404672c1904d67700
|
| 3 |
size 1475598539
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b40bf27b8da3a33cf6690642642b22d2db2d3cb6012eb5c8ba18857468051fe5
|
| 3 |
size 14645
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47156a30838add9c6d1e5c3a85522a37146fec16b6e399a085c18ad4de256b38
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8c71c3f5412e8488c941f04b67cc301f496e0293c9b96325052c2b46f8bb449
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": 10188,
|
| 3 |
"best_metric": 0.03723596781492233,
|
| 4 |
"best_model_checkpoint": "/tmp/tmpmisnhqpx/deberta-multilabel/checkpoint-10188",
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -10752,6 +10752,1792 @@
|
|
| 10752 |
"eval_samples_per_second": 108.845,
|
| 10753 |
"eval_steps_per_second": 13.606,
|
| 10754 |
"step": 15282
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10755 |
}
|
| 10756 |
],
|
| 10757 |
"logging_steps": 10,
|
|
@@ -10766,7 +12552,7 @@
|
|
| 10766 |
"early_stopping_threshold": 0.0
|
| 10767 |
},
|
| 10768 |
"attributes": {
|
| 10769 |
-
"early_stopping_patience_counter":
|
| 10770 |
}
|
| 10771 |
},
|
| 10772 |
"TrainerControl": {
|
|
@@ -10775,12 +12561,12 @@
|
|
| 10775 |
"should_evaluate": false,
|
| 10776 |
"should_log": false,
|
| 10777 |
"should_save": true,
|
| 10778 |
-
"should_training_stop":
|
| 10779 |
},
|
| 10780 |
"attributes": {}
|
| 10781 |
}
|
| 10782 |
},
|
| 10783 |
-
"total_flos": 3.
|
| 10784 |
"train_batch_size": 8,
|
| 10785 |
"trial_name": null,
|
| 10786 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": 10188,
|
| 3 |
"best_metric": 0.03723596781492233,
|
| 4 |
"best_model_checkpoint": "/tmp/tmpmisnhqpx/deberta-multilabel/checkpoint-10188",
|
| 5 |
+
"epoch": 7.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 17829,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 10752 |
"eval_samples_per_second": 108.845,
|
| 10753 |
"eval_steps_per_second": 13.606,
|
| 10754 |
"step": 15282
|
| 10755 |
+
},
|
| 10756 |
+
{
|
| 10757 |
+
"epoch": 6.003140950137417,
|
| 10758 |
+
"grad_norm": 0.10729097574949265,
|
| 10759 |
+
"learning_rate": 1.332945556072592e-05,
|
| 10760 |
+
"loss": 0.0043,
|
| 10761 |
+
"step": 15290
|
| 10762 |
+
},
|
| 10763 |
+
{
|
| 10764 |
+
"epoch": 6.007067137809187,
|
| 10765 |
+
"grad_norm": 0.005536023993045092,
|
| 10766 |
+
"learning_rate": 1.33236389018148e-05,
|
| 10767 |
+
"loss": 0.009,
|
| 10768 |
+
"step": 15300
|
| 10769 |
+
},
|
| 10770 |
+
{
|
| 10771 |
+
"epoch": 6.010993325480958,
|
| 10772 |
+
"grad_norm": 0.08851289004087448,
|
| 10773 |
+
"learning_rate": 1.3317822242903677e-05,
|
| 10774 |
+
"loss": 0.0024,
|
| 10775 |
+
"step": 15310
|
| 10776 |
+
},
|
| 10777 |
+
{
|
| 10778 |
+
"epoch": 6.014919513152729,
|
| 10779 |
+
"grad_norm": 0.14325062930583954,
|
| 10780 |
+
"learning_rate": 1.3312005583992555e-05,
|
| 10781 |
+
"loss": 0.0048,
|
| 10782 |
+
"step": 15320
|
| 10783 |
+
},
|
| 10784 |
+
{
|
| 10785 |
+
"epoch": 6.0188457008244995,
|
| 10786 |
+
"grad_norm": 0.1224195808172226,
|
| 10787 |
+
"learning_rate": 1.3306188925081433e-05,
|
| 10788 |
+
"loss": 0.01,
|
| 10789 |
+
"step": 15330
|
| 10790 |
+
},
|
| 10791 |
+
{
|
| 10792 |
+
"epoch": 6.02277188849627,
|
| 10793 |
+
"grad_norm": 0.02600484900176525,
|
| 10794 |
+
"learning_rate": 1.3300372266170314e-05,
|
| 10795 |
+
"loss": 0.0041,
|
| 10796 |
+
"step": 15340
|
| 10797 |
+
},
|
| 10798 |
+
{
|
| 10799 |
+
"epoch": 6.02669807616804,
|
| 10800 |
+
"grad_norm": 0.07855655997991562,
|
| 10801 |
+
"learning_rate": 1.3294555607259192e-05,
|
| 10802 |
+
"loss": 0.0027,
|
| 10803 |
+
"step": 15350
|
| 10804 |
+
},
|
| 10805 |
+
{
|
| 10806 |
+
"epoch": 6.030624263839812,
|
| 10807 |
+
"grad_norm": 0.02549581602215767,
|
| 10808 |
+
"learning_rate": 1.328873894834807e-05,
|
| 10809 |
+
"loss": 0.0376,
|
| 10810 |
+
"step": 15360
|
| 10811 |
+
},
|
| 10812 |
+
{
|
| 10813 |
+
"epoch": 6.034550451511582,
|
| 10814 |
+
"grad_norm": 0.10870740562677383,
|
| 10815 |
+
"learning_rate": 1.3282922289436948e-05,
|
| 10816 |
+
"loss": 0.0115,
|
| 10817 |
+
"step": 15370
|
| 10818 |
+
},
|
| 10819 |
+
{
|
| 10820 |
+
"epoch": 6.038476639183353,
|
| 10821 |
+
"grad_norm": 0.11019928008317947,
|
| 10822 |
+
"learning_rate": 1.3277105630525826e-05,
|
| 10823 |
+
"loss": 0.0071,
|
| 10824 |
+
"step": 15380
|
| 10825 |
+
},
|
| 10826 |
+
{
|
| 10827 |
+
"epoch": 6.042402826855124,
|
| 10828 |
+
"grad_norm": 0.22508302330970764,
|
| 10829 |
+
"learning_rate": 1.3271288971614706e-05,
|
| 10830 |
+
"loss": 0.0053,
|
| 10831 |
+
"step": 15390
|
| 10832 |
+
},
|
| 10833 |
+
{
|
| 10834 |
+
"epoch": 6.046329014526894,
|
| 10835 |
+
"grad_norm": 0.15338468551635742,
|
| 10836 |
+
"learning_rate": 1.3265472312703585e-05,
|
| 10837 |
+
"loss": 0.0076,
|
| 10838 |
+
"step": 15400
|
| 10839 |
+
},
|
| 10840 |
+
{
|
| 10841 |
+
"epoch": 6.050255202198665,
|
| 10842 |
+
"grad_norm": 0.005434167105704546,
|
| 10843 |
+
"learning_rate": 1.3259655653792463e-05,
|
| 10844 |
+
"loss": 0.0168,
|
| 10845 |
+
"step": 15410
|
| 10846 |
+
},
|
| 10847 |
+
{
|
| 10848 |
+
"epoch": 6.054181389870436,
|
| 10849 |
+
"grad_norm": 0.878824770450592,
|
| 10850 |
+
"learning_rate": 1.3253838994881341e-05,
|
| 10851 |
+
"loss": 0.0073,
|
| 10852 |
+
"step": 15420
|
| 10853 |
+
},
|
| 10854 |
+
{
|
| 10855 |
+
"epoch": 6.058107577542207,
|
| 10856 |
+
"grad_norm": 0.18308301270008087,
|
| 10857 |
+
"learning_rate": 1.3248022335970219e-05,
|
| 10858 |
+
"loss": 0.0017,
|
| 10859 |
+
"step": 15430
|
| 10860 |
+
},
|
| 10861 |
+
{
|
| 10862 |
+
"epoch": 6.062033765213977,
|
| 10863 |
+
"grad_norm": 0.02841450646519661,
|
| 10864 |
+
"learning_rate": 1.3242205677059099e-05,
|
| 10865 |
+
"loss": 0.0068,
|
| 10866 |
+
"step": 15440
|
| 10867 |
+
},
|
| 10868 |
+
{
|
| 10869 |
+
"epoch": 6.0659599528857475,
|
| 10870 |
+
"grad_norm": 0.2938190698623657,
|
| 10871 |
+
"learning_rate": 1.3236389018147976e-05,
|
| 10872 |
+
"loss": 0.0202,
|
| 10873 |
+
"step": 15450
|
| 10874 |
+
},
|
| 10875 |
+
{
|
| 10876 |
+
"epoch": 6.069886140557519,
|
| 10877 |
+
"grad_norm": 0.22715412080287933,
|
| 10878 |
+
"learning_rate": 1.3230572359236854e-05,
|
| 10879 |
+
"loss": 0.0068,
|
| 10880 |
+
"step": 15460
|
| 10881 |
+
},
|
| 10882 |
+
{
|
| 10883 |
+
"epoch": 6.073812328229289,
|
| 10884 |
+
"grad_norm": 0.35270991921424866,
|
| 10885 |
+
"learning_rate": 1.3224755700325734e-05,
|
| 10886 |
+
"loss": 0.0049,
|
| 10887 |
+
"step": 15470
|
| 10888 |
+
},
|
| 10889 |
+
{
|
| 10890 |
+
"epoch": 6.07773851590106,
|
| 10891 |
+
"grad_norm": 0.12743733823299408,
|
| 10892 |
+
"learning_rate": 1.3218939041414614e-05,
|
| 10893 |
+
"loss": 0.0035,
|
| 10894 |
+
"step": 15480
|
| 10895 |
+
},
|
| 10896 |
+
{
|
| 10897 |
+
"epoch": 6.081664703572831,
|
| 10898 |
+
"grad_norm": 0.19374226033687592,
|
| 10899 |
+
"learning_rate": 1.3213122382503491e-05,
|
| 10900 |
+
"loss": 0.0044,
|
| 10901 |
+
"step": 15490
|
| 10902 |
+
},
|
| 10903 |
+
{
|
| 10904 |
+
"epoch": 6.0855908912446015,
|
| 10905 |
+
"grad_norm": 0.20690010488033295,
|
| 10906 |
+
"learning_rate": 1.320730572359237e-05,
|
| 10907 |
+
"loss": 0.006,
|
| 10908 |
+
"step": 15500
|
| 10909 |
+
},
|
| 10910 |
+
{
|
| 10911 |
+
"epoch": 6.089517078916372,
|
| 10912 |
+
"grad_norm": 57.17576599121094,
|
| 10913 |
+
"learning_rate": 1.3201489064681247e-05,
|
| 10914 |
+
"loss": 0.0407,
|
| 10915 |
+
"step": 15510
|
| 10916 |
+
},
|
| 10917 |
+
{
|
| 10918 |
+
"epoch": 6.093443266588143,
|
| 10919 |
+
"grad_norm": 0.033401213586330414,
|
| 10920 |
+
"learning_rate": 1.3195672405770125e-05,
|
| 10921 |
+
"loss": 0.004,
|
| 10922 |
+
"step": 15520
|
| 10923 |
+
},
|
| 10924 |
+
{
|
| 10925 |
+
"epoch": 6.097369454259914,
|
| 10926 |
+
"grad_norm": 0.24066749215126038,
|
| 10927 |
+
"learning_rate": 1.3189855746859007e-05,
|
| 10928 |
+
"loss": 0.0259,
|
| 10929 |
+
"step": 15530
|
| 10930 |
+
},
|
| 10931 |
+
{
|
| 10932 |
+
"epoch": 6.101295641931684,
|
| 10933 |
+
"grad_norm": 0.0016689227195456624,
|
| 10934 |
+
"learning_rate": 1.3184039087947884e-05,
|
| 10935 |
+
"loss": 0.0018,
|
| 10936 |
+
"step": 15540
|
| 10937 |
+
},
|
| 10938 |
+
{
|
| 10939 |
+
"epoch": 6.105221829603455,
|
| 10940 |
+
"grad_norm": 0.20571045577526093,
|
| 10941 |
+
"learning_rate": 1.3178222429036762e-05,
|
| 10942 |
+
"loss": 0.0442,
|
| 10943 |
+
"step": 15550
|
| 10944 |
+
},
|
| 10945 |
+
{
|
| 10946 |
+
"epoch": 6.109148017275226,
|
| 10947 |
+
"grad_norm": 0.46590718626976013,
|
| 10948 |
+
"learning_rate": 1.317240577012564e-05,
|
| 10949 |
+
"loss": 0.0027,
|
| 10950 |
+
"step": 15560
|
| 10951 |
+
},
|
| 10952 |
+
{
|
| 10953 |
+
"epoch": 6.113074204946996,
|
| 10954 |
+
"grad_norm": 0.00208448083139956,
|
| 10955 |
+
"learning_rate": 1.316658911121452e-05,
|
| 10956 |
+
"loss": 0.0104,
|
| 10957 |
+
"step": 15570
|
| 10958 |
+
},
|
| 10959 |
+
{
|
| 10960 |
+
"epoch": 6.117000392618767,
|
| 10961 |
+
"grad_norm": 0.02239024080336094,
|
| 10962 |
+
"learning_rate": 1.31607724523034e-05,
|
| 10963 |
+
"loss": 0.0064,
|
| 10964 |
+
"step": 15580
|
| 10965 |
+
},
|
| 10966 |
+
{
|
| 10967 |
+
"epoch": 6.120926580290538,
|
| 10968 |
+
"grad_norm": 0.06192897632718086,
|
| 10969 |
+
"learning_rate": 1.3154955793392277e-05,
|
| 10970 |
+
"loss": 0.0041,
|
| 10971 |
+
"step": 15590
|
| 10972 |
+
},
|
| 10973 |
+
{
|
| 10974 |
+
"epoch": 6.124852767962309,
|
| 10975 |
+
"grad_norm": 0.05527729541063309,
|
| 10976 |
+
"learning_rate": 1.3149139134481155e-05,
|
| 10977 |
+
"loss": 0.0396,
|
| 10978 |
+
"step": 15600
|
| 10979 |
+
},
|
| 10980 |
+
{
|
| 10981 |
+
"epoch": 6.128778955634079,
|
| 10982 |
+
"grad_norm": 0.07194492965936661,
|
| 10983 |
+
"learning_rate": 1.3143322475570033e-05,
|
| 10984 |
+
"loss": 0.0059,
|
| 10985 |
+
"step": 15610
|
| 10986 |
+
},
|
| 10987 |
+
{
|
| 10988 |
+
"epoch": 6.13270514330585,
|
| 10989 |
+
"grad_norm": 0.034291282296180725,
|
| 10990 |
+
"learning_rate": 1.3137505816658913e-05,
|
| 10991 |
+
"loss": 0.0038,
|
| 10992 |
+
"step": 15620
|
| 10993 |
+
},
|
| 10994 |
+
{
|
| 10995 |
+
"epoch": 6.136631330977621,
|
| 10996 |
+
"grad_norm": 0.016068680211901665,
|
| 10997 |
+
"learning_rate": 1.313168915774779e-05,
|
| 10998 |
+
"loss": 0.0098,
|
| 10999 |
+
"step": 15630
|
| 11000 |
+
},
|
| 11001 |
+
{
|
| 11002 |
+
"epoch": 6.140557518649391,
|
| 11003 |
+
"grad_norm": 0.0008041482069529593,
|
| 11004 |
+
"learning_rate": 1.3125872498836669e-05,
|
| 11005 |
+
"loss": 0.0055,
|
| 11006 |
+
"step": 15640
|
| 11007 |
+
},
|
| 11008 |
+
{
|
| 11009 |
+
"epoch": 6.144483706321163,
|
| 11010 |
+
"grad_norm": 0.016596445813775063,
|
| 11011 |
+
"learning_rate": 1.3120055839925547e-05,
|
| 11012 |
+
"loss": 0.0041,
|
| 11013 |
+
"step": 15650
|
| 11014 |
+
},
|
| 11015 |
+
{
|
| 11016 |
+
"epoch": 6.148409893992933,
|
| 11017 |
+
"grad_norm": 0.5889832973480225,
|
| 11018 |
+
"learning_rate": 1.3114239181014426e-05,
|
| 11019 |
+
"loss": 0.0066,
|
| 11020 |
+
"step": 15660
|
| 11021 |
+
},
|
| 11022 |
+
{
|
| 11023 |
+
"epoch": 6.1523360816647035,
|
| 11024 |
+
"grad_norm": 0.008105458691716194,
|
| 11025 |
+
"learning_rate": 1.3108422522103306e-05,
|
| 11026 |
+
"loss": 0.0067,
|
| 11027 |
+
"step": 15670
|
| 11028 |
+
},
|
| 11029 |
+
{
|
| 11030 |
+
"epoch": 6.156262269336474,
|
| 11031 |
+
"grad_norm": 0.28331077098846436,
|
| 11032 |
+
"learning_rate": 1.3102605863192184e-05,
|
| 11033 |
+
"loss": 0.0149,
|
| 11034 |
+
"step": 15680
|
| 11035 |
+
},
|
| 11036 |
+
{
|
| 11037 |
+
"epoch": 6.160188457008245,
|
| 11038 |
+
"grad_norm": 0.37553882598876953,
|
| 11039 |
+
"learning_rate": 1.3096789204281062e-05,
|
| 11040 |
+
"loss": 0.0057,
|
| 11041 |
+
"step": 15690
|
| 11042 |
+
},
|
| 11043 |
+
{
|
| 11044 |
+
"epoch": 6.164114644680016,
|
| 11045 |
+
"grad_norm": 0.04441031068563461,
|
| 11046 |
+
"learning_rate": 1.309097254536994e-05,
|
| 11047 |
+
"loss": 0.0032,
|
| 11048 |
+
"step": 15700
|
| 11049 |
+
},
|
| 11050 |
+
{
|
| 11051 |
+
"epoch": 6.168040832351786,
|
| 11052 |
+
"grad_norm": 0.10374340415000916,
|
| 11053 |
+
"learning_rate": 1.308515588645882e-05,
|
| 11054 |
+
"loss": 0.0041,
|
| 11055 |
+
"step": 15710
|
| 11056 |
+
},
|
| 11057 |
+
{
|
| 11058 |
+
"epoch": 6.1719670200235575,
|
| 11059 |
+
"grad_norm": 0.059388287365436554,
|
| 11060 |
+
"learning_rate": 1.3079339227547699e-05,
|
| 11061 |
+
"loss": 0.0049,
|
| 11062 |
+
"step": 15720
|
| 11063 |
+
},
|
| 11064 |
+
{
|
| 11065 |
+
"epoch": 6.175893207695328,
|
| 11066 |
+
"grad_norm": 0.23134520649909973,
|
| 11067 |
+
"learning_rate": 1.3073522568636577e-05,
|
| 11068 |
+
"loss": 0.0446,
|
| 11069 |
+
"step": 15730
|
| 11070 |
+
},
|
| 11071 |
+
{
|
| 11072 |
+
"epoch": 6.179819395367098,
|
| 11073 |
+
"grad_norm": 0.021650293841958046,
|
| 11074 |
+
"learning_rate": 1.3067705909725454e-05,
|
| 11075 |
+
"loss": 0.0053,
|
| 11076 |
+
"step": 15740
|
| 11077 |
+
},
|
| 11078 |
+
{
|
| 11079 |
+
"epoch": 6.18374558303887,
|
| 11080 |
+
"grad_norm": 0.3005703091621399,
|
| 11081 |
+
"learning_rate": 1.3061889250814332e-05,
|
| 11082 |
+
"loss": 0.0026,
|
| 11083 |
+
"step": 15750
|
| 11084 |
+
},
|
| 11085 |
+
{
|
| 11086 |
+
"epoch": 6.18767177071064,
|
| 11087 |
+
"grad_norm": 0.035445231944322586,
|
| 11088 |
+
"learning_rate": 1.3056072591903212e-05,
|
| 11089 |
+
"loss": 0.0197,
|
| 11090 |
+
"step": 15760
|
| 11091 |
+
},
|
| 11092 |
+
{
|
| 11093 |
+
"epoch": 6.191597958382411,
|
| 11094 |
+
"grad_norm": 37.90801239013672,
|
| 11095 |
+
"learning_rate": 1.3050255932992092e-05,
|
| 11096 |
+
"loss": 0.0508,
|
| 11097 |
+
"step": 15770
|
| 11098 |
+
},
|
| 11099 |
+
{
|
| 11100 |
+
"epoch": 6.195524146054181,
|
| 11101 |
+
"grad_norm": 0.009957941249012947,
|
| 11102 |
+
"learning_rate": 1.304443927408097e-05,
|
| 11103 |
+
"loss": 0.0174,
|
| 11104 |
+
"step": 15780
|
| 11105 |
+
},
|
| 11106 |
+
{
|
| 11107 |
+
"epoch": 6.199450333725952,
|
| 11108 |
+
"grad_norm": 0.003192415228113532,
|
| 11109 |
+
"learning_rate": 1.3038622615169847e-05,
|
| 11110 |
+
"loss": 0.0014,
|
| 11111 |
+
"step": 15790
|
| 11112 |
+
},
|
| 11113 |
+
{
|
| 11114 |
+
"epoch": 6.203376521397723,
|
| 11115 |
+
"grad_norm": 0.12274528294801712,
|
| 11116 |
+
"learning_rate": 1.3032805956258725e-05,
|
| 11117 |
+
"loss": 0.2067,
|
| 11118 |
+
"step": 15800
|
| 11119 |
+
},
|
| 11120 |
+
{
|
| 11121 |
+
"epoch": 6.207302709069493,
|
| 11122 |
+
"grad_norm": 2.8174281120300293,
|
| 11123 |
+
"learning_rate": 1.3026989297347605e-05,
|
| 11124 |
+
"loss": 0.0194,
|
| 11125 |
+
"step": 15810
|
| 11126 |
+
},
|
| 11127 |
+
{
|
| 11128 |
+
"epoch": 6.2112288967412645,
|
| 11129 |
+
"grad_norm": 0.10060155391693115,
|
| 11130 |
+
"learning_rate": 1.3021172638436483e-05,
|
| 11131 |
+
"loss": 0.0082,
|
| 11132 |
+
"step": 15820
|
| 11133 |
+
},
|
| 11134 |
+
{
|
| 11135 |
+
"epoch": 6.215155084413035,
|
| 11136 |
+
"grad_norm": 0.11491195112466812,
|
| 11137 |
+
"learning_rate": 1.301535597952536e-05,
|
| 11138 |
+
"loss": 0.0145,
|
| 11139 |
+
"step": 15830
|
| 11140 |
+
},
|
| 11141 |
+
{
|
| 11142 |
+
"epoch": 6.219081272084805,
|
| 11143 |
+
"grad_norm": 0.6463930010795593,
|
| 11144 |
+
"learning_rate": 1.3009539320614239e-05,
|
| 11145 |
+
"loss": 0.0082,
|
| 11146 |
+
"step": 15840
|
| 11147 |
+
},
|
| 11148 |
+
{
|
| 11149 |
+
"epoch": 6.223007459756577,
|
| 11150 |
+
"grad_norm": 0.049693990498781204,
|
| 11151 |
+
"learning_rate": 1.300372266170312e-05,
|
| 11152 |
+
"loss": 0.0723,
|
| 11153 |
+
"step": 15850
|
| 11154 |
+
},
|
| 11155 |
+
{
|
| 11156 |
+
"epoch": 6.226933647428347,
|
| 11157 |
+
"grad_norm": 0.3396795988082886,
|
| 11158 |
+
"learning_rate": 1.2997906002791998e-05,
|
| 11159 |
+
"loss": 0.0043,
|
| 11160 |
+
"step": 15860
|
| 11161 |
+
},
|
| 11162 |
+
{
|
| 11163 |
+
"epoch": 6.230859835100118,
|
| 11164 |
+
"grad_norm": 0.14611338078975677,
|
| 11165 |
+
"learning_rate": 1.2992089343880876e-05,
|
| 11166 |
+
"loss": 0.0076,
|
| 11167 |
+
"step": 15870
|
| 11168 |
+
},
|
| 11169 |
+
{
|
| 11170 |
+
"epoch": 6.234786022771888,
|
| 11171 |
+
"grad_norm": 6.290849685668945,
|
| 11172 |
+
"learning_rate": 1.2986272684969754e-05,
|
| 11173 |
+
"loss": 0.0075,
|
| 11174 |
+
"step": 15880
|
| 11175 |
+
},
|
| 11176 |
+
{
|
| 11177 |
+
"epoch": 6.238712210443659,
|
| 11178 |
+
"grad_norm": 1.1685798168182373,
|
| 11179 |
+
"learning_rate": 1.2980456026058632e-05,
|
| 11180 |
+
"loss": 0.0152,
|
| 11181 |
+
"step": 15890
|
| 11182 |
+
},
|
| 11183 |
+
{
|
| 11184 |
+
"epoch": 6.24263839811543,
|
| 11185 |
+
"grad_norm": 0.04885543882846832,
|
| 11186 |
+
"learning_rate": 1.2974639367147513e-05,
|
| 11187 |
+
"loss": 0.0551,
|
| 11188 |
+
"step": 15900
|
| 11189 |
+
},
|
| 11190 |
+
{
|
| 11191 |
+
"epoch": 6.2465645857872,
|
| 11192 |
+
"grad_norm": 0.00462720962241292,
|
| 11193 |
+
"learning_rate": 1.296882270823639e-05,
|
| 11194 |
+
"loss": 0.004,
|
| 11195 |
+
"step": 15910
|
| 11196 |
+
},
|
| 11197 |
+
{
|
| 11198 |
+
"epoch": 6.250490773458972,
|
| 11199 |
+
"grad_norm": 0.002610673662275076,
|
| 11200 |
+
"learning_rate": 1.2963006049325269e-05,
|
| 11201 |
+
"loss": 0.0032,
|
| 11202 |
+
"step": 15920
|
| 11203 |
+
},
|
| 11204 |
+
{
|
| 11205 |
+
"epoch": 6.254416961130742,
|
| 11206 |
+
"grad_norm": 0.052097711712121964,
|
| 11207 |
+
"learning_rate": 1.2957189390414147e-05,
|
| 11208 |
+
"loss": 0.0027,
|
| 11209 |
+
"step": 15930
|
| 11210 |
+
},
|
| 11211 |
+
{
|
| 11212 |
+
"epoch": 6.2583431488025125,
|
| 11213 |
+
"grad_norm": 0.02658090554177761,
|
| 11214 |
+
"learning_rate": 1.2951372731503025e-05,
|
| 11215 |
+
"loss": 0.0069,
|
| 11216 |
+
"step": 15940
|
| 11217 |
+
},
|
| 11218 |
+
{
|
| 11219 |
+
"epoch": 6.262269336474284,
|
| 11220 |
+
"grad_norm": 0.09183839708566666,
|
| 11221 |
+
"learning_rate": 1.2945556072591904e-05,
|
| 11222 |
+
"loss": 0.0047,
|
| 11223 |
+
"step": 15950
|
| 11224 |
+
},
|
| 11225 |
+
{
|
| 11226 |
+
"epoch": 6.266195524146054,
|
| 11227 |
+
"grad_norm": 0.019141655415296555,
|
| 11228 |
+
"learning_rate": 1.2939739413680784e-05,
|
| 11229 |
+
"loss": 0.0028,
|
| 11230 |
+
"step": 15960
|
| 11231 |
+
},
|
| 11232 |
+
{
|
| 11233 |
+
"epoch": 6.270121711817825,
|
| 11234 |
+
"grad_norm": 0.015964055433869362,
|
| 11235 |
+
"learning_rate": 1.2933922754769662e-05,
|
| 11236 |
+
"loss": 0.0022,
|
| 11237 |
+
"step": 15970
|
| 11238 |
+
},
|
| 11239 |
+
{
|
| 11240 |
+
"epoch": 6.274047899489595,
|
| 11241 |
+
"grad_norm": 0.2533932626247406,
|
| 11242 |
+
"learning_rate": 1.292810609585854e-05,
|
| 11243 |
+
"loss": 0.0016,
|
| 11244 |
+
"step": 15980
|
| 11245 |
+
},
|
| 11246 |
+
{
|
| 11247 |
+
"epoch": 6.2779740871613665,
|
| 11248 |
+
"grad_norm": 0.004543932154774666,
|
| 11249 |
+
"learning_rate": 1.292228943694742e-05,
|
| 11250 |
+
"loss": 0.0022,
|
| 11251 |
+
"step": 15990
|
| 11252 |
+
},
|
| 11253 |
+
{
|
| 11254 |
+
"epoch": 6.281900274833137,
|
| 11255 |
+
"grad_norm": 0.020446695387363434,
|
| 11256 |
+
"learning_rate": 1.2916472778036297e-05,
|
| 11257 |
+
"loss": 0.0007,
|
| 11258 |
+
"step": 16000
|
| 11259 |
+
},
|
| 11260 |
+
{
|
| 11261 |
+
"epoch": 6.285826462504907,
|
| 11262 |
+
"grad_norm": 0.146882101893425,
|
| 11263 |
+
"learning_rate": 1.2910656119125175e-05,
|
| 11264 |
+
"loss": 0.0297,
|
| 11265 |
+
"step": 16010
|
| 11266 |
+
},
|
| 11267 |
+
{
|
| 11268 |
+
"epoch": 6.289752650176679,
|
| 11269 |
+
"grad_norm": 0.1484435796737671,
|
| 11270 |
+
"learning_rate": 1.2904839460214053e-05,
|
| 11271 |
+
"loss": 0.0108,
|
| 11272 |
+
"step": 16020
|
| 11273 |
+
},
|
| 11274 |
+
{
|
| 11275 |
+
"epoch": 6.293678837848449,
|
| 11276 |
+
"grad_norm": 0.05455717071890831,
|
| 11277 |
+
"learning_rate": 1.2899022801302933e-05,
|
| 11278 |
+
"loss": 0.0061,
|
| 11279 |
+
"step": 16030
|
| 11280 |
+
},
|
| 11281 |
+
{
|
| 11282 |
+
"epoch": 6.29760502552022,
|
| 11283 |
+
"grad_norm": 8.229965209960938,
|
| 11284 |
+
"learning_rate": 1.2893206142391812e-05,
|
| 11285 |
+
"loss": 0.015,
|
| 11286 |
+
"step": 16040
|
| 11287 |
+
},
|
| 11288 |
+
{
|
| 11289 |
+
"epoch": 6.301531213191991,
|
| 11290 |
+
"grad_norm": 0.09126053750514984,
|
| 11291 |
+
"learning_rate": 1.288738948348069e-05,
|
| 11292 |
+
"loss": 0.0014,
|
| 11293 |
+
"step": 16050
|
| 11294 |
+
},
|
| 11295 |
+
{
|
| 11296 |
+
"epoch": 6.305457400863761,
|
| 11297 |
+
"grad_norm": 0.04557826742529869,
|
| 11298 |
+
"learning_rate": 1.2881572824569568e-05,
|
| 11299 |
+
"loss": 0.0047,
|
| 11300 |
+
"step": 16060
|
| 11301 |
+
},
|
| 11302 |
+
{
|
| 11303 |
+
"epoch": 6.309383588535532,
|
| 11304 |
+
"grad_norm": 0.2023244947195053,
|
| 11305 |
+
"learning_rate": 1.2875756165658446e-05,
|
| 11306 |
+
"loss": 0.0025,
|
| 11307 |
+
"step": 16070
|
| 11308 |
+
},
|
| 11309 |
+
{
|
| 11310 |
+
"epoch": 6.313309776207303,
|
| 11311 |
+
"grad_norm": 0.00869916845113039,
|
| 11312 |
+
"learning_rate": 1.2869939506747324e-05,
|
| 11313 |
+
"loss": 0.1648,
|
| 11314 |
+
"step": 16080
|
| 11315 |
+
},
|
| 11316 |
+
{
|
| 11317 |
+
"epoch": 6.317235963879074,
|
| 11318 |
+
"grad_norm": 0.003229738911613822,
|
| 11319 |
+
"learning_rate": 1.2864122847836205e-05,
|
| 11320 |
+
"loss": 0.0024,
|
| 11321 |
+
"step": 16090
|
| 11322 |
+
},
|
| 11323 |
+
{
|
| 11324 |
+
"epoch": 6.321162151550844,
|
| 11325 |
+
"grad_norm": 0.23027494549751282,
|
| 11326 |
+
"learning_rate": 1.2858306188925083e-05,
|
| 11327 |
+
"loss": 0.0037,
|
| 11328 |
+
"step": 16100
|
| 11329 |
+
},
|
| 11330 |
+
{
|
| 11331 |
+
"epoch": 6.3250883392226145,
|
| 11332 |
+
"grad_norm": 2.996023178100586,
|
| 11333 |
+
"learning_rate": 1.2852489530013961e-05,
|
| 11334 |
+
"loss": 0.0127,
|
| 11335 |
+
"step": 16110
|
| 11336 |
+
},
|
| 11337 |
+
{
|
| 11338 |
+
"epoch": 6.329014526894386,
|
| 11339 |
+
"grad_norm": 0.39892762899398804,
|
| 11340 |
+
"learning_rate": 1.2846672871102839e-05,
|
| 11341 |
+
"loss": 0.0311,
|
| 11342 |
+
"step": 16120
|
| 11343 |
+
},
|
| 11344 |
+
{
|
| 11345 |
+
"epoch": 6.332940714566156,
|
| 11346 |
+
"grad_norm": 1.0110081434249878,
|
| 11347 |
+
"learning_rate": 1.2840856212191718e-05,
|
| 11348 |
+
"loss": 0.0107,
|
| 11349 |
+
"step": 16130
|
| 11350 |
+
},
|
| 11351 |
+
{
|
| 11352 |
+
"epoch": 6.336866902237927,
|
| 11353 |
+
"grad_norm": 0.015728643164038658,
|
| 11354 |
+
"learning_rate": 1.2835039553280596e-05,
|
| 11355 |
+
"loss": 0.0092,
|
| 11356 |
+
"step": 16140
|
| 11357 |
+
},
|
| 11358 |
+
{
|
| 11359 |
+
"epoch": 6.340793089909698,
|
| 11360 |
+
"grad_norm": 0.26917484402656555,
|
| 11361 |
+
"learning_rate": 1.2829222894369476e-05,
|
| 11362 |
+
"loss": 0.0066,
|
| 11363 |
+
"step": 16150
|
| 11364 |
+
},
|
| 11365 |
+
{
|
| 11366 |
+
"epoch": 6.3447192775814685,
|
| 11367 |
+
"grad_norm": 0.0725165605545044,
|
| 11368 |
+
"learning_rate": 1.2823406235458354e-05,
|
| 11369 |
+
"loss": 0.0052,
|
| 11370 |
+
"step": 16160
|
| 11371 |
+
},
|
| 11372 |
+
{
|
| 11373 |
+
"epoch": 6.348645465253239,
|
| 11374 |
+
"grad_norm": 0.047034312039613724,
|
| 11375 |
+
"learning_rate": 1.2817589576547232e-05,
|
| 11376 |
+
"loss": 0.0057,
|
| 11377 |
+
"step": 16170
|
| 11378 |
+
},
|
| 11379 |
+
{
|
| 11380 |
+
"epoch": 6.352571652925009,
|
| 11381 |
+
"grad_norm": 1.0331474542617798,
|
| 11382 |
+
"learning_rate": 1.2811772917636111e-05,
|
| 11383 |
+
"loss": 0.0044,
|
| 11384 |
+
"step": 16180
|
| 11385 |
+
},
|
| 11386 |
+
{
|
| 11387 |
+
"epoch": 6.356497840596781,
|
| 11388 |
+
"grad_norm": 0.004600520711392164,
|
| 11389 |
+
"learning_rate": 1.280595625872499e-05,
|
| 11390 |
+
"loss": 0.1207,
|
| 11391 |
+
"step": 16190
|
| 11392 |
+
},
|
| 11393 |
+
{
|
| 11394 |
+
"epoch": 6.360424028268551,
|
| 11395 |
+
"grad_norm": 0.002064879285171628,
|
| 11396 |
+
"learning_rate": 1.2800139599813867e-05,
|
| 11397 |
+
"loss": 0.0166,
|
| 11398 |
+
"step": 16200
|
| 11399 |
+
},
|
| 11400 |
+
{
|
| 11401 |
+
"epoch": 6.364350215940322,
|
| 11402 |
+
"grad_norm": 0.028028376400470734,
|
| 11403 |
+
"learning_rate": 1.2794322940902745e-05,
|
| 11404 |
+
"loss": 0.0062,
|
| 11405 |
+
"step": 16210
|
| 11406 |
+
},
|
| 11407 |
+
{
|
| 11408 |
+
"epoch": 6.368276403612093,
|
| 11409 |
+
"grad_norm": 0.004204788710922003,
|
| 11410 |
+
"learning_rate": 1.2788506281991626e-05,
|
| 11411 |
+
"loss": 0.0161,
|
| 11412 |
+
"step": 16220
|
| 11413 |
+
},
|
| 11414 |
+
{
|
| 11415 |
+
"epoch": 6.372202591283863,
|
| 11416 |
+
"grad_norm": 0.0615185871720314,
|
| 11417 |
+
"learning_rate": 1.2782689623080504e-05,
|
| 11418 |
+
"loss": 0.0098,
|
| 11419 |
+
"step": 16230
|
| 11420 |
+
},
|
| 11421 |
+
{
|
| 11422 |
+
"epoch": 6.376128778955634,
|
| 11423 |
+
"grad_norm": 0.032976388931274414,
|
| 11424 |
+
"learning_rate": 1.2776872964169382e-05,
|
| 11425 |
+
"loss": 0.0022,
|
| 11426 |
+
"step": 16240
|
| 11427 |
+
},
|
| 11428 |
+
{
|
| 11429 |
+
"epoch": 6.380054966627405,
|
| 11430 |
+
"grad_norm": 0.09620284289121628,
|
| 11431 |
+
"learning_rate": 1.277105630525826e-05,
|
| 11432 |
+
"loss": 0.0339,
|
| 11433 |
+
"step": 16250
|
| 11434 |
+
},
|
| 11435 |
+
{
|
| 11436 |
+
"epoch": 6.383981154299176,
|
| 11437 |
+
"grad_norm": 0.07562306523323059,
|
| 11438 |
+
"learning_rate": 1.2765239646347138e-05,
|
| 11439 |
+
"loss": 0.0032,
|
| 11440 |
+
"step": 16260
|
| 11441 |
+
},
|
| 11442 |
+
{
|
| 11443 |
+
"epoch": 6.387907341970946,
|
| 11444 |
+
"grad_norm": 0.09829970449209213,
|
| 11445 |
+
"learning_rate": 1.275942298743602e-05,
|
| 11446 |
+
"loss": 0.0024,
|
| 11447 |
+
"step": 16270
|
| 11448 |
+
},
|
| 11449 |
+
{
|
| 11450 |
+
"epoch": 6.391833529642717,
|
| 11451 |
+
"grad_norm": 0.23492808640003204,
|
| 11452 |
+
"learning_rate": 1.2753606328524897e-05,
|
| 11453 |
+
"loss": 0.0052,
|
| 11454 |
+
"step": 16280
|
| 11455 |
+
},
|
| 11456 |
+
{
|
| 11457 |
+
"epoch": 6.395759717314488,
|
| 11458 |
+
"grad_norm": 3.4835753440856934,
|
| 11459 |
+
"learning_rate": 1.2747789669613775e-05,
|
| 11460 |
+
"loss": 0.0123,
|
| 11461 |
+
"step": 16290
|
| 11462 |
+
},
|
| 11463 |
+
{
|
| 11464 |
+
"epoch": 6.399685904986258,
|
| 11465 |
+
"grad_norm": 0.46241846680641174,
|
| 11466 |
+
"learning_rate": 1.2741973010702653e-05,
|
| 11467 |
+
"loss": 0.0103,
|
| 11468 |
+
"step": 16300
|
| 11469 |
+
},
|
| 11470 |
+
{
|
| 11471 |
+
"epoch": 6.403612092658029,
|
| 11472 |
+
"grad_norm": 0.082466721534729,
|
| 11473 |
+
"learning_rate": 1.2736156351791531e-05,
|
| 11474 |
+
"loss": 0.0066,
|
| 11475 |
+
"step": 16310
|
| 11476 |
+
},
|
| 11477 |
+
{
|
| 11478 |
+
"epoch": 6.4075382803298,
|
| 11479 |
+
"grad_norm": 0.003947274759411812,
|
| 11480 |
+
"learning_rate": 1.273033969288041e-05,
|
| 11481 |
+
"loss": 0.0076,
|
| 11482 |
+
"step": 16320
|
| 11483 |
+
},
|
| 11484 |
+
{
|
| 11485 |
+
"epoch": 6.41146446800157,
|
| 11486 |
+
"grad_norm": 0.6531131267547607,
|
| 11487 |
+
"learning_rate": 1.2724523033969289e-05,
|
| 11488 |
+
"loss": 0.1046,
|
| 11489 |
+
"step": 16330
|
| 11490 |
+
},
|
| 11491 |
+
{
|
| 11492 |
+
"epoch": 6.415390655673341,
|
| 11493 |
+
"grad_norm": 0.25550273060798645,
|
| 11494 |
+
"learning_rate": 1.2718706375058168e-05,
|
| 11495 |
+
"loss": 0.0178,
|
| 11496 |
+
"step": 16340
|
| 11497 |
+
},
|
| 11498 |
+
{
|
| 11499 |
+
"epoch": 6.419316843345112,
|
| 11500 |
+
"grad_norm": 0.04862063005566597,
|
| 11501 |
+
"learning_rate": 1.2712889716147046e-05,
|
| 11502 |
+
"loss": 0.0189,
|
| 11503 |
+
"step": 16350
|
| 11504 |
+
},
|
| 11505 |
+
{
|
| 11506 |
+
"epoch": 6.423243031016883,
|
| 11507 |
+
"grad_norm": 0.18675532937049866,
|
| 11508 |
+
"learning_rate": 1.2707073057235926e-05,
|
| 11509 |
+
"loss": 0.0385,
|
| 11510 |
+
"step": 16360
|
| 11511 |
+
},
|
| 11512 |
+
{
|
| 11513 |
+
"epoch": 6.427169218688653,
|
| 11514 |
+
"grad_norm": 0.09339571744203568,
|
| 11515 |
+
"learning_rate": 1.2701256398324804e-05,
|
| 11516 |
+
"loss": 0.0082,
|
| 11517 |
+
"step": 16370
|
| 11518 |
+
},
|
| 11519 |
+
{
|
| 11520 |
+
"epoch": 6.431095406360424,
|
| 11521 |
+
"grad_norm": 0.586990475654602,
|
| 11522 |
+
"learning_rate": 1.2695439739413681e-05,
|
| 11523 |
+
"loss": 0.0272,
|
| 11524 |
+
"step": 16380
|
| 11525 |
+
},
|
| 11526 |
+
{
|
| 11527 |
+
"epoch": 6.435021594032195,
|
| 11528 |
+
"grad_norm": 0.3322784900665283,
|
| 11529 |
+
"learning_rate": 1.268962308050256e-05,
|
| 11530 |
+
"loss": 0.0278,
|
| 11531 |
+
"step": 16390
|
| 11532 |
+
},
|
| 11533 |
+
{
|
| 11534 |
+
"epoch": 6.438947781703965,
|
| 11535 |
+
"grad_norm": 0.09063828736543655,
|
| 11536 |
+
"learning_rate": 1.2683806421591437e-05,
|
| 11537 |
+
"loss": 0.0036,
|
| 11538 |
+
"step": 16400
|
| 11539 |
+
},
|
| 11540 |
+
{
|
| 11541 |
+
"epoch": 6.442873969375736,
|
| 11542 |
+
"grad_norm": 0.14562222361564636,
|
| 11543 |
+
"learning_rate": 1.2677989762680319e-05,
|
| 11544 |
+
"loss": 0.0072,
|
| 11545 |
+
"step": 16410
|
| 11546 |
+
},
|
| 11547 |
+
{
|
| 11548 |
+
"epoch": 6.446800157047507,
|
| 11549 |
+
"grad_norm": 0.03718629106879234,
|
| 11550 |
+
"learning_rate": 1.2672173103769196e-05,
|
| 11551 |
+
"loss": 0.0063,
|
| 11552 |
+
"step": 16420
|
| 11553 |
+
},
|
| 11554 |
+
{
|
| 11555 |
+
"epoch": 6.4507263447192775,
|
| 11556 |
+
"grad_norm": 0.0008307918324135244,
|
| 11557 |
+
"learning_rate": 1.2666356444858074e-05,
|
| 11558 |
+
"loss": 0.0047,
|
| 11559 |
+
"step": 16430
|
| 11560 |
+
},
|
| 11561 |
+
{
|
| 11562 |
+
"epoch": 6.454652532391048,
|
| 11563 |
+
"grad_norm": 0.3880873918533325,
|
| 11564 |
+
"learning_rate": 1.2660539785946952e-05,
|
| 11565 |
+
"loss": 0.0029,
|
| 11566 |
+
"step": 16440
|
| 11567 |
+
},
|
| 11568 |
+
{
|
| 11569 |
+
"epoch": 6.458578720062819,
|
| 11570 |
+
"grad_norm": 0.1379193514585495,
|
| 11571 |
+
"learning_rate": 1.265472312703583e-05,
|
| 11572 |
+
"loss": 0.0213,
|
| 11573 |
+
"step": 16450
|
| 11574 |
+
},
|
| 11575 |
+
{
|
| 11576 |
+
"epoch": 6.46250490773459,
|
| 11577 |
+
"grad_norm": 23.391353607177734,
|
| 11578 |
+
"learning_rate": 1.2648906468124712e-05,
|
| 11579 |
+
"loss": 0.0287,
|
| 11580 |
+
"step": 16460
|
| 11581 |
+
},
|
| 11582 |
+
{
|
| 11583 |
+
"epoch": 6.46643109540636,
|
| 11584 |
+
"grad_norm": 0.9327417016029358,
|
| 11585 |
+
"learning_rate": 1.264308980921359e-05,
|
| 11586 |
+
"loss": 0.0106,
|
| 11587 |
+
"step": 16470
|
| 11588 |
+
},
|
| 11589 |
+
{
|
| 11590 |
+
"epoch": 6.4703572830781315,
|
| 11591 |
+
"grad_norm": 0.0707123652100563,
|
| 11592 |
+
"learning_rate": 1.2637273150302467e-05,
|
| 11593 |
+
"loss": 0.008,
|
| 11594 |
+
"step": 16480
|
| 11595 |
+
},
|
| 11596 |
+
{
|
| 11597 |
+
"epoch": 6.474283470749902,
|
| 11598 |
+
"grad_norm": 0.031931228935718536,
|
| 11599 |
+
"learning_rate": 1.2631456491391345e-05,
|
| 11600 |
+
"loss": 0.0044,
|
| 11601 |
+
"step": 16490
|
| 11602 |
+
},
|
| 11603 |
+
{
|
| 11604 |
+
"epoch": 6.478209658421672,
|
| 11605 |
+
"grad_norm": 0.015672454610466957,
|
| 11606 |
+
"learning_rate": 1.2625639832480225e-05,
|
| 11607 |
+
"loss": 0.0122,
|
| 11608 |
+
"step": 16500
|
| 11609 |
+
},
|
| 11610 |
+
{
|
| 11611 |
+
"epoch": 6.482135846093444,
|
| 11612 |
+
"grad_norm": 0.2340773344039917,
|
| 11613 |
+
"learning_rate": 1.2619823173569103e-05,
|
| 11614 |
+
"loss": 0.2114,
|
| 11615 |
+
"step": 16510
|
| 11616 |
+
},
|
| 11617 |
+
{
|
| 11618 |
+
"epoch": 6.486062033765214,
|
| 11619 |
+
"grad_norm": 0.39611977338790894,
|
| 11620 |
+
"learning_rate": 1.2614006514657982e-05,
|
| 11621 |
+
"loss": 0.0093,
|
| 11622 |
+
"step": 16520
|
| 11623 |
+
},
|
| 11624 |
+
{
|
| 11625 |
+
"epoch": 6.489988221436985,
|
| 11626 |
+
"grad_norm": 0.007380360271781683,
|
| 11627 |
+
"learning_rate": 1.260818985574686e-05,
|
| 11628 |
+
"loss": 0.0005,
|
| 11629 |
+
"step": 16530
|
| 11630 |
+
},
|
| 11631 |
+
{
|
| 11632 |
+
"epoch": 6.493914409108755,
|
| 11633 |
+
"grad_norm": 8.786765098571777,
|
| 11634 |
+
"learning_rate": 1.2602373196835738e-05,
|
| 11635 |
+
"loss": 0.0267,
|
| 11636 |
+
"step": 16540
|
| 11637 |
+
},
|
| 11638 |
+
{
|
| 11639 |
+
"epoch": 6.497840596780526,
|
| 11640 |
+
"grad_norm": 0.7042697072029114,
|
| 11641 |
+
"learning_rate": 1.2596556537924618e-05,
|
| 11642 |
+
"loss": 0.0082,
|
| 11643 |
+
"step": 16550
|
| 11644 |
+
},
|
| 11645 |
+
{
|
| 11646 |
+
"epoch": 6.501766784452297,
|
| 11647 |
+
"grad_norm": 0.5189945697784424,
|
| 11648 |
+
"learning_rate": 1.2590739879013496e-05,
|
| 11649 |
+
"loss": 0.0061,
|
| 11650 |
+
"step": 16560
|
| 11651 |
+
},
|
| 11652 |
+
{
|
| 11653 |
+
"epoch": 6.505692972124067,
|
| 11654 |
+
"grad_norm": 1.6222602128982544,
|
| 11655 |
+
"learning_rate": 1.2584923220102374e-05,
|
| 11656 |
+
"loss": 0.0764,
|
| 11657 |
+
"step": 16570
|
| 11658 |
+
},
|
| 11659 |
+
{
|
| 11660 |
+
"epoch": 6.509619159795839,
|
| 11661 |
+
"grad_norm": 0.25157955288887024,
|
| 11662 |
+
"learning_rate": 1.2579106561191252e-05,
|
| 11663 |
+
"loss": 0.041,
|
| 11664 |
+
"step": 16580
|
| 11665 |
+
},
|
| 11666 |
+
{
|
| 11667 |
+
"epoch": 6.513545347467609,
|
| 11668 |
+
"grad_norm": 0.03619721159338951,
|
| 11669 |
+
"learning_rate": 1.257328990228013e-05,
|
| 11670 |
+
"loss": 0.0063,
|
| 11671 |
+
"step": 16590
|
| 11672 |
+
},
|
| 11673 |
+
{
|
| 11674 |
+
"epoch": 6.5174715351393795,
|
| 11675 |
+
"grad_norm": 0.08651311695575714,
|
| 11676 |
+
"learning_rate": 1.256747324336901e-05,
|
| 11677 |
+
"loss": 0.0585,
|
| 11678 |
+
"step": 16600
|
| 11679 |
+
},
|
| 11680 |
+
{
|
| 11681 |
+
"epoch": 6.52139772281115,
|
| 11682 |
+
"grad_norm": 0.03698743134737015,
|
| 11683 |
+
"learning_rate": 1.2561656584457889e-05,
|
| 11684 |
+
"loss": 0.0037,
|
| 11685 |
+
"step": 16610
|
| 11686 |
+
},
|
| 11687 |
+
{
|
| 11688 |
+
"epoch": 6.525323910482921,
|
| 11689 |
+
"grad_norm": 0.10592307895421982,
|
| 11690 |
+
"learning_rate": 1.2555839925546767e-05,
|
| 11691 |
+
"loss": 0.1759,
|
| 11692 |
+
"step": 16620
|
| 11693 |
+
},
|
| 11694 |
+
{
|
| 11695 |
+
"epoch": 6.529250098154692,
|
| 11696 |
+
"grad_norm": 1.0061217546463013,
|
| 11697 |
+
"learning_rate": 1.2550023266635644e-05,
|
| 11698 |
+
"loss": 0.0055,
|
| 11699 |
+
"step": 16630
|
| 11700 |
+
},
|
| 11701 |
+
{
|
| 11702 |
+
"epoch": 6.533176285826462,
|
| 11703 |
+
"grad_norm": 0.022983470931649208,
|
| 11704 |
+
"learning_rate": 1.2544206607724526e-05,
|
| 11705 |
+
"loss": 0.0017,
|
| 11706 |
+
"step": 16640
|
| 11707 |
+
},
|
| 11708 |
+
{
|
| 11709 |
+
"epoch": 6.5371024734982335,
|
| 11710 |
+
"grad_norm": 0.011514846235513687,
|
| 11711 |
+
"learning_rate": 1.2538389948813404e-05,
|
| 11712 |
+
"loss": 0.0096,
|
| 11713 |
+
"step": 16650
|
| 11714 |
+
},
|
| 11715 |
+
{
|
| 11716 |
+
"epoch": 6.541028661170004,
|
| 11717 |
+
"grad_norm": 0.0031375123653560877,
|
| 11718 |
+
"learning_rate": 1.2532573289902282e-05,
|
| 11719 |
+
"loss": 0.0011,
|
| 11720 |
+
"step": 16660
|
| 11721 |
+
},
|
| 11722 |
+
{
|
| 11723 |
+
"epoch": 6.544954848841774,
|
| 11724 |
+
"grad_norm": 0.19131770730018616,
|
| 11725 |
+
"learning_rate": 1.252675663099116e-05,
|
| 11726 |
+
"loss": 0.1628,
|
| 11727 |
+
"step": 16670
|
| 11728 |
+
},
|
| 11729 |
+
{
|
| 11730 |
+
"epoch": 6.548881036513546,
|
| 11731 |
+
"grad_norm": 0.160979226231575,
|
| 11732 |
+
"learning_rate": 1.2520939972080037e-05,
|
| 11733 |
+
"loss": 0.0223,
|
| 11734 |
+
"step": 16680
|
| 11735 |
+
},
|
| 11736 |
+
{
|
| 11737 |
+
"epoch": 6.552807224185316,
|
| 11738 |
+
"grad_norm": 0.08103285729885101,
|
| 11739 |
+
"learning_rate": 1.2515123313168917e-05,
|
| 11740 |
+
"loss": 0.0131,
|
| 11741 |
+
"step": 16690
|
| 11742 |
+
},
|
| 11743 |
+
{
|
| 11744 |
+
"epoch": 6.556733411857087,
|
| 11745 |
+
"grad_norm": 0.05095025151968002,
|
| 11746 |
+
"learning_rate": 1.2509306654257795e-05,
|
| 11747 |
+
"loss": 0.003,
|
| 11748 |
+
"step": 16700
|
| 11749 |
+
},
|
| 11750 |
+
{
|
| 11751 |
+
"epoch": 6.560659599528858,
|
| 11752 |
+
"grad_norm": 0.020380154252052307,
|
| 11753 |
+
"learning_rate": 1.2503489995346675e-05,
|
| 11754 |
+
"loss": 0.0048,
|
| 11755 |
+
"step": 16710
|
| 11756 |
+
},
|
| 11757 |
+
{
|
| 11758 |
+
"epoch": 6.564585787200628,
|
| 11759 |
+
"grad_norm": 1.007746934890747,
|
| 11760 |
+
"learning_rate": 1.2497673336435552e-05,
|
| 11761 |
+
"loss": 0.0087,
|
| 11762 |
+
"step": 16720
|
| 11763 |
+
},
|
| 11764 |
+
{
|
| 11765 |
+
"epoch": 6.568511974872399,
|
| 11766 |
+
"grad_norm": 0.06670385599136353,
|
| 11767 |
+
"learning_rate": 1.2491856677524432e-05,
|
| 11768 |
+
"loss": 0.0534,
|
| 11769 |
+
"step": 16730
|
| 11770 |
+
},
|
| 11771 |
+
{
|
| 11772 |
+
"epoch": 6.572438162544169,
|
| 11773 |
+
"grad_norm": 2.7181715965270996,
|
| 11774 |
+
"learning_rate": 1.248604001861331e-05,
|
| 11775 |
+
"loss": 0.0087,
|
| 11776 |
+
"step": 16740
|
| 11777 |
+
},
|
| 11778 |
+
{
|
| 11779 |
+
"epoch": 6.576364350215941,
|
| 11780 |
+
"grad_norm": 0.3471444547176361,
|
| 11781 |
+
"learning_rate": 1.2480223359702188e-05,
|
| 11782 |
+
"loss": 0.0126,
|
| 11783 |
+
"step": 16750
|
| 11784 |
+
},
|
| 11785 |
+
{
|
| 11786 |
+
"epoch": 6.580290537887711,
|
| 11787 |
+
"grad_norm": 1.0008331537246704,
|
| 11788 |
+
"learning_rate": 1.2474406700791066e-05,
|
| 11789 |
+
"loss": 0.0116,
|
| 11790 |
+
"step": 16760
|
| 11791 |
+
},
|
| 11792 |
+
{
|
| 11793 |
+
"epoch": 6.5842167255594815,
|
| 11794 |
+
"grad_norm": 0.4389515817165375,
|
| 11795 |
+
"learning_rate": 1.2468590041879944e-05,
|
| 11796 |
+
"loss": 0.0091,
|
| 11797 |
+
"step": 16770
|
| 11798 |
+
},
|
| 11799 |
+
{
|
| 11800 |
+
"epoch": 6.588142913231253,
|
| 11801 |
+
"grad_norm": 0.013092178851366043,
|
| 11802 |
+
"learning_rate": 1.2462773382968825e-05,
|
| 11803 |
+
"loss": 0.0321,
|
| 11804 |
+
"step": 16780
|
| 11805 |
+
},
|
| 11806 |
+
{
|
| 11807 |
+
"epoch": 6.592069100903023,
|
| 11808 |
+
"grad_norm": 0.03941558673977852,
|
| 11809 |
+
"learning_rate": 1.2456956724057703e-05,
|
| 11810 |
+
"loss": 0.0017,
|
| 11811 |
+
"step": 16790
|
| 11812 |
+
},
|
| 11813 |
+
{
|
| 11814 |
+
"epoch": 6.595995288574794,
|
| 11815 |
+
"grad_norm": 0.04022646322846413,
|
| 11816 |
+
"learning_rate": 1.245114006514658e-05,
|
| 11817 |
+
"loss": 0.0055,
|
| 11818 |
+
"step": 16800
|
| 11819 |
+
},
|
| 11820 |
+
{
|
| 11821 |
+
"epoch": 6.599921476246564,
|
| 11822 |
+
"grad_norm": 0.8628998398780823,
|
| 11823 |
+
"learning_rate": 1.2445323406235459e-05,
|
| 11824 |
+
"loss": 0.0226,
|
| 11825 |
+
"step": 16810
|
| 11826 |
+
},
|
| 11827 |
+
{
|
| 11828 |
+
"epoch": 6.603847663918335,
|
| 11829 |
+
"grad_norm": 0.004127669148147106,
|
| 11830 |
+
"learning_rate": 1.2439506747324337e-05,
|
| 11831 |
+
"loss": 0.0087,
|
| 11832 |
+
"step": 16820
|
| 11833 |
+
},
|
| 11834 |
+
{
|
| 11835 |
+
"epoch": 6.607773851590106,
|
| 11836 |
+
"grad_norm": 0.030491773039102554,
|
| 11837 |
+
"learning_rate": 1.2433690088413218e-05,
|
| 11838 |
+
"loss": 0.0102,
|
| 11839 |
+
"step": 16830
|
| 11840 |
+
},
|
| 11841 |
+
{
|
| 11842 |
+
"epoch": 6.611700039261876,
|
| 11843 |
+
"grad_norm": 0.10267560929059982,
|
| 11844 |
+
"learning_rate": 1.2427873429502096e-05,
|
| 11845 |
+
"loss": 0.0025,
|
| 11846 |
+
"step": 16840
|
| 11847 |
+
},
|
| 11848 |
+
{
|
| 11849 |
+
"epoch": 6.615626226933648,
|
| 11850 |
+
"grad_norm": 0.0036746449768543243,
|
| 11851 |
+
"learning_rate": 1.2422056770590974e-05,
|
| 11852 |
+
"loss": 0.0148,
|
| 11853 |
+
"step": 16850
|
| 11854 |
+
},
|
| 11855 |
+
{
|
| 11856 |
+
"epoch": 6.619552414605418,
|
| 11857 |
+
"grad_norm": 0.22567714750766754,
|
| 11858 |
+
"learning_rate": 1.2416240111679852e-05,
|
| 11859 |
+
"loss": 0.0092,
|
| 11860 |
+
"step": 16860
|
| 11861 |
+
},
|
| 11862 |
+
{
|
| 11863 |
+
"epoch": 6.6234786022771885,
|
| 11864 |
+
"grad_norm": 0.2515012323856354,
|
| 11865 |
+
"learning_rate": 1.2410423452768731e-05,
|
| 11866 |
+
"loss": 0.0075,
|
| 11867 |
+
"step": 16870
|
| 11868 |
+
},
|
| 11869 |
+
{
|
| 11870 |
+
"epoch": 6.62740478994896,
|
| 11871 |
+
"grad_norm": 8.063331604003906,
|
| 11872 |
+
"learning_rate": 1.240460679385761e-05,
|
| 11873 |
+
"loss": 0.0046,
|
| 11874 |
+
"step": 16880
|
| 11875 |
+
},
|
| 11876 |
+
{
|
| 11877 |
+
"epoch": 6.63133097762073,
|
| 11878 |
+
"grad_norm": 0.444339394569397,
|
| 11879 |
+
"learning_rate": 1.2398790134946487e-05,
|
| 11880 |
+
"loss": 0.0067,
|
| 11881 |
+
"step": 16890
|
| 11882 |
+
},
|
| 11883 |
+
{
|
| 11884 |
+
"epoch": 6.635257165292501,
|
| 11885 |
+
"grad_norm": 0.2454690933227539,
|
| 11886 |
+
"learning_rate": 1.2392973476035367e-05,
|
| 11887 |
+
"loss": 0.0043,
|
| 11888 |
+
"step": 16900
|
| 11889 |
+
},
|
| 11890 |
+
{
|
| 11891 |
+
"epoch": 6.639183352964272,
|
| 11892 |
+
"grad_norm": 2.7716898918151855,
|
| 11893 |
+
"learning_rate": 1.2387156817124245e-05,
|
| 11894 |
+
"loss": 0.0042,
|
| 11895 |
+
"step": 16910
|
| 11896 |
+
},
|
| 11897 |
+
{
|
| 11898 |
+
"epoch": 6.6431095406360425,
|
| 11899 |
+
"grad_norm": 0.0014400059590116143,
|
| 11900 |
+
"learning_rate": 1.2381340158213124e-05,
|
| 11901 |
+
"loss": 0.0023,
|
| 11902 |
+
"step": 16920
|
| 11903 |
+
},
|
| 11904 |
+
{
|
| 11905 |
+
"epoch": 6.647035728307813,
|
| 11906 |
+
"grad_norm": 0.012137122452259064,
|
| 11907 |
+
"learning_rate": 1.2375523499302002e-05,
|
| 11908 |
+
"loss": 0.0079,
|
| 11909 |
+
"step": 16930
|
| 11910 |
+
},
|
| 11911 |
+
{
|
| 11912 |
+
"epoch": 6.650961915979584,
|
| 11913 |
+
"grad_norm": 0.014659970998764038,
|
| 11914 |
+
"learning_rate": 1.236970684039088e-05,
|
| 11915 |
+
"loss": 0.0047,
|
| 11916 |
+
"step": 16940
|
| 11917 |
+
},
|
| 11918 |
+
{
|
| 11919 |
+
"epoch": 6.654888103651355,
|
| 11920 |
+
"grad_norm": 0.003505716798827052,
|
| 11921 |
+
"learning_rate": 1.2363890181479758e-05,
|
| 11922 |
+
"loss": 0.0075,
|
| 11923 |
+
"step": 16950
|
| 11924 |
+
},
|
| 11925 |
+
{
|
| 11926 |
+
"epoch": 6.658814291323125,
|
| 11927 |
+
"grad_norm": 0.0015272964956238866,
|
| 11928 |
+
"learning_rate": 1.2358073522568636e-05,
|
| 11929 |
+
"loss": 0.0169,
|
| 11930 |
+
"step": 16960
|
| 11931 |
+
},
|
| 11932 |
+
{
|
| 11933 |
+
"epoch": 6.662740478994896,
|
| 11934 |
+
"grad_norm": 0.008153236471116543,
|
| 11935 |
+
"learning_rate": 1.2352256863657517e-05,
|
| 11936 |
+
"loss": 0.009,
|
| 11937 |
+
"step": 16970
|
| 11938 |
+
},
|
| 11939 |
+
{
|
| 11940 |
+
"epoch": 6.666666666666667,
|
| 11941 |
+
"grad_norm": 0.1218530684709549,
|
| 11942 |
+
"learning_rate": 1.2346440204746395e-05,
|
| 11943 |
+
"loss": 0.0727,
|
| 11944 |
+
"step": 16980
|
| 11945 |
+
},
|
| 11946 |
+
{
|
| 11947 |
+
"epoch": 6.670592854338437,
|
| 11948 |
+
"grad_norm": 0.04540344327688217,
|
| 11949 |
+
"learning_rate": 1.2340623545835273e-05,
|
| 11950 |
+
"loss": 0.0078,
|
| 11951 |
+
"step": 16990
|
| 11952 |
+
},
|
| 11953 |
+
{
|
| 11954 |
+
"epoch": 6.674519042010208,
|
| 11955 |
+
"grad_norm": 0.21214599907398224,
|
| 11956 |
+
"learning_rate": 1.2334806886924151e-05,
|
| 11957 |
+
"loss": 0.0305,
|
| 11958 |
+
"step": 17000
|
| 11959 |
+
},
|
| 11960 |
+
{
|
| 11961 |
+
"epoch": 6.678445229681979,
|
| 11962 |
+
"grad_norm": 0.008593380451202393,
|
| 11963 |
+
"learning_rate": 1.2328990228013032e-05,
|
| 11964 |
+
"loss": 0.0052,
|
| 11965 |
+
"step": 17010
|
| 11966 |
+
},
|
| 11967 |
+
{
|
| 11968 |
+
"epoch": 6.68237141735375,
|
| 11969 |
+
"grad_norm": 0.04752210155129433,
|
| 11970 |
+
"learning_rate": 1.232317356910191e-05,
|
| 11971 |
+
"loss": 0.0155,
|
| 11972 |
+
"step": 17020
|
| 11973 |
+
},
|
| 11974 |
+
{
|
| 11975 |
+
"epoch": 6.68629760502552,
|
| 11976 |
+
"grad_norm": 0.057865675538778305,
|
| 11977 |
+
"learning_rate": 1.2317356910190788e-05,
|
| 11978 |
+
"loss": 0.0312,
|
| 11979 |
+
"step": 17030
|
| 11980 |
+
},
|
| 11981 |
+
{
|
| 11982 |
+
"epoch": 6.6902237926972905,
|
| 11983 |
+
"grad_norm": 0.17430682480335236,
|
| 11984 |
+
"learning_rate": 1.2311540251279666e-05,
|
| 11985 |
+
"loss": 0.0084,
|
| 11986 |
+
"step": 17040
|
| 11987 |
+
},
|
| 11988 |
+
{
|
| 11989 |
+
"epoch": 6.694149980369062,
|
| 11990 |
+
"grad_norm": 0.437730997800827,
|
| 11991 |
+
"learning_rate": 1.2305723592368544e-05,
|
| 11992 |
+
"loss": 0.0052,
|
| 11993 |
+
"step": 17050
|
| 11994 |
+
},
|
| 11995 |
+
{
|
| 11996 |
+
"epoch": 6.698076168040832,
|
| 11997 |
+
"grad_norm": 0.02329200506210327,
|
| 11998 |
+
"learning_rate": 1.2299906933457423e-05,
|
| 11999 |
+
"loss": 0.0279,
|
| 12000 |
+
"step": 17060
|
| 12001 |
+
},
|
| 12002 |
+
{
|
| 12003 |
+
"epoch": 6.702002355712603,
|
| 12004 |
+
"grad_norm": 0.028645997866988182,
|
| 12005 |
+
"learning_rate": 1.2294090274546301e-05,
|
| 12006 |
+
"loss": 0.0068,
|
| 12007 |
+
"step": 17070
|
| 12008 |
+
},
|
| 12009 |
+
{
|
| 12010 |
+
"epoch": 6.705928543384374,
|
| 12011 |
+
"grad_norm": 0.002165143145248294,
|
| 12012 |
+
"learning_rate": 1.228827361563518e-05,
|
| 12013 |
+
"loss": 0.0188,
|
| 12014 |
+
"step": 17080
|
| 12015 |
+
},
|
| 12016 |
+
{
|
| 12017 |
+
"epoch": 6.7098547310561445,
|
| 12018 |
+
"grad_norm": 0.007935491390526295,
|
| 12019 |
+
"learning_rate": 1.2282456956724059e-05,
|
| 12020 |
+
"loss": 0.0077,
|
| 12021 |
+
"step": 17090
|
| 12022 |
+
},
|
| 12023 |
+
{
|
| 12024 |
+
"epoch": 6.713780918727915,
|
| 12025 |
+
"grad_norm": 0.00014909982564859092,
|
| 12026 |
+
"learning_rate": 1.2276640297812937e-05,
|
| 12027 |
+
"loss": 0.0034,
|
| 12028 |
+
"step": 17100
|
| 12029 |
+
},
|
| 12030 |
+
{
|
| 12031 |
+
"epoch": 6.717707106399686,
|
| 12032 |
+
"grad_norm": 0.33264774084091187,
|
| 12033 |
+
"learning_rate": 1.2270823638901816e-05,
|
| 12034 |
+
"loss": 0.0619,
|
| 12035 |
+
"step": 17110
|
| 12036 |
+
},
|
| 12037 |
+
{
|
| 12038 |
+
"epoch": 6.721633294071457,
|
| 12039 |
+
"grad_norm": 0.06475920975208282,
|
| 12040 |
+
"learning_rate": 1.2265006979990694e-05,
|
| 12041 |
+
"loss": 0.0071,
|
| 12042 |
+
"step": 17120
|
| 12043 |
+
},
|
| 12044 |
+
{
|
| 12045 |
+
"epoch": 6.725559481743227,
|
| 12046 |
+
"grad_norm": 0.004965378437191248,
|
| 12047 |
+
"learning_rate": 1.2259190321079572e-05,
|
| 12048 |
+
"loss": 0.0509,
|
| 12049 |
+
"step": 17130
|
| 12050 |
+
},
|
| 12051 |
+
{
|
| 12052 |
+
"epoch": 6.7294856694149985,
|
| 12053 |
+
"grad_norm": 0.1468079686164856,
|
| 12054 |
+
"learning_rate": 1.225337366216845e-05,
|
| 12055 |
+
"loss": 0.0305,
|
| 12056 |
+
"step": 17140
|
| 12057 |
+
},
|
| 12058 |
+
{
|
| 12059 |
+
"epoch": 6.733411857086769,
|
| 12060 |
+
"grad_norm": 0.003811666974797845,
|
| 12061 |
+
"learning_rate": 1.2247557003257331e-05,
|
| 12062 |
+
"loss": 0.032,
|
| 12063 |
+
"step": 17150
|
| 12064 |
+
},
|
| 12065 |
+
{
|
| 12066 |
+
"epoch": 6.737338044758539,
|
| 12067 |
+
"grad_norm": 0.06458223611116409,
|
| 12068 |
+
"learning_rate": 1.224174034434621e-05,
|
| 12069 |
+
"loss": 0.004,
|
| 12070 |
+
"step": 17160
|
| 12071 |
+
},
|
| 12072 |
+
{
|
| 12073 |
+
"epoch": 6.74126423243031,
|
| 12074 |
+
"grad_norm": 1.0377260446548462,
|
| 12075 |
+
"learning_rate": 1.2235923685435087e-05,
|
| 12076 |
+
"loss": 0.043,
|
| 12077 |
+
"step": 17170
|
| 12078 |
+
},
|
| 12079 |
+
{
|
| 12080 |
+
"epoch": 6.745190420102081,
|
| 12081 |
+
"grad_norm": 0.018551893532276154,
|
| 12082 |
+
"learning_rate": 1.2230107026523965e-05,
|
| 12083 |
+
"loss": 0.0052,
|
| 12084 |
+
"step": 17180
|
| 12085 |
+
},
|
| 12086 |
+
{
|
| 12087 |
+
"epoch": 6.749116607773852,
|
| 12088 |
+
"grad_norm": 0.026255004107952118,
|
| 12089 |
+
"learning_rate": 1.2224290367612843e-05,
|
| 12090 |
+
"loss": 0.0054,
|
| 12091 |
+
"step": 17190
|
| 12092 |
+
},
|
| 12093 |
+
{
|
| 12094 |
+
"epoch": 6.753042795445622,
|
| 12095 |
+
"grad_norm": 0.11142627149820328,
|
| 12096 |
+
"learning_rate": 1.2218473708701724e-05,
|
| 12097 |
+
"loss": 0.1553,
|
| 12098 |
+
"step": 17200
|
| 12099 |
+
},
|
| 12100 |
+
{
|
| 12101 |
+
"epoch": 6.756968983117393,
|
| 12102 |
+
"grad_norm": 0.03343178704380989,
|
| 12103 |
+
"learning_rate": 1.2212657049790602e-05,
|
| 12104 |
+
"loss": 0.0217,
|
| 12105 |
+
"step": 17210
|
| 12106 |
+
},
|
| 12107 |
+
{
|
| 12108 |
+
"epoch": 6.760895170789164,
|
| 12109 |
+
"grad_norm": 0.002346778055652976,
|
| 12110 |
+
"learning_rate": 1.220684039087948e-05,
|
| 12111 |
+
"loss": 0.0074,
|
| 12112 |
+
"step": 17220
|
| 12113 |
+
},
|
| 12114 |
+
{
|
| 12115 |
+
"epoch": 6.764821358460934,
|
| 12116 |
+
"grad_norm": 0.016266973689198494,
|
| 12117 |
+
"learning_rate": 1.2201023731968358e-05,
|
| 12118 |
+
"loss": 0.0012,
|
| 12119 |
+
"step": 17230
|
| 12120 |
+
},
|
| 12121 |
+
{
|
| 12122 |
+
"epoch": 6.768747546132705,
|
| 12123 |
+
"grad_norm": 0.028538648039102554,
|
| 12124 |
+
"learning_rate": 1.2195207073057236e-05,
|
| 12125 |
+
"loss": 0.0065,
|
| 12126 |
+
"step": 17240
|
| 12127 |
+
},
|
| 12128 |
+
{
|
| 12129 |
+
"epoch": 6.772673733804476,
|
| 12130 |
+
"grad_norm": 0.04032367095351219,
|
| 12131 |
+
"learning_rate": 1.2189390414146116e-05,
|
| 12132 |
+
"loss": 0.0049,
|
| 12133 |
+
"step": 17250
|
| 12134 |
+
},
|
| 12135 |
+
{
|
| 12136 |
+
"epoch": 6.7765999214762465,
|
| 12137 |
+
"grad_norm": 1.0972919464111328,
|
| 12138 |
+
"learning_rate": 1.2183573755234994e-05,
|
| 12139 |
+
"loss": 0.0393,
|
| 12140 |
+
"step": 17260
|
| 12141 |
+
},
|
| 12142 |
+
{
|
| 12143 |
+
"epoch": 6.780526109148017,
|
| 12144 |
+
"grad_norm": 0.3029380738735199,
|
| 12145 |
+
"learning_rate": 1.2177757096323871e-05,
|
| 12146 |
+
"loss": 0.0112,
|
| 12147 |
+
"step": 17270
|
| 12148 |
+
},
|
| 12149 |
+
{
|
| 12150 |
+
"epoch": 6.784452296819788,
|
| 12151 |
+
"grad_norm": 0.02861529216170311,
|
| 12152 |
+
"learning_rate": 1.2171940437412751e-05,
|
| 12153 |
+
"loss": 0.0099,
|
| 12154 |
+
"step": 17280
|
| 12155 |
+
},
|
| 12156 |
+
{
|
| 12157 |
+
"epoch": 6.788378484491559,
|
| 12158 |
+
"grad_norm": 0.07839078456163406,
|
| 12159 |
+
"learning_rate": 1.216612377850163e-05,
|
| 12160 |
+
"loss": 0.0058,
|
| 12161 |
+
"step": 17290
|
| 12162 |
+
},
|
| 12163 |
+
{
|
| 12164 |
+
"epoch": 6.792304672163329,
|
| 12165 |
+
"grad_norm": 2.803145170211792,
|
| 12166 |
+
"learning_rate": 1.2160307119590509e-05,
|
| 12167 |
+
"loss": 0.0211,
|
| 12168 |
+
"step": 17300
|
| 12169 |
+
},
|
| 12170 |
+
{
|
| 12171 |
+
"epoch": 6.7962308598351004,
|
| 12172 |
+
"grad_norm": 1.0837717056274414,
|
| 12173 |
+
"learning_rate": 1.2154490460679386e-05,
|
| 12174 |
+
"loss": 0.0207,
|
| 12175 |
+
"step": 17310
|
| 12176 |
+
},
|
| 12177 |
+
{
|
| 12178 |
+
"epoch": 6.800157047506871,
|
| 12179 |
+
"grad_norm": 0.001161164720542729,
|
| 12180 |
+
"learning_rate": 1.2148673801768264e-05,
|
| 12181 |
+
"loss": 0.0101,
|
| 12182 |
+
"step": 17320
|
| 12183 |
+
},
|
| 12184 |
+
{
|
| 12185 |
+
"epoch": 6.804083235178641,
|
| 12186 |
+
"grad_norm": 0.24111507833003998,
|
| 12187 |
+
"learning_rate": 1.2142857142857142e-05,
|
| 12188 |
+
"loss": 0.0041,
|
| 12189 |
+
"step": 17330
|
| 12190 |
+
},
|
| 12191 |
+
{
|
| 12192 |
+
"epoch": 6.808009422850413,
|
| 12193 |
+
"grad_norm": 1.3525302410125732,
|
| 12194 |
+
"learning_rate": 1.2137040483946024e-05,
|
| 12195 |
+
"loss": 0.0066,
|
| 12196 |
+
"step": 17340
|
| 12197 |
+
},
|
| 12198 |
+
{
|
| 12199 |
+
"epoch": 6.811935610522183,
|
| 12200 |
+
"grad_norm": 0.07989898324012756,
|
| 12201 |
+
"learning_rate": 1.2131223825034901e-05,
|
| 12202 |
+
"loss": 0.0072,
|
| 12203 |
+
"step": 17350
|
| 12204 |
+
},
|
| 12205 |
+
{
|
| 12206 |
+
"epoch": 6.8158617981939535,
|
| 12207 |
+
"grad_norm": 0.0023537969682365656,
|
| 12208 |
+
"learning_rate": 1.212540716612378e-05,
|
| 12209 |
+
"loss": 0.0116,
|
| 12210 |
+
"step": 17360
|
| 12211 |
+
},
|
| 12212 |
+
{
|
| 12213 |
+
"epoch": 6.819787985865725,
|
| 12214 |
+
"grad_norm": 0.0338248535990715,
|
| 12215 |
+
"learning_rate": 1.2119590507212657e-05,
|
| 12216 |
+
"loss": 0.0036,
|
| 12217 |
+
"step": 17370
|
| 12218 |
+
},
|
| 12219 |
+
{
|
| 12220 |
+
"epoch": 6.823714173537495,
|
| 12221 |
+
"grad_norm": 3.00838303565979,
|
| 12222 |
+
"learning_rate": 1.2113773848301537e-05,
|
| 12223 |
+
"loss": 0.0085,
|
| 12224 |
+
"step": 17380
|
| 12225 |
+
},
|
| 12226 |
+
{
|
| 12227 |
+
"epoch": 6.827640361209266,
|
| 12228 |
+
"grad_norm": 0.0010932368459179997,
|
| 12229 |
+
"learning_rate": 1.2107957189390417e-05,
|
| 12230 |
+
"loss": 0.0053,
|
| 12231 |
+
"step": 17390
|
| 12232 |
+
},
|
| 12233 |
+
{
|
| 12234 |
+
"epoch": 6.831566548881036,
|
| 12235 |
+
"grad_norm": 2.4534800052642822,
|
| 12236 |
+
"learning_rate": 1.2102140530479294e-05,
|
| 12237 |
+
"loss": 0.144,
|
| 12238 |
+
"step": 17400
|
| 12239 |
+
},
|
| 12240 |
+
{
|
| 12241 |
+
"epoch": 6.8354927365528075,
|
| 12242 |
+
"grad_norm": 1.3615754842758179,
|
| 12243 |
+
"learning_rate": 1.2096323871568172e-05,
|
| 12244 |
+
"loss": 0.0188,
|
| 12245 |
+
"step": 17410
|
| 12246 |
+
},
|
| 12247 |
+
{
|
| 12248 |
+
"epoch": 6.839418924224578,
|
| 12249 |
+
"grad_norm": 0.30835217237472534,
|
| 12250 |
+
"learning_rate": 1.209050721265705e-05,
|
| 12251 |
+
"loss": 0.0076,
|
| 12252 |
+
"step": 17420
|
| 12253 |
+
},
|
| 12254 |
+
{
|
| 12255 |
+
"epoch": 6.843345111896348,
|
| 12256 |
+
"grad_norm": 0.04610268399119377,
|
| 12257 |
+
"learning_rate": 1.208469055374593e-05,
|
| 12258 |
+
"loss": 0.0302,
|
| 12259 |
+
"step": 17430
|
| 12260 |
+
},
|
| 12261 |
+
{
|
| 12262 |
+
"epoch": 6.847271299568119,
|
| 12263 |
+
"grad_norm": 0.4700387716293335,
|
| 12264 |
+
"learning_rate": 1.2078873894834808e-05,
|
| 12265 |
+
"loss": 0.1631,
|
| 12266 |
+
"step": 17440
|
| 12267 |
+
},
|
| 12268 |
+
{
|
| 12269 |
+
"epoch": 6.85119748723989,
|
| 12270 |
+
"grad_norm": 0.0012368853203952312,
|
| 12271 |
+
"learning_rate": 1.2073057235923686e-05,
|
| 12272 |
+
"loss": 0.0765,
|
| 12273 |
+
"step": 17450
|
| 12274 |
+
},
|
| 12275 |
+
{
|
| 12276 |
+
"epoch": 6.855123674911661,
|
| 12277 |
+
"grad_norm": 0.27137330174446106,
|
| 12278 |
+
"learning_rate": 1.2067240577012565e-05,
|
| 12279 |
+
"loss": 0.0213,
|
| 12280 |
+
"step": 17460
|
| 12281 |
+
},
|
| 12282 |
+
{
|
| 12283 |
+
"epoch": 6.859049862583431,
|
| 12284 |
+
"grad_norm": 0.12336058169603348,
|
| 12285 |
+
"learning_rate": 1.2061423918101443e-05,
|
| 12286 |
+
"loss": 0.0097,
|
| 12287 |
+
"step": 17470
|
| 12288 |
+
},
|
| 12289 |
+
{
|
| 12290 |
+
"epoch": 6.862976050255202,
|
| 12291 |
+
"grad_norm": 0.41353583335876465,
|
| 12292 |
+
"learning_rate": 1.2055607259190323e-05,
|
| 12293 |
+
"loss": 0.0158,
|
| 12294 |
+
"step": 17480
|
| 12295 |
+
},
|
| 12296 |
+
{
|
| 12297 |
+
"epoch": 6.866902237926973,
|
| 12298 |
+
"grad_norm": 0.043068114668130875,
|
| 12299 |
+
"learning_rate": 1.20497906002792e-05,
|
| 12300 |
+
"loss": 0.0062,
|
| 12301 |
+
"step": 17490
|
| 12302 |
+
},
|
| 12303 |
+
{
|
| 12304 |
+
"epoch": 6.870828425598743,
|
| 12305 |
+
"grad_norm": 0.8259932398796082,
|
| 12306 |
+
"learning_rate": 1.2043973941368079e-05,
|
| 12307 |
+
"loss": 0.0082,
|
| 12308 |
+
"step": 17500
|
| 12309 |
+
},
|
| 12310 |
+
{
|
| 12311 |
+
"epoch": 6.874754613270515,
|
| 12312 |
+
"grad_norm": 0.0739266499876976,
|
| 12313 |
+
"learning_rate": 1.2038157282456957e-05,
|
| 12314 |
+
"loss": 0.1985,
|
| 12315 |
+
"step": 17510
|
| 12316 |
+
},
|
| 12317 |
+
{
|
| 12318 |
+
"epoch": 6.878680800942285,
|
| 12319 |
+
"grad_norm": 0.3620983064174652,
|
| 12320 |
+
"learning_rate": 1.2032340623545838e-05,
|
| 12321 |
+
"loss": 0.0307,
|
| 12322 |
+
"step": 17520
|
| 12323 |
+
},
|
| 12324 |
+
{
|
| 12325 |
+
"epoch": 6.8826069886140555,
|
| 12326 |
+
"grad_norm": 0.24680419266223907,
|
| 12327 |
+
"learning_rate": 1.2026523964634716e-05,
|
| 12328 |
+
"loss": 0.0104,
|
| 12329 |
+
"step": 17530
|
| 12330 |
+
},
|
| 12331 |
+
{
|
| 12332 |
+
"epoch": 6.886533176285827,
|
| 12333 |
+
"grad_norm": 0.36779218912124634,
|
| 12334 |
+
"learning_rate": 1.2020707305723594e-05,
|
| 12335 |
+
"loss": 0.0054,
|
| 12336 |
+
"step": 17540
|
| 12337 |
+
},
|
| 12338 |
+
{
|
| 12339 |
+
"epoch": 6.890459363957597,
|
| 12340 |
+
"grad_norm": 0.4611557722091675,
|
| 12341 |
+
"learning_rate": 1.2014890646812472e-05,
|
| 12342 |
+
"loss": 0.0404,
|
| 12343 |
+
"step": 17550
|
| 12344 |
+
},
|
| 12345 |
+
{
|
| 12346 |
+
"epoch": 6.894385551629368,
|
| 12347 |
+
"grad_norm": 0.23447008430957794,
|
| 12348 |
+
"learning_rate": 1.200907398790135e-05,
|
| 12349 |
+
"loss": 0.0646,
|
| 12350 |
+
"step": 17560
|
| 12351 |
+
},
|
| 12352 |
+
{
|
| 12353 |
+
"epoch": 6.898311739301139,
|
| 12354 |
+
"grad_norm": 0.12997685372829437,
|
| 12355 |
+
"learning_rate": 1.2003257328990229e-05,
|
| 12356 |
+
"loss": 0.0077,
|
| 12357 |
+
"step": 17570
|
| 12358 |
+
},
|
| 12359 |
+
{
|
| 12360 |
+
"epoch": 6.9022379269729095,
|
| 12361 |
+
"grad_norm": 0.22160975635051727,
|
| 12362 |
+
"learning_rate": 1.1997440670079109e-05,
|
| 12363 |
+
"loss": 0.0371,
|
| 12364 |
+
"step": 17580
|
| 12365 |
+
},
|
| 12366 |
+
{
|
| 12367 |
+
"epoch": 6.90616411464468,
|
| 12368 |
+
"grad_norm": 0.07754328846931458,
|
| 12369 |
+
"learning_rate": 1.1991624011167987e-05,
|
| 12370 |
+
"loss": 0.1126,
|
| 12371 |
+
"step": 17590
|
| 12372 |
+
},
|
| 12373 |
+
{
|
| 12374 |
+
"epoch": 6.91009030231645,
|
| 12375 |
+
"grad_norm": 0.1668512225151062,
|
| 12376 |
+
"learning_rate": 1.1985807352256865e-05,
|
| 12377 |
+
"loss": 0.0063,
|
| 12378 |
+
"step": 17600
|
| 12379 |
+
},
|
| 12380 |
+
{
|
| 12381 |
+
"epoch": 6.914016489988222,
|
| 12382 |
+
"grad_norm": 0.3324645459651947,
|
| 12383 |
+
"learning_rate": 1.1979990693345742e-05,
|
| 12384 |
+
"loss": 0.0797,
|
| 12385 |
+
"step": 17610
|
| 12386 |
+
},
|
| 12387 |
+
{
|
| 12388 |
+
"epoch": 6.917942677659992,
|
| 12389 |
+
"grad_norm": 0.5611400604248047,
|
| 12390 |
+
"learning_rate": 1.1974174034434622e-05,
|
| 12391 |
+
"loss": 0.0181,
|
| 12392 |
+
"step": 17620
|
| 12393 |
+
},
|
| 12394 |
+
{
|
| 12395 |
+
"epoch": 6.921868865331763,
|
| 12396 |
+
"grad_norm": 0.06364478170871735,
|
| 12397 |
+
"learning_rate": 1.19683573755235e-05,
|
| 12398 |
+
"loss": 0.028,
|
| 12399 |
+
"step": 17630
|
| 12400 |
+
},
|
| 12401 |
+
{
|
| 12402 |
+
"epoch": 6.925795053003534,
|
| 12403 |
+
"grad_norm": 0.0001713363017188385,
|
| 12404 |
+
"learning_rate": 1.1962540716612378e-05,
|
| 12405 |
+
"loss": 0.0035,
|
| 12406 |
+
"step": 17640
|
| 12407 |
+
},
|
| 12408 |
+
{
|
| 12409 |
+
"epoch": 6.929721240675304,
|
| 12410 |
+
"grad_norm": 0.04272838681936264,
|
| 12411 |
+
"learning_rate": 1.1956724057701257e-05,
|
| 12412 |
+
"loss": 0.0024,
|
| 12413 |
+
"step": 17650
|
| 12414 |
+
},
|
| 12415 |
+
{
|
| 12416 |
+
"epoch": 6.933647428347075,
|
| 12417 |
+
"grad_norm": 0.07123679667711258,
|
| 12418 |
+
"learning_rate": 1.1950907398790137e-05,
|
| 12419 |
+
"loss": 0.0014,
|
| 12420 |
+
"step": 17660
|
| 12421 |
+
},
|
| 12422 |
+
{
|
| 12423 |
+
"epoch": 6.937573616018845,
|
| 12424 |
+
"grad_norm": 0.23113000392913818,
|
| 12425 |
+
"learning_rate": 1.1945090739879015e-05,
|
| 12426 |
+
"loss": 0.002,
|
| 12427 |
+
"step": 17670
|
| 12428 |
+
},
|
| 12429 |
+
{
|
| 12430 |
+
"epoch": 6.941499803690617,
|
| 12431 |
+
"grad_norm": 0.00043736593215726316,
|
| 12432 |
+
"learning_rate": 1.1939274080967893e-05,
|
| 12433 |
+
"loss": 0.0054,
|
| 12434 |
+
"step": 17680
|
| 12435 |
+
},
|
| 12436 |
+
{
|
| 12437 |
+
"epoch": 6.945425991362387,
|
| 12438 |
+
"grad_norm": 0.027676576748490334,
|
| 12439 |
+
"learning_rate": 1.193345742205677e-05,
|
| 12440 |
+
"loss": 0.0272,
|
| 12441 |
+
"step": 17690
|
| 12442 |
+
},
|
| 12443 |
+
{
|
| 12444 |
+
"epoch": 6.9493521790341575,
|
| 12445 |
+
"grad_norm": 0.038382917642593384,
|
| 12446 |
+
"learning_rate": 1.1927640763145649e-05,
|
| 12447 |
+
"loss": 0.0207,
|
| 12448 |
+
"step": 17700
|
| 12449 |
+
},
|
| 12450 |
+
{
|
| 12451 |
+
"epoch": 6.953278366705929,
|
| 12452 |
+
"grad_norm": 0.0018801887053996325,
|
| 12453 |
+
"learning_rate": 1.192182410423453e-05,
|
| 12454 |
+
"loss": 0.0051,
|
| 12455 |
+
"step": 17710
|
| 12456 |
+
},
|
| 12457 |
+
{
|
| 12458 |
+
"epoch": 6.957204554377699,
|
| 12459 |
+
"grad_norm": 0.03147049620747566,
|
| 12460 |
+
"learning_rate": 1.1916007445323408e-05,
|
| 12461 |
+
"loss": 0.008,
|
| 12462 |
+
"step": 17720
|
| 12463 |
+
},
|
| 12464 |
+
{
|
| 12465 |
+
"epoch": 6.96113074204947,
|
| 12466 |
+
"grad_norm": 0.07576867192983627,
|
| 12467 |
+
"learning_rate": 1.1910190786412286e-05,
|
| 12468 |
+
"loss": 0.003,
|
| 12469 |
+
"step": 17730
|
| 12470 |
+
},
|
| 12471 |
+
{
|
| 12472 |
+
"epoch": 6.965056929721241,
|
| 12473 |
+
"grad_norm": 0.363186776638031,
|
| 12474 |
+
"learning_rate": 1.1904374127501164e-05,
|
| 12475 |
+
"loss": 0.0107,
|
| 12476 |
+
"step": 17740
|
| 12477 |
+
},
|
| 12478 |
+
{
|
| 12479 |
+
"epoch": 6.9689831173930115,
|
| 12480 |
+
"grad_norm": 0.026293108239769936,
|
| 12481 |
+
"learning_rate": 1.1898557468590042e-05,
|
| 12482 |
+
"loss": 0.0044,
|
| 12483 |
+
"step": 17750
|
| 12484 |
+
},
|
| 12485 |
+
{
|
| 12486 |
+
"epoch": 6.972909305064782,
|
| 12487 |
+
"grad_norm": 0.0016808859072625637,
|
| 12488 |
+
"learning_rate": 1.1892740809678921e-05,
|
| 12489 |
+
"loss": 0.0121,
|
| 12490 |
+
"step": 17760
|
| 12491 |
+
},
|
| 12492 |
+
{
|
| 12493 |
+
"epoch": 6.976835492736553,
|
| 12494 |
+
"grad_norm": 0.1285417228937149,
|
| 12495 |
+
"learning_rate": 1.1886924150767801e-05,
|
| 12496 |
+
"loss": 0.0018,
|
| 12497 |
+
"step": 17770
|
| 12498 |
+
},
|
| 12499 |
+
{
|
| 12500 |
+
"epoch": 6.980761680408324,
|
| 12501 |
+
"grad_norm": 0.04193047434091568,
|
| 12502 |
+
"learning_rate": 1.1881107491856679e-05,
|
| 12503 |
+
"loss": 0.0035,
|
| 12504 |
+
"step": 17780
|
| 12505 |
+
},
|
| 12506 |
+
{
|
| 12507 |
+
"epoch": 6.984687868080094,
|
| 12508 |
+
"grad_norm": 0.04311095550656319,
|
| 12509 |
+
"learning_rate": 1.1875290832945557e-05,
|
| 12510 |
+
"loss": 0.0071,
|
| 12511 |
+
"step": 17790
|
| 12512 |
+
},
|
| 12513 |
+
{
|
| 12514 |
+
"epoch": 6.988614055751865,
|
| 12515 |
+
"grad_norm": 0.5076810717582703,
|
| 12516 |
+
"learning_rate": 1.1869474174034436e-05,
|
| 12517 |
+
"loss": 0.0044,
|
| 12518 |
+
"step": 17800
|
| 12519 |
+
},
|
| 12520 |
+
{
|
| 12521 |
+
"epoch": 6.992540243423636,
|
| 12522 |
+
"grad_norm": 0.3999071717262268,
|
| 12523 |
+
"learning_rate": 1.1863657515123314e-05,
|
| 12524 |
+
"loss": 0.0082,
|
| 12525 |
+
"step": 17810
|
| 12526 |
+
},
|
| 12527 |
+
{
|
| 12528 |
+
"epoch": 6.996466431095406,
|
| 12529 |
+
"grad_norm": 0.3073045313358307,
|
| 12530 |
+
"learning_rate": 1.1857840856212192e-05,
|
| 12531 |
+
"loss": 0.0035,
|
| 12532 |
+
"step": 17820
|
| 12533 |
+
},
|
| 12534 |
+
{
|
| 12535 |
+
"epoch": 7.0,
|
| 12536 |
+
"eval_loss": 0.06648081541061401,
|
| 12537 |
+
"eval_runtime": 20.8442,
|
| 12538 |
+
"eval_samples_per_second": 108.615,
|
| 12539 |
+
"eval_steps_per_second": 13.577,
|
| 12540 |
+
"step": 17829
|
| 12541 |
}
|
| 12542 |
],
|
| 12543 |
"logging_steps": 10,
|
|
|
|
| 12552 |
"early_stopping_threshold": 0.0
|
| 12553 |
},
|
| 12554 |
"attributes": {
|
| 12555 |
+
"early_stopping_patience_counter": 3
|
| 12556 |
}
|
| 12557 |
},
|
| 12558 |
"TrainerControl": {
|
|
|
|
| 12561 |
"should_evaluate": false,
|
| 12562 |
"should_log": false,
|
| 12563 |
"should_save": true,
|
| 12564 |
+
"should_training_stop": true
|
| 12565 |
},
|
| 12566 |
"attributes": {}
|
| 12567 |
}
|
| 12568 |
},
|
| 12569 |
+
"total_flos": 3.751772480567184e+16,
|
| 12570 |
"train_batch_size": 8,
|
| 12571 |
"trial_name": null,
|
| 12572 |
"trial_params": null
|