Training in progress, step 14100, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e9d5196271e33d944777c3a49fccbfa302a70e1321dd5cbd1d0f98d610c0e19
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5f25152ec3704298acc5baeda37a016ce684c5ed788f7275c1932e40edc1f8d
|
| 3 |
size 173247691
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b928d2d8033ac6bd87c58a39b741faace8bd1c6b0d070b7fad23c19520ff9f1a
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5e3d260b6ccf56bb7f54043de5aacef72dc68b9e723cd5fda1af160f114d6bb
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -96608,6 +96608,2106 @@
|
|
| 96608 |
"learning_rate": 5.893886653718317e-06,
|
| 96609 |
"loss": 0.7302,
|
| 96610 |
"step": 13800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96611 |
}
|
| 96612 |
],
|
| 96613 |
"logging_steps": 1,
|
|
@@ -96627,7 +98727,7 @@
|
|
| 96627 |
"attributes": {}
|
| 96628 |
}
|
| 96629 |
},
|
| 96630 |
-
"total_flos": 7.
|
| 96631 |
"train_batch_size": 8,
|
| 96632 |
"trial_name": null,
|
| 96633 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 16.05925925925926,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 14100,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 96608 |
"learning_rate": 5.893886653718317e-06,
|
| 96609 |
"loss": 0.7302,
|
| 96610 |
"step": 13800
|
| 96611 |
+
},
|
| 96612 |
+
{
|
| 96613 |
+
"epoch": 15.71908831908832,
|
| 96614 |
+
"grad_norm": 0.22730977833271027,
|
| 96615 |
+
"learning_rate": 5.890882155790686e-06,
|
| 96616 |
+
"loss": 0.8484,
|
| 96617 |
+
"step": 13801
|
| 96618 |
+
},
|
| 96619 |
+
{
|
| 96620 |
+
"epoch": 15.72022792022792,
|
| 96621 |
+
"grad_norm": 0.18116992712020874,
|
| 96622 |
+
"learning_rate": 5.887878321575266e-06,
|
| 96623 |
+
"loss": 0.7743,
|
| 96624 |
+
"step": 13802
|
| 96625 |
+
},
|
| 96626 |
+
{
|
| 96627 |
+
"epoch": 15.72136752136752,
|
| 96628 |
+
"grad_norm": 0.2321796417236328,
|
| 96629 |
+
"learning_rate": 5.8848751511764e-06,
|
| 96630 |
+
"loss": 0.4451,
|
| 96631 |
+
"step": 13803
|
| 96632 |
+
},
|
| 96633 |
+
{
|
| 96634 |
+
"epoch": 15.722507122507123,
|
| 96635 |
+
"grad_norm": 0.18436527252197266,
|
| 96636 |
+
"learning_rate": 5.88187264469838e-06,
|
| 96637 |
+
"loss": 0.9361,
|
| 96638 |
+
"step": 13804
|
| 96639 |
+
},
|
| 96640 |
+
{
|
| 96641 |
+
"epoch": 15.723646723646723,
|
| 96642 |
+
"grad_norm": 0.2078021615743637,
|
| 96643 |
+
"learning_rate": 5.8788708022454984e-06,
|
| 96644 |
+
"loss": 0.6999,
|
| 96645 |
+
"step": 13805
|
| 96646 |
+
},
|
| 96647 |
+
{
|
| 96648 |
+
"epoch": 15.724786324786324,
|
| 96649 |
+
"grad_norm": 0.18605077266693115,
|
| 96650 |
+
"learning_rate": 5.875869623922014e-06,
|
| 96651 |
+
"loss": 0.8222,
|
| 96652 |
+
"step": 13806
|
| 96653 |
+
},
|
| 96654 |
+
{
|
| 96655 |
+
"epoch": 15.725925925925926,
|
| 96656 |
+
"grad_norm": 0.1825038194656372,
|
| 96657 |
+
"learning_rate": 5.87286910983218e-06,
|
| 96658 |
+
"loss": 0.8306,
|
| 96659 |
+
"step": 13807
|
| 96660 |
+
},
|
| 96661 |
+
{
|
| 96662 |
+
"epoch": 15.727065527065527,
|
| 96663 |
+
"grad_norm": 0.24502849578857422,
|
| 96664 |
+
"learning_rate": 5.8698692600801905e-06,
|
| 96665 |
+
"loss": 0.7141,
|
| 96666 |
+
"step": 13808
|
| 96667 |
+
},
|
| 96668 |
+
{
|
| 96669 |
+
"epoch": 15.728205128205127,
|
| 96670 |
+
"grad_norm": 0.26166555285453796,
|
| 96671 |
+
"learning_rate": 5.866870074770253e-06,
|
| 96672 |
+
"loss": 0.5176,
|
| 96673 |
+
"step": 13809
|
| 96674 |
+
},
|
| 96675 |
+
{
|
| 96676 |
+
"epoch": 15.72934472934473,
|
| 96677 |
+
"grad_norm": 0.17108017206192017,
|
| 96678 |
+
"learning_rate": 5.863871554006534e-06,
|
| 96679 |
+
"loss": 0.6432,
|
| 96680 |
+
"step": 13810
|
| 96681 |
+
},
|
| 96682 |
+
{
|
| 96683 |
+
"epoch": 15.73048433048433,
|
| 96684 |
+
"grad_norm": 0.1834830939769745,
|
| 96685 |
+
"learning_rate": 5.860873697893179e-06,
|
| 96686 |
+
"loss": 0.6208,
|
| 96687 |
+
"step": 13811
|
| 96688 |
+
},
|
| 96689 |
+
{
|
| 96690 |
+
"epoch": 15.73162393162393,
|
| 96691 |
+
"grad_norm": 0.19852334260940552,
|
| 96692 |
+
"learning_rate": 5.857876506534313e-06,
|
| 96693 |
+
"loss": 0.5493,
|
| 96694 |
+
"step": 13812
|
| 96695 |
+
},
|
| 96696 |
+
{
|
| 96697 |
+
"epoch": 15.732763532763533,
|
| 96698 |
+
"grad_norm": 0.1938435286283493,
|
| 96699 |
+
"learning_rate": 5.854879980034039e-06,
|
| 96700 |
+
"loss": 0.7416,
|
| 96701 |
+
"step": 13813
|
| 96702 |
+
},
|
| 96703 |
+
{
|
| 96704 |
+
"epoch": 15.733903133903134,
|
| 96705 |
+
"grad_norm": 0.19227145612239838,
|
| 96706 |
+
"learning_rate": 5.851884118496432e-06,
|
| 96707 |
+
"loss": 0.6168,
|
| 96708 |
+
"step": 13814
|
| 96709 |
+
},
|
| 96710 |
+
{
|
| 96711 |
+
"epoch": 15.735042735042736,
|
| 96712 |
+
"grad_norm": 0.26434120535850525,
|
| 96713 |
+
"learning_rate": 5.848888922025553e-06,
|
| 96714 |
+
"loss": 0.4461,
|
| 96715 |
+
"step": 13815
|
| 96716 |
+
},
|
| 96717 |
+
{
|
| 96718 |
+
"epoch": 15.736182336182337,
|
| 96719 |
+
"grad_norm": 0.20051394402980804,
|
| 96720 |
+
"learning_rate": 5.845894390725421e-06,
|
| 96721 |
+
"loss": 0.5806,
|
| 96722 |
+
"step": 13816
|
| 96723 |
+
},
|
| 96724 |
+
{
|
| 96725 |
+
"epoch": 15.737321937321937,
|
| 96726 |
+
"grad_norm": 0.23545600473880768,
|
| 96727 |
+
"learning_rate": 5.842900524700051e-06,
|
| 96728 |
+
"loss": 0.6093,
|
| 96729 |
+
"step": 13817
|
| 96730 |
+
},
|
| 96731 |
+
{
|
| 96732 |
+
"epoch": 15.73846153846154,
|
| 96733 |
+
"grad_norm": 0.2062317430973053,
|
| 96734 |
+
"learning_rate": 5.839907324053425e-06,
|
| 96735 |
+
"loss": 0.7049,
|
| 96736 |
+
"step": 13818
|
| 96737 |
+
},
|
| 96738 |
+
{
|
| 96739 |
+
"epoch": 15.73960113960114,
|
| 96740 |
+
"grad_norm": 0.2594757676124573,
|
| 96741 |
+
"learning_rate": 5.836914788889519e-06,
|
| 96742 |
+
"loss": 0.6828,
|
| 96743 |
+
"step": 13819
|
| 96744 |
+
},
|
| 96745 |
+
{
|
| 96746 |
+
"epoch": 15.74074074074074,
|
| 96747 |
+
"grad_norm": 0.2455168217420578,
|
| 96748 |
+
"learning_rate": 5.8339229193122544e-06,
|
| 96749 |
+
"loss": 0.8516,
|
| 96750 |
+
"step": 13820
|
| 96751 |
+
},
|
| 96752 |
+
{
|
| 96753 |
+
"epoch": 15.741880341880343,
|
| 96754 |
+
"grad_norm": 0.2117108553647995,
|
| 96755 |
+
"learning_rate": 5.830931715425553e-06,
|
| 96756 |
+
"loss": 0.675,
|
| 96757 |
+
"step": 13821
|
| 96758 |
+
},
|
| 96759 |
+
{
|
| 96760 |
+
"epoch": 15.743019943019943,
|
| 96761 |
+
"grad_norm": 0.22628933191299438,
|
| 96762 |
+
"learning_rate": 5.827941177333307e-06,
|
| 96763 |
+
"loss": 0.6739,
|
| 96764 |
+
"step": 13822
|
| 96765 |
+
},
|
| 96766 |
+
{
|
| 96767 |
+
"epoch": 15.744159544159544,
|
| 96768 |
+
"grad_norm": 0.17668086290359497,
|
| 96769 |
+
"learning_rate": 5.824951305139387e-06,
|
| 96770 |
+
"loss": 0.7275,
|
| 96771 |
+
"step": 13823
|
| 96772 |
+
},
|
| 96773 |
+
{
|
| 96774 |
+
"epoch": 15.745299145299146,
|
| 96775 |
+
"grad_norm": 0.1669437438249588,
|
| 96776 |
+
"learning_rate": 5.821962098947642e-06,
|
| 96777 |
+
"loss": 0.957,
|
| 96778 |
+
"step": 13824
|
| 96779 |
+
},
|
| 96780 |
+
{
|
| 96781 |
+
"epoch": 15.746438746438747,
|
| 96782 |
+
"grad_norm": 0.22993691265583038,
|
| 96783 |
+
"learning_rate": 5.81897355886189e-06,
|
| 96784 |
+
"loss": 0.6483,
|
| 96785 |
+
"step": 13825
|
| 96786 |
+
},
|
| 96787 |
+
{
|
| 96788 |
+
"epoch": 15.747578347578347,
|
| 96789 |
+
"grad_norm": 0.23172104358673096,
|
| 96790 |
+
"learning_rate": 5.815985684985945e-06,
|
| 96791 |
+
"loss": 0.7056,
|
| 96792 |
+
"step": 13826
|
| 96793 |
+
},
|
| 96794 |
+
{
|
| 96795 |
+
"epoch": 15.74871794871795,
|
| 96796 |
+
"grad_norm": 0.1977315992116928,
|
| 96797 |
+
"learning_rate": 5.812998477423562e-06,
|
| 96798 |
+
"loss": 0.5988,
|
| 96799 |
+
"step": 13827
|
| 96800 |
+
},
|
| 96801 |
+
{
|
| 96802 |
+
"epoch": 15.74985754985755,
|
| 96803 |
+
"grad_norm": 0.18591248989105225,
|
| 96804 |
+
"learning_rate": 5.810011936278509e-06,
|
| 96805 |
+
"loss": 0.7036,
|
| 96806 |
+
"step": 13828
|
| 96807 |
+
},
|
| 96808 |
+
{
|
| 96809 |
+
"epoch": 15.75099715099715,
|
| 96810 |
+
"grad_norm": 0.20047298073768616,
|
| 96811 |
+
"learning_rate": 5.807026061654513e-06,
|
| 96812 |
+
"loss": 0.7494,
|
| 96813 |
+
"step": 13829
|
| 96814 |
+
},
|
| 96815 |
+
{
|
| 96816 |
+
"epoch": 15.752136752136753,
|
| 96817 |
+
"grad_norm": 0.22419284284114838,
|
| 96818 |
+
"learning_rate": 5.804040853655293e-06,
|
| 96819 |
+
"loss": 0.6049,
|
| 96820 |
+
"step": 13830
|
| 96821 |
+
},
|
| 96822 |
+
{
|
| 96823 |
+
"epoch": 15.753276353276354,
|
| 96824 |
+
"grad_norm": 0.19881680607795715,
|
| 96825 |
+
"learning_rate": 5.801056312384512e-06,
|
| 96826 |
+
"loss": 0.6976,
|
| 96827 |
+
"step": 13831
|
| 96828 |
+
},
|
| 96829 |
+
{
|
| 96830 |
+
"epoch": 15.754415954415954,
|
| 96831 |
+
"grad_norm": 0.22455157339572906,
|
| 96832 |
+
"learning_rate": 5.798072437945845e-06,
|
| 96833 |
+
"loss": 0.3943,
|
| 96834 |
+
"step": 13832
|
| 96835 |
+
},
|
| 96836 |
+
{
|
| 96837 |
+
"epoch": 15.755555555555556,
|
| 96838 |
+
"grad_norm": 0.17858223617076874,
|
| 96839 |
+
"learning_rate": 5.795089230442927e-06,
|
| 96840 |
+
"loss": 0.7836,
|
| 96841 |
+
"step": 13833
|
| 96842 |
+
},
|
| 96843 |
+
{
|
| 96844 |
+
"epoch": 15.756695156695157,
|
| 96845 |
+
"grad_norm": 0.1665424108505249,
|
| 96846 |
+
"learning_rate": 5.792106689979373e-06,
|
| 96847 |
+
"loss": 0.7137,
|
| 96848 |
+
"step": 13834
|
| 96849 |
+
},
|
| 96850 |
+
{
|
| 96851 |
+
"epoch": 15.757834757834758,
|
| 96852 |
+
"grad_norm": 0.2321743220090866,
|
| 96853 |
+
"learning_rate": 5.789124816658778e-06,
|
| 96854 |
+
"loss": 0.2724,
|
| 96855 |
+
"step": 13835
|
| 96856 |
+
},
|
| 96857 |
+
{
|
| 96858 |
+
"epoch": 15.75897435897436,
|
| 96859 |
+
"grad_norm": 0.15501980483531952,
|
| 96860 |
+
"learning_rate": 5.786143610584707e-06,
|
| 96861 |
+
"loss": 0.784,
|
| 96862 |
+
"step": 13836
|
| 96863 |
+
},
|
| 96864 |
+
{
|
| 96865 |
+
"epoch": 15.76011396011396,
|
| 96866 |
+
"grad_norm": 0.24102312326431274,
|
| 96867 |
+
"learning_rate": 5.783163071860715e-06,
|
| 96868 |
+
"loss": 0.7432,
|
| 96869 |
+
"step": 13837
|
| 96870 |
+
},
|
| 96871 |
+
{
|
| 96872 |
+
"epoch": 15.761253561253561,
|
| 96873 |
+
"grad_norm": 0.21585829555988312,
|
| 96874 |
+
"learning_rate": 5.780183200590306e-06,
|
| 96875 |
+
"loss": 0.6081,
|
| 96876 |
+
"step": 13838
|
| 96877 |
+
},
|
| 96878 |
+
{
|
| 96879 |
+
"epoch": 15.762393162393163,
|
| 96880 |
+
"grad_norm": 0.23042422533035278,
|
| 96881 |
+
"learning_rate": 5.77720399687699e-06,
|
| 96882 |
+
"loss": 0.5227,
|
| 96883 |
+
"step": 13839
|
| 96884 |
+
},
|
| 96885 |
+
{
|
| 96886 |
+
"epoch": 15.763532763532764,
|
| 96887 |
+
"grad_norm": 0.22137843072414398,
|
| 96888 |
+
"learning_rate": 5.774225460824243e-06,
|
| 96889 |
+
"loss": 0.7376,
|
| 96890 |
+
"step": 13840
|
| 96891 |
+
},
|
| 96892 |
+
{
|
| 96893 |
+
"epoch": 15.764672364672364,
|
| 96894 |
+
"grad_norm": 0.1850956231355667,
|
| 96895 |
+
"learning_rate": 5.771247592535523e-06,
|
| 96896 |
+
"loss": 0.6656,
|
| 96897 |
+
"step": 13841
|
| 96898 |
+
},
|
| 96899 |
+
{
|
| 96900 |
+
"epoch": 15.765811965811967,
|
| 96901 |
+
"grad_norm": 0.18516525626182556,
|
| 96902 |
+
"learning_rate": 5.7682703921142474e-06,
|
| 96903 |
+
"loss": 0.6501,
|
| 96904 |
+
"step": 13842
|
| 96905 |
+
},
|
| 96906 |
+
{
|
| 96907 |
+
"epoch": 15.766951566951567,
|
| 96908 |
+
"grad_norm": 0.22501075267791748,
|
| 96909 |
+
"learning_rate": 5.7652938596638286e-06,
|
| 96910 |
+
"loss": 0.7376,
|
| 96911 |
+
"step": 13843
|
| 96912 |
+
},
|
| 96913 |
+
{
|
| 96914 |
+
"epoch": 15.768091168091168,
|
| 96915 |
+
"grad_norm": 0.19035011529922485,
|
| 96916 |
+
"learning_rate": 5.762317995287641e-06,
|
| 96917 |
+
"loss": 0.615,
|
| 96918 |
+
"step": 13844
|
| 96919 |
+
},
|
| 96920 |
+
{
|
| 96921 |
+
"epoch": 15.76923076923077,
|
| 96922 |
+
"grad_norm": 0.18901343643665314,
|
| 96923 |
+
"learning_rate": 5.759342799089068e-06,
|
| 96924 |
+
"loss": 0.6742,
|
| 96925 |
+
"step": 13845
|
| 96926 |
+
},
|
| 96927 |
+
{
|
| 96928 |
+
"epoch": 15.77037037037037,
|
| 96929 |
+
"grad_norm": 0.24338462948799133,
|
| 96930 |
+
"learning_rate": 5.756368271171425e-06,
|
| 96931 |
+
"loss": 0.4775,
|
| 96932 |
+
"step": 13846
|
| 96933 |
+
},
|
| 96934 |
+
{
|
| 96935 |
+
"epoch": 15.771509971509971,
|
| 96936 |
+
"grad_norm": 0.21235759556293488,
|
| 96937 |
+
"learning_rate": 5.753394411638033e-06,
|
| 96938 |
+
"loss": 0.6555,
|
| 96939 |
+
"step": 13847
|
| 96940 |
+
},
|
| 96941 |
+
{
|
| 96942 |
+
"epoch": 15.772649572649573,
|
| 96943 |
+
"grad_norm": 0.202910378575325,
|
| 96944 |
+
"learning_rate": 5.7504212205921806e-06,
|
| 96945 |
+
"loss": 0.5172,
|
| 96946 |
+
"step": 13848
|
| 96947 |
+
},
|
| 96948 |
+
{
|
| 96949 |
+
"epoch": 15.773789173789174,
|
| 96950 |
+
"grad_norm": 0.16425685584545135,
|
| 96951 |
+
"learning_rate": 5.747448698137142e-06,
|
| 96952 |
+
"loss": 0.7835,
|
| 96953 |
+
"step": 13849
|
| 96954 |
+
},
|
| 96955 |
+
{
|
| 96956 |
+
"epoch": 15.774928774928775,
|
| 96957 |
+
"grad_norm": 0.21649502217769623,
|
| 96958 |
+
"learning_rate": 5.744476844376148e-06,
|
| 96959 |
+
"loss": 0.6296,
|
| 96960 |
+
"step": 13850
|
| 96961 |
+
},
|
| 96962 |
+
{
|
| 96963 |
+
"epoch": 15.776068376068377,
|
| 96964 |
+
"grad_norm": 0.18373684585094452,
|
| 96965 |
+
"learning_rate": 5.7415056594124274e-06,
|
| 96966 |
+
"loss": 0.7905,
|
| 96967 |
+
"step": 13851
|
| 96968 |
+
},
|
| 96969 |
+
{
|
| 96970 |
+
"epoch": 15.777207977207977,
|
| 96971 |
+
"grad_norm": 0.21412786841392517,
|
| 96972 |
+
"learning_rate": 5.738535143349178e-06,
|
| 96973 |
+
"loss": 0.7043,
|
| 96974 |
+
"step": 13852
|
| 96975 |
+
},
|
| 96976 |
+
{
|
| 96977 |
+
"epoch": 15.778347578347578,
|
| 96978 |
+
"grad_norm": 0.23520193994045258,
|
| 96979 |
+
"learning_rate": 5.735565296289574e-06,
|
| 96980 |
+
"loss": 0.472,
|
| 96981 |
+
"step": 13853
|
| 96982 |
+
},
|
| 96983 |
+
{
|
| 96984 |
+
"epoch": 15.77948717948718,
|
| 96985 |
+
"grad_norm": 0.15721645951271057,
|
| 96986 |
+
"learning_rate": 5.732596118336761e-06,
|
| 96987 |
+
"loss": 0.8033,
|
| 96988 |
+
"step": 13854
|
| 96989 |
+
},
|
| 96990 |
+
{
|
| 96991 |
+
"epoch": 15.78062678062678,
|
| 96992 |
+
"grad_norm": 0.22200199961662292,
|
| 96993 |
+
"learning_rate": 5.729627609593863e-06,
|
| 96994 |
+
"loss": 0.5786,
|
| 96995 |
+
"step": 13855
|
| 96996 |
+
},
|
| 96997 |
+
{
|
| 96998 |
+
"epoch": 15.781766381766381,
|
| 96999 |
+
"grad_norm": 0.19021391868591309,
|
| 97000 |
+
"learning_rate": 5.726659770164006e-06,
|
| 97001 |
+
"loss": 0.6212,
|
| 97002 |
+
"step": 13856
|
| 97003 |
+
},
|
| 97004 |
+
{
|
| 97005 |
+
"epoch": 15.782905982905984,
|
| 97006 |
+
"grad_norm": 0.21510300040245056,
|
| 97007 |
+
"learning_rate": 5.723692600150249e-06,
|
| 97008 |
+
"loss": 0.5331,
|
| 97009 |
+
"step": 13857
|
| 97010 |
+
},
|
| 97011 |
+
{
|
| 97012 |
+
"epoch": 15.784045584045584,
|
| 97013 |
+
"grad_norm": 0.1836748570203781,
|
| 97014 |
+
"learning_rate": 5.72072609965566e-06,
|
| 97015 |
+
"loss": 0.6963,
|
| 97016 |
+
"step": 13858
|
| 97017 |
+
},
|
| 97018 |
+
{
|
| 97019 |
+
"epoch": 15.785185185185185,
|
| 97020 |
+
"grad_norm": 0.17830480635166168,
|
| 97021 |
+
"learning_rate": 5.717760268783271e-06,
|
| 97022 |
+
"loss": 0.8189,
|
| 97023 |
+
"step": 13859
|
| 97024 |
+
},
|
| 97025 |
+
{
|
| 97026 |
+
"epoch": 15.786324786324787,
|
| 97027 |
+
"grad_norm": 0.2077512890100479,
|
| 97028 |
+
"learning_rate": 5.714795107636101e-06,
|
| 97029 |
+
"loss": 0.7427,
|
| 97030 |
+
"step": 13860
|
| 97031 |
+
},
|
| 97032 |
+
{
|
| 97033 |
+
"epoch": 15.787464387464388,
|
| 97034 |
+
"grad_norm": 0.19024313986301422,
|
| 97035 |
+
"learning_rate": 5.711830616317123e-06,
|
| 97036 |
+
"loss": 0.5893,
|
| 97037 |
+
"step": 13861
|
| 97038 |
+
},
|
| 97039 |
+
{
|
| 97040 |
+
"epoch": 15.788603988603988,
|
| 97041 |
+
"grad_norm": 0.20871800184249878,
|
| 97042 |
+
"learning_rate": 5.708866794929313e-06,
|
| 97043 |
+
"loss": 0.7435,
|
| 97044 |
+
"step": 13862
|
| 97045 |
+
},
|
| 97046 |
+
{
|
| 97047 |
+
"epoch": 15.78974358974359,
|
| 97048 |
+
"grad_norm": 0.19228936731815338,
|
| 97049 |
+
"learning_rate": 5.705903643575608e-06,
|
| 97050 |
+
"loss": 0.7723,
|
| 97051 |
+
"step": 13863
|
| 97052 |
+
},
|
| 97053 |
+
{
|
| 97054 |
+
"epoch": 15.790883190883191,
|
| 97055 |
+
"grad_norm": 0.18718524277210236,
|
| 97056 |
+
"learning_rate": 5.702941162358935e-06,
|
| 97057 |
+
"loss": 0.7106,
|
| 97058 |
+
"step": 13864
|
| 97059 |
+
},
|
| 97060 |
+
{
|
| 97061 |
+
"epoch": 15.792022792022792,
|
| 97062 |
+
"grad_norm": 0.19854268431663513,
|
| 97063 |
+
"learning_rate": 5.6999793513821785e-06,
|
| 97064 |
+
"loss": 0.6359,
|
| 97065 |
+
"step": 13865
|
| 97066 |
+
},
|
| 97067 |
+
{
|
| 97068 |
+
"epoch": 15.793162393162394,
|
| 97069 |
+
"grad_norm": 0.2403852492570877,
|
| 97070 |
+
"learning_rate": 5.697018210748206e-06,
|
| 97071 |
+
"loss": 0.4741,
|
| 97072 |
+
"step": 13866
|
| 97073 |
+
},
|
| 97074 |
+
{
|
| 97075 |
+
"epoch": 15.794301994301994,
|
| 97076 |
+
"grad_norm": 0.20986339449882507,
|
| 97077 |
+
"learning_rate": 5.694057740559889e-06,
|
| 97078 |
+
"loss": 0.7468,
|
| 97079 |
+
"step": 13867
|
| 97080 |
+
},
|
| 97081 |
+
{
|
| 97082 |
+
"epoch": 15.795441595441595,
|
| 97083 |
+
"grad_norm": 0.2499758005142212,
|
| 97084 |
+
"learning_rate": 5.691097940920029e-06,
|
| 97085 |
+
"loss": 0.4523,
|
| 97086 |
+
"step": 13868
|
| 97087 |
+
},
|
| 97088 |
+
{
|
| 97089 |
+
"epoch": 15.796581196581197,
|
| 97090 |
+
"grad_norm": 0.23681160807609558,
|
| 97091 |
+
"learning_rate": 5.688138811931437e-06,
|
| 97092 |
+
"loss": 0.5846,
|
| 97093 |
+
"step": 13869
|
| 97094 |
+
},
|
| 97095 |
+
{
|
| 97096 |
+
"epoch": 15.797720797720798,
|
| 97097 |
+
"grad_norm": 0.1891166716814041,
|
| 97098 |
+
"learning_rate": 5.685180353696895e-06,
|
| 97099 |
+
"loss": 0.6728,
|
| 97100 |
+
"step": 13870
|
| 97101 |
+
},
|
| 97102 |
+
{
|
| 97103 |
+
"epoch": 15.798860398860398,
|
| 97104 |
+
"grad_norm": 0.20599184930324554,
|
| 97105 |
+
"learning_rate": 5.682222566319159e-06,
|
| 97106 |
+
"loss": 0.8015,
|
| 97107 |
+
"step": 13871
|
| 97108 |
+
},
|
| 97109 |
+
{
|
| 97110 |
+
"epoch": 15.8,
|
| 97111 |
+
"grad_norm": 0.17965242266654968,
|
| 97112 |
+
"learning_rate": 5.679265449900953e-06,
|
| 97113 |
+
"loss": 0.88,
|
| 97114 |
+
"step": 13872
|
| 97115 |
+
},
|
| 97116 |
+
{
|
| 97117 |
+
"epoch": 15.801139601139601,
|
| 97118 |
+
"grad_norm": 0.18087612092494965,
|
| 97119 |
+
"learning_rate": 5.676309004544989e-06,
|
| 97120 |
+
"loss": 0.6112,
|
| 97121 |
+
"step": 13873
|
| 97122 |
+
},
|
| 97123 |
+
{
|
| 97124 |
+
"epoch": 15.802279202279202,
|
| 97125 |
+
"grad_norm": 0.2494555115699768,
|
| 97126 |
+
"learning_rate": 5.673353230353954e-06,
|
| 97127 |
+
"loss": 0.5585,
|
| 97128 |
+
"step": 13874
|
| 97129 |
+
},
|
| 97130 |
+
{
|
| 97131 |
+
"epoch": 15.803418803418804,
|
| 97132 |
+
"grad_norm": 0.21561256051063538,
|
| 97133 |
+
"learning_rate": 5.670398127430515e-06,
|
| 97134 |
+
"loss": 0.5682,
|
| 97135 |
+
"step": 13875
|
| 97136 |
+
},
|
| 97137 |
+
{
|
| 97138 |
+
"epoch": 15.804558404558405,
|
| 97139 |
+
"grad_norm": 0.1912786066532135,
|
| 97140 |
+
"learning_rate": 5.6674436958773e-06,
|
| 97141 |
+
"loss": 0.6802,
|
| 97142 |
+
"step": 13876
|
| 97143 |
+
},
|
| 97144 |
+
{
|
| 97145 |
+
"epoch": 15.805698005698005,
|
| 97146 |
+
"grad_norm": 0.24314413964748383,
|
| 97147 |
+
"learning_rate": 5.6644899357969235e-06,
|
| 97148 |
+
"loss": 0.5237,
|
| 97149 |
+
"step": 13877
|
| 97150 |
+
},
|
| 97151 |
+
{
|
| 97152 |
+
"epoch": 15.806837606837608,
|
| 97153 |
+
"grad_norm": 0.18922634422779083,
|
| 97154 |
+
"learning_rate": 5.661536847291998e-06,
|
| 97155 |
+
"loss": 0.7806,
|
| 97156 |
+
"step": 13878
|
| 97157 |
+
},
|
| 97158 |
+
{
|
| 97159 |
+
"epoch": 15.807977207977208,
|
| 97160 |
+
"grad_norm": 0.2381921410560608,
|
| 97161 |
+
"learning_rate": 5.658584430465072e-06,
|
| 97162 |
+
"loss": 0.5844,
|
| 97163 |
+
"step": 13879
|
| 97164 |
+
},
|
| 97165 |
+
{
|
| 97166 |
+
"epoch": 15.809116809116809,
|
| 97167 |
+
"grad_norm": 0.25195929408073425,
|
| 97168 |
+
"learning_rate": 5.655632685418699e-06,
|
| 97169 |
+
"loss": 0.4855,
|
| 97170 |
+
"step": 13880
|
| 97171 |
+
},
|
| 97172 |
+
{
|
| 97173 |
+
"epoch": 15.810256410256411,
|
| 97174 |
+
"grad_norm": 0.19405119121074677,
|
| 97175 |
+
"learning_rate": 5.652681612255398e-06,
|
| 97176 |
+
"loss": 0.6456,
|
| 97177 |
+
"step": 13881
|
| 97178 |
+
},
|
| 97179 |
+
{
|
| 97180 |
+
"epoch": 15.811396011396011,
|
| 97181 |
+
"grad_norm": 0.19623373448848724,
|
| 97182 |
+
"learning_rate": 5.64973121107768e-06,
|
| 97183 |
+
"loss": 0.5443,
|
| 97184 |
+
"step": 13882
|
| 97185 |
+
},
|
| 97186 |
+
{
|
| 97187 |
+
"epoch": 15.812535612535612,
|
| 97188 |
+
"grad_norm": 0.20332282781600952,
|
| 97189 |
+
"learning_rate": 5.646781481988e-06,
|
| 97190 |
+
"loss": 0.7967,
|
| 97191 |
+
"step": 13883
|
| 97192 |
+
},
|
| 97193 |
+
{
|
| 97194 |
+
"epoch": 15.813675213675214,
|
| 97195 |
+
"grad_norm": 0.26592984795570374,
|
| 97196 |
+
"learning_rate": 5.643832425088821e-06,
|
| 97197 |
+
"loss": 0.5693,
|
| 97198 |
+
"step": 13884
|
| 97199 |
+
},
|
| 97200 |
+
{
|
| 97201 |
+
"epoch": 15.814814814814815,
|
| 97202 |
+
"grad_norm": 0.19840383529663086,
|
| 97203 |
+
"learning_rate": 5.640884040482574e-06,
|
| 97204 |
+
"loss": 0.8236,
|
| 97205 |
+
"step": 13885
|
| 97206 |
+
},
|
| 97207 |
+
{
|
| 97208 |
+
"epoch": 15.815954415954415,
|
| 97209 |
+
"grad_norm": 0.22513839602470398,
|
| 97210 |
+
"learning_rate": 5.6379363282716675e-06,
|
| 97211 |
+
"loss": 0.6565,
|
| 97212 |
+
"step": 13886
|
| 97213 |
+
},
|
| 97214 |
+
{
|
| 97215 |
+
"epoch": 15.817094017094018,
|
| 97216 |
+
"grad_norm": 0.19240371882915497,
|
| 97217 |
+
"learning_rate": 5.63498928855847e-06,
|
| 97218 |
+
"loss": 0.6027,
|
| 97219 |
+
"step": 13887
|
| 97220 |
+
},
|
| 97221 |
+
{
|
| 97222 |
+
"epoch": 15.818233618233618,
|
| 97223 |
+
"grad_norm": 0.2948347330093384,
|
| 97224 |
+
"learning_rate": 5.632042921445352e-06,
|
| 97225 |
+
"loss": 0.7077,
|
| 97226 |
+
"step": 13888
|
| 97227 |
+
},
|
| 97228 |
+
{
|
| 97229 |
+
"epoch": 15.819373219373219,
|
| 97230 |
+
"grad_norm": 0.23926644027233124,
|
| 97231 |
+
"learning_rate": 5.629097227034635e-06,
|
| 97232 |
+
"loss": 0.4717,
|
| 97233 |
+
"step": 13889
|
| 97234 |
+
},
|
| 97235 |
+
{
|
| 97236 |
+
"epoch": 15.820512820512821,
|
| 97237 |
+
"grad_norm": 0.2090224325656891,
|
| 97238 |
+
"learning_rate": 5.626152205428656e-06,
|
| 97239 |
+
"loss": 0.5267,
|
| 97240 |
+
"step": 13890
|
| 97241 |
+
},
|
| 97242 |
+
{
|
| 97243 |
+
"epoch": 15.821652421652422,
|
| 97244 |
+
"grad_norm": 0.18229421973228455,
|
| 97245 |
+
"learning_rate": 5.6232078567296845e-06,
|
| 97246 |
+
"loss": 0.7855,
|
| 97247 |
+
"step": 13891
|
| 97248 |
+
},
|
| 97249 |
+
{
|
| 97250 |
+
"epoch": 15.822792022792022,
|
| 97251 |
+
"grad_norm": 0.18569529056549072,
|
| 97252 |
+
"learning_rate": 5.620264181039989e-06,
|
| 97253 |
+
"loss": 0.6802,
|
| 97254 |
+
"step": 13892
|
| 97255 |
+
},
|
| 97256 |
+
{
|
| 97257 |
+
"epoch": 15.823931623931625,
|
| 97258 |
+
"grad_norm": 0.20608794689178467,
|
| 97259 |
+
"learning_rate": 5.6173211784618125e-06,
|
| 97260 |
+
"loss": 0.6522,
|
| 97261 |
+
"step": 13893
|
| 97262 |
+
},
|
| 97263 |
+
{
|
| 97264 |
+
"epoch": 15.825071225071225,
|
| 97265 |
+
"grad_norm": 0.2130715250968933,
|
| 97266 |
+
"learning_rate": 5.614378849097382e-06,
|
| 97267 |
+
"loss": 0.5559,
|
| 97268 |
+
"step": 13894
|
| 97269 |
+
},
|
| 97270 |
+
{
|
| 97271 |
+
"epoch": 15.826210826210826,
|
| 97272 |
+
"grad_norm": 0.21179170906543732,
|
| 97273 |
+
"learning_rate": 5.611437193048877e-06,
|
| 97274 |
+
"loss": 0.808,
|
| 97275 |
+
"step": 13895
|
| 97276 |
+
},
|
| 97277 |
+
{
|
| 97278 |
+
"epoch": 15.827350427350428,
|
| 97279 |
+
"grad_norm": 0.21439428627490997,
|
| 97280 |
+
"learning_rate": 5.608496210418476e-06,
|
| 97281 |
+
"loss": 0.5765,
|
| 97282 |
+
"step": 13896
|
| 97283 |
+
},
|
| 97284 |
+
{
|
| 97285 |
+
"epoch": 15.828490028490029,
|
| 97286 |
+
"grad_norm": 0.16110475361347198,
|
| 97287 |
+
"learning_rate": 5.6055559013083295e-06,
|
| 97288 |
+
"loss": 0.6759,
|
| 97289 |
+
"step": 13897
|
| 97290 |
+
},
|
| 97291 |
+
{
|
| 97292 |
+
"epoch": 15.829629629629629,
|
| 97293 |
+
"grad_norm": 0.19680850207805634,
|
| 97294 |
+
"learning_rate": 5.602616265820568e-06,
|
| 97295 |
+
"loss": 0.5224,
|
| 97296 |
+
"step": 13898
|
| 97297 |
+
},
|
| 97298 |
+
{
|
| 97299 |
+
"epoch": 15.830769230769231,
|
| 97300 |
+
"grad_norm": 0.20465679466724396,
|
| 97301 |
+
"learning_rate": 5.5996773040572795e-06,
|
| 97302 |
+
"loss": 0.7224,
|
| 97303 |
+
"step": 13899
|
| 97304 |
+
},
|
| 97305 |
+
{
|
| 97306 |
+
"epoch": 15.831908831908832,
|
| 97307 |
+
"grad_norm": 0.21229791641235352,
|
| 97308 |
+
"learning_rate": 5.596739016120545e-06,
|
| 97309 |
+
"loss": 0.5391,
|
| 97310 |
+
"step": 13900
|
| 97311 |
+
},
|
| 97312 |
+
{
|
| 97313 |
+
"epoch": 15.833048433048432,
|
| 97314 |
+
"grad_norm": 0.20955872535705566,
|
| 97315 |
+
"learning_rate": 5.593801402112436e-06,
|
| 97316 |
+
"loss": 0.7048,
|
| 97317 |
+
"step": 13901
|
| 97318 |
+
},
|
| 97319 |
+
{
|
| 97320 |
+
"epoch": 15.834188034188035,
|
| 97321 |
+
"grad_norm": 0.24753613770008087,
|
| 97322 |
+
"learning_rate": 5.590864462134965e-06,
|
| 97323 |
+
"loss": 0.4146,
|
| 97324 |
+
"step": 13902
|
| 97325 |
+
},
|
| 97326 |
+
{
|
| 97327 |
+
"epoch": 15.835327635327635,
|
| 97328 |
+
"grad_norm": 0.17635095119476318,
|
| 97329 |
+
"learning_rate": 5.587928196290143e-06,
|
| 97330 |
+
"loss": 0.759,
|
| 97331 |
+
"step": 13903
|
| 97332 |
+
},
|
| 97333 |
+
{
|
| 97334 |
+
"epoch": 15.836467236467236,
|
| 97335 |
+
"grad_norm": 0.18288402259349823,
|
| 97336 |
+
"learning_rate": 5.584992604679961e-06,
|
| 97337 |
+
"loss": 0.7086,
|
| 97338 |
+
"step": 13904
|
| 97339 |
+
},
|
| 97340 |
+
{
|
| 97341 |
+
"epoch": 15.837606837606838,
|
| 97342 |
+
"grad_norm": 0.18178793787956238,
|
| 97343 |
+
"learning_rate": 5.582057687406386e-06,
|
| 97344 |
+
"loss": 0.6704,
|
| 97345 |
+
"step": 13905
|
| 97346 |
+
},
|
| 97347 |
+
{
|
| 97348 |
+
"epoch": 15.838746438746439,
|
| 97349 |
+
"grad_norm": 0.19446249306201935,
|
| 97350 |
+
"learning_rate": 5.579123444571338e-06,
|
| 97351 |
+
"loss": 0.6034,
|
| 97352 |
+
"step": 13906
|
| 97353 |
+
},
|
| 97354 |
+
{
|
| 97355 |
+
"epoch": 15.83988603988604,
|
| 97356 |
+
"grad_norm": 0.17122094333171844,
|
| 97357 |
+
"learning_rate": 5.576189876276741e-06,
|
| 97358 |
+
"loss": 0.6343,
|
| 97359 |
+
"step": 13907
|
| 97360 |
+
},
|
| 97361 |
+
{
|
| 97362 |
+
"epoch": 15.841025641025642,
|
| 97363 |
+
"grad_norm": 0.2367812544107437,
|
| 97364 |
+
"learning_rate": 5.573256982624483e-06,
|
| 97365 |
+
"loss": 0.4544,
|
| 97366 |
+
"step": 13908
|
| 97367 |
+
},
|
| 97368 |
+
{
|
| 97369 |
+
"epoch": 15.842165242165242,
|
| 97370 |
+
"grad_norm": 0.15342922508716583,
|
| 97371 |
+
"learning_rate": 5.570324763716445e-06,
|
| 97372 |
+
"loss": 0.5693,
|
| 97373 |
+
"step": 13909
|
| 97374 |
+
},
|
| 97375 |
+
{
|
| 97376 |
+
"epoch": 15.843304843304843,
|
| 97377 |
+
"grad_norm": 0.19328589737415314,
|
| 97378 |
+
"learning_rate": 5.5673932196544485e-06,
|
| 97379 |
+
"loss": 0.6786,
|
| 97380 |
+
"step": 13910
|
| 97381 |
+
},
|
| 97382 |
+
{
|
| 97383 |
+
"epoch": 15.844444444444445,
|
| 97384 |
+
"grad_norm": 0.22357133030891418,
|
| 97385 |
+
"learning_rate": 5.564462350540323e-06,
|
| 97386 |
+
"loss": 0.7917,
|
| 97387 |
+
"step": 13911
|
| 97388 |
+
},
|
| 97389 |
+
{
|
| 97390 |
+
"epoch": 15.845584045584046,
|
| 97391 |
+
"grad_norm": 0.22361674904823303,
|
| 97392 |
+
"learning_rate": 5.561532156475879e-06,
|
| 97393 |
+
"loss": 0.5247,
|
| 97394 |
+
"step": 13912
|
| 97395 |
+
},
|
| 97396 |
+
{
|
| 97397 |
+
"epoch": 15.846723646723646,
|
| 97398 |
+
"grad_norm": 0.20205283164978027,
|
| 97399 |
+
"learning_rate": 5.558602637562871e-06,
|
| 97400 |
+
"loss": 0.7483,
|
| 97401 |
+
"step": 13913
|
| 97402 |
+
},
|
| 97403 |
+
{
|
| 97404 |
+
"epoch": 15.847863247863248,
|
| 97405 |
+
"grad_norm": 0.205192431807518,
|
| 97406 |
+
"learning_rate": 5.55567379390306e-06,
|
| 97407 |
+
"loss": 0.6263,
|
| 97408 |
+
"step": 13914
|
| 97409 |
+
},
|
| 97410 |
+
{
|
| 97411 |
+
"epoch": 15.849002849002849,
|
| 97412 |
+
"grad_norm": 0.19363215565681458,
|
| 97413 |
+
"learning_rate": 5.552745625598169e-06,
|
| 97414 |
+
"loss": 0.864,
|
| 97415 |
+
"step": 13915
|
| 97416 |
+
},
|
| 97417 |
+
{
|
| 97418 |
+
"epoch": 15.85014245014245,
|
| 97419 |
+
"grad_norm": 0.24662263691425323,
|
| 97420 |
+
"learning_rate": 5.5498181327499095e-06,
|
| 97421 |
+
"loss": 0.7262,
|
| 97422 |
+
"step": 13916
|
| 97423 |
+
},
|
| 97424 |
+
{
|
| 97425 |
+
"epoch": 15.851282051282052,
|
| 97426 |
+
"grad_norm": 0.20658141374588013,
|
| 97427 |
+
"learning_rate": 5.546891315459948e-06,
|
| 97428 |
+
"loss": 0.5356,
|
| 97429 |
+
"step": 13917
|
| 97430 |
+
},
|
| 97431 |
+
{
|
| 97432 |
+
"epoch": 15.852421652421652,
|
| 97433 |
+
"grad_norm": 0.2509874999523163,
|
| 97434 |
+
"learning_rate": 5.543965173829949e-06,
|
| 97435 |
+
"loss": 0.6354,
|
| 97436 |
+
"step": 13918
|
| 97437 |
+
},
|
| 97438 |
+
{
|
| 97439 |
+
"epoch": 15.853561253561253,
|
| 97440 |
+
"grad_norm": 0.22638174891471863,
|
| 97441 |
+
"learning_rate": 5.54103970796154e-06,
|
| 97442 |
+
"loss": 0.3051,
|
| 97443 |
+
"step": 13919
|
| 97444 |
+
},
|
| 97445 |
+
{
|
| 97446 |
+
"epoch": 15.854700854700855,
|
| 97447 |
+
"grad_norm": 0.1767934411764145,
|
| 97448 |
+
"learning_rate": 5.5381149179563444e-06,
|
| 97449 |
+
"loss": 0.839,
|
| 97450 |
+
"step": 13920
|
| 97451 |
+
},
|
| 97452 |
+
{
|
| 97453 |
+
"epoch": 15.855840455840456,
|
| 97454 |
+
"grad_norm": 0.16481101512908936,
|
| 97455 |
+
"learning_rate": 5.5351908039159295e-06,
|
| 97456 |
+
"loss": 0.7852,
|
| 97457 |
+
"step": 13921
|
| 97458 |
+
},
|
| 97459 |
+
{
|
| 97460 |
+
"epoch": 15.856980056980056,
|
| 97461 |
+
"grad_norm": 0.22488468885421753,
|
| 97462 |
+
"learning_rate": 5.53226736594186e-06,
|
| 97463 |
+
"loss": 0.5728,
|
| 97464 |
+
"step": 13922
|
| 97465 |
+
},
|
| 97466 |
+
{
|
| 97467 |
+
"epoch": 15.858119658119659,
|
| 97468 |
+
"grad_norm": 0.2066558301448822,
|
| 97469 |
+
"learning_rate": 5.529344604135689e-06,
|
| 97470 |
+
"loss": 0.764,
|
| 97471 |
+
"step": 13923
|
| 97472 |
+
},
|
| 97473 |
+
{
|
| 97474 |
+
"epoch": 15.85925925925926,
|
| 97475 |
+
"grad_norm": 0.18836940824985504,
|
| 97476 |
+
"learning_rate": 5.526422518598928e-06,
|
| 97477 |
+
"loss": 0.6421,
|
| 97478 |
+
"step": 13924
|
| 97479 |
+
},
|
| 97480 |
+
{
|
| 97481 |
+
"epoch": 15.86039886039886,
|
| 97482 |
+
"grad_norm": 0.1548687368631363,
|
| 97483 |
+
"learning_rate": 5.523501109433063e-06,
|
| 97484 |
+
"loss": 0.5461,
|
| 97485 |
+
"step": 13925
|
| 97486 |
+
},
|
| 97487 |
+
{
|
| 97488 |
+
"epoch": 15.861538461538462,
|
| 97489 |
+
"grad_norm": 0.18227815628051758,
|
| 97490 |
+
"learning_rate": 5.520580376739562e-06,
|
| 97491 |
+
"loss": 0.5217,
|
| 97492 |
+
"step": 13926
|
| 97493 |
+
},
|
| 97494 |
+
{
|
| 97495 |
+
"epoch": 15.862678062678063,
|
| 97496 |
+
"grad_norm": 0.18795578181743622,
|
| 97497 |
+
"learning_rate": 5.5176603206198746e-06,
|
| 97498 |
+
"loss": 0.7182,
|
| 97499 |
+
"step": 13927
|
| 97500 |
+
},
|
| 97501 |
+
{
|
| 97502 |
+
"epoch": 15.863817663817663,
|
| 97503 |
+
"grad_norm": 0.1918874830007553,
|
| 97504 |
+
"learning_rate": 5.514740941175428e-06,
|
| 97505 |
+
"loss": 0.7007,
|
| 97506 |
+
"step": 13928
|
| 97507 |
+
},
|
| 97508 |
+
{
|
| 97509 |
+
"epoch": 15.864957264957265,
|
| 97510 |
+
"grad_norm": 0.18768425285816193,
|
| 97511 |
+
"learning_rate": 5.5118222385076056e-06,
|
| 97512 |
+
"loss": 0.7876,
|
| 97513 |
+
"step": 13929
|
| 97514 |
+
},
|
| 97515 |
+
{
|
| 97516 |
+
"epoch": 15.866096866096866,
|
| 97517 |
+
"grad_norm": 0.19266986846923828,
|
| 97518 |
+
"learning_rate": 5.508904212717789e-06,
|
| 97519 |
+
"loss": 0.5271,
|
| 97520 |
+
"step": 13930
|
| 97521 |
+
},
|
| 97522 |
+
{
|
| 97523 |
+
"epoch": 15.867236467236467,
|
| 97524 |
+
"grad_norm": 0.2516765594482422,
|
| 97525 |
+
"learning_rate": 5.5059868639073305e-06,
|
| 97526 |
+
"loss": 0.5491,
|
| 97527 |
+
"step": 13931
|
| 97528 |
+
},
|
| 97529 |
+
{
|
| 97530 |
+
"epoch": 15.868376068376069,
|
| 97531 |
+
"grad_norm": 0.260698139667511,
|
| 97532 |
+
"learning_rate": 5.5030701921775645e-06,
|
| 97533 |
+
"loss": 0.7285,
|
| 97534 |
+
"step": 13932
|
| 97535 |
+
},
|
| 97536 |
+
{
|
| 97537 |
+
"epoch": 15.86951566951567,
|
| 97538 |
+
"grad_norm": 0.17928585410118103,
|
| 97539 |
+
"learning_rate": 5.5001541976297724e-06,
|
| 97540 |
+
"loss": 0.605,
|
| 97541 |
+
"step": 13933
|
| 97542 |
+
},
|
| 97543 |
+
{
|
| 97544 |
+
"epoch": 15.87065527065527,
|
| 97545 |
+
"grad_norm": 0.20798265933990479,
|
| 97546 |
+
"learning_rate": 5.497238880365258e-06,
|
| 97547 |
+
"loss": 0.6186,
|
| 97548 |
+
"step": 13934
|
| 97549 |
+
},
|
| 97550 |
+
{
|
| 97551 |
+
"epoch": 15.871794871794872,
|
| 97552 |
+
"grad_norm": 0.19151423871517181,
|
| 97553 |
+
"learning_rate": 5.494324240485277e-06,
|
| 97554 |
+
"loss": 0.6788,
|
| 97555 |
+
"step": 13935
|
| 97556 |
+
},
|
| 97557 |
+
{
|
| 97558 |
+
"epoch": 15.872934472934473,
|
| 97559 |
+
"grad_norm": 0.216191828250885,
|
| 97560 |
+
"learning_rate": 5.4914102780910474e-06,
|
| 97561 |
+
"loss": 0.7635,
|
| 97562 |
+
"step": 13936
|
| 97563 |
+
},
|
| 97564 |
+
{
|
| 97565 |
+
"epoch": 15.874074074074073,
|
| 97566 |
+
"grad_norm": 0.2170773595571518,
|
| 97567 |
+
"learning_rate": 5.4884969932837895e-06,
|
| 97568 |
+
"loss": 0.7002,
|
| 97569 |
+
"step": 13937
|
| 97570 |
+
},
|
| 97571 |
+
{
|
| 97572 |
+
"epoch": 15.875213675213676,
|
| 97573 |
+
"grad_norm": 0.201252281665802,
|
| 97574 |
+
"learning_rate": 5.485584386164688e-06,
|
| 97575 |
+
"loss": 0.7075,
|
| 97576 |
+
"step": 13938
|
| 97577 |
+
},
|
| 97578 |
+
{
|
| 97579 |
+
"epoch": 15.876353276353276,
|
| 97580 |
+
"grad_norm": 0.2165941447019577,
|
| 97581 |
+
"learning_rate": 5.482672456834911e-06,
|
| 97582 |
+
"loss": 0.6854,
|
| 97583 |
+
"step": 13939
|
| 97584 |
+
},
|
| 97585 |
+
{
|
| 97586 |
+
"epoch": 15.877492877492877,
|
| 97587 |
+
"grad_norm": 0.22835934162139893,
|
| 97588 |
+
"learning_rate": 5.479761205395587e-06,
|
| 97589 |
+
"loss": 0.4414,
|
| 97590 |
+
"step": 13940
|
| 97591 |
+
},
|
| 97592 |
+
{
|
| 97593 |
+
"epoch": 15.878632478632479,
|
| 97594 |
+
"grad_norm": 0.23190470039844513,
|
| 97595 |
+
"learning_rate": 5.476850631947836e-06,
|
| 97596 |
+
"loss": 0.5443,
|
| 97597 |
+
"step": 13941
|
| 97598 |
+
},
|
| 97599 |
+
{
|
| 97600 |
+
"epoch": 15.87977207977208,
|
| 97601 |
+
"grad_norm": 0.22439834475517273,
|
| 97602 |
+
"learning_rate": 5.47394073659275e-06,
|
| 97603 |
+
"loss": 0.3856,
|
| 97604 |
+
"step": 13942
|
| 97605 |
+
},
|
| 97606 |
+
{
|
| 97607 |
+
"epoch": 15.88091168091168,
|
| 97608 |
+
"grad_norm": 0.2105487734079361,
|
| 97609 |
+
"learning_rate": 5.471031519431408e-06,
|
| 97610 |
+
"loss": 0.7456,
|
| 97611 |
+
"step": 13943
|
| 97612 |
+
},
|
| 97613 |
+
{
|
| 97614 |
+
"epoch": 15.882051282051282,
|
| 97615 |
+
"grad_norm": 0.18805000185966492,
|
| 97616 |
+
"learning_rate": 5.468122980564833e-06,
|
| 97617 |
+
"loss": 0.7202,
|
| 97618 |
+
"step": 13944
|
| 97619 |
+
},
|
| 97620 |
+
{
|
| 97621 |
+
"epoch": 15.883190883190883,
|
| 97622 |
+
"grad_norm": 0.2164195030927658,
|
| 97623 |
+
"learning_rate": 5.465215120094067e-06,
|
| 97624 |
+
"loss": 0.5933,
|
| 97625 |
+
"step": 13945
|
| 97626 |
+
},
|
| 97627 |
+
{
|
| 97628 |
+
"epoch": 15.884330484330484,
|
| 97629 |
+
"grad_norm": 0.1648697853088379,
|
| 97630 |
+
"learning_rate": 5.462307938120103e-06,
|
| 97631 |
+
"loss": 0.7291,
|
| 97632 |
+
"step": 13946
|
| 97633 |
+
},
|
| 97634 |
+
{
|
| 97635 |
+
"epoch": 15.885470085470086,
|
| 97636 |
+
"grad_norm": 0.2595181465148926,
|
| 97637 |
+
"learning_rate": 5.459401434743911e-06,
|
| 97638 |
+
"loss": 0.4453,
|
| 97639 |
+
"step": 13947
|
| 97640 |
+
},
|
| 97641 |
+
{
|
| 97642 |
+
"epoch": 15.886609686609686,
|
| 97643 |
+
"grad_norm": 0.18670688569545746,
|
| 97644 |
+
"learning_rate": 5.456495610066442e-06,
|
| 97645 |
+
"loss": 0.6382,
|
| 97646 |
+
"step": 13948
|
| 97647 |
+
},
|
| 97648 |
+
{
|
| 97649 |
+
"epoch": 15.887749287749287,
|
| 97650 |
+
"grad_norm": 0.20756377279758453,
|
| 97651 |
+
"learning_rate": 5.4535904641886265e-06,
|
| 97652 |
+
"loss": 0.808,
|
| 97653 |
+
"step": 13949
|
| 97654 |
+
},
|
| 97655 |
+
{
|
| 97656 |
+
"epoch": 15.88888888888889,
|
| 97657 |
+
"grad_norm": 0.2046612799167633,
|
| 97658 |
+
"learning_rate": 5.450685997211375e-06,
|
| 97659 |
+
"loss": 0.6016,
|
| 97660 |
+
"step": 13950
|
| 97661 |
+
},
|
| 97662 |
+
{
|
| 97663 |
+
"epoch": 15.89002849002849,
|
| 97664 |
+
"grad_norm": 0.24006542563438416,
|
| 97665 |
+
"learning_rate": 5.44778220923555e-06,
|
| 97666 |
+
"loss": 0.5609,
|
| 97667 |
+
"step": 13951
|
| 97668 |
+
},
|
| 97669 |
+
{
|
| 97670 |
+
"epoch": 15.89116809116809,
|
| 97671 |
+
"grad_norm": 0.1908271312713623,
|
| 97672 |
+
"learning_rate": 5.444879100362019e-06,
|
| 97673 |
+
"loss": 0.9716,
|
| 97674 |
+
"step": 13952
|
| 97675 |
+
},
|
| 97676 |
+
{
|
| 97677 |
+
"epoch": 15.892307692307693,
|
| 97678 |
+
"grad_norm": 0.19450271129608154,
|
| 97679 |
+
"learning_rate": 5.441976670691615e-06,
|
| 97680 |
+
"loss": 0.6022,
|
| 97681 |
+
"step": 13953
|
| 97682 |
+
},
|
| 97683 |
+
{
|
| 97684 |
+
"epoch": 15.893447293447293,
|
| 97685 |
+
"grad_norm": 0.23146270215511322,
|
| 97686 |
+
"learning_rate": 5.439074920325149e-06,
|
| 97687 |
+
"loss": 0.6238,
|
| 97688 |
+
"step": 13954
|
| 97689 |
+
},
|
| 97690 |
+
{
|
| 97691 |
+
"epoch": 15.894586894586894,
|
| 97692 |
+
"grad_norm": 0.2440861463546753,
|
| 97693 |
+
"learning_rate": 5.436173849363393e-06,
|
| 97694 |
+
"loss": 0.5213,
|
| 97695 |
+
"step": 13955
|
| 97696 |
+
},
|
| 97697 |
+
{
|
| 97698 |
+
"epoch": 15.895726495726496,
|
| 97699 |
+
"grad_norm": 0.1997850388288498,
|
| 97700 |
+
"learning_rate": 5.433273457907126e-06,
|
| 97701 |
+
"loss": 0.7222,
|
| 97702 |
+
"step": 13956
|
| 97703 |
+
},
|
| 97704 |
+
{
|
| 97705 |
+
"epoch": 15.896866096866097,
|
| 97706 |
+
"grad_norm": 0.21447142958641052,
|
| 97707 |
+
"learning_rate": 5.430373746057088e-06,
|
| 97708 |
+
"loss": 0.664,
|
| 97709 |
+
"step": 13957
|
| 97710 |
+
},
|
| 97711 |
+
{
|
| 97712 |
+
"epoch": 15.898005698005697,
|
| 97713 |
+
"grad_norm": 0.22905848920345306,
|
| 97714 |
+
"learning_rate": 5.42747471391398e-06,
|
| 97715 |
+
"loss": 0.8078,
|
| 97716 |
+
"step": 13958
|
| 97717 |
+
},
|
| 97718 |
+
{
|
| 97719 |
+
"epoch": 15.8991452991453,
|
| 97720 |
+
"grad_norm": 0.22671881318092346,
|
| 97721 |
+
"learning_rate": 5.424576361578499e-06,
|
| 97722 |
+
"loss": 0.6527,
|
| 97723 |
+
"step": 13959
|
| 97724 |
+
},
|
| 97725 |
+
{
|
| 97726 |
+
"epoch": 15.9002849002849,
|
| 97727 |
+
"grad_norm": 0.18757790327072144,
|
| 97728 |
+
"learning_rate": 5.421678689151313e-06,
|
| 97729 |
+
"loss": 0.6938,
|
| 97730 |
+
"step": 13960
|
| 97731 |
+
},
|
| 97732 |
+
{
|
| 97733 |
+
"epoch": 15.9014245014245,
|
| 97734 |
+
"grad_norm": 0.21530726552009583,
|
| 97735 |
+
"learning_rate": 5.418781696733074e-06,
|
| 97736 |
+
"loss": 0.6772,
|
| 97737 |
+
"step": 13961
|
| 97738 |
+
},
|
| 97739 |
+
{
|
| 97740 |
+
"epoch": 15.902564102564103,
|
| 97741 |
+
"grad_norm": 0.21243935823440552,
|
| 97742 |
+
"learning_rate": 5.415885384424388e-06,
|
| 97743 |
+
"loss": 0.5114,
|
| 97744 |
+
"step": 13962
|
| 97745 |
+
},
|
| 97746 |
+
{
|
| 97747 |
+
"epoch": 15.903703703703703,
|
| 97748 |
+
"grad_norm": 0.2314883917570114,
|
| 97749 |
+
"learning_rate": 5.412989752325862e-06,
|
| 97750 |
+
"loss": 0.55,
|
| 97751 |
+
"step": 13963
|
| 97752 |
+
},
|
| 97753 |
+
{
|
| 97754 |
+
"epoch": 15.904843304843304,
|
| 97755 |
+
"grad_norm": 0.16625399887561798,
|
| 97756 |
+
"learning_rate": 5.410094800538062e-06,
|
| 97757 |
+
"loss": 0.7069,
|
| 97758 |
+
"step": 13964
|
| 97759 |
+
},
|
| 97760 |
+
{
|
| 97761 |
+
"epoch": 15.905982905982906,
|
| 97762 |
+
"grad_norm": 0.24676908552646637,
|
| 97763 |
+
"learning_rate": 5.407200529161552e-06,
|
| 97764 |
+
"loss": 0.5306,
|
| 97765 |
+
"step": 13965
|
| 97766 |
+
},
|
| 97767 |
+
{
|
| 97768 |
+
"epoch": 15.907122507122507,
|
| 97769 |
+
"grad_norm": 0.1859494149684906,
|
| 97770 |
+
"learning_rate": 5.404306938296832e-06,
|
| 97771 |
+
"loss": 0.6454,
|
| 97772 |
+
"step": 13966
|
| 97773 |
+
},
|
| 97774 |
+
{
|
| 97775 |
+
"epoch": 15.908262108262107,
|
| 97776 |
+
"grad_norm": 0.18464453518390656,
|
| 97777 |
+
"learning_rate": 5.4014140280444296e-06,
|
| 97778 |
+
"loss": 0.9086,
|
| 97779 |
+
"step": 13967
|
| 97780 |
+
},
|
| 97781 |
+
{
|
| 97782 |
+
"epoch": 15.90940170940171,
|
| 97783 |
+
"grad_norm": 0.2452298402786255,
|
| 97784 |
+
"learning_rate": 5.398521798504813e-06,
|
| 97785 |
+
"loss": 0.6533,
|
| 97786 |
+
"step": 13968
|
| 97787 |
+
},
|
| 97788 |
+
{
|
| 97789 |
+
"epoch": 15.91054131054131,
|
| 97790 |
+
"grad_norm": 0.19522514939308167,
|
| 97791 |
+
"learning_rate": 5.3956302497784466e-06,
|
| 97792 |
+
"loss": 0.6949,
|
| 97793 |
+
"step": 13969
|
| 97794 |
+
},
|
| 97795 |
+
{
|
| 97796 |
+
"epoch": 15.91168091168091,
|
| 97797 |
+
"grad_norm": 0.23665203154087067,
|
| 97798 |
+
"learning_rate": 5.392739381965744e-06,
|
| 97799 |
+
"loss": 0.5759,
|
| 97800 |
+
"step": 13970
|
| 97801 |
+
},
|
| 97802 |
+
{
|
| 97803 |
+
"epoch": 15.912820512820513,
|
| 97804 |
+
"grad_norm": 0.1601344347000122,
|
| 97805 |
+
"learning_rate": 5.389849195167127e-06,
|
| 97806 |
+
"loss": 0.6828,
|
| 97807 |
+
"step": 13971
|
| 97808 |
+
},
|
| 97809 |
+
{
|
| 97810 |
+
"epoch": 15.913960113960114,
|
| 97811 |
+
"grad_norm": 0.23100726306438446,
|
| 97812 |
+
"learning_rate": 5.386959689482973e-06,
|
| 97813 |
+
"loss": 0.4421,
|
| 97814 |
+
"step": 13972
|
| 97815 |
+
},
|
| 97816 |
+
{
|
| 97817 |
+
"epoch": 15.915099715099714,
|
| 97818 |
+
"grad_norm": 0.19434283673763275,
|
| 97819 |
+
"learning_rate": 5.384070865013652e-06,
|
| 97820 |
+
"loss": 0.7509,
|
| 97821 |
+
"step": 13973
|
| 97822 |
+
},
|
| 97823 |
+
{
|
| 97824 |
+
"epoch": 15.916239316239317,
|
| 97825 |
+
"grad_norm": 0.25062304735183716,
|
| 97826 |
+
"learning_rate": 5.3811827218594874e-06,
|
| 97827 |
+
"loss": 0.3491,
|
| 97828 |
+
"step": 13974
|
| 97829 |
+
},
|
| 97830 |
+
{
|
| 97831 |
+
"epoch": 15.917378917378917,
|
| 97832 |
+
"grad_norm": 0.18781554698944092,
|
| 97833 |
+
"learning_rate": 5.3782952601208e-06,
|
| 97834 |
+
"loss": 0.7871,
|
| 97835 |
+
"step": 13975
|
| 97836 |
+
},
|
| 97837 |
+
{
|
| 97838 |
+
"epoch": 15.918518518518518,
|
| 97839 |
+
"grad_norm": 0.20198141038417816,
|
| 97840 |
+
"learning_rate": 5.3754084798978754e-06,
|
| 97841 |
+
"loss": 0.5347,
|
| 97842 |
+
"step": 13976
|
| 97843 |
+
},
|
| 97844 |
+
{
|
| 97845 |
+
"epoch": 15.91965811965812,
|
| 97846 |
+
"grad_norm": 0.2023342251777649,
|
| 97847 |
+
"learning_rate": 5.372522381290984e-06,
|
| 97848 |
+
"loss": 0.777,
|
| 97849 |
+
"step": 13977
|
| 97850 |
+
},
|
| 97851 |
+
{
|
| 97852 |
+
"epoch": 15.92079772079772,
|
| 97853 |
+
"grad_norm": 0.17312119901180267,
|
| 97854 |
+
"learning_rate": 5.3696369644003654e-06,
|
| 97855 |
+
"loss": 0.5332,
|
| 97856 |
+
"step": 13978
|
| 97857 |
+
},
|
| 97858 |
+
{
|
| 97859 |
+
"epoch": 15.921937321937321,
|
| 97860 |
+
"grad_norm": 0.2182493656873703,
|
| 97861 |
+
"learning_rate": 5.366752229326241e-06,
|
| 97862 |
+
"loss": 0.6611,
|
| 97863 |
+
"step": 13979
|
| 97864 |
+
},
|
| 97865 |
+
{
|
| 97866 |
+
"epoch": 15.923076923076923,
|
| 97867 |
+
"grad_norm": 0.21295364201068878,
|
| 97868 |
+
"learning_rate": 5.363868176168807e-06,
|
| 97869 |
+
"loss": 0.6564,
|
| 97870 |
+
"step": 13980
|
| 97871 |
+
},
|
| 97872 |
+
{
|
| 97873 |
+
"epoch": 15.924216524216524,
|
| 97874 |
+
"grad_norm": 0.24037398397922516,
|
| 97875 |
+
"learning_rate": 5.360984805028227e-06,
|
| 97876 |
+
"loss": 0.5228,
|
| 97877 |
+
"step": 13981
|
| 97878 |
+
},
|
| 97879 |
+
{
|
| 97880 |
+
"epoch": 15.925356125356124,
|
| 97881 |
+
"grad_norm": 0.19931496679782867,
|
| 97882 |
+
"learning_rate": 5.3581021160046486e-06,
|
| 97883 |
+
"loss": 0.5939,
|
| 97884 |
+
"step": 13982
|
| 97885 |
+
},
|
| 97886 |
+
{
|
| 97887 |
+
"epoch": 15.926495726495727,
|
| 97888 |
+
"grad_norm": 0.19876810908317566,
|
| 97889 |
+
"learning_rate": 5.355220109198203e-06,
|
| 97890 |
+
"loss": 0.5636,
|
| 97891 |
+
"step": 13983
|
| 97892 |
+
},
|
| 97893 |
+
{
|
| 97894 |
+
"epoch": 15.927635327635327,
|
| 97895 |
+
"grad_norm": 0.19912245869636536,
|
| 97896 |
+
"learning_rate": 5.352338784708991e-06,
|
| 97897 |
+
"loss": 0.404,
|
| 97898 |
+
"step": 13984
|
| 97899 |
+
},
|
| 97900 |
+
{
|
| 97901 |
+
"epoch": 15.928774928774928,
|
| 97902 |
+
"grad_norm": 0.2745014429092407,
|
| 97903 |
+
"learning_rate": 5.349458142637076e-06,
|
| 97904 |
+
"loss": 0.4361,
|
| 97905 |
+
"step": 13985
|
| 97906 |
+
},
|
| 97907 |
+
{
|
| 97908 |
+
"epoch": 15.92991452991453,
|
| 97909 |
+
"grad_norm": 0.19565151631832123,
|
| 97910 |
+
"learning_rate": 5.34657818308252e-06,
|
| 97911 |
+
"loss": 0.7867,
|
| 97912 |
+
"step": 13986
|
| 97913 |
+
},
|
| 97914 |
+
{
|
| 97915 |
+
"epoch": 15.93105413105413,
|
| 97916 |
+
"grad_norm": 0.1679268777370453,
|
| 97917 |
+
"learning_rate": 5.343698906145353e-06,
|
| 97918 |
+
"loss": 0.6697,
|
| 97919 |
+
"step": 13987
|
| 97920 |
+
},
|
| 97921 |
+
{
|
| 97922 |
+
"epoch": 15.932193732193731,
|
| 97923 |
+
"grad_norm": 0.17869971692562103,
|
| 97924 |
+
"learning_rate": 5.340820311925576e-06,
|
| 97925 |
+
"loss": 0.7855,
|
| 97926 |
+
"step": 13988
|
| 97927 |
+
},
|
| 97928 |
+
{
|
| 97929 |
+
"epoch": 15.933333333333334,
|
| 97930 |
+
"grad_norm": 0.19192154705524445,
|
| 97931 |
+
"learning_rate": 5.337942400523174e-06,
|
| 97932 |
+
"loss": 0.7875,
|
| 97933 |
+
"step": 13989
|
| 97934 |
+
},
|
| 97935 |
+
{
|
| 97936 |
+
"epoch": 15.934472934472934,
|
| 97937 |
+
"grad_norm": 0.27600088715553284,
|
| 97938 |
+
"learning_rate": 5.335065172038101e-06,
|
| 97939 |
+
"loss": 0.4982,
|
| 97940 |
+
"step": 13990
|
| 97941 |
+
},
|
| 97942 |
+
{
|
| 97943 |
+
"epoch": 15.935612535612536,
|
| 97944 |
+
"grad_norm": 0.21197514235973358,
|
| 97945 |
+
"learning_rate": 5.3321886265703035e-06,
|
| 97946 |
+
"loss": 0.6903,
|
| 97947 |
+
"step": 13991
|
| 97948 |
+
},
|
| 97949 |
+
{
|
| 97950 |
+
"epoch": 15.936752136752137,
|
| 97951 |
+
"grad_norm": 0.20699726045131683,
|
| 97952 |
+
"learning_rate": 5.329312764219671e-06,
|
| 97953 |
+
"loss": 0.642,
|
| 97954 |
+
"step": 13992
|
| 97955 |
+
},
|
| 97956 |
+
{
|
| 97957 |
+
"epoch": 15.937891737891738,
|
| 97958 |
+
"grad_norm": 0.1790648102760315,
|
| 97959 |
+
"learning_rate": 5.326437585086102e-06,
|
| 97960 |
+
"loss": 0.8265,
|
| 97961 |
+
"step": 13993
|
| 97962 |
+
},
|
| 97963 |
+
{
|
| 97964 |
+
"epoch": 15.93903133903134,
|
| 97965 |
+
"grad_norm": 0.2005932480096817,
|
| 97966 |
+
"learning_rate": 5.323563089269459e-06,
|
| 97967 |
+
"loss": 0.6435,
|
| 97968 |
+
"step": 13994
|
| 97969 |
+
},
|
| 97970 |
+
{
|
| 97971 |
+
"epoch": 15.94017094017094,
|
| 97972 |
+
"grad_norm": 0.22714604437351227,
|
| 97973 |
+
"learning_rate": 5.320689276869586e-06,
|
| 97974 |
+
"loss": 0.3956,
|
| 97975 |
+
"step": 13995
|
| 97976 |
+
},
|
| 97977 |
+
{
|
| 97978 |
+
"epoch": 15.941310541310541,
|
| 97979 |
+
"grad_norm": 0.23463019728660583,
|
| 97980 |
+
"learning_rate": 5.317816147986287e-06,
|
| 97981 |
+
"loss": 0.7648,
|
| 97982 |
+
"step": 13996
|
| 97983 |
+
},
|
| 97984 |
+
{
|
| 97985 |
+
"epoch": 15.942450142450143,
|
| 97986 |
+
"grad_norm": 0.20150230824947357,
|
| 97987 |
+
"learning_rate": 5.314943702719361e-06,
|
| 97988 |
+
"loss": 0.7364,
|
| 97989 |
+
"step": 13997
|
| 97990 |
+
},
|
| 97991 |
+
{
|
| 97992 |
+
"epoch": 15.943589743589744,
|
| 97993 |
+
"grad_norm": 0.19925189018249512,
|
| 97994 |
+
"learning_rate": 5.312071941168572e-06,
|
| 97995 |
+
"loss": 0.6879,
|
| 97996 |
+
"step": 13998
|
| 97997 |
+
},
|
| 97998 |
+
{
|
| 97999 |
+
"epoch": 15.944729344729344,
|
| 98000 |
+
"grad_norm": 0.22107404470443726,
|
| 98001 |
+
"learning_rate": 5.309200863433667e-06,
|
| 98002 |
+
"loss": 0.5548,
|
| 98003 |
+
"step": 13999
|
| 98004 |
+
},
|
| 98005 |
+
{
|
| 98006 |
+
"epoch": 15.945868945868947,
|
| 98007 |
+
"grad_norm": 0.15994355082511902,
|
| 98008 |
+
"learning_rate": 5.3063304696143655e-06,
|
| 98009 |
+
"loss": 0.5801,
|
| 98010 |
+
"step": 14000
|
| 98011 |
+
},
|
| 98012 |
+
{
|
| 98013 |
+
"epoch": 15.947008547008547,
|
| 98014 |
+
"grad_norm": 0.17757205665111542,
|
| 98015 |
+
"learning_rate": 5.303460759810366e-06,
|
| 98016 |
+
"loss": 0.7727,
|
| 98017 |
+
"step": 14001
|
| 98018 |
+
},
|
| 98019 |
+
{
|
| 98020 |
+
"epoch": 15.948148148148148,
|
| 98021 |
+
"grad_norm": 0.2089131623506546,
|
| 98022 |
+
"learning_rate": 5.300591734121338e-06,
|
| 98023 |
+
"loss": 0.5178,
|
| 98024 |
+
"step": 14002
|
| 98025 |
+
},
|
| 98026 |
+
{
|
| 98027 |
+
"epoch": 15.94928774928775,
|
| 98028 |
+
"grad_norm": 0.1872301995754242,
|
| 98029 |
+
"learning_rate": 5.297723392646942e-06,
|
| 98030 |
+
"loss": 0.7741,
|
| 98031 |
+
"step": 14003
|
| 98032 |
+
},
|
| 98033 |
+
{
|
| 98034 |
+
"epoch": 15.95042735042735,
|
| 98035 |
+
"grad_norm": 0.18554730713367462,
|
| 98036 |
+
"learning_rate": 5.294855735486784e-06,
|
| 98037 |
+
"loss": 0.7561,
|
| 98038 |
+
"step": 14004
|
| 98039 |
+
},
|
| 98040 |
+
{
|
| 98041 |
+
"epoch": 15.951566951566951,
|
| 98042 |
+
"grad_norm": 0.16955998539924622,
|
| 98043 |
+
"learning_rate": 5.291988762740477e-06,
|
| 98044 |
+
"loss": 0.7384,
|
| 98045 |
+
"step": 14005
|
| 98046 |
+
},
|
| 98047 |
+
{
|
| 98048 |
+
"epoch": 15.952706552706553,
|
| 98049 |
+
"grad_norm": 0.18044869601726532,
|
| 98050 |
+
"learning_rate": 5.289122474507599e-06,
|
| 98051 |
+
"loss": 0.7132,
|
| 98052 |
+
"step": 14006
|
| 98053 |
+
},
|
| 98054 |
+
{
|
| 98055 |
+
"epoch": 15.953846153846154,
|
| 98056 |
+
"grad_norm": 0.26764407753944397,
|
| 98057 |
+
"learning_rate": 5.286256870887707e-06,
|
| 98058 |
+
"loss": 0.4791,
|
| 98059 |
+
"step": 14007
|
| 98060 |
+
},
|
| 98061 |
+
{
|
| 98062 |
+
"epoch": 15.954985754985755,
|
| 98063 |
+
"grad_norm": 0.19423027336597443,
|
| 98064 |
+
"learning_rate": 5.283391951980324e-06,
|
| 98065 |
+
"loss": 0.5809,
|
| 98066 |
+
"step": 14008
|
| 98067 |
+
},
|
| 98068 |
+
{
|
| 98069 |
+
"epoch": 15.956125356125357,
|
| 98070 |
+
"grad_norm": 0.2263379991054535,
|
| 98071 |
+
"learning_rate": 5.280527717884956e-06,
|
| 98072 |
+
"loss": 0.5008,
|
| 98073 |
+
"step": 14009
|
| 98074 |
+
},
|
| 98075 |
+
{
|
| 98076 |
+
"epoch": 15.957264957264957,
|
| 98077 |
+
"grad_norm": 0.25239741802215576,
|
| 98078 |
+
"learning_rate": 5.277664168701088e-06,
|
| 98079 |
+
"loss": 0.6367,
|
| 98080 |
+
"step": 14010
|
| 98081 |
+
},
|
| 98082 |
+
{
|
| 98083 |
+
"epoch": 15.958404558404558,
|
| 98084 |
+
"grad_norm": 0.16616038978099823,
|
| 98085 |
+
"learning_rate": 5.274801304528182e-06,
|
| 98086 |
+
"loss": 0.7751,
|
| 98087 |
+
"step": 14011
|
| 98088 |
+
},
|
| 98089 |
+
{
|
| 98090 |
+
"epoch": 15.95954415954416,
|
| 98091 |
+
"grad_norm": 0.23653560876846313,
|
| 98092 |
+
"learning_rate": 5.271939125465672e-06,
|
| 98093 |
+
"loss": 0.6919,
|
| 98094 |
+
"step": 14012
|
| 98095 |
+
},
|
| 98096 |
+
{
|
| 98097 |
+
"epoch": 15.96068376068376,
|
| 98098 |
+
"grad_norm": 0.23039647936820984,
|
| 98099 |
+
"learning_rate": 5.269077631612967e-06,
|
| 98100 |
+
"loss": 0.6409,
|
| 98101 |
+
"step": 14013
|
| 98102 |
+
},
|
| 98103 |
+
{
|
| 98104 |
+
"epoch": 15.961823361823361,
|
| 98105 |
+
"grad_norm": 0.1734488606452942,
|
| 98106 |
+
"learning_rate": 5.2662168230694645e-06,
|
| 98107 |
+
"loss": 0.8209,
|
| 98108 |
+
"step": 14014
|
| 98109 |
+
},
|
| 98110 |
+
{
|
| 98111 |
+
"epoch": 15.962962962962964,
|
| 98112 |
+
"grad_norm": 0.22865897417068481,
|
| 98113 |
+
"learning_rate": 5.263356699934513e-06,
|
| 98114 |
+
"loss": 0.3679,
|
| 98115 |
+
"step": 14015
|
| 98116 |
+
},
|
| 98117 |
+
{
|
| 98118 |
+
"epoch": 15.964102564102564,
|
| 98119 |
+
"grad_norm": 0.20063550770282745,
|
| 98120 |
+
"learning_rate": 5.260497262307456e-06,
|
| 98121 |
+
"loss": 0.7736,
|
| 98122 |
+
"step": 14016
|
| 98123 |
+
},
|
| 98124 |
+
{
|
| 98125 |
+
"epoch": 15.965242165242165,
|
| 98126 |
+
"grad_norm": 0.17283949255943298,
|
| 98127 |
+
"learning_rate": 5.2576385102876155e-06,
|
| 98128 |
+
"loss": 0.7293,
|
| 98129 |
+
"step": 14017
|
| 98130 |
+
},
|
| 98131 |
+
{
|
| 98132 |
+
"epoch": 15.966381766381767,
|
| 98133 |
+
"grad_norm": 0.30130085349082947,
|
| 98134 |
+
"learning_rate": 5.254780443974289e-06,
|
| 98135 |
+
"loss": 0.53,
|
| 98136 |
+
"step": 14018
|
| 98137 |
+
},
|
| 98138 |
+
{
|
| 98139 |
+
"epoch": 15.967521367521368,
|
| 98140 |
+
"grad_norm": 0.22147002816200256,
|
| 98141 |
+
"learning_rate": 5.2519230634667295e-06,
|
| 98142 |
+
"loss": 0.4588,
|
| 98143 |
+
"step": 14019
|
| 98144 |
+
},
|
| 98145 |
+
{
|
| 98146 |
+
"epoch": 15.968660968660968,
|
| 98147 |
+
"grad_norm": 0.21945025026798248,
|
| 98148 |
+
"learning_rate": 5.249066368864189e-06,
|
| 98149 |
+
"loss": 0.4553,
|
| 98150 |
+
"step": 14020
|
| 98151 |
+
},
|
| 98152 |
+
{
|
| 98153 |
+
"epoch": 15.96980056980057,
|
| 98154 |
+
"grad_norm": 0.22448524832725525,
|
| 98155 |
+
"learning_rate": 5.246210360265888e-06,
|
| 98156 |
+
"loss": 0.4898,
|
| 98157 |
+
"step": 14021
|
| 98158 |
+
},
|
| 98159 |
+
{
|
| 98160 |
+
"epoch": 15.970940170940171,
|
| 98161 |
+
"grad_norm": 0.21025843918323517,
|
| 98162 |
+
"learning_rate": 5.243355037771028e-06,
|
| 98163 |
+
"loss": 0.554,
|
| 98164 |
+
"step": 14022
|
| 98165 |
+
},
|
| 98166 |
+
{
|
| 98167 |
+
"epoch": 15.972079772079772,
|
| 98168 |
+
"grad_norm": 0.315996915102005,
|
| 98169 |
+
"learning_rate": 5.240500401478774e-06,
|
| 98170 |
+
"loss": 0.5518,
|
| 98171 |
+
"step": 14023
|
| 98172 |
+
},
|
| 98173 |
+
{
|
| 98174 |
+
"epoch": 15.973219373219374,
|
| 98175 |
+
"grad_norm": 0.22453878819942474,
|
| 98176 |
+
"learning_rate": 5.237646451488282e-06,
|
| 98177 |
+
"loss": 0.6244,
|
| 98178 |
+
"step": 14024
|
| 98179 |
+
},
|
| 98180 |
+
{
|
| 98181 |
+
"epoch": 15.974358974358974,
|
| 98182 |
+
"grad_norm": 0.22095011174678802,
|
| 98183 |
+
"learning_rate": 5.234793187898682e-06,
|
| 98184 |
+
"loss": 0.7162,
|
| 98185 |
+
"step": 14025
|
| 98186 |
+
},
|
| 98187 |
+
{
|
| 98188 |
+
"epoch": 15.975498575498575,
|
| 98189 |
+
"grad_norm": 0.20376168191432953,
|
| 98190 |
+
"learning_rate": 5.231940610809063e-06,
|
| 98191 |
+
"loss": 0.563,
|
| 98192 |
+
"step": 14026
|
| 98193 |
+
},
|
| 98194 |
+
{
|
| 98195 |
+
"epoch": 15.976638176638177,
|
| 98196 |
+
"grad_norm": 0.21002911031246185,
|
| 98197 |
+
"learning_rate": 5.229088720318507e-06,
|
| 98198 |
+
"loss": 0.5615,
|
| 98199 |
+
"step": 14027
|
| 98200 |
+
},
|
| 98201 |
+
{
|
| 98202 |
+
"epoch": 15.977777777777778,
|
| 98203 |
+
"grad_norm": 0.22033274173736572,
|
| 98204 |
+
"learning_rate": 5.226237516526072e-06,
|
| 98205 |
+
"loss": 0.6323,
|
| 98206 |
+
"step": 14028
|
| 98207 |
+
},
|
| 98208 |
+
{
|
| 98209 |
+
"epoch": 15.978917378917378,
|
| 98210 |
+
"grad_norm": 0.24790626764297485,
|
| 98211 |
+
"learning_rate": 5.223386999530791e-06,
|
| 98212 |
+
"loss": 0.518,
|
| 98213 |
+
"step": 14029
|
| 98214 |
+
},
|
| 98215 |
+
{
|
| 98216 |
+
"epoch": 15.98005698005698,
|
| 98217 |
+
"grad_norm": 0.17082878947257996,
|
| 98218 |
+
"learning_rate": 5.2205371694316606e-06,
|
| 98219 |
+
"loss": 0.7464,
|
| 98220 |
+
"step": 14030
|
| 98221 |
+
},
|
| 98222 |
+
{
|
| 98223 |
+
"epoch": 15.981196581196581,
|
| 98224 |
+
"grad_norm": 0.2568652331829071,
|
| 98225 |
+
"learning_rate": 5.217688026327666e-06,
|
| 98226 |
+
"loss": 0.6016,
|
| 98227 |
+
"step": 14031
|
| 98228 |
+
},
|
| 98229 |
+
{
|
| 98230 |
+
"epoch": 15.982336182336182,
|
| 98231 |
+
"grad_norm": 0.17630797624588013,
|
| 98232 |
+
"learning_rate": 5.21483957031777e-06,
|
| 98233 |
+
"loss": 0.5679,
|
| 98234 |
+
"step": 14032
|
| 98235 |
+
},
|
| 98236 |
+
{
|
| 98237 |
+
"epoch": 15.983475783475784,
|
| 98238 |
+
"grad_norm": 0.2258668839931488,
|
| 98239 |
+
"learning_rate": 5.2119918015009036e-06,
|
| 98240 |
+
"loss": 0.5935,
|
| 98241 |
+
"step": 14033
|
| 98242 |
+
},
|
| 98243 |
+
{
|
| 98244 |
+
"epoch": 15.984615384615385,
|
| 98245 |
+
"grad_norm": 0.18470749258995056,
|
| 98246 |
+
"learning_rate": 5.209144719975981e-06,
|
| 98247 |
+
"loss": 0.8318,
|
| 98248 |
+
"step": 14034
|
| 98249 |
+
},
|
| 98250 |
+
{
|
| 98251 |
+
"epoch": 15.985754985754985,
|
| 98252 |
+
"grad_norm": 0.19650182127952576,
|
| 98253 |
+
"learning_rate": 5.206298325841885e-06,
|
| 98254 |
+
"loss": 0.7677,
|
| 98255 |
+
"step": 14035
|
| 98256 |
+
},
|
| 98257 |
+
{
|
| 98258 |
+
"epoch": 15.986894586894588,
|
| 98259 |
+
"grad_norm": 0.18926526606082916,
|
| 98260 |
+
"learning_rate": 5.203452619197488e-06,
|
| 98261 |
+
"loss": 0.7555,
|
| 98262 |
+
"step": 14036
|
| 98263 |
+
},
|
| 98264 |
+
{
|
| 98265 |
+
"epoch": 15.988034188034188,
|
| 98266 |
+
"grad_norm": 0.24393440783023834,
|
| 98267 |
+
"learning_rate": 5.200607600141619e-06,
|
| 98268 |
+
"loss": 0.5547,
|
| 98269 |
+
"step": 14037
|
| 98270 |
+
},
|
| 98271 |
+
{
|
| 98272 |
+
"epoch": 15.989173789173789,
|
| 98273 |
+
"grad_norm": 0.2429366260766983,
|
| 98274 |
+
"learning_rate": 5.197763268773093e-06,
|
| 98275 |
+
"loss": 0.4874,
|
| 98276 |
+
"step": 14038
|
| 98277 |
+
},
|
| 98278 |
+
{
|
| 98279 |
+
"epoch": 15.990313390313391,
|
| 98280 |
+
"grad_norm": 0.19000403583049774,
|
| 98281 |
+
"learning_rate": 5.194919625190706e-06,
|
| 98282 |
+
"loss": 0.6184,
|
| 98283 |
+
"step": 14039
|
| 98284 |
+
},
|
| 98285 |
+
{
|
| 98286 |
+
"epoch": 15.991452991452991,
|
| 98287 |
+
"grad_norm": 0.21088244020938873,
|
| 98288 |
+
"learning_rate": 5.192076669493231e-06,
|
| 98289 |
+
"loss": 0.4815,
|
| 98290 |
+
"step": 14040
|
| 98291 |
+
},
|
| 98292 |
+
{
|
| 98293 |
+
"epoch": 15.992592592592592,
|
| 98294 |
+
"grad_norm": 0.17279838025569916,
|
| 98295 |
+
"learning_rate": 5.1892344017794e-06,
|
| 98296 |
+
"loss": 0.5564,
|
| 98297 |
+
"step": 14041
|
| 98298 |
+
},
|
| 98299 |
+
{
|
| 98300 |
+
"epoch": 15.993732193732194,
|
| 98301 |
+
"grad_norm": 0.24155011773109436,
|
| 98302 |
+
"learning_rate": 5.186392822147934e-06,
|
| 98303 |
+
"loss": 0.6469,
|
| 98304 |
+
"step": 14042
|
| 98305 |
+
},
|
| 98306 |
+
{
|
| 98307 |
+
"epoch": 15.994871794871795,
|
| 98308 |
+
"grad_norm": 0.23470929265022278,
|
| 98309 |
+
"learning_rate": 5.1835519306975305e-06,
|
| 98310 |
+
"loss": 0.5868,
|
| 98311 |
+
"step": 14043
|
| 98312 |
+
},
|
| 98313 |
+
{
|
| 98314 |
+
"epoch": 15.996011396011395,
|
| 98315 |
+
"grad_norm": 0.17550434172153473,
|
| 98316 |
+
"learning_rate": 5.180711727526877e-06,
|
| 98317 |
+
"loss": 0.6681,
|
| 98318 |
+
"step": 14044
|
| 98319 |
+
},
|
| 98320 |
+
{
|
| 98321 |
+
"epoch": 15.997150997150998,
|
| 98322 |
+
"grad_norm": 0.188474640250206,
|
| 98323 |
+
"learning_rate": 5.1778722127346e-06,
|
| 98324 |
+
"loss": 0.7394,
|
| 98325 |
+
"step": 14045
|
| 98326 |
+
},
|
| 98327 |
+
{
|
| 98328 |
+
"epoch": 15.998290598290598,
|
| 98329 |
+
"grad_norm": 0.27023807168006897,
|
| 98330 |
+
"learning_rate": 5.1750333864193315e-06,
|
| 98331 |
+
"loss": 0.4689,
|
| 98332 |
+
"step": 14046
|
| 98333 |
+
},
|
| 98334 |
+
{
|
| 98335 |
+
"epoch": 15.999430199430199,
|
| 98336 |
+
"grad_norm": 0.2202572524547577,
|
| 98337 |
+
"learning_rate": 5.1721952486796736e-06,
|
| 98338 |
+
"loss": 0.7047,
|
| 98339 |
+
"step": 14047
|
| 98340 |
+
},
|
| 98341 |
+
{
|
| 98342 |
+
"epoch": 16.0,
|
| 98343 |
+
"grad_norm": 0.3407000005245209,
|
| 98344 |
+
"learning_rate": 5.169357799614208e-06,
|
| 98345 |
+
"loss": 1.1228,
|
| 98346 |
+
"step": 14048
|
| 98347 |
+
},
|
| 98348 |
+
{
|
| 98349 |
+
"epoch": 16.001139601139602,
|
| 98350 |
+
"grad_norm": 0.17774610221385956,
|
| 98351 |
+
"learning_rate": 5.166521039321473e-06,
|
| 98352 |
+
"loss": 0.6193,
|
| 98353 |
+
"step": 14049
|
| 98354 |
+
},
|
| 98355 |
+
{
|
| 98356 |
+
"epoch": 16.0022792022792,
|
| 98357 |
+
"grad_norm": 0.1734650731086731,
|
| 98358 |
+
"learning_rate": 5.163684967900007e-06,
|
| 98359 |
+
"loss": 0.8355,
|
| 98360 |
+
"step": 14050
|
| 98361 |
+
},
|
| 98362 |
+
{
|
| 98363 |
+
"epoch": 16.003418803418803,
|
| 98364 |
+
"grad_norm": 0.18855872750282288,
|
| 98365 |
+
"learning_rate": 5.16084958544831e-06,
|
| 98366 |
+
"loss": 0.595,
|
| 98367 |
+
"step": 14051
|
| 98368 |
+
},
|
| 98369 |
+
{
|
| 98370 |
+
"epoch": 16.004558404558406,
|
| 98371 |
+
"grad_norm": 0.17589649558067322,
|
| 98372 |
+
"learning_rate": 5.1580148920648715e-06,
|
| 98373 |
+
"loss": 0.932,
|
| 98374 |
+
"step": 14052
|
| 98375 |
+
},
|
| 98376 |
+
{
|
| 98377 |
+
"epoch": 16.005698005698004,
|
| 98378 |
+
"grad_norm": 0.1905054897069931,
|
| 98379 |
+
"learning_rate": 5.155180887848135e-06,
|
| 98380 |
+
"loss": 0.6708,
|
| 98381 |
+
"step": 14053
|
| 98382 |
+
},
|
| 98383 |
+
{
|
| 98384 |
+
"epoch": 16.006837606837607,
|
| 98385 |
+
"grad_norm": 0.203451007604599,
|
| 98386 |
+
"learning_rate": 5.152347572896535e-06,
|
| 98387 |
+
"loss": 0.6731,
|
| 98388 |
+
"step": 14054
|
| 98389 |
+
},
|
| 98390 |
+
{
|
| 98391 |
+
"epoch": 16.00797720797721,
|
| 98392 |
+
"grad_norm": 0.1736338585615158,
|
| 98393 |
+
"learning_rate": 5.149514947308495e-06,
|
| 98394 |
+
"loss": 0.7534,
|
| 98395 |
+
"step": 14055
|
| 98396 |
+
},
|
| 98397 |
+
{
|
| 98398 |
+
"epoch": 16.009116809116808,
|
| 98399 |
+
"grad_norm": 0.18473488092422485,
|
| 98400 |
+
"learning_rate": 5.146683011182388e-06,
|
| 98401 |
+
"loss": 0.5515,
|
| 98402 |
+
"step": 14056
|
| 98403 |
+
},
|
| 98404 |
+
{
|
| 98405 |
+
"epoch": 16.01025641025641,
|
| 98406 |
+
"grad_norm": 0.19602778553962708,
|
| 98407 |
+
"learning_rate": 5.143851764616572e-06,
|
| 98408 |
+
"loss": 0.4613,
|
| 98409 |
+
"step": 14057
|
| 98410 |
+
},
|
| 98411 |
+
{
|
| 98412 |
+
"epoch": 16.011396011396013,
|
| 98413 |
+
"grad_norm": 0.1879139393568039,
|
| 98414 |
+
"learning_rate": 5.14102120770939e-06,
|
| 98415 |
+
"loss": 0.5354,
|
| 98416 |
+
"step": 14058
|
| 98417 |
+
},
|
| 98418 |
+
{
|
| 98419 |
+
"epoch": 16.01253561253561,
|
| 98420 |
+
"grad_norm": 0.21041598916053772,
|
| 98421 |
+
"learning_rate": 5.138191340559162e-06,
|
| 98422 |
+
"loss": 0.4618,
|
| 98423 |
+
"step": 14059
|
| 98424 |
+
},
|
| 98425 |
+
{
|
| 98426 |
+
"epoch": 16.013675213675214,
|
| 98427 |
+
"grad_norm": 0.19878196716308594,
|
| 98428 |
+
"learning_rate": 5.1353621632641625e-06,
|
| 98429 |
+
"loss": 0.581,
|
| 98430 |
+
"step": 14060
|
| 98431 |
+
},
|
| 98432 |
+
{
|
| 98433 |
+
"epoch": 16.014814814814816,
|
| 98434 |
+
"grad_norm": 0.21880239248275757,
|
| 98435 |
+
"learning_rate": 5.13253367592266e-06,
|
| 98436 |
+
"loss": 0.6558,
|
| 98437 |
+
"step": 14061
|
| 98438 |
+
},
|
| 98439 |
+
{
|
| 98440 |
+
"epoch": 16.015954415954415,
|
| 98441 |
+
"grad_norm": 0.21958042681217194,
|
| 98442 |
+
"learning_rate": 5.129705878632901e-06,
|
| 98443 |
+
"loss": 0.6628,
|
| 98444 |
+
"step": 14062
|
| 98445 |
+
},
|
| 98446 |
+
{
|
| 98447 |
+
"epoch": 16.017094017094017,
|
| 98448 |
+
"grad_norm": 0.1946631819009781,
|
| 98449 |
+
"learning_rate": 5.126878771493107e-06,
|
| 98450 |
+
"loss": 0.7045,
|
| 98451 |
+
"step": 14063
|
| 98452 |
+
},
|
| 98453 |
+
{
|
| 98454 |
+
"epoch": 16.01823361823362,
|
| 98455 |
+
"grad_norm": 0.19736795127391815,
|
| 98456 |
+
"learning_rate": 5.124052354601458e-06,
|
| 98457 |
+
"loss": 0.9372,
|
| 98458 |
+
"step": 14064
|
| 98459 |
+
},
|
| 98460 |
+
{
|
| 98461 |
+
"epoch": 16.019373219373218,
|
| 98462 |
+
"grad_norm": 0.17857308685779572,
|
| 98463 |
+
"learning_rate": 5.1212266280561225e-06,
|
| 98464 |
+
"loss": 0.5775,
|
| 98465 |
+
"step": 14065
|
| 98466 |
+
},
|
| 98467 |
+
{
|
| 98468 |
+
"epoch": 16.02051282051282,
|
| 98469 |
+
"grad_norm": 0.1766318827867508,
|
| 98470 |
+
"learning_rate": 5.118401591955269e-06,
|
| 98471 |
+
"loss": 0.77,
|
| 98472 |
+
"step": 14066
|
| 98473 |
+
},
|
| 98474 |
+
{
|
| 98475 |
+
"epoch": 16.021652421652423,
|
| 98476 |
+
"grad_norm": 0.16231514513492584,
|
| 98477 |
+
"learning_rate": 5.115577246396991e-06,
|
| 98478 |
+
"loss": 0.6573,
|
| 98479 |
+
"step": 14067
|
| 98480 |
+
},
|
| 98481 |
+
{
|
| 98482 |
+
"epoch": 16.02279202279202,
|
| 98483 |
+
"grad_norm": 0.21469400823116302,
|
| 98484 |
+
"learning_rate": 5.112753591479402e-06,
|
| 98485 |
+
"loss": 0.5812,
|
| 98486 |
+
"step": 14068
|
| 98487 |
+
},
|
| 98488 |
+
{
|
| 98489 |
+
"epoch": 16.023931623931624,
|
| 98490 |
+
"grad_norm": 0.1972481906414032,
|
| 98491 |
+
"learning_rate": 5.109930627300569e-06,
|
| 98492 |
+
"loss": 0.7897,
|
| 98493 |
+
"step": 14069
|
| 98494 |
+
},
|
| 98495 |
+
{
|
| 98496 |
+
"epoch": 16.025071225071226,
|
| 98497 |
+
"grad_norm": 0.20298215746879578,
|
| 98498 |
+
"learning_rate": 5.107108353958551e-06,
|
| 98499 |
+
"loss": 0.4743,
|
| 98500 |
+
"step": 14070
|
| 98501 |
+
},
|
| 98502 |
+
{
|
| 98503 |
+
"epoch": 16.026210826210825,
|
| 98504 |
+
"grad_norm": 0.20357541739940643,
|
| 98505 |
+
"learning_rate": 5.104286771551356e-06,
|
| 98506 |
+
"loss": 0.5591,
|
| 98507 |
+
"step": 14071
|
| 98508 |
+
},
|
| 98509 |
+
{
|
| 98510 |
+
"epoch": 16.027350427350427,
|
| 98511 |
+
"grad_norm": 0.24694040417671204,
|
| 98512 |
+
"learning_rate": 5.101465880176998e-06,
|
| 98513 |
+
"loss": 0.6901,
|
| 98514 |
+
"step": 14072
|
| 98515 |
+
},
|
| 98516 |
+
{
|
| 98517 |
+
"epoch": 16.02849002849003,
|
| 98518 |
+
"grad_norm": 0.19539234042167664,
|
| 98519 |
+
"learning_rate": 5.098645679933451e-06,
|
| 98520 |
+
"loss": 0.7029,
|
| 98521 |
+
"step": 14073
|
| 98522 |
+
},
|
| 98523 |
+
{
|
| 98524 |
+
"epoch": 16.02962962962963,
|
| 98525 |
+
"grad_norm": 0.19871358573436737,
|
| 98526 |
+
"learning_rate": 5.095826170918674e-06,
|
| 98527 |
+
"loss": 0.7805,
|
| 98528 |
+
"step": 14074
|
| 98529 |
+
},
|
| 98530 |
+
{
|
| 98531 |
+
"epoch": 16.03076923076923,
|
| 98532 |
+
"grad_norm": 0.17123480141162872,
|
| 98533 |
+
"learning_rate": 5.093007353230584e-06,
|
| 98534 |
+
"loss": 0.5841,
|
| 98535 |
+
"step": 14075
|
| 98536 |
+
},
|
| 98537 |
+
{
|
| 98538 |
+
"epoch": 16.031908831908833,
|
| 98539 |
+
"grad_norm": 0.18302345275878906,
|
| 98540 |
+
"learning_rate": 5.090189226967085e-06,
|
| 98541 |
+
"loss": 0.5956,
|
| 98542 |
+
"step": 14076
|
| 98543 |
+
},
|
| 98544 |
+
{
|
| 98545 |
+
"epoch": 16.03304843304843,
|
| 98546 |
+
"grad_norm": 0.1640847623348236,
|
| 98547 |
+
"learning_rate": 5.087371792226084e-06,
|
| 98548 |
+
"loss": 0.8549,
|
| 98549 |
+
"step": 14077
|
| 98550 |
+
},
|
| 98551 |
+
{
|
| 98552 |
+
"epoch": 16.034188034188034,
|
| 98553 |
+
"grad_norm": 0.22754384577274323,
|
| 98554 |
+
"learning_rate": 5.08455504910541e-06,
|
| 98555 |
+
"loss": 0.6512,
|
| 98556 |
+
"step": 14078
|
| 98557 |
+
},
|
| 98558 |
+
{
|
| 98559 |
+
"epoch": 16.035327635327636,
|
| 98560 |
+
"grad_norm": 0.2154824435710907,
|
| 98561 |
+
"learning_rate": 5.081738997702909e-06,
|
| 98562 |
+
"loss": 0.6456,
|
| 98563 |
+
"step": 14079
|
| 98564 |
+
},
|
| 98565 |
+
{
|
| 98566 |
+
"epoch": 16.036467236467235,
|
| 98567 |
+
"grad_norm": 0.19186750054359436,
|
| 98568 |
+
"learning_rate": 5.078923638116387e-06,
|
| 98569 |
+
"loss": 0.7609,
|
| 98570 |
+
"step": 14080
|
| 98571 |
+
},
|
| 98572 |
+
{
|
| 98573 |
+
"epoch": 16.037606837606837,
|
| 98574 |
+
"grad_norm": 0.21435517072677612,
|
| 98575 |
+
"learning_rate": 5.07610897044363e-06,
|
| 98576 |
+
"loss": 0.6266,
|
| 98577 |
+
"step": 14081
|
| 98578 |
+
},
|
| 98579 |
+
{
|
| 98580 |
+
"epoch": 16.03874643874644,
|
| 98581 |
+
"grad_norm": 0.25843796133995056,
|
| 98582 |
+
"learning_rate": 5.073294994782407e-06,
|
| 98583 |
+
"loss": 0.3241,
|
| 98584 |
+
"step": 14082
|
| 98585 |
+
},
|
| 98586 |
+
{
|
| 98587 |
+
"epoch": 16.03988603988604,
|
| 98588 |
+
"grad_norm": 0.2305968701839447,
|
| 98589 |
+
"learning_rate": 5.070481711230441e-06,
|
| 98590 |
+
"loss": 0.646,
|
| 98591 |
+
"step": 14083
|
| 98592 |
+
},
|
| 98593 |
+
{
|
| 98594 |
+
"epoch": 16.04102564102564,
|
| 98595 |
+
"grad_norm": 0.1847716122865677,
|
| 98596 |
+
"learning_rate": 5.0676691198854485e-06,
|
| 98597 |
+
"loss": 0.7921,
|
| 98598 |
+
"step": 14084
|
| 98599 |
+
},
|
| 98600 |
+
{
|
| 98601 |
+
"epoch": 16.042165242165243,
|
| 98602 |
+
"grad_norm": 0.19992592930793762,
|
| 98603 |
+
"learning_rate": 5.0648572208451235e-06,
|
| 98604 |
+
"loss": 0.4748,
|
| 98605 |
+
"step": 14085
|
| 98606 |
+
},
|
| 98607 |
+
{
|
| 98608 |
+
"epoch": 16.043304843304842,
|
| 98609 |
+
"grad_norm": 0.22528088092803955,
|
| 98610 |
+
"learning_rate": 5.062046014207136e-06,
|
| 98611 |
+
"loss": 0.4691,
|
| 98612 |
+
"step": 14086
|
| 98613 |
+
},
|
| 98614 |
+
{
|
| 98615 |
+
"epoch": 16.044444444444444,
|
| 98616 |
+
"grad_norm": 0.23095327615737915,
|
| 98617 |
+
"learning_rate": 5.059235500069106e-06,
|
| 98618 |
+
"loss": 0.6832,
|
| 98619 |
+
"step": 14087
|
| 98620 |
+
},
|
| 98621 |
+
{
|
| 98622 |
+
"epoch": 16.045584045584047,
|
| 98623 |
+
"grad_norm": 0.2332068383693695,
|
| 98624 |
+
"learning_rate": 5.056425678528673e-06,
|
| 98625 |
+
"loss": 0.3386,
|
| 98626 |
+
"step": 14088
|
| 98627 |
+
},
|
| 98628 |
+
{
|
| 98629 |
+
"epoch": 16.046723646723645,
|
| 98630 |
+
"grad_norm": 0.16111916303634644,
|
| 98631 |
+
"learning_rate": 5.053616549683427e-06,
|
| 98632 |
+
"loss": 0.7517,
|
| 98633 |
+
"step": 14089
|
| 98634 |
+
},
|
| 98635 |
+
{
|
| 98636 |
+
"epoch": 16.047863247863248,
|
| 98637 |
+
"grad_norm": 0.2254699319601059,
|
| 98638 |
+
"learning_rate": 5.050808113630925e-06,
|
| 98639 |
+
"loss": 0.6548,
|
| 98640 |
+
"step": 14090
|
| 98641 |
+
},
|
| 98642 |
+
{
|
| 98643 |
+
"epoch": 16.04900284900285,
|
| 98644 |
+
"grad_norm": 0.21958674490451813,
|
| 98645 |
+
"learning_rate": 5.048000370468717e-06,
|
| 98646 |
+
"loss": 0.5918,
|
| 98647 |
+
"step": 14091
|
| 98648 |
+
},
|
| 98649 |
+
{
|
| 98650 |
+
"epoch": 16.05014245014245,
|
| 98651 |
+
"grad_norm": 0.1744617074728012,
|
| 98652 |
+
"learning_rate": 5.045193320294323e-06,
|
| 98653 |
+
"loss": 0.7799,
|
| 98654 |
+
"step": 14092
|
| 98655 |
+
},
|
| 98656 |
+
{
|
| 98657 |
+
"epoch": 16.05128205128205,
|
| 98658 |
+
"grad_norm": 0.22298863530158997,
|
| 98659 |
+
"learning_rate": 5.0423869632052475e-06,
|
| 98660 |
+
"loss": 0.4798,
|
| 98661 |
+
"step": 14093
|
| 98662 |
+
},
|
| 98663 |
+
{
|
| 98664 |
+
"epoch": 16.052421652421653,
|
| 98665 |
+
"grad_norm": 0.22837935388088226,
|
| 98666 |
+
"learning_rate": 5.0395812992989535e-06,
|
| 98667 |
+
"loss": 0.5823,
|
| 98668 |
+
"step": 14094
|
| 98669 |
+
},
|
| 98670 |
+
{
|
| 98671 |
+
"epoch": 16.053561253561252,
|
| 98672 |
+
"grad_norm": 0.21403367817401886,
|
| 98673 |
+
"learning_rate": 5.0367763286728875e-06,
|
| 98674 |
+
"loss": 0.6041,
|
| 98675 |
+
"step": 14095
|
| 98676 |
+
},
|
| 98677 |
+
{
|
| 98678 |
+
"epoch": 16.054700854700855,
|
| 98679 |
+
"grad_norm": 0.21587662398815155,
|
| 98680 |
+
"learning_rate": 5.033972051424482e-06,
|
| 98681 |
+
"loss": 0.3409,
|
| 98682 |
+
"step": 14096
|
| 98683 |
+
},
|
| 98684 |
+
{
|
| 98685 |
+
"epoch": 16.055840455840457,
|
| 98686 |
+
"grad_norm": 0.18038514256477356,
|
| 98687 |
+
"learning_rate": 5.03116846765114e-06,
|
| 98688 |
+
"loss": 0.7024,
|
| 98689 |
+
"step": 14097
|
| 98690 |
+
},
|
| 98691 |
+
{
|
| 98692 |
+
"epoch": 16.056980056980056,
|
| 98693 |
+
"grad_norm": 0.2202543467283249,
|
| 98694 |
+
"learning_rate": 5.028365577450217e-06,
|
| 98695 |
+
"loss": 0.507,
|
| 98696 |
+
"step": 14098
|
| 98697 |
+
},
|
| 98698 |
+
{
|
| 98699 |
+
"epoch": 16.058119658119658,
|
| 98700 |
+
"grad_norm": 0.22714479267597198,
|
| 98701 |
+
"learning_rate": 5.025563380919088e-06,
|
| 98702 |
+
"loss": 0.2714,
|
| 98703 |
+
"step": 14099
|
| 98704 |
+
},
|
| 98705 |
+
{
|
| 98706 |
+
"epoch": 16.05925925925926,
|
| 98707 |
+
"grad_norm": 0.18912501633167267,
|
| 98708 |
+
"learning_rate": 5.02276187815508e-06,
|
| 98709 |
+
"loss": 0.7436,
|
| 98710 |
+
"step": 14100
|
| 98711 |
}
|
| 98712 |
],
|
| 98713 |
"logging_steps": 1,
|
|
|
|
| 98727 |
"attributes": {}
|
| 98728 |
}
|
| 98729 |
},
|
| 98730 |
+
"total_flos": 7.883484268832244e+19,
|
| 98731 |
"train_batch_size": 8,
|
| 98732 |
"trial_name": null,
|
| 98733 |
"trial_params": null
|