Training in progress, step 23500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000555808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e68365d634df885e2ccb22b9e0405446541877ee8e62ec1302fff49e3c3e0cc
|
| 3 |
size 1000555808
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1101500939
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:effbf62d20f9fb94ae8370692cd3bad3dc2ec649330b3346c28933ce1ebd69a0
|
| 3 |
size 1101500939
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a320e77ba3fa666257876a31b13bcbef1414b563002c39ac7bf5917ce16d1c33
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9db26897e091a59cb4b7d079801a4421ecd4005778946ddd5ab1ef08a85c9d29
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 1000,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -163076,6 +163076,1630 @@
|
|
| 163076 |
"eval_samples_per_second": 18.815,
|
| 163077 |
"eval_steps_per_second": 4.704,
|
| 163078 |
"step": 23268
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163079 |
}
|
| 163080 |
],
|
| 163081 |
"logging_steps": 1,
|
|
@@ -163089,13 +164713,13 @@
|
|
| 163089 |
"should_epoch_stop": false,
|
| 163090 |
"should_evaluate": false,
|
| 163091 |
"should_log": false,
|
| 163092 |
-
"should_save":
|
| 163093 |
"should_training_stop": false
|
| 163094 |
},
|
| 163095 |
"attributes": {}
|
| 163096 |
}
|
| 163097 |
},
|
| 163098 |
-
"total_flos": 1.
|
| 163099 |
"train_batch_size": 8,
|
| 163100 |
"trial_name": null,
|
| 163101 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.5529411764705883,
|
| 6 |
"eval_steps": 1000,
|
| 7 |
+
"global_step": 23500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 163076 |
"eval_samples_per_second": 18.815,
|
| 163077 |
"eval_steps_per_second": 4.704,
|
| 163078 |
"step": 23268
|
| 163079 |
+
},
|
| 163080 |
+
{
|
| 163081 |
+
"epoch": 0.5475058823529412,
|
| 163082 |
+
"grad_norm": 0.13671875,
|
| 163083 |
+
"learning_rate": 0.015470152610441767,
|
| 163084 |
+
"loss": 0.6689,
|
| 163085 |
+
"step": 23269
|
| 163086 |
+
},
|
| 163087 |
+
{
|
| 163088 |
+
"epoch": 0.5475294117647059,
|
| 163089 |
+
"grad_norm": 0.1416015625,
|
| 163090 |
+
"learning_rate": 0.01546935341365462,
|
| 163091 |
+
"loss": 1.1151,
|
| 163092 |
+
"step": 23270
|
| 163093 |
+
},
|
| 163094 |
+
{
|
| 163095 |
+
"epoch": 0.5475529411764706,
|
| 163096 |
+
"grad_norm": 0.12060546875,
|
| 163097 |
+
"learning_rate": 0.015468554216867469,
|
| 163098 |
+
"loss": 1.0712,
|
| 163099 |
+
"step": 23271
|
| 163100 |
+
},
|
| 163101 |
+
{
|
| 163102 |
+
"epoch": 0.5475764705882353,
|
| 163103 |
+
"grad_norm": 0.140625,
|
| 163104 |
+
"learning_rate": 0.01546775502008032,
|
| 163105 |
+
"loss": 0.6743,
|
| 163106 |
+
"step": 23272
|
| 163107 |
+
},
|
| 163108 |
+
{
|
| 163109 |
+
"epoch": 0.5476,
|
| 163110 |
+
"grad_norm": 0.126953125,
|
| 163111 |
+
"learning_rate": 0.015466955823293172,
|
| 163112 |
+
"loss": 1.1874,
|
| 163113 |
+
"step": 23273
|
| 163114 |
+
},
|
| 163115 |
+
{
|
| 163116 |
+
"epoch": 0.5476235294117647,
|
| 163117 |
+
"grad_norm": 0.1357421875,
|
| 163118 |
+
"learning_rate": 0.015466156626506024,
|
| 163119 |
+
"loss": 0.9292,
|
| 163120 |
+
"step": 23274
|
| 163121 |
+
},
|
| 163122 |
+
{
|
| 163123 |
+
"epoch": 0.5476470588235294,
|
| 163124 |
+
"grad_norm": 0.1484375,
|
| 163125 |
+
"learning_rate": 0.015465357429718874,
|
| 163126 |
+
"loss": 0.9797,
|
| 163127 |
+
"step": 23275
|
| 163128 |
+
},
|
| 163129 |
+
{
|
| 163130 |
+
"epoch": 0.5476705882352941,
|
| 163131 |
+
"grad_norm": 0.1376953125,
|
| 163132 |
+
"learning_rate": 0.01546455823293173,
|
| 163133 |
+
"loss": 1.0146,
|
| 163134 |
+
"step": 23276
|
| 163135 |
+
},
|
| 163136 |
+
{
|
| 163137 |
+
"epoch": 0.5476941176470588,
|
| 163138 |
+
"grad_norm": 0.1396484375,
|
| 163139 |
+
"learning_rate": 0.015463759036144577,
|
| 163140 |
+
"loss": 0.8917,
|
| 163141 |
+
"step": 23277
|
| 163142 |
+
},
|
| 163143 |
+
{
|
| 163144 |
+
"epoch": 0.5477176470588235,
|
| 163145 |
+
"grad_norm": 0.140625,
|
| 163146 |
+
"learning_rate": 0.01546295983935743,
|
| 163147 |
+
"loss": 0.8178,
|
| 163148 |
+
"step": 23278
|
| 163149 |
+
},
|
| 163150 |
+
{
|
| 163151 |
+
"epoch": 0.5477411764705883,
|
| 163152 |
+
"grad_norm": 0.1298828125,
|
| 163153 |
+
"learning_rate": 0.015462160642570283,
|
| 163154 |
+
"loss": 1.1993,
|
| 163155 |
+
"step": 23279
|
| 163156 |
+
},
|
| 163157 |
+
{
|
| 163158 |
+
"epoch": 0.5477647058823529,
|
| 163159 |
+
"grad_norm": 0.134765625,
|
| 163160 |
+
"learning_rate": 0.015461361445783132,
|
| 163161 |
+
"loss": 1.132,
|
| 163162 |
+
"step": 23280
|
| 163163 |
+
},
|
| 163164 |
+
{
|
| 163165 |
+
"epoch": 0.5477882352941177,
|
| 163166 |
+
"grad_norm": 0.134765625,
|
| 163167 |
+
"learning_rate": 0.015460562248995984,
|
| 163168 |
+
"loss": 1.1385,
|
| 163169 |
+
"step": 23281
|
| 163170 |
+
},
|
| 163171 |
+
{
|
| 163172 |
+
"epoch": 0.5478117647058823,
|
| 163173 |
+
"grad_norm": 0.1337890625,
|
| 163174 |
+
"learning_rate": 0.015459763052208836,
|
| 163175 |
+
"loss": 1.0586,
|
| 163176 |
+
"step": 23282
|
| 163177 |
+
},
|
| 163178 |
+
{
|
| 163179 |
+
"epoch": 0.5478352941176471,
|
| 163180 |
+
"grad_norm": 0.1259765625,
|
| 163181 |
+
"learning_rate": 0.015458963855421688,
|
| 163182 |
+
"loss": 1.2273,
|
| 163183 |
+
"step": 23283
|
| 163184 |
+
},
|
| 163185 |
+
{
|
| 163186 |
+
"epoch": 0.5478588235294117,
|
| 163187 |
+
"grad_norm": 0.126953125,
|
| 163188 |
+
"learning_rate": 0.01545816465863454,
|
| 163189 |
+
"loss": 0.9406,
|
| 163190 |
+
"step": 23284
|
| 163191 |
+
},
|
| 163192 |
+
{
|
| 163193 |
+
"epoch": 0.5478823529411765,
|
| 163194 |
+
"grad_norm": 0.138671875,
|
| 163195 |
+
"learning_rate": 0.015457365461847391,
|
| 163196 |
+
"loss": 0.8373,
|
| 163197 |
+
"step": 23285
|
| 163198 |
+
},
|
| 163199 |
+
{
|
| 163200 |
+
"epoch": 0.5479058823529411,
|
| 163201 |
+
"grad_norm": 0.138671875,
|
| 163202 |
+
"learning_rate": 0.01545656626506024,
|
| 163203 |
+
"loss": 0.8457,
|
| 163204 |
+
"step": 23286
|
| 163205 |
+
},
|
| 163206 |
+
{
|
| 163207 |
+
"epoch": 0.5479294117647059,
|
| 163208 |
+
"grad_norm": 0.1494140625,
|
| 163209 |
+
"learning_rate": 0.015455767068273093,
|
| 163210 |
+
"loss": 0.5616,
|
| 163211 |
+
"step": 23287
|
| 163212 |
+
},
|
| 163213 |
+
{
|
| 163214 |
+
"epoch": 0.5479529411764706,
|
| 163215 |
+
"grad_norm": 0.1318359375,
|
| 163216 |
+
"learning_rate": 0.015454967871485942,
|
| 163217 |
+
"loss": 0.9273,
|
| 163218 |
+
"step": 23288
|
| 163219 |
+
},
|
| 163220 |
+
{
|
| 163221 |
+
"epoch": 0.5479764705882353,
|
| 163222 |
+
"grad_norm": 0.138671875,
|
| 163223 |
+
"learning_rate": 0.015454168674698794,
|
| 163224 |
+
"loss": 0.785,
|
| 163225 |
+
"step": 23289
|
| 163226 |
+
},
|
| 163227 |
+
{
|
| 163228 |
+
"epoch": 0.548,
|
| 163229 |
+
"grad_norm": 0.12353515625,
|
| 163230 |
+
"learning_rate": 0.015453369477911648,
|
| 163231 |
+
"loss": 1.3134,
|
| 163232 |
+
"step": 23290
|
| 163233 |
+
},
|
| 163234 |
+
{
|
| 163235 |
+
"epoch": 0.5480235294117647,
|
| 163236 |
+
"grad_norm": 0.12109375,
|
| 163237 |
+
"learning_rate": 0.0154525702811245,
|
| 163238 |
+
"loss": 1.2729,
|
| 163239 |
+
"step": 23291
|
| 163240 |
+
},
|
| 163241 |
+
{
|
| 163242 |
+
"epoch": 0.5480470588235294,
|
| 163243 |
+
"grad_norm": 0.1376953125,
|
| 163244 |
+
"learning_rate": 0.01545177108433735,
|
| 163245 |
+
"loss": 0.9418,
|
| 163246 |
+
"step": 23292
|
| 163247 |
+
},
|
| 163248 |
+
{
|
| 163249 |
+
"epoch": 0.5480705882352941,
|
| 163250 |
+
"grad_norm": 0.14453125,
|
| 163251 |
+
"learning_rate": 0.0154509718875502,
|
| 163252 |
+
"loss": 0.9342,
|
| 163253 |
+
"step": 23293
|
| 163254 |
+
},
|
| 163255 |
+
{
|
| 163256 |
+
"epoch": 0.5480941176470588,
|
| 163257 |
+
"grad_norm": 0.12255859375,
|
| 163258 |
+
"learning_rate": 0.01545017269076305,
|
| 163259 |
+
"loss": 1.0404,
|
| 163260 |
+
"step": 23294
|
| 163261 |
+
},
|
| 163262 |
+
{
|
| 163263 |
+
"epoch": 0.5481176470588235,
|
| 163264 |
+
"grad_norm": 0.142578125,
|
| 163265 |
+
"learning_rate": 0.015449373493975902,
|
| 163266 |
+
"loss": 0.6449,
|
| 163267 |
+
"step": 23295
|
| 163268 |
+
},
|
| 163269 |
+
{
|
| 163270 |
+
"epoch": 0.5481411764705882,
|
| 163271 |
+
"grad_norm": 0.12353515625,
|
| 163272 |
+
"learning_rate": 0.015448574297188756,
|
| 163273 |
+
"loss": 1.2413,
|
| 163274 |
+
"step": 23296
|
| 163275 |
+
},
|
| 163276 |
+
{
|
| 163277 |
+
"epoch": 0.5481647058823529,
|
| 163278 |
+
"grad_norm": 0.1494140625,
|
| 163279 |
+
"learning_rate": 0.01544777510040161,
|
| 163280 |
+
"loss": 0.6388,
|
| 163281 |
+
"step": 23297
|
| 163282 |
+
},
|
| 163283 |
+
{
|
| 163284 |
+
"epoch": 0.5481882352941176,
|
| 163285 |
+
"grad_norm": 0.126953125,
|
| 163286 |
+
"learning_rate": 0.015446975903614458,
|
| 163287 |
+
"loss": 0.6571,
|
| 163288 |
+
"step": 23298
|
| 163289 |
+
},
|
| 163290 |
+
{
|
| 163291 |
+
"epoch": 0.5482117647058824,
|
| 163292 |
+
"grad_norm": 0.12353515625,
|
| 163293 |
+
"learning_rate": 0.015446176706827311,
|
| 163294 |
+
"loss": 1.2032,
|
| 163295 |
+
"step": 23299
|
| 163296 |
+
},
|
| 163297 |
+
{
|
| 163298 |
+
"epoch": 0.548235294117647,
|
| 163299 |
+
"grad_norm": 0.130859375,
|
| 163300 |
+
"learning_rate": 0.015445377510040159,
|
| 163301 |
+
"loss": 1.1818,
|
| 163302 |
+
"step": 23300
|
| 163303 |
+
},
|
| 163304 |
+
{
|
| 163305 |
+
"epoch": 0.5482588235294118,
|
| 163306 |
+
"grad_norm": 0.1357421875,
|
| 163307 |
+
"learning_rate": 0.015444578313253013,
|
| 163308 |
+
"loss": 1.1244,
|
| 163309 |
+
"step": 23301
|
| 163310 |
+
},
|
| 163311 |
+
{
|
| 163312 |
+
"epoch": 0.5482823529411764,
|
| 163313 |
+
"grad_norm": 0.1494140625,
|
| 163314 |
+
"learning_rate": 0.015443779116465864,
|
| 163315 |
+
"loss": 0.7766,
|
| 163316 |
+
"step": 23302
|
| 163317 |
+
},
|
| 163318 |
+
{
|
| 163319 |
+
"epoch": 0.5483058823529412,
|
| 163320 |
+
"grad_norm": 0.140625,
|
| 163321 |
+
"learning_rate": 0.015442979919678718,
|
| 163322 |
+
"loss": 0.9023,
|
| 163323 |
+
"step": 23303
|
| 163324 |
+
},
|
| 163325 |
+
{
|
| 163326 |
+
"epoch": 0.5483294117647058,
|
| 163327 |
+
"grad_norm": 0.1328125,
|
| 163328 |
+
"learning_rate": 0.015442180722891568,
|
| 163329 |
+
"loss": 1.1,
|
| 163330 |
+
"step": 23304
|
| 163331 |
+
},
|
| 163332 |
+
{
|
| 163333 |
+
"epoch": 0.5483529411764706,
|
| 163334 |
+
"grad_norm": 0.1259765625,
|
| 163335 |
+
"learning_rate": 0.01544138152610442,
|
| 163336 |
+
"loss": 1.152,
|
| 163337 |
+
"step": 23305
|
| 163338 |
+
},
|
| 163339 |
+
{
|
| 163340 |
+
"epoch": 0.5483764705882352,
|
| 163341 |
+
"grad_norm": 0.13671875,
|
| 163342 |
+
"learning_rate": 0.01544058232931727,
|
| 163343 |
+
"loss": 1.0426,
|
| 163344 |
+
"step": 23306
|
| 163345 |
+
},
|
| 163346 |
+
{
|
| 163347 |
+
"epoch": 0.5484,
|
| 163348 |
+
"grad_norm": 0.1474609375,
|
| 163349 |
+
"learning_rate": 0.015439783132530121,
|
| 163350 |
+
"loss": 0.5905,
|
| 163351 |
+
"step": 23307
|
| 163352 |
+
},
|
| 163353 |
+
{
|
| 163354 |
+
"epoch": 0.5484235294117648,
|
| 163355 |
+
"grad_norm": 0.1298828125,
|
| 163356 |
+
"learning_rate": 0.015438983935742973,
|
| 163357 |
+
"loss": 1.378,
|
| 163358 |
+
"step": 23308
|
| 163359 |
+
},
|
| 163360 |
+
{
|
| 163361 |
+
"epoch": 0.5484470588235294,
|
| 163362 |
+
"grad_norm": 0.1435546875,
|
| 163363 |
+
"learning_rate": 0.015438184738955823,
|
| 163364 |
+
"loss": 0.7097,
|
| 163365 |
+
"step": 23309
|
| 163366 |
+
},
|
| 163367 |
+
{
|
| 163368 |
+
"epoch": 0.5484705882352942,
|
| 163369 |
+
"grad_norm": 0.138671875,
|
| 163370 |
+
"learning_rate": 0.015437385542168676,
|
| 163371 |
+
"loss": 1.0572,
|
| 163372 |
+
"step": 23310
|
| 163373 |
+
},
|
| 163374 |
+
{
|
| 163375 |
+
"epoch": 0.5484941176470588,
|
| 163376 |
+
"grad_norm": 0.1337890625,
|
| 163377 |
+
"learning_rate": 0.015436586345381528,
|
| 163378 |
+
"loss": 0.9243,
|
| 163379 |
+
"step": 23311
|
| 163380 |
+
},
|
| 163381 |
+
{
|
| 163382 |
+
"epoch": 0.5485176470588236,
|
| 163383 |
+
"grad_norm": 0.134765625,
|
| 163384 |
+
"learning_rate": 0.015435787148594378,
|
| 163385 |
+
"loss": 1.0168,
|
| 163386 |
+
"step": 23312
|
| 163387 |
+
},
|
| 163388 |
+
{
|
| 163389 |
+
"epoch": 0.5485411764705882,
|
| 163390 |
+
"grad_norm": 0.134765625,
|
| 163391 |
+
"learning_rate": 0.01543498795180723,
|
| 163392 |
+
"loss": 1.1851,
|
| 163393 |
+
"step": 23313
|
| 163394 |
+
},
|
| 163395 |
+
{
|
| 163396 |
+
"epoch": 0.548564705882353,
|
| 163397 |
+
"grad_norm": 0.1337890625,
|
| 163398 |
+
"learning_rate": 0.01543418875502008,
|
| 163399 |
+
"loss": 1.062,
|
| 163400 |
+
"step": 23314
|
| 163401 |
+
},
|
| 163402 |
+
{
|
| 163403 |
+
"epoch": 0.5485882352941176,
|
| 163404 |
+
"grad_norm": 0.1357421875,
|
| 163405 |
+
"learning_rate": 0.01543338955823293,
|
| 163406 |
+
"loss": 0.9744,
|
| 163407 |
+
"step": 23315
|
| 163408 |
+
},
|
| 163409 |
+
{
|
| 163410 |
+
"epoch": 0.5486117647058824,
|
| 163411 |
+
"grad_norm": 0.142578125,
|
| 163412 |
+
"learning_rate": 0.015432590361445783,
|
| 163413 |
+
"loss": 0.6391,
|
| 163414 |
+
"step": 23316
|
| 163415 |
+
},
|
| 163416 |
+
{
|
| 163417 |
+
"epoch": 0.5486352941176471,
|
| 163418 |
+
"grad_norm": 0.12890625,
|
| 163419 |
+
"learning_rate": 0.015431791164658636,
|
| 163420 |
+
"loss": 1.0453,
|
| 163421 |
+
"step": 23317
|
| 163422 |
+
},
|
| 163423 |
+
{
|
| 163424 |
+
"epoch": 0.5486588235294118,
|
| 163425 |
+
"grad_norm": 0.12451171875,
|
| 163426 |
+
"learning_rate": 0.015430991967871488,
|
| 163427 |
+
"loss": 1.0803,
|
| 163428 |
+
"step": 23318
|
| 163429 |
+
},
|
| 163430 |
+
{
|
| 163431 |
+
"epoch": 0.5486823529411765,
|
| 163432 |
+
"grad_norm": 0.12255859375,
|
| 163433 |
+
"learning_rate": 0.015430192771084338,
|
| 163434 |
+
"loss": 0.9948,
|
| 163435 |
+
"step": 23319
|
| 163436 |
+
},
|
| 163437 |
+
{
|
| 163438 |
+
"epoch": 0.5487058823529412,
|
| 163439 |
+
"grad_norm": 0.1435546875,
|
| 163440 |
+
"learning_rate": 0.015429393574297191,
|
| 163441 |
+
"loss": 0.6367,
|
| 163442 |
+
"step": 23320
|
| 163443 |
+
},
|
| 163444 |
+
{
|
| 163445 |
+
"epoch": 0.5487294117647059,
|
| 163446 |
+
"grad_norm": 0.138671875,
|
| 163447 |
+
"learning_rate": 0.01542859437751004,
|
| 163448 |
+
"loss": 0.8535,
|
| 163449 |
+
"step": 23321
|
| 163450 |
+
},
|
| 163451 |
+
{
|
| 163452 |
+
"epoch": 0.5487529411764706,
|
| 163453 |
+
"grad_norm": 0.134765625,
|
| 163454 |
+
"learning_rate": 0.015427795180722893,
|
| 163455 |
+
"loss": 1.2507,
|
| 163456 |
+
"step": 23322
|
| 163457 |
+
},
|
| 163458 |
+
{
|
| 163459 |
+
"epoch": 0.5487764705882353,
|
| 163460 |
+
"grad_norm": 0.1298828125,
|
| 163461 |
+
"learning_rate": 0.01542699598393574,
|
| 163462 |
+
"loss": 1.1702,
|
| 163463 |
+
"step": 23323
|
| 163464 |
+
},
|
| 163465 |
+
{
|
| 163466 |
+
"epoch": 0.5488,
|
| 163467 |
+
"grad_norm": 0.134765625,
|
| 163468 |
+
"learning_rate": 0.015426196787148596,
|
| 163469 |
+
"loss": 0.891,
|
| 163470 |
+
"step": 23324
|
| 163471 |
+
},
|
| 163472 |
+
{
|
| 163473 |
+
"epoch": 0.5488235294117647,
|
| 163474 |
+
"grad_norm": 0.140625,
|
| 163475 |
+
"learning_rate": 0.015425397590361448,
|
| 163476 |
+
"loss": 0.6905,
|
| 163477 |
+
"step": 23325
|
| 163478 |
+
},
|
| 163479 |
+
{
|
| 163480 |
+
"epoch": 0.5488470588235295,
|
| 163481 |
+
"grad_norm": 0.1298828125,
|
| 163482 |
+
"learning_rate": 0.015424598393574298,
|
| 163483 |
+
"loss": 1.1199,
|
| 163484 |
+
"step": 23326
|
| 163485 |
+
},
|
| 163486 |
+
{
|
| 163487 |
+
"epoch": 0.5488705882352941,
|
| 163488 |
+
"grad_norm": 0.12353515625,
|
| 163489 |
+
"learning_rate": 0.01542379919678715,
|
| 163490 |
+
"loss": 1.1141,
|
| 163491 |
+
"step": 23327
|
| 163492 |
+
},
|
| 163493 |
+
{
|
| 163494 |
+
"epoch": 0.5488941176470589,
|
| 163495 |
+
"grad_norm": 0.1396484375,
|
| 163496 |
+
"learning_rate": 0.015423000000000001,
|
| 163497 |
+
"loss": 0.9771,
|
| 163498 |
+
"step": 23328
|
| 163499 |
+
},
|
| 163500 |
+
{
|
| 163501 |
+
"epoch": 0.5489176470588235,
|
| 163502 |
+
"grad_norm": 0.1279296875,
|
| 163503 |
+
"learning_rate": 0.015422200803212851,
|
| 163504 |
+
"loss": 1.0492,
|
| 163505 |
+
"step": 23329
|
| 163506 |
+
},
|
| 163507 |
+
{
|
| 163508 |
+
"epoch": 0.5489411764705883,
|
| 163509 |
+
"grad_norm": 0.12890625,
|
| 163510 |
+
"learning_rate": 0.015421401606425703,
|
| 163511 |
+
"loss": 0.9894,
|
| 163512 |
+
"step": 23330
|
| 163513 |
+
},
|
| 163514 |
+
{
|
| 163515 |
+
"epoch": 0.5489647058823529,
|
| 163516 |
+
"grad_norm": 0.1298828125,
|
| 163517 |
+
"learning_rate": 0.015420602409638556,
|
| 163518 |
+
"loss": 1.1257,
|
| 163519 |
+
"step": 23331
|
| 163520 |
+
},
|
| 163521 |
+
{
|
| 163522 |
+
"epoch": 0.5489882352941177,
|
| 163523 |
+
"grad_norm": 0.134765625,
|
| 163524 |
+
"learning_rate": 0.015419803212851406,
|
| 163525 |
+
"loss": 0.8951,
|
| 163526 |
+
"step": 23332
|
| 163527 |
+
},
|
| 163528 |
+
{
|
| 163529 |
+
"epoch": 0.5490117647058823,
|
| 163530 |
+
"grad_norm": 0.150390625,
|
| 163531 |
+
"learning_rate": 0.015419004016064258,
|
| 163532 |
+
"loss": 0.6524,
|
| 163533 |
+
"step": 23333
|
| 163534 |
+
},
|
| 163535 |
+
{
|
| 163536 |
+
"epoch": 0.5490352941176471,
|
| 163537 |
+
"grad_norm": 0.1318359375,
|
| 163538 |
+
"learning_rate": 0.01541820481927711,
|
| 163539 |
+
"loss": 0.8458,
|
| 163540 |
+
"step": 23334
|
| 163541 |
+
},
|
| 163542 |
+
{
|
| 163543 |
+
"epoch": 0.5490588235294117,
|
| 163544 |
+
"grad_norm": 0.138671875,
|
| 163545 |
+
"learning_rate": 0.01541740562248996,
|
| 163546 |
+
"loss": 0.8328,
|
| 163547 |
+
"step": 23335
|
| 163548 |
+
},
|
| 163549 |
+
{
|
| 163550 |
+
"epoch": 0.5490823529411765,
|
| 163551 |
+
"grad_norm": 0.142578125,
|
| 163552 |
+
"learning_rate": 0.015416606425702811,
|
| 163553 |
+
"loss": 0.7816,
|
| 163554 |
+
"step": 23336
|
| 163555 |
+
},
|
| 163556 |
+
{
|
| 163557 |
+
"epoch": 0.5491058823529412,
|
| 163558 |
+
"grad_norm": 0.130859375,
|
| 163559 |
+
"learning_rate": 0.01541580722891566,
|
| 163560 |
+
"loss": 0.9673,
|
| 163561 |
+
"step": 23337
|
| 163562 |
+
},
|
| 163563 |
+
{
|
| 163564 |
+
"epoch": 0.5491294117647059,
|
| 163565 |
+
"grad_norm": 0.1396484375,
|
| 163566 |
+
"learning_rate": 0.015415008032128514,
|
| 163567 |
+
"loss": 0.7678,
|
| 163568 |
+
"step": 23338
|
| 163569 |
+
},
|
| 163570 |
+
{
|
| 163571 |
+
"epoch": 0.5491529411764706,
|
| 163572 |
+
"grad_norm": 0.13671875,
|
| 163573 |
+
"learning_rate": 0.015414208835341366,
|
| 163574 |
+
"loss": 1.0191,
|
| 163575 |
+
"step": 23339
|
| 163576 |
+
},
|
| 163577 |
+
{
|
| 163578 |
+
"epoch": 0.5491764705882353,
|
| 163579 |
+
"grad_norm": 0.125,
|
| 163580 |
+
"learning_rate": 0.015413409638554216,
|
| 163581 |
+
"loss": 1.3162,
|
| 163582 |
+
"step": 23340
|
| 163583 |
+
},
|
| 163584 |
+
{
|
| 163585 |
+
"epoch": 0.5492,
|
| 163586 |
+
"grad_norm": 0.126953125,
|
| 163587 |
+
"learning_rate": 0.01541261044176707,
|
| 163588 |
+
"loss": 1.1242,
|
| 163589 |
+
"step": 23341
|
| 163590 |
+
},
|
| 163591 |
+
{
|
| 163592 |
+
"epoch": 0.5492235294117647,
|
| 163593 |
+
"grad_norm": 0.1474609375,
|
| 163594 |
+
"learning_rate": 0.01541181124497992,
|
| 163595 |
+
"loss": 0.9067,
|
| 163596 |
+
"step": 23342
|
| 163597 |
+
},
|
| 163598 |
+
{
|
| 163599 |
+
"epoch": 0.5492470588235294,
|
| 163600 |
+
"grad_norm": 0.1328125,
|
| 163601 |
+
"learning_rate": 0.015411012048192773,
|
| 163602 |
+
"loss": 0.8857,
|
| 163603 |
+
"step": 23343
|
| 163604 |
+
},
|
| 163605 |
+
{
|
| 163606 |
+
"epoch": 0.5492705882352941,
|
| 163607 |
+
"grad_norm": 0.134765625,
|
| 163608 |
+
"learning_rate": 0.015410212851405621,
|
| 163609 |
+
"loss": 1.1175,
|
| 163610 |
+
"step": 23344
|
| 163611 |
+
},
|
| 163612 |
+
{
|
| 163613 |
+
"epoch": 0.5492941176470588,
|
| 163614 |
+
"grad_norm": 0.126953125,
|
| 163615 |
+
"learning_rate": 0.015409413654618476,
|
| 163616 |
+
"loss": 0.9772,
|
| 163617 |
+
"step": 23345
|
| 163618 |
+
},
|
| 163619 |
+
{
|
| 163620 |
+
"epoch": 0.5493176470588236,
|
| 163621 |
+
"grad_norm": 0.1513671875,
|
| 163622 |
+
"learning_rate": 0.015408614457831328,
|
| 163623 |
+
"loss": 0.7615,
|
| 163624 |
+
"step": 23346
|
| 163625 |
+
},
|
| 163626 |
+
{
|
| 163627 |
+
"epoch": 0.5493411764705882,
|
| 163628 |
+
"grad_norm": 0.1455078125,
|
| 163629 |
+
"learning_rate": 0.015407815261044178,
|
| 163630 |
+
"loss": 0.5724,
|
| 163631 |
+
"step": 23347
|
| 163632 |
+
},
|
| 163633 |
+
{
|
| 163634 |
+
"epoch": 0.549364705882353,
|
| 163635 |
+
"grad_norm": 0.1298828125,
|
| 163636 |
+
"learning_rate": 0.01540701606425703,
|
| 163637 |
+
"loss": 1.2035,
|
| 163638 |
+
"step": 23348
|
| 163639 |
+
},
|
| 163640 |
+
{
|
| 163641 |
+
"epoch": 0.5493882352941176,
|
| 163642 |
+
"grad_norm": 0.12255859375,
|
| 163643 |
+
"learning_rate": 0.01540621686746988,
|
| 163644 |
+
"loss": 0.9656,
|
| 163645 |
+
"step": 23349
|
| 163646 |
+
},
|
| 163647 |
+
{
|
| 163648 |
+
"epoch": 0.5494117647058824,
|
| 163649 |
+
"grad_norm": 0.13671875,
|
| 163650 |
+
"learning_rate": 0.015405417670682731,
|
| 163651 |
+
"loss": 0.9868,
|
| 163652 |
+
"step": 23350
|
| 163653 |
+
},
|
| 163654 |
+
{
|
| 163655 |
+
"epoch": 0.549435294117647,
|
| 163656 |
+
"grad_norm": 0.12255859375,
|
| 163657 |
+
"learning_rate": 0.015404618473895583,
|
| 163658 |
+
"loss": 0.9911,
|
| 163659 |
+
"step": 23351
|
| 163660 |
+
},
|
| 163661 |
+
{
|
| 163662 |
+
"epoch": 0.5494588235294118,
|
| 163663 |
+
"grad_norm": 0.1435546875,
|
| 163664 |
+
"learning_rate": 0.015403819277108434,
|
| 163665 |
+
"loss": 0.73,
|
| 163666 |
+
"step": 23352
|
| 163667 |
+
},
|
| 163668 |
+
{
|
| 163669 |
+
"epoch": 0.5494823529411764,
|
| 163670 |
+
"grad_norm": 0.142578125,
|
| 163671 |
+
"learning_rate": 0.015403020080321286,
|
| 163672 |
+
"loss": 0.9137,
|
| 163673 |
+
"step": 23353
|
| 163674 |
+
},
|
| 163675 |
+
{
|
| 163676 |
+
"epoch": 0.5495058823529412,
|
| 163677 |
+
"grad_norm": 0.1279296875,
|
| 163678 |
+
"learning_rate": 0.015402220883534138,
|
| 163679 |
+
"loss": 1.2795,
|
| 163680 |
+
"step": 23354
|
| 163681 |
+
},
|
| 163682 |
+
{
|
| 163683 |
+
"epoch": 0.5495294117647059,
|
| 163684 |
+
"grad_norm": 0.1455078125,
|
| 163685 |
+
"learning_rate": 0.015401421686746988,
|
| 163686 |
+
"loss": 0.8397,
|
| 163687 |
+
"step": 23355
|
| 163688 |
+
},
|
| 163689 |
+
{
|
| 163690 |
+
"epoch": 0.5495529411764706,
|
| 163691 |
+
"grad_norm": 0.142578125,
|
| 163692 |
+
"learning_rate": 0.01540062248995984,
|
| 163693 |
+
"loss": 0.8133,
|
| 163694 |
+
"step": 23356
|
| 163695 |
+
},
|
| 163696 |
+
{
|
| 163697 |
+
"epoch": 0.5495764705882353,
|
| 163698 |
+
"grad_norm": 0.13671875,
|
| 163699 |
+
"learning_rate": 0.01539982329317269,
|
| 163700 |
+
"loss": 0.7689,
|
| 163701 |
+
"step": 23357
|
| 163702 |
+
},
|
| 163703 |
+
{
|
| 163704 |
+
"epoch": 0.5496,
|
| 163705 |
+
"grad_norm": 0.146484375,
|
| 163706 |
+
"learning_rate": 0.015399024096385541,
|
| 163707 |
+
"loss": 1.1101,
|
| 163708 |
+
"step": 23358
|
| 163709 |
+
},
|
| 163710 |
+
{
|
| 163711 |
+
"epoch": 0.5496235294117647,
|
| 163712 |
+
"grad_norm": 0.140625,
|
| 163713 |
+
"learning_rate": 0.015398224899598394,
|
| 163714 |
+
"loss": 0.8795,
|
| 163715 |
+
"step": 23359
|
| 163716 |
+
},
|
| 163717 |
+
{
|
| 163718 |
+
"epoch": 0.5496470588235294,
|
| 163719 |
+
"grad_norm": 0.1357421875,
|
| 163720 |
+
"learning_rate": 0.015397425702811246,
|
| 163721 |
+
"loss": 0.8949,
|
| 163722 |
+
"step": 23360
|
| 163723 |
+
},
|
| 163724 |
+
{
|
| 163725 |
+
"epoch": 0.5496705882352941,
|
| 163726 |
+
"grad_norm": 0.134765625,
|
| 163727 |
+
"learning_rate": 0.015396626506024096,
|
| 163728 |
+
"loss": 1.0301,
|
| 163729 |
+
"step": 23361
|
| 163730 |
+
},
|
| 163731 |
+
{
|
| 163732 |
+
"epoch": 0.5496941176470588,
|
| 163733 |
+
"grad_norm": 0.1318359375,
|
| 163734 |
+
"learning_rate": 0.015395827309236948,
|
| 163735 |
+
"loss": 0.8817,
|
| 163736 |
+
"step": 23362
|
| 163737 |
+
},
|
| 163738 |
+
{
|
| 163739 |
+
"epoch": 0.5497176470588235,
|
| 163740 |
+
"grad_norm": 0.126953125,
|
| 163741 |
+
"learning_rate": 0.015395028112449798,
|
| 163742 |
+
"loss": 1.0944,
|
| 163743 |
+
"step": 23363
|
| 163744 |
+
},
|
| 163745 |
+
{
|
| 163746 |
+
"epoch": 0.5497411764705883,
|
| 163747 |
+
"grad_norm": 0.125,
|
| 163748 |
+
"learning_rate": 0.015394228915662651,
|
| 163749 |
+
"loss": 0.8911,
|
| 163750 |
+
"step": 23364
|
| 163751 |
+
},
|
| 163752 |
+
{
|
| 163753 |
+
"epoch": 0.5497647058823529,
|
| 163754 |
+
"grad_norm": 0.1328125,
|
| 163755 |
+
"learning_rate": 0.015393429718875501,
|
| 163756 |
+
"loss": 1.1571,
|
| 163757 |
+
"step": 23365
|
| 163758 |
+
},
|
| 163759 |
+
{
|
| 163760 |
+
"epoch": 0.5497882352941177,
|
| 163761 |
+
"grad_norm": 0.1259765625,
|
| 163762 |
+
"learning_rate": 0.015392630522088356,
|
| 163763 |
+
"loss": 1.2269,
|
| 163764 |
+
"step": 23366
|
| 163765 |
+
},
|
| 163766 |
+
{
|
| 163767 |
+
"epoch": 0.5498117647058823,
|
| 163768 |
+
"grad_norm": 0.1259765625,
|
| 163769 |
+
"learning_rate": 0.015391831325301204,
|
| 163770 |
+
"loss": 1.0186,
|
| 163771 |
+
"step": 23367
|
| 163772 |
+
},
|
| 163773 |
+
{
|
| 163774 |
+
"epoch": 0.5498352941176471,
|
| 163775 |
+
"grad_norm": 0.150390625,
|
| 163776 |
+
"learning_rate": 0.015391032128514058,
|
| 163777 |
+
"loss": 0.9914,
|
| 163778 |
+
"step": 23368
|
| 163779 |
+
},
|
| 163780 |
+
{
|
| 163781 |
+
"epoch": 0.5498588235294117,
|
| 163782 |
+
"grad_norm": 0.12353515625,
|
| 163783 |
+
"learning_rate": 0.01539023293172691,
|
| 163784 |
+
"loss": 0.9478,
|
| 163785 |
+
"step": 23369
|
| 163786 |
+
},
|
| 163787 |
+
{
|
| 163788 |
+
"epoch": 0.5498823529411765,
|
| 163789 |
+
"grad_norm": 0.134765625,
|
| 163790 |
+
"learning_rate": 0.01538943373493976,
|
| 163791 |
+
"loss": 0.8703,
|
| 163792 |
+
"step": 23370
|
| 163793 |
+
},
|
| 163794 |
+
{
|
| 163795 |
+
"epoch": 0.5499058823529411,
|
| 163796 |
+
"grad_norm": 0.1279296875,
|
| 163797 |
+
"learning_rate": 0.015388634538152611,
|
| 163798 |
+
"loss": 1.0288,
|
| 163799 |
+
"step": 23371
|
| 163800 |
+
},
|
| 163801 |
+
{
|
| 163802 |
+
"epoch": 0.5499294117647059,
|
| 163803 |
+
"grad_norm": 0.1376953125,
|
| 163804 |
+
"learning_rate": 0.015387835341365464,
|
| 163805 |
+
"loss": 0.7829,
|
| 163806 |
+
"step": 23372
|
| 163807 |
+
},
|
| 163808 |
+
{
|
| 163809 |
+
"epoch": 0.5499529411764705,
|
| 163810 |
+
"grad_norm": 0.1298828125,
|
| 163811 |
+
"learning_rate": 0.015387036144578314,
|
| 163812 |
+
"loss": 1.3358,
|
| 163813 |
+
"step": 23373
|
| 163814 |
+
},
|
| 163815 |
+
{
|
| 163816 |
+
"epoch": 0.5499764705882353,
|
| 163817 |
+
"grad_norm": 0.1328125,
|
| 163818 |
+
"learning_rate": 0.015386236947791166,
|
| 163819 |
+
"loss": 0.8077,
|
| 163820 |
+
"step": 23374
|
| 163821 |
+
},
|
| 163822 |
+
{
|
| 163823 |
+
"epoch": 0.55,
|
| 163824 |
+
"grad_norm": 0.142578125,
|
| 163825 |
+
"learning_rate": 0.015385437751004016,
|
| 163826 |
+
"loss": 0.9076,
|
| 163827 |
+
"step": 23375
|
| 163828 |
+
},
|
| 163829 |
+
{
|
| 163830 |
+
"epoch": 0.5500235294117647,
|
| 163831 |
+
"grad_norm": 0.1376953125,
|
| 163832 |
+
"learning_rate": 0.015384638554216868,
|
| 163833 |
+
"loss": 0.8985,
|
| 163834 |
+
"step": 23376
|
| 163835 |
+
},
|
| 163836 |
+
{
|
| 163837 |
+
"epoch": 0.5500470588235294,
|
| 163838 |
+
"grad_norm": 0.1337890625,
|
| 163839 |
+
"learning_rate": 0.01538383935742972,
|
| 163840 |
+
"loss": 0.9738,
|
| 163841 |
+
"step": 23377
|
| 163842 |
+
},
|
| 163843 |
+
{
|
| 163844 |
+
"epoch": 0.5500705882352941,
|
| 163845 |
+
"grad_norm": 0.134765625,
|
| 163846 |
+
"learning_rate": 0.01538304016064257,
|
| 163847 |
+
"loss": 0.642,
|
| 163848 |
+
"step": 23378
|
| 163849 |
+
},
|
| 163850 |
+
{
|
| 163851 |
+
"epoch": 0.5500941176470588,
|
| 163852 |
+
"grad_norm": 0.12451171875,
|
| 163853 |
+
"learning_rate": 0.015382240963855423,
|
| 163854 |
+
"loss": 1.0596,
|
| 163855 |
+
"step": 23379
|
| 163856 |
+
},
|
| 163857 |
+
{
|
| 163858 |
+
"epoch": 0.5501176470588235,
|
| 163859 |
+
"grad_norm": 0.140625,
|
| 163860 |
+
"learning_rate": 0.015381441767068274,
|
| 163861 |
+
"loss": 0.7747,
|
| 163862 |
+
"step": 23380
|
| 163863 |
+
},
|
| 163864 |
+
{
|
| 163865 |
+
"epoch": 0.5501411764705882,
|
| 163866 |
+
"grad_norm": 0.1337890625,
|
| 163867 |
+
"learning_rate": 0.015380642570281124,
|
| 163868 |
+
"loss": 0.8205,
|
| 163869 |
+
"step": 23381
|
| 163870 |
+
},
|
| 163871 |
+
{
|
| 163872 |
+
"epoch": 0.5501647058823529,
|
| 163873 |
+
"grad_norm": 0.1328125,
|
| 163874 |
+
"learning_rate": 0.015379843373493976,
|
| 163875 |
+
"loss": 0.9117,
|
| 163876 |
+
"step": 23382
|
| 163877 |
+
},
|
| 163878 |
+
{
|
| 163879 |
+
"epoch": 0.5501882352941176,
|
| 163880 |
+
"grad_norm": 0.146484375,
|
| 163881 |
+
"learning_rate": 0.015379044176706826,
|
| 163882 |
+
"loss": 0.7029,
|
| 163883 |
+
"step": 23383
|
| 163884 |
+
},
|
| 163885 |
+
{
|
| 163886 |
+
"epoch": 0.5502117647058824,
|
| 163887 |
+
"grad_norm": 0.1435546875,
|
| 163888 |
+
"learning_rate": 0.015378244979919678,
|
| 163889 |
+
"loss": 0.9076,
|
| 163890 |
+
"step": 23384
|
| 163891 |
+
},
|
| 163892 |
+
{
|
| 163893 |
+
"epoch": 0.550235294117647,
|
| 163894 |
+
"grad_norm": 0.130859375,
|
| 163895 |
+
"learning_rate": 0.015377445783132531,
|
| 163896 |
+
"loss": 0.9651,
|
| 163897 |
+
"step": 23385
|
| 163898 |
+
},
|
| 163899 |
+
{
|
| 163900 |
+
"epoch": 0.5502588235294118,
|
| 163901 |
+
"grad_norm": 0.13671875,
|
| 163902 |
+
"learning_rate": 0.015376646586345383,
|
| 163903 |
+
"loss": 0.7064,
|
| 163904 |
+
"step": 23386
|
| 163905 |
+
},
|
| 163906 |
+
{
|
| 163907 |
+
"epoch": 0.5502823529411764,
|
| 163908 |
+
"grad_norm": 0.12451171875,
|
| 163909 |
+
"learning_rate": 0.015375847389558234,
|
| 163910 |
+
"loss": 1.2383,
|
| 163911 |
+
"step": 23387
|
| 163912 |
+
},
|
| 163913 |
+
{
|
| 163914 |
+
"epoch": 0.5503058823529412,
|
| 163915 |
+
"grad_norm": 0.142578125,
|
| 163916 |
+
"learning_rate": 0.015375048192771084,
|
| 163917 |
+
"loss": 0.8459,
|
| 163918 |
+
"step": 23388
|
| 163919 |
+
},
|
| 163920 |
+
{
|
| 163921 |
+
"epoch": 0.5503294117647058,
|
| 163922 |
+
"grad_norm": 0.1328125,
|
| 163923 |
+
"learning_rate": 0.015374248995983938,
|
| 163924 |
+
"loss": 0.8261,
|
| 163925 |
+
"step": 23389
|
| 163926 |
+
},
|
| 163927 |
+
{
|
| 163928 |
+
"epoch": 0.5503529411764706,
|
| 163929 |
+
"grad_norm": 0.1376953125,
|
| 163930 |
+
"learning_rate": 0.015373449799196786,
|
| 163931 |
+
"loss": 0.6572,
|
| 163932 |
+
"step": 23390
|
| 163933 |
+
},
|
| 163934 |
+
{
|
| 163935 |
+
"epoch": 0.5503764705882352,
|
| 163936 |
+
"grad_norm": 0.140625,
|
| 163937 |
+
"learning_rate": 0.01537265060240964,
|
| 163938 |
+
"loss": 1.0145,
|
| 163939 |
+
"step": 23391
|
| 163940 |
+
},
|
| 163941 |
+
{
|
| 163942 |
+
"epoch": 0.5504,
|
| 163943 |
+
"grad_norm": 0.1435546875,
|
| 163944 |
+
"learning_rate": 0.01537185140562249,
|
| 163945 |
+
"loss": 0.76,
|
| 163946 |
+
"step": 23392
|
| 163947 |
+
},
|
| 163948 |
+
{
|
| 163949 |
+
"epoch": 0.5504235294117648,
|
| 163950 |
+
"grad_norm": 0.142578125,
|
| 163951 |
+
"learning_rate": 0.015371052208835343,
|
| 163952 |
+
"loss": 0.7933,
|
| 163953 |
+
"step": 23393
|
| 163954 |
+
},
|
| 163955 |
+
{
|
| 163956 |
+
"epoch": 0.5504470588235294,
|
| 163957 |
+
"grad_norm": 0.13671875,
|
| 163958 |
+
"learning_rate": 0.015370253012048194,
|
| 163959 |
+
"loss": 0.8352,
|
| 163960 |
+
"step": 23394
|
| 163961 |
+
},
|
| 163962 |
+
{
|
| 163963 |
+
"epoch": 0.5504705882352942,
|
| 163964 |
+
"grad_norm": 0.134765625,
|
| 163965 |
+
"learning_rate": 0.015369453815261046,
|
| 163966 |
+
"loss": 0.6511,
|
| 163967 |
+
"step": 23395
|
| 163968 |
+
},
|
| 163969 |
+
{
|
| 163970 |
+
"epoch": 0.5504941176470588,
|
| 163971 |
+
"grad_norm": 0.1455078125,
|
| 163972 |
+
"learning_rate": 0.015368654618473896,
|
| 163973 |
+
"loss": 1.0089,
|
| 163974 |
+
"step": 23396
|
| 163975 |
+
},
|
| 163976 |
+
{
|
| 163977 |
+
"epoch": 0.5505176470588236,
|
| 163978 |
+
"grad_norm": 0.14453125,
|
| 163979 |
+
"learning_rate": 0.015367855421686748,
|
| 163980 |
+
"loss": 0.5324,
|
| 163981 |
+
"step": 23397
|
| 163982 |
+
},
|
| 163983 |
+
{
|
| 163984 |
+
"epoch": 0.5505411764705882,
|
| 163985 |
+
"grad_norm": 0.126953125,
|
| 163986 |
+
"learning_rate": 0.015367056224899598,
|
| 163987 |
+
"loss": 1.1988,
|
| 163988 |
+
"step": 23398
|
| 163989 |
+
},
|
| 163990 |
+
{
|
| 163991 |
+
"epoch": 0.550564705882353,
|
| 163992 |
+
"grad_norm": 0.130859375,
|
| 163993 |
+
"learning_rate": 0.01536625702811245,
|
| 163994 |
+
"loss": 0.7709,
|
| 163995 |
+
"step": 23399
|
| 163996 |
+
},
|
| 163997 |
+
{
|
| 163998 |
+
"epoch": 0.5505882352941176,
|
| 163999 |
+
"grad_norm": 0.1318359375,
|
| 164000 |
+
"learning_rate": 0.015365457831325303,
|
| 164001 |
+
"loss": 1.0202,
|
| 164002 |
+
"step": 23400
|
| 164003 |
+
},
|
| 164004 |
+
{
|
| 164005 |
+
"epoch": 0.5506117647058824,
|
| 164006 |
+
"grad_norm": 0.134765625,
|
| 164007 |
+
"learning_rate": 0.015364658634538153,
|
| 164008 |
+
"loss": 1.0951,
|
| 164009 |
+
"step": 23401
|
| 164010 |
+
},
|
| 164011 |
+
{
|
| 164012 |
+
"epoch": 0.5506352941176471,
|
| 164013 |
+
"grad_norm": 0.146484375,
|
| 164014 |
+
"learning_rate": 0.015363859437751004,
|
| 164015 |
+
"loss": 0.8101,
|
| 164016 |
+
"step": 23402
|
| 164017 |
+
},
|
| 164018 |
+
{
|
| 164019 |
+
"epoch": 0.5506588235294118,
|
| 164020 |
+
"grad_norm": 0.1279296875,
|
| 164021 |
+
"learning_rate": 0.015363060240963856,
|
| 164022 |
+
"loss": 1.2484,
|
| 164023 |
+
"step": 23403
|
| 164024 |
+
},
|
| 164025 |
+
{
|
| 164026 |
+
"epoch": 0.5506823529411765,
|
| 164027 |
+
"grad_norm": 0.1435546875,
|
| 164028 |
+
"learning_rate": 0.015362261044176706,
|
| 164029 |
+
"loss": 0.5794,
|
| 164030 |
+
"step": 23404
|
| 164031 |
+
},
|
| 164032 |
+
{
|
| 164033 |
+
"epoch": 0.5507058823529412,
|
| 164034 |
+
"grad_norm": 0.1318359375,
|
| 164035 |
+
"learning_rate": 0.015361461847389558,
|
| 164036 |
+
"loss": 0.9699,
|
| 164037 |
+
"step": 23405
|
| 164038 |
+
},
|
| 164039 |
+
{
|
| 164040 |
+
"epoch": 0.5507294117647059,
|
| 164041 |
+
"grad_norm": 0.1279296875,
|
| 164042 |
+
"learning_rate": 0.015360662650602408,
|
| 164043 |
+
"loss": 1.2109,
|
| 164044 |
+
"step": 23406
|
| 164045 |
+
},
|
| 164046 |
+
{
|
| 164047 |
+
"epoch": 0.5507529411764706,
|
| 164048 |
+
"grad_norm": 0.1298828125,
|
| 164049 |
+
"learning_rate": 0.015359863453815261,
|
| 164050 |
+
"loss": 0.8731,
|
| 164051 |
+
"step": 23407
|
| 164052 |
+
},
|
| 164053 |
+
{
|
| 164054 |
+
"epoch": 0.5507764705882353,
|
| 164055 |
+
"grad_norm": 0.1376953125,
|
| 164056 |
+
"learning_rate": 0.015359064257028114,
|
| 164057 |
+
"loss": 0.9394,
|
| 164058 |
+
"step": 23408
|
| 164059 |
+
},
|
| 164060 |
+
{
|
| 164061 |
+
"epoch": 0.5508,
|
| 164062 |
+
"grad_norm": 0.1376953125,
|
| 164063 |
+
"learning_rate": 0.015358265060240963,
|
| 164064 |
+
"loss": 0.9475,
|
| 164065 |
+
"step": 23409
|
| 164066 |
+
},
|
| 164067 |
+
{
|
| 164068 |
+
"epoch": 0.5508235294117647,
|
| 164069 |
+
"grad_norm": 0.1279296875,
|
| 164070 |
+
"learning_rate": 0.015357465863453816,
|
| 164071 |
+
"loss": 1.1674,
|
| 164072 |
+
"step": 23410
|
| 164073 |
+
},
|
| 164074 |
+
{
|
| 164075 |
+
"epoch": 0.5508470588235294,
|
| 164076 |
+
"grad_norm": 0.1396484375,
|
| 164077 |
+
"learning_rate": 0.015356666666666666,
|
| 164078 |
+
"loss": 1.0475,
|
| 164079 |
+
"step": 23411
|
| 164080 |
+
},
|
| 164081 |
+
{
|
| 164082 |
+
"epoch": 0.5508705882352941,
|
| 164083 |
+
"grad_norm": 0.138671875,
|
| 164084 |
+
"learning_rate": 0.01535586746987952,
|
| 164085 |
+
"loss": 0.7187,
|
| 164086 |
+
"step": 23412
|
| 164087 |
+
},
|
| 164088 |
+
{
|
| 164089 |
+
"epoch": 0.5508941176470589,
|
| 164090 |
+
"grad_norm": 0.1533203125,
|
| 164091 |
+
"learning_rate": 0.015355068273092368,
|
| 164092 |
+
"loss": 0.4491,
|
| 164093 |
+
"step": 23413
|
| 164094 |
+
},
|
| 164095 |
+
{
|
| 164096 |
+
"epoch": 0.5509176470588235,
|
| 164097 |
+
"grad_norm": 0.130859375,
|
| 164098 |
+
"learning_rate": 0.015354269076305223,
|
| 164099 |
+
"loss": 0.9911,
|
| 164100 |
+
"step": 23414
|
| 164101 |
+
},
|
| 164102 |
+
{
|
| 164103 |
+
"epoch": 0.5509411764705883,
|
| 164104 |
+
"grad_norm": 0.1474609375,
|
| 164105 |
+
"learning_rate": 0.015353469879518074,
|
| 164106 |
+
"loss": 1.0572,
|
| 164107 |
+
"step": 23415
|
| 164108 |
+
},
|
| 164109 |
+
{
|
| 164110 |
+
"epoch": 0.5509647058823529,
|
| 164111 |
+
"grad_norm": 0.1396484375,
|
| 164112 |
+
"learning_rate": 0.015352670682730924,
|
| 164113 |
+
"loss": 0.8708,
|
| 164114 |
+
"step": 23416
|
| 164115 |
+
},
|
| 164116 |
+
{
|
| 164117 |
+
"epoch": 0.5509882352941177,
|
| 164118 |
+
"grad_norm": 0.1357421875,
|
| 164119 |
+
"learning_rate": 0.015351871485943776,
|
| 164120 |
+
"loss": 0.8104,
|
| 164121 |
+
"step": 23417
|
| 164122 |
+
},
|
| 164123 |
+
{
|
| 164124 |
+
"epoch": 0.5510117647058823,
|
| 164125 |
+
"grad_norm": 0.125,
|
| 164126 |
+
"learning_rate": 0.015351072289156626,
|
| 164127 |
+
"loss": 1.1572,
|
| 164128 |
+
"step": 23418
|
| 164129 |
+
},
|
| 164130 |
+
{
|
| 164131 |
+
"epoch": 0.5510352941176471,
|
| 164132 |
+
"grad_norm": 0.150390625,
|
| 164133 |
+
"learning_rate": 0.015350273092369478,
|
| 164134 |
+
"loss": 0.6726,
|
| 164135 |
+
"step": 23419
|
| 164136 |
+
},
|
| 164137 |
+
{
|
| 164138 |
+
"epoch": 0.5510588235294117,
|
| 164139 |
+
"grad_norm": 0.1259765625,
|
| 164140 |
+
"learning_rate": 0.01534947389558233,
|
| 164141 |
+
"loss": 1.2323,
|
| 164142 |
+
"step": 23420
|
| 164143 |
+
},
|
| 164144 |
+
{
|
| 164145 |
+
"epoch": 0.5510823529411765,
|
| 164146 |
+
"grad_norm": 0.130859375,
|
| 164147 |
+
"learning_rate": 0.015348674698795183,
|
| 164148 |
+
"loss": 1.0056,
|
| 164149 |
+
"step": 23421
|
| 164150 |
+
},
|
| 164151 |
+
{
|
| 164152 |
+
"epoch": 0.5511058823529412,
|
| 164153 |
+
"grad_norm": 0.130859375,
|
| 164154 |
+
"learning_rate": 0.015347875502008033,
|
| 164155 |
+
"loss": 1.0776,
|
| 164156 |
+
"step": 23422
|
| 164157 |
+
},
|
| 164158 |
+
{
|
| 164159 |
+
"epoch": 0.5511294117647059,
|
| 164160 |
+
"grad_norm": 0.140625,
|
| 164161 |
+
"learning_rate": 0.015347076305220884,
|
| 164162 |
+
"loss": 0.6647,
|
| 164163 |
+
"step": 23423
|
| 164164 |
+
},
|
| 164165 |
+
{
|
| 164166 |
+
"epoch": 0.5511529411764706,
|
| 164167 |
+
"grad_norm": 0.1259765625,
|
| 164168 |
+
"learning_rate": 0.015346277108433734,
|
| 164169 |
+
"loss": 1.1149,
|
| 164170 |
+
"step": 23424
|
| 164171 |
+
},
|
| 164172 |
+
{
|
| 164173 |
+
"epoch": 0.5511764705882353,
|
| 164174 |
+
"grad_norm": 0.1494140625,
|
| 164175 |
+
"learning_rate": 0.015345477911646586,
|
| 164176 |
+
"loss": 0.5988,
|
| 164177 |
+
"step": 23425
|
| 164178 |
+
},
|
| 164179 |
+
{
|
| 164180 |
+
"epoch": 0.5512,
|
| 164181 |
+
"grad_norm": 0.1484375,
|
| 164182 |
+
"learning_rate": 0.015344678714859438,
|
| 164183 |
+
"loss": 0.5964,
|
| 164184 |
+
"step": 23426
|
| 164185 |
+
},
|
| 164186 |
+
{
|
| 164187 |
+
"epoch": 0.5512235294117647,
|
| 164188 |
+
"grad_norm": 0.1298828125,
|
| 164189 |
+
"learning_rate": 0.015343879518072288,
|
| 164190 |
+
"loss": 0.9213,
|
| 164191 |
+
"step": 23427
|
| 164192 |
+
},
|
| 164193 |
+
{
|
| 164194 |
+
"epoch": 0.5512470588235294,
|
| 164195 |
+
"grad_norm": 0.1669921875,
|
| 164196 |
+
"learning_rate": 0.015343080321285141,
|
| 164197 |
+
"loss": 1.0182,
|
| 164198 |
+
"step": 23428
|
| 164199 |
+
},
|
| 164200 |
+
{
|
| 164201 |
+
"epoch": 0.5512705882352941,
|
| 164202 |
+
"grad_norm": 0.1484375,
|
| 164203 |
+
"learning_rate": 0.015342281124497993,
|
| 164204 |
+
"loss": 0.5726,
|
| 164205 |
+
"step": 23429
|
| 164206 |
+
},
|
| 164207 |
+
{
|
| 164208 |
+
"epoch": 0.5512941176470588,
|
| 164209 |
+
"grad_norm": 0.15234375,
|
| 164210 |
+
"learning_rate": 0.015341481927710843,
|
| 164211 |
+
"loss": 0.708,
|
| 164212 |
+
"step": 23430
|
| 164213 |
+
},
|
| 164214 |
+
{
|
| 164215 |
+
"epoch": 0.5513176470588236,
|
| 164216 |
+
"grad_norm": 0.13671875,
|
| 164217 |
+
"learning_rate": 0.015340682730923696,
|
| 164218 |
+
"loss": 0.7392,
|
| 164219 |
+
"step": 23431
|
| 164220 |
+
},
|
| 164221 |
+
{
|
| 164222 |
+
"epoch": 0.5513411764705882,
|
| 164223 |
+
"grad_norm": 0.1416015625,
|
| 164224 |
+
"learning_rate": 0.015339883534136544,
|
| 164225 |
+
"loss": 0.7377,
|
| 164226 |
+
"step": 23432
|
| 164227 |
+
},
|
| 164228 |
+
{
|
| 164229 |
+
"epoch": 0.551364705882353,
|
| 164230 |
+
"grad_norm": 0.11572265625,
|
| 164231 |
+
"learning_rate": 0.015339084337349398,
|
| 164232 |
+
"loss": 1.1039,
|
| 164233 |
+
"step": 23433
|
| 164234 |
+
},
|
| 164235 |
+
{
|
| 164236 |
+
"epoch": 0.5513882352941176,
|
| 164237 |
+
"grad_norm": 0.146484375,
|
| 164238 |
+
"learning_rate": 0.015338285140562248,
|
| 164239 |
+
"loss": 0.7521,
|
| 164240 |
+
"step": 23434
|
| 164241 |
+
},
|
| 164242 |
+
{
|
| 164243 |
+
"epoch": 0.5514117647058824,
|
| 164244 |
+
"grad_norm": 0.13671875,
|
| 164245 |
+
"learning_rate": 0.015337485943775103,
|
| 164246 |
+
"loss": 0.9875,
|
| 164247 |
+
"step": 23435
|
| 164248 |
+
},
|
| 164249 |
+
{
|
| 164250 |
+
"epoch": 0.551435294117647,
|
| 164251 |
+
"grad_norm": 0.13671875,
|
| 164252 |
+
"learning_rate": 0.015336686746987953,
|
| 164253 |
+
"loss": 0.968,
|
| 164254 |
+
"step": 23436
|
| 164255 |
+
},
|
| 164256 |
+
{
|
| 164257 |
+
"epoch": 0.5514588235294118,
|
| 164258 |
+
"grad_norm": 0.12890625,
|
| 164259 |
+
"learning_rate": 0.015335887550200804,
|
| 164260 |
+
"loss": 1.0024,
|
| 164261 |
+
"step": 23437
|
| 164262 |
+
},
|
| 164263 |
+
{
|
| 164264 |
+
"epoch": 0.5514823529411764,
|
| 164265 |
+
"grad_norm": 0.130859375,
|
| 164266 |
+
"learning_rate": 0.015335088353413656,
|
| 164267 |
+
"loss": 1.2719,
|
| 164268 |
+
"step": 23438
|
| 164269 |
+
},
|
| 164270 |
+
{
|
| 164271 |
+
"epoch": 0.5515058823529412,
|
| 164272 |
+
"grad_norm": 0.134765625,
|
| 164273 |
+
"learning_rate": 0.015334289156626506,
|
| 164274 |
+
"loss": 1.0496,
|
| 164275 |
+
"step": 23439
|
| 164276 |
+
},
|
| 164277 |
+
{
|
| 164278 |
+
"epoch": 0.5515294117647059,
|
| 164279 |
+
"grad_norm": 0.1474609375,
|
| 164280 |
+
"learning_rate": 0.015333489959839358,
|
| 164281 |
+
"loss": 0.5874,
|
| 164282 |
+
"step": 23440
|
| 164283 |
+
},
|
| 164284 |
+
{
|
| 164285 |
+
"epoch": 0.5515529411764706,
|
| 164286 |
+
"grad_norm": 0.1318359375,
|
| 164287 |
+
"learning_rate": 0.015332690763052211,
|
| 164288 |
+
"loss": 0.931,
|
| 164289 |
+
"step": 23441
|
| 164290 |
+
},
|
| 164291 |
+
{
|
| 164292 |
+
"epoch": 0.5515764705882353,
|
| 164293 |
+
"grad_norm": 0.134765625,
|
| 164294 |
+
"learning_rate": 0.015331891566265061,
|
| 164295 |
+
"loss": 0.5616,
|
| 164296 |
+
"step": 23442
|
| 164297 |
+
},
|
| 164298 |
+
{
|
| 164299 |
+
"epoch": 0.5516,
|
| 164300 |
+
"grad_norm": 0.12890625,
|
| 164301 |
+
"learning_rate": 0.015331092369477913,
|
| 164302 |
+
"loss": 1.0999,
|
| 164303 |
+
"step": 23443
|
| 164304 |
+
},
|
| 164305 |
+
{
|
| 164306 |
+
"epoch": 0.5516235294117647,
|
| 164307 |
+
"grad_norm": 0.1298828125,
|
| 164308 |
+
"learning_rate": 0.015330293172690763,
|
| 164309 |
+
"loss": 1.1041,
|
| 164310 |
+
"step": 23444
|
| 164311 |
+
},
|
| 164312 |
+
{
|
| 164313 |
+
"epoch": 0.5516470588235294,
|
| 164314 |
+
"grad_norm": 0.1748046875,
|
| 164315 |
+
"learning_rate": 0.015329493975903614,
|
| 164316 |
+
"loss": 0.7844,
|
| 164317 |
+
"step": 23445
|
| 164318 |
+
},
|
| 164319 |
+
{
|
| 164320 |
+
"epoch": 0.5516705882352941,
|
| 164321 |
+
"grad_norm": 0.146484375,
|
| 164322 |
+
"learning_rate": 0.015328694779116466,
|
| 164323 |
+
"loss": 0.7037,
|
| 164324 |
+
"step": 23446
|
| 164325 |
+
},
|
| 164326 |
+
{
|
| 164327 |
+
"epoch": 0.5516941176470588,
|
| 164328 |
+
"grad_norm": 0.142578125,
|
| 164329 |
+
"learning_rate": 0.015327895582329316,
|
| 164330 |
+
"loss": 0.6985,
|
| 164331 |
+
"step": 23447
|
| 164332 |
+
},
|
| 164333 |
+
{
|
| 164334 |
+
"epoch": 0.5517176470588235,
|
| 164335 |
+
"grad_norm": 0.1201171875,
|
| 164336 |
+
"learning_rate": 0.01532709638554217,
|
| 164337 |
+
"loss": 1.3469,
|
| 164338 |
+
"step": 23448
|
| 164339 |
+
},
|
| 164340 |
+
{
|
| 164341 |
+
"epoch": 0.5517411764705883,
|
| 164342 |
+
"grad_norm": 0.1650390625,
|
| 164343 |
+
"learning_rate": 0.015326297188755021,
|
| 164344 |
+
"loss": 0.9338,
|
| 164345 |
+
"step": 23449
|
| 164346 |
+
},
|
| 164347 |
+
{
|
| 164348 |
+
"epoch": 0.5517647058823529,
|
| 164349 |
+
"grad_norm": 0.1416015625,
|
| 164350 |
+
"learning_rate": 0.015325497991967871,
|
| 164351 |
+
"loss": 0.7578,
|
| 164352 |
+
"step": 23450
|
| 164353 |
+
},
|
| 164354 |
+
{
|
| 164355 |
+
"epoch": 0.5517882352941177,
|
| 164356 |
+
"grad_norm": 0.1328125,
|
| 164357 |
+
"learning_rate": 0.015324698795180723,
|
| 164358 |
+
"loss": 0.668,
|
| 164359 |
+
"step": 23451
|
| 164360 |
+
},
|
| 164361 |
+
{
|
| 164362 |
+
"epoch": 0.5518117647058823,
|
| 164363 |
+
"grad_norm": 0.11376953125,
|
| 164364 |
+
"learning_rate": 0.015323899598393574,
|
| 164365 |
+
"loss": 1.1693,
|
| 164366 |
+
"step": 23452
|
| 164367 |
+
},
|
| 164368 |
+
{
|
| 164369 |
+
"epoch": 0.5518352941176471,
|
| 164370 |
+
"grad_norm": 0.1328125,
|
| 164371 |
+
"learning_rate": 0.015323100401606424,
|
| 164372 |
+
"loss": 0.9964,
|
| 164373 |
+
"step": 23453
|
| 164374 |
+
},
|
| 164375 |
+
{
|
| 164376 |
+
"epoch": 0.5518588235294117,
|
| 164377 |
+
"grad_norm": 0.12451171875,
|
| 164378 |
+
"learning_rate": 0.015322301204819278,
|
| 164379 |
+
"loss": 1.3084,
|
| 164380 |
+
"step": 23454
|
| 164381 |
+
},
|
| 164382 |
+
{
|
| 164383 |
+
"epoch": 0.5518823529411765,
|
| 164384 |
+
"grad_norm": 0.150390625,
|
| 164385 |
+
"learning_rate": 0.01532150200803213,
|
| 164386 |
+
"loss": 0.8761,
|
| 164387 |
+
"step": 23455
|
| 164388 |
+
},
|
| 164389 |
+
{
|
| 164390 |
+
"epoch": 0.5519058823529411,
|
| 164391 |
+
"grad_norm": 0.1435546875,
|
| 164392 |
+
"learning_rate": 0.015320702811244981,
|
| 164393 |
+
"loss": 0.7761,
|
| 164394 |
+
"step": 23456
|
| 164395 |
+
},
|
| 164396 |
+
{
|
| 164397 |
+
"epoch": 0.5519294117647059,
|
| 164398 |
+
"grad_norm": 0.1376953125,
|
| 164399 |
+
"learning_rate": 0.015319903614457831,
|
| 164400 |
+
"loss": 0.9035,
|
| 164401 |
+
"step": 23457
|
| 164402 |
+
},
|
| 164403 |
+
{
|
| 164404 |
+
"epoch": 0.5519529411764705,
|
| 164405 |
+
"grad_norm": 0.1396484375,
|
| 164406 |
+
"learning_rate": 0.015319104417670685,
|
| 164407 |
+
"loss": 0.9632,
|
| 164408 |
+
"step": 23458
|
| 164409 |
+
},
|
| 164410 |
+
{
|
| 164411 |
+
"epoch": 0.5519764705882353,
|
| 164412 |
+
"grad_norm": 0.1298828125,
|
| 164413 |
+
"learning_rate": 0.015318305220883534,
|
| 164414 |
+
"loss": 0.9451,
|
| 164415 |
+
"step": 23459
|
| 164416 |
+
},
|
| 164417 |
+
{
|
| 164418 |
+
"epoch": 0.552,
|
| 164419 |
+
"grad_norm": 0.123046875,
|
| 164420 |
+
"learning_rate": 0.015317506024096386,
|
| 164421 |
+
"loss": 1.181,
|
| 164422 |
+
"step": 23460
|
| 164423 |
+
},
|
| 164424 |
+
{
|
| 164425 |
+
"epoch": 0.5520235294117647,
|
| 164426 |
+
"grad_norm": 0.14453125,
|
| 164427 |
+
"learning_rate": 0.015316706827309236,
|
| 164428 |
+
"loss": 0.6543,
|
| 164429 |
+
"step": 23461
|
| 164430 |
+
},
|
| 164431 |
+
{
|
| 164432 |
+
"epoch": 0.5520470588235294,
|
| 164433 |
+
"grad_norm": 0.14453125,
|
| 164434 |
+
"learning_rate": 0.01531590763052209,
|
| 164435 |
+
"loss": 0.6403,
|
| 164436 |
+
"step": 23462
|
| 164437 |
+
},
|
| 164438 |
+
{
|
| 164439 |
+
"epoch": 0.5520705882352941,
|
| 164440 |
+
"grad_norm": 0.14453125,
|
| 164441 |
+
"learning_rate": 0.015315108433734941,
|
| 164442 |
+
"loss": 0.8436,
|
| 164443 |
+
"step": 23463
|
| 164444 |
+
},
|
| 164445 |
+
{
|
| 164446 |
+
"epoch": 0.5520941176470588,
|
| 164447 |
+
"grad_norm": 0.1328125,
|
| 164448 |
+
"learning_rate": 0.015314309236947793,
|
| 164449 |
+
"loss": 0.7905,
|
| 164450 |
+
"step": 23464
|
| 164451 |
+
},
|
| 164452 |
+
{
|
| 164453 |
+
"epoch": 0.5521176470588235,
|
| 164454 |
+
"grad_norm": 0.1298828125,
|
| 164455 |
+
"learning_rate": 0.015313510040160643,
|
| 164456 |
+
"loss": 0.9583,
|
| 164457 |
+
"step": 23465
|
| 164458 |
+
},
|
| 164459 |
+
{
|
| 164460 |
+
"epoch": 0.5521411764705882,
|
| 164461 |
+
"grad_norm": 0.12255859375,
|
| 164462 |
+
"learning_rate": 0.015312710843373495,
|
| 164463 |
+
"loss": 0.9479,
|
| 164464 |
+
"step": 23466
|
| 164465 |
+
},
|
| 164466 |
+
{
|
| 164467 |
+
"epoch": 0.5521647058823529,
|
| 164468 |
+
"grad_norm": 0.138671875,
|
| 164469 |
+
"learning_rate": 0.015311911646586344,
|
| 164470 |
+
"loss": 0.9805,
|
| 164471 |
+
"step": 23467
|
| 164472 |
+
},
|
| 164473 |
+
{
|
| 164474 |
+
"epoch": 0.5521882352941176,
|
| 164475 |
+
"grad_norm": 0.126953125,
|
| 164476 |
+
"learning_rate": 0.015311112449799196,
|
| 164477 |
+
"loss": 0.8195,
|
| 164478 |
+
"step": 23468
|
| 164479 |
+
},
|
| 164480 |
+
{
|
| 164481 |
+
"epoch": 0.5522117647058824,
|
| 164482 |
+
"grad_norm": 0.13671875,
|
| 164483 |
+
"learning_rate": 0.01531031325301205,
|
| 164484 |
+
"loss": 0.9965,
|
| 164485 |
+
"step": 23469
|
| 164486 |
+
},
|
| 164487 |
+
{
|
| 164488 |
+
"epoch": 0.552235294117647,
|
| 164489 |
+
"grad_norm": 0.1201171875,
|
| 164490 |
+
"learning_rate": 0.0153095140562249,
|
| 164491 |
+
"loss": 1.1262,
|
| 164492 |
+
"step": 23470
|
| 164493 |
+
},
|
| 164494 |
+
{
|
| 164495 |
+
"epoch": 0.5522588235294118,
|
| 164496 |
+
"grad_norm": 0.1396484375,
|
| 164497 |
+
"learning_rate": 0.015308714859437751,
|
| 164498 |
+
"loss": 0.7896,
|
| 164499 |
+
"step": 23471
|
| 164500 |
+
},
|
| 164501 |
+
{
|
| 164502 |
+
"epoch": 0.5522823529411764,
|
| 164503 |
+
"grad_norm": 0.142578125,
|
| 164504 |
+
"learning_rate": 0.015307915662650603,
|
| 164505 |
+
"loss": 0.6436,
|
| 164506 |
+
"step": 23472
|
| 164507 |
+
},
|
| 164508 |
+
{
|
| 164509 |
+
"epoch": 0.5523058823529412,
|
| 164510 |
+
"grad_norm": 0.12451171875,
|
| 164511 |
+
"learning_rate": 0.015307116465863453,
|
| 164512 |
+
"loss": 0.9267,
|
| 164513 |
+
"step": 23473
|
| 164514 |
+
},
|
| 164515 |
+
{
|
| 164516 |
+
"epoch": 0.5523294117647058,
|
| 164517 |
+
"grad_norm": 0.1259765625,
|
| 164518 |
+
"learning_rate": 0.015306317269076304,
|
| 164519 |
+
"loss": 0.8238,
|
| 164520 |
+
"step": 23474
|
| 164521 |
+
},
|
| 164522 |
+
{
|
| 164523 |
+
"epoch": 0.5523529411764706,
|
| 164524 |
+
"grad_norm": 0.138671875,
|
| 164525 |
+
"learning_rate": 0.015305518072289158,
|
| 164526 |
+
"loss": 0.667,
|
| 164527 |
+
"step": 23475
|
| 164528 |
+
},
|
| 164529 |
+
{
|
| 164530 |
+
"epoch": 0.5523764705882352,
|
| 164531 |
+
"grad_norm": 0.12451171875,
|
| 164532 |
+
"learning_rate": 0.015304718875502008,
|
| 164533 |
+
"loss": 1.2347,
|
| 164534 |
+
"step": 23476
|
| 164535 |
+
},
|
| 164536 |
+
{
|
| 164537 |
+
"epoch": 0.5524,
|
| 164538 |
+
"grad_norm": 0.1220703125,
|
| 164539 |
+
"learning_rate": 0.015303919678714861,
|
| 164540 |
+
"loss": 1.1447,
|
| 164541 |
+
"step": 23477
|
| 164542 |
+
},
|
| 164543 |
+
{
|
| 164544 |
+
"epoch": 0.5524235294117648,
|
| 164545 |
+
"grad_norm": 0.134765625,
|
| 164546 |
+
"learning_rate": 0.015303120481927711,
|
| 164547 |
+
"loss": 1.0975,
|
| 164548 |
+
"step": 23478
|
| 164549 |
+
},
|
| 164550 |
+
{
|
| 164551 |
+
"epoch": 0.5524470588235294,
|
| 164552 |
+
"grad_norm": 0.12890625,
|
| 164553 |
+
"learning_rate": 0.015302321285140563,
|
| 164554 |
+
"loss": 1.1043,
|
| 164555 |
+
"step": 23479
|
| 164556 |
+
},
|
| 164557 |
+
{
|
| 164558 |
+
"epoch": 0.5524705882352942,
|
| 164559 |
+
"grad_norm": 0.1357421875,
|
| 164560 |
+
"learning_rate": 0.015301522088353413,
|
| 164561 |
+
"loss": 1.0454,
|
| 164562 |
+
"step": 23480
|
| 164563 |
+
},
|
| 164564 |
+
{
|
| 164565 |
+
"epoch": 0.5524941176470588,
|
| 164566 |
+
"grad_norm": 0.1328125,
|
| 164567 |
+
"learning_rate": 0.015300722891566266,
|
| 164568 |
+
"loss": 0.9484,
|
| 164569 |
+
"step": 23481
|
| 164570 |
+
},
|
| 164571 |
+
{
|
| 164572 |
+
"epoch": 0.5525176470588236,
|
| 164573 |
+
"grad_norm": 0.1357421875,
|
| 164574 |
+
"learning_rate": 0.015299923694779116,
|
| 164575 |
+
"loss": 0.9709,
|
| 164576 |
+
"step": 23482
|
| 164577 |
+
},
|
| 164578 |
+
{
|
| 164579 |
+
"epoch": 0.5525411764705882,
|
| 164580 |
+
"grad_norm": 0.134765625,
|
| 164581 |
+
"learning_rate": 0.01529912449799197,
|
| 164582 |
+
"loss": 0.9379,
|
| 164583 |
+
"step": 23483
|
| 164584 |
+
},
|
| 164585 |
+
{
|
| 164586 |
+
"epoch": 0.552564705882353,
|
| 164587 |
+
"grad_norm": 0.1357421875,
|
| 164588 |
+
"learning_rate": 0.015298325301204821,
|
| 164589 |
+
"loss": 0.9729,
|
| 164590 |
+
"step": 23484
|
| 164591 |
+
},
|
| 164592 |
+
{
|
| 164593 |
+
"epoch": 0.5525882352941176,
|
| 164594 |
+
"grad_norm": 0.123046875,
|
| 164595 |
+
"learning_rate": 0.015297526104417671,
|
| 164596 |
+
"loss": 0.9784,
|
| 164597 |
+
"step": 23485
|
| 164598 |
+
},
|
| 164599 |
+
{
|
| 164600 |
+
"epoch": 0.5526117647058824,
|
| 164601 |
+
"grad_norm": 0.1455078125,
|
| 164602 |
+
"learning_rate": 0.015296726907630523,
|
| 164603 |
+
"loss": 0.7137,
|
| 164604 |
+
"step": 23486
|
| 164605 |
+
},
|
| 164606 |
+
{
|
| 164607 |
+
"epoch": 0.5526352941176471,
|
| 164608 |
+
"grad_norm": 0.1435546875,
|
| 164609 |
+
"learning_rate": 0.015295927710843373,
|
| 164610 |
+
"loss": 1.1623,
|
| 164611 |
+
"step": 23487
|
| 164612 |
+
},
|
| 164613 |
+
{
|
| 164614 |
+
"epoch": 0.5526588235294118,
|
| 164615 |
+
"grad_norm": 0.1416015625,
|
| 164616 |
+
"learning_rate": 0.015295128514056225,
|
| 164617 |
+
"loss": 0.8227,
|
| 164618 |
+
"step": 23488
|
| 164619 |
+
},
|
| 164620 |
+
{
|
| 164621 |
+
"epoch": 0.5526823529411765,
|
| 164622 |
+
"grad_norm": 0.1337890625,
|
| 164623 |
+
"learning_rate": 0.015294329317269076,
|
| 164624 |
+
"loss": 1.1337,
|
| 164625 |
+
"step": 23489
|
| 164626 |
+
},
|
| 164627 |
+
{
|
| 164628 |
+
"epoch": 0.5527058823529412,
|
| 164629 |
+
"grad_norm": 0.130859375,
|
| 164630 |
+
"learning_rate": 0.01529353012048193,
|
| 164631 |
+
"loss": 0.7641,
|
| 164632 |
+
"step": 23490
|
| 164633 |
+
},
|
| 164634 |
+
{
|
| 164635 |
+
"epoch": 0.5527294117647059,
|
| 164636 |
+
"grad_norm": 0.140625,
|
| 164637 |
+
"learning_rate": 0.01529273092369478,
|
| 164638 |
+
"loss": 0.9554,
|
| 164639 |
+
"step": 23491
|
| 164640 |
+
},
|
| 164641 |
+
{
|
| 164642 |
+
"epoch": 0.5527529411764706,
|
| 164643 |
+
"grad_norm": 0.12451171875,
|
| 164644 |
+
"learning_rate": 0.015291931726907631,
|
| 164645 |
+
"loss": 1.2277,
|
| 164646 |
+
"step": 23492
|
| 164647 |
+
},
|
| 164648 |
+
{
|
| 164649 |
+
"epoch": 0.5527764705882353,
|
| 164650 |
+
"grad_norm": 0.1376953125,
|
| 164651 |
+
"learning_rate": 0.015291132530120481,
|
| 164652 |
+
"loss": 0.8368,
|
| 164653 |
+
"step": 23493
|
| 164654 |
+
},
|
| 164655 |
+
{
|
| 164656 |
+
"epoch": 0.5528,
|
| 164657 |
+
"grad_norm": 0.1259765625,
|
| 164658 |
+
"learning_rate": 0.015290333333333333,
|
| 164659 |
+
"loss": 1.175,
|
| 164660 |
+
"step": 23494
|
| 164661 |
+
},
|
| 164662 |
+
{
|
| 164663 |
+
"epoch": 0.5528235294117647,
|
| 164664 |
+
"grad_norm": 0.1376953125,
|
| 164665 |
+
"learning_rate": 0.015289534136546185,
|
| 164666 |
+
"loss": 0.8574,
|
| 164667 |
+
"step": 23495
|
| 164668 |
+
},
|
| 164669 |
+
{
|
| 164670 |
+
"epoch": 0.5528470588235294,
|
| 164671 |
+
"grad_norm": 0.1328125,
|
| 164672 |
+
"learning_rate": 0.015288734939759034,
|
| 164673 |
+
"loss": 1.1897,
|
| 164674 |
+
"step": 23496
|
| 164675 |
+
},
|
| 164676 |
+
{
|
| 164677 |
+
"epoch": 0.5528705882352941,
|
| 164678 |
+
"grad_norm": 0.1318359375,
|
| 164679 |
+
"learning_rate": 0.015287935742971888,
|
| 164680 |
+
"loss": 1.0169,
|
| 164681 |
+
"step": 23497
|
| 164682 |
+
},
|
| 164683 |
+
{
|
| 164684 |
+
"epoch": 0.5528941176470589,
|
| 164685 |
+
"grad_norm": 0.142578125,
|
| 164686 |
+
"learning_rate": 0.015287136546184741,
|
| 164687 |
+
"loss": 1.1123,
|
| 164688 |
+
"step": 23498
|
| 164689 |
+
},
|
| 164690 |
+
{
|
| 164691 |
+
"epoch": 0.5529176470588235,
|
| 164692 |
+
"grad_norm": 0.142578125,
|
| 164693 |
+
"learning_rate": 0.01528633734939759,
|
| 164694 |
+
"loss": 0.5403,
|
| 164695 |
+
"step": 23499
|
| 164696 |
+
},
|
| 164697 |
+
{
|
| 164698 |
+
"epoch": 0.5529411764705883,
|
| 164699 |
+
"grad_norm": 0.140625,
|
| 164700 |
+
"learning_rate": 0.015285538152610443,
|
| 164701 |
+
"loss": 0.8267,
|
| 164702 |
+
"step": 23500
|
| 164703 |
}
|
| 164704 |
],
|
| 164705 |
"logging_steps": 1,
|
|
|
|
| 164713 |
"should_epoch_stop": false,
|
| 164714 |
"should_evaluate": false,
|
| 164715 |
"should_log": false,
|
| 164716 |
+
"should_save": true,
|
| 164717 |
"should_training_stop": false
|
| 164718 |
},
|
| 164719 |
"attributes": {}
|
| 164720 |
}
|
| 164721 |
},
|
| 164722 |
+
"total_flos": 1.425463715789262e+19,
|
| 164723 |
"train_batch_size": 8,
|
| 164724 |
"trial_name": null,
|
| 164725 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6289
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47478d0364a9e5b4a3d11e4abd89537a080c247d5b4ca266aa2094c02d65ddf1
|
| 3 |
size 6289
|