Training in progress, step 3800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 93608
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e50b4171de13b44af59b89ae3218c4d4d280e87040d3ad5ef8ede699be14fdd1
|
| 3 |
size 93608
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 204490
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec510b9bacef6d5659215ddd3289052a6abac497a19cf722518465bd820e064e
|
| 3 |
size 204490
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bd47fa3613805a7f10d313ea783a9a8cd53653218219206ca2e0bdb414ce1c4
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd1db1d801d8252d892325c3e9003b87eb802aad5ce526801f9c8f46e4bc23f6
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 11.
|
| 3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
| 4 |
-
"epoch": 5.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -25359,6 +25359,1414 @@
|
|
| 25359 |
"eval_samples_per_second": 94.931,
|
| 25360 |
"eval_steps_per_second": 23.784,
|
| 25361 |
"step": 3600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25362 |
}
|
| 25363 |
],
|
| 25364 |
"logging_steps": 1,
|
|
@@ -25373,7 +26781,7 @@
|
|
| 25373 |
"early_stopping_threshold": 0.0
|
| 25374 |
},
|
| 25375 |
"attributes": {
|
| 25376 |
-
"early_stopping_patience_counter":
|
| 25377 |
}
|
| 25378 |
},
|
| 25379 |
"TrainerControl": {
|
|
@@ -25387,7 +26795,7 @@
|
|
| 25387 |
"attributes": {}
|
| 25388 |
}
|
| 25389 |
},
|
| 25390 |
-
"total_flos":
|
| 25391 |
"train_batch_size": 2,
|
| 25392 |
"trial_name": null,
|
| 25393 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 11.8898344039917,
|
| 3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-3800",
|
| 4 |
+
"epoch": 5.284308828320121,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 3800,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 25359 |
"eval_samples_per_second": 94.931,
|
| 25360 |
"eval_steps_per_second": 23.784,
|
| 25361 |
"step": 3600
|
| 25362 |
+
},
|
| 25363 |
+
{
|
| 25364 |
+
"epoch": 5.007709849060701,
|
| 25365 |
+
"grad_norm": 0.05842632055282593,
|
| 25366 |
+
"learning_rate": 0.0002,
|
| 25367 |
+
"loss": 11.9003,
|
| 25368 |
+
"step": 3601
|
| 25369 |
+
},
|
| 25370 |
+
{
|
| 25371 |
+
"epoch": 5.009099793680096,
|
| 25372 |
+
"grad_norm": 0.046708352863788605,
|
| 25373 |
+
"learning_rate": 0.0002,
|
| 25374 |
+
"loss": 11.8363,
|
| 25375 |
+
"step": 3602
|
| 25376 |
+
},
|
| 25377 |
+
{
|
| 25378 |
+
"epoch": 5.01048973829949,
|
| 25379 |
+
"grad_norm": 0.05128537863492966,
|
| 25380 |
+
"learning_rate": 0.0002,
|
| 25381 |
+
"loss": 12.0339,
|
| 25382 |
+
"step": 3603
|
| 25383 |
+
},
|
| 25384 |
+
{
|
| 25385 |
+
"epoch": 5.011879682918884,
|
| 25386 |
+
"grad_norm": 0.04519947990775108,
|
| 25387 |
+
"learning_rate": 0.0002,
|
| 25388 |
+
"loss": 11.8844,
|
| 25389 |
+
"step": 3604
|
| 25390 |
+
},
|
| 25391 |
+
{
|
| 25392 |
+
"epoch": 5.0132696275382775,
|
| 25393 |
+
"grad_norm": 0.04591000825166702,
|
| 25394 |
+
"learning_rate": 0.0002,
|
| 25395 |
+
"loss": 11.7796,
|
| 25396 |
+
"step": 3605
|
| 25397 |
+
},
|
| 25398 |
+
{
|
| 25399 |
+
"epoch": 5.014659572157671,
|
| 25400 |
+
"grad_norm": 0.04770844429731369,
|
| 25401 |
+
"learning_rate": 0.0002,
|
| 25402 |
+
"loss": 12.1149,
|
| 25403 |
+
"step": 3606
|
| 25404 |
+
},
|
| 25405 |
+
{
|
| 25406 |
+
"epoch": 5.016049516777066,
|
| 25407 |
+
"grad_norm": 0.04840252920985222,
|
| 25408 |
+
"learning_rate": 0.0002,
|
| 25409 |
+
"loss": 11.5734,
|
| 25410 |
+
"step": 3607
|
| 25411 |
+
},
|
| 25412 |
+
{
|
| 25413 |
+
"epoch": 5.01743946139646,
|
| 25414 |
+
"grad_norm": 0.04826009273529053,
|
| 25415 |
+
"learning_rate": 0.0002,
|
| 25416 |
+
"loss": 12.2667,
|
| 25417 |
+
"step": 3608
|
| 25418 |
+
},
|
| 25419 |
+
{
|
| 25420 |
+
"epoch": 5.018829406015854,
|
| 25421 |
+
"grad_norm": 0.041874658316373825,
|
| 25422 |
+
"learning_rate": 0.0002,
|
| 25423 |
+
"loss": 11.7042,
|
| 25424 |
+
"step": 3609
|
| 25425 |
+
},
|
| 25426 |
+
{
|
| 25427 |
+
"epoch": 5.020219350635248,
|
| 25428 |
+
"grad_norm": 0.053329598158597946,
|
| 25429 |
+
"learning_rate": 0.0002,
|
| 25430 |
+
"loss": 12.1699,
|
| 25431 |
+
"step": 3610
|
| 25432 |
+
},
|
| 25433 |
+
{
|
| 25434 |
+
"epoch": 5.021609295254642,
|
| 25435 |
+
"grad_norm": 0.0484725646674633,
|
| 25436 |
+
"learning_rate": 0.0002,
|
| 25437 |
+
"loss": 11.7989,
|
| 25438 |
+
"step": 3611
|
| 25439 |
+
},
|
| 25440 |
+
{
|
| 25441 |
+
"epoch": 5.022999239874037,
|
| 25442 |
+
"grad_norm": 0.040339384227991104,
|
| 25443 |
+
"learning_rate": 0.0002,
|
| 25444 |
+
"loss": 11.7096,
|
| 25445 |
+
"step": 3612
|
| 25446 |
+
},
|
| 25447 |
+
{
|
| 25448 |
+
"epoch": 5.0243891844934305,
|
| 25449 |
+
"grad_norm": 0.041044898331165314,
|
| 25450 |
+
"learning_rate": 0.0002,
|
| 25451 |
+
"loss": 11.7502,
|
| 25452 |
+
"step": 3613
|
| 25453 |
+
},
|
| 25454 |
+
{
|
| 25455 |
+
"epoch": 5.025779129112824,
|
| 25456 |
+
"grad_norm": 0.046834707260131836,
|
| 25457 |
+
"learning_rate": 0.0002,
|
| 25458 |
+
"loss": 12.1533,
|
| 25459 |
+
"step": 3614
|
| 25460 |
+
},
|
| 25461 |
+
{
|
| 25462 |
+
"epoch": 5.027169073732218,
|
| 25463 |
+
"grad_norm": 0.05244176834821701,
|
| 25464 |
+
"learning_rate": 0.0002,
|
| 25465 |
+
"loss": 11.7451,
|
| 25466 |
+
"step": 3615
|
| 25467 |
+
},
|
| 25468 |
+
{
|
| 25469 |
+
"epoch": 5.028559018351612,
|
| 25470 |
+
"grad_norm": 0.04213585704565048,
|
| 25471 |
+
"learning_rate": 0.0002,
|
| 25472 |
+
"loss": 11.8915,
|
| 25473 |
+
"step": 3616
|
| 25474 |
+
},
|
| 25475 |
+
{
|
| 25476 |
+
"epoch": 5.029948962971007,
|
| 25477 |
+
"grad_norm": 0.045843832194805145,
|
| 25478 |
+
"learning_rate": 0.0002,
|
| 25479 |
+
"loss": 11.9614,
|
| 25480 |
+
"step": 3617
|
| 25481 |
+
},
|
| 25482 |
+
{
|
| 25483 |
+
"epoch": 5.031338907590401,
|
| 25484 |
+
"grad_norm": 0.044140398502349854,
|
| 25485 |
+
"learning_rate": 0.0002,
|
| 25486 |
+
"loss": 12.1623,
|
| 25487 |
+
"step": 3618
|
| 25488 |
+
},
|
| 25489 |
+
{
|
| 25490 |
+
"epoch": 5.032728852209795,
|
| 25491 |
+
"grad_norm": 0.042298879474401474,
|
| 25492 |
+
"learning_rate": 0.0002,
|
| 25493 |
+
"loss": 11.6336,
|
| 25494 |
+
"step": 3619
|
| 25495 |
+
},
|
| 25496 |
+
{
|
| 25497 |
+
"epoch": 5.034118796829189,
|
| 25498 |
+
"grad_norm": 0.054477911442518234,
|
| 25499 |
+
"learning_rate": 0.0002,
|
| 25500 |
+
"loss": 11.8265,
|
| 25501 |
+
"step": 3620
|
| 25502 |
+
},
|
| 25503 |
+
{
|
| 25504 |
+
"epoch": 5.035508741448583,
|
| 25505 |
+
"grad_norm": 0.04397626593708992,
|
| 25506 |
+
"learning_rate": 0.0002,
|
| 25507 |
+
"loss": 11.9023,
|
| 25508 |
+
"step": 3621
|
| 25509 |
+
},
|
| 25510 |
+
{
|
| 25511 |
+
"epoch": 5.036898686067977,
|
| 25512 |
+
"grad_norm": 0.04996155947446823,
|
| 25513 |
+
"learning_rate": 0.0002,
|
| 25514 |
+
"loss": 11.9869,
|
| 25515 |
+
"step": 3622
|
| 25516 |
+
},
|
| 25517 |
+
{
|
| 25518 |
+
"epoch": 5.038288630687371,
|
| 25519 |
+
"grad_norm": 0.06032784283161163,
|
| 25520 |
+
"learning_rate": 0.0002,
|
| 25521 |
+
"loss": 11.7498,
|
| 25522 |
+
"step": 3623
|
| 25523 |
+
},
|
| 25524 |
+
{
|
| 25525 |
+
"epoch": 5.039678575306765,
|
| 25526 |
+
"grad_norm": 0.04672672599554062,
|
| 25527 |
+
"learning_rate": 0.0002,
|
| 25528 |
+
"loss": 11.9769,
|
| 25529 |
+
"step": 3624
|
| 25530 |
+
},
|
| 25531 |
+
{
|
| 25532 |
+
"epoch": 5.041068519926159,
|
| 25533 |
+
"grad_norm": 0.047622647136449814,
|
| 25534 |
+
"learning_rate": 0.0002,
|
| 25535 |
+
"loss": 11.8838,
|
| 25536 |
+
"step": 3625
|
| 25537 |
+
},
|
| 25538 |
+
{
|
| 25539 |
+
"epoch": 5.042458464545553,
|
| 25540 |
+
"grad_norm": 0.046631064265966415,
|
| 25541 |
+
"learning_rate": 0.0002,
|
| 25542 |
+
"loss": 12.2459,
|
| 25543 |
+
"step": 3626
|
| 25544 |
+
},
|
| 25545 |
+
{
|
| 25546 |
+
"epoch": 5.043848409164947,
|
| 25547 |
+
"grad_norm": 0.04834873974323273,
|
| 25548 |
+
"learning_rate": 0.0002,
|
| 25549 |
+
"loss": 11.4633,
|
| 25550 |
+
"step": 3627
|
| 25551 |
+
},
|
| 25552 |
+
{
|
| 25553 |
+
"epoch": 5.045238353784342,
|
| 25554 |
+
"grad_norm": 0.05008138343691826,
|
| 25555 |
+
"learning_rate": 0.0002,
|
| 25556 |
+
"loss": 12.2161,
|
| 25557 |
+
"step": 3628
|
| 25558 |
+
},
|
| 25559 |
+
{
|
| 25560 |
+
"epoch": 5.046628298403736,
|
| 25561 |
+
"grad_norm": 0.047594718635082245,
|
| 25562 |
+
"learning_rate": 0.0002,
|
| 25563 |
+
"loss": 11.4036,
|
| 25564 |
+
"step": 3629
|
| 25565 |
+
},
|
| 25566 |
+
{
|
| 25567 |
+
"epoch": 5.0480182430231295,
|
| 25568 |
+
"grad_norm": 0.05238320305943489,
|
| 25569 |
+
"learning_rate": 0.0002,
|
| 25570 |
+
"loss": 12.2592,
|
| 25571 |
+
"step": 3630
|
| 25572 |
+
},
|
| 25573 |
+
{
|
| 25574 |
+
"epoch": 5.049408187642523,
|
| 25575 |
+
"grad_norm": 0.05663394555449486,
|
| 25576 |
+
"learning_rate": 0.0002,
|
| 25577 |
+
"loss": 12.0941,
|
| 25578 |
+
"step": 3631
|
| 25579 |
+
},
|
| 25580 |
+
{
|
| 25581 |
+
"epoch": 5.050798132261917,
|
| 25582 |
+
"grad_norm": 0.046839337795972824,
|
| 25583 |
+
"learning_rate": 0.0002,
|
| 25584 |
+
"loss": 11.9538,
|
| 25585 |
+
"step": 3632
|
| 25586 |
+
},
|
| 25587 |
+
{
|
| 25588 |
+
"epoch": 5.052188076881312,
|
| 25589 |
+
"grad_norm": 0.05177483707666397,
|
| 25590 |
+
"learning_rate": 0.0002,
|
| 25591 |
+
"loss": 11.6635,
|
| 25592 |
+
"step": 3633
|
| 25593 |
+
},
|
| 25594 |
+
{
|
| 25595 |
+
"epoch": 5.053578021500706,
|
| 25596 |
+
"grad_norm": 0.048547644168138504,
|
| 25597 |
+
"learning_rate": 0.0002,
|
| 25598 |
+
"loss": 11.5452,
|
| 25599 |
+
"step": 3634
|
| 25600 |
+
},
|
| 25601 |
+
{
|
| 25602 |
+
"epoch": 5.0549679661201,
|
| 25603 |
+
"grad_norm": 0.05080310255289078,
|
| 25604 |
+
"learning_rate": 0.0002,
|
| 25605 |
+
"loss": 12.2685,
|
| 25606 |
+
"step": 3635
|
| 25607 |
+
},
|
| 25608 |
+
{
|
| 25609 |
+
"epoch": 5.056357910739494,
|
| 25610 |
+
"grad_norm": 0.0490473248064518,
|
| 25611 |
+
"learning_rate": 0.0002,
|
| 25612 |
+
"loss": 12.1856,
|
| 25613 |
+
"step": 3636
|
| 25614 |
+
},
|
| 25615 |
+
{
|
| 25616 |
+
"epoch": 5.057747855358888,
|
| 25617 |
+
"grad_norm": 0.051111724227666855,
|
| 25618 |
+
"learning_rate": 0.0002,
|
| 25619 |
+
"loss": 11.4866,
|
| 25620 |
+
"step": 3637
|
| 25621 |
+
},
|
| 25622 |
+
{
|
| 25623 |
+
"epoch": 5.0591377999782825,
|
| 25624 |
+
"grad_norm": 0.05272262170910835,
|
| 25625 |
+
"learning_rate": 0.0002,
|
| 25626 |
+
"loss": 12.1752,
|
| 25627 |
+
"step": 3638
|
| 25628 |
+
},
|
| 25629 |
+
{
|
| 25630 |
+
"epoch": 5.060527744597676,
|
| 25631 |
+
"grad_norm": 0.045457981526851654,
|
| 25632 |
+
"learning_rate": 0.0002,
|
| 25633 |
+
"loss": 11.5706,
|
| 25634 |
+
"step": 3639
|
| 25635 |
+
},
|
| 25636 |
+
{
|
| 25637 |
+
"epoch": 5.06191768921707,
|
| 25638 |
+
"grad_norm": 0.043644461780786514,
|
| 25639 |
+
"learning_rate": 0.0002,
|
| 25640 |
+
"loss": 12.0777,
|
| 25641 |
+
"step": 3640
|
| 25642 |
+
},
|
| 25643 |
+
{
|
| 25644 |
+
"epoch": 5.063307633836464,
|
| 25645 |
+
"grad_norm": 0.05072405934333801,
|
| 25646 |
+
"learning_rate": 0.0002,
|
| 25647 |
+
"loss": 12.0785,
|
| 25648 |
+
"step": 3641
|
| 25649 |
+
},
|
| 25650 |
+
{
|
| 25651 |
+
"epoch": 5.064697578455858,
|
| 25652 |
+
"grad_norm": 0.044014543294906616,
|
| 25653 |
+
"learning_rate": 0.0002,
|
| 25654 |
+
"loss": 11.6316,
|
| 25655 |
+
"step": 3642
|
| 25656 |
+
},
|
| 25657 |
+
{
|
| 25658 |
+
"epoch": 5.066087523075253,
|
| 25659 |
+
"grad_norm": 0.051093343645334244,
|
| 25660 |
+
"learning_rate": 0.0002,
|
| 25661 |
+
"loss": 11.8356,
|
| 25662 |
+
"step": 3643
|
| 25663 |
+
},
|
| 25664 |
+
{
|
| 25665 |
+
"epoch": 5.067477467694647,
|
| 25666 |
+
"grad_norm": 0.05445750057697296,
|
| 25667 |
+
"learning_rate": 0.0002,
|
| 25668 |
+
"loss": 12.044,
|
| 25669 |
+
"step": 3644
|
| 25670 |
+
},
|
| 25671 |
+
{
|
| 25672 |
+
"epoch": 5.068867412314041,
|
| 25673 |
+
"grad_norm": 0.057933688163757324,
|
| 25674 |
+
"learning_rate": 0.0002,
|
| 25675 |
+
"loss": 11.9202,
|
| 25676 |
+
"step": 3645
|
| 25677 |
+
},
|
| 25678 |
+
{
|
| 25679 |
+
"epoch": 5.070257356933435,
|
| 25680 |
+
"grad_norm": 0.05419892072677612,
|
| 25681 |
+
"learning_rate": 0.0002,
|
| 25682 |
+
"loss": 11.6963,
|
| 25683 |
+
"step": 3646
|
| 25684 |
+
},
|
| 25685 |
+
{
|
| 25686 |
+
"epoch": 5.0716473015528285,
|
| 25687 |
+
"grad_norm": 0.0459490641951561,
|
| 25688 |
+
"learning_rate": 0.0002,
|
| 25689 |
+
"loss": 11.9003,
|
| 25690 |
+
"step": 3647
|
| 25691 |
+
},
|
| 25692 |
+
{
|
| 25693 |
+
"epoch": 5.073037246172222,
|
| 25694 |
+
"grad_norm": 0.048636000603437424,
|
| 25695 |
+
"learning_rate": 0.0002,
|
| 25696 |
+
"loss": 11.8032,
|
| 25697 |
+
"step": 3648
|
| 25698 |
+
},
|
| 25699 |
+
{
|
| 25700 |
+
"epoch": 5.074427190791617,
|
| 25701 |
+
"grad_norm": 0.05490191653370857,
|
| 25702 |
+
"learning_rate": 0.0002,
|
| 25703 |
+
"loss": 12.0109,
|
| 25704 |
+
"step": 3649
|
| 25705 |
+
},
|
| 25706 |
+
{
|
| 25707 |
+
"epoch": 5.075817135411011,
|
| 25708 |
+
"grad_norm": 0.0459350161254406,
|
| 25709 |
+
"learning_rate": 0.0002,
|
| 25710 |
+
"loss": 12.0816,
|
| 25711 |
+
"step": 3650
|
| 25712 |
+
},
|
| 25713 |
+
{
|
| 25714 |
+
"epoch": 5.077207080030405,
|
| 25715 |
+
"grad_norm": 0.05242336541414261,
|
| 25716 |
+
"learning_rate": 0.0002,
|
| 25717 |
+
"loss": 11.6428,
|
| 25718 |
+
"step": 3651
|
| 25719 |
+
},
|
| 25720 |
+
{
|
| 25721 |
+
"epoch": 5.078597024649799,
|
| 25722 |
+
"grad_norm": 0.05621163547039032,
|
| 25723 |
+
"learning_rate": 0.0002,
|
| 25724 |
+
"loss": 11.9355,
|
| 25725 |
+
"step": 3652
|
| 25726 |
+
},
|
| 25727 |
+
{
|
| 25728 |
+
"epoch": 5.079986969269193,
|
| 25729 |
+
"grad_norm": 0.06240610405802727,
|
| 25730 |
+
"learning_rate": 0.0002,
|
| 25731 |
+
"loss": 12.1216,
|
| 25732 |
+
"step": 3653
|
| 25733 |
+
},
|
| 25734 |
+
{
|
| 25735 |
+
"epoch": 5.081376913888588,
|
| 25736 |
+
"grad_norm": 0.04969922825694084,
|
| 25737 |
+
"learning_rate": 0.0002,
|
| 25738 |
+
"loss": 11.757,
|
| 25739 |
+
"step": 3654
|
| 25740 |
+
},
|
| 25741 |
+
{
|
| 25742 |
+
"epoch": 5.0827668585079815,
|
| 25743 |
+
"grad_norm": 0.04940304532647133,
|
| 25744 |
+
"learning_rate": 0.0002,
|
| 25745 |
+
"loss": 11.6182,
|
| 25746 |
+
"step": 3655
|
| 25747 |
+
},
|
| 25748 |
+
{
|
| 25749 |
+
"epoch": 5.084156803127375,
|
| 25750 |
+
"grad_norm": 0.05530862137675285,
|
| 25751 |
+
"learning_rate": 0.0002,
|
| 25752 |
+
"loss": 12.182,
|
| 25753 |
+
"step": 3656
|
| 25754 |
+
},
|
| 25755 |
+
{
|
| 25756 |
+
"epoch": 5.085546747746769,
|
| 25757 |
+
"grad_norm": 0.0555587001144886,
|
| 25758 |
+
"learning_rate": 0.0002,
|
| 25759 |
+
"loss": 11.9743,
|
| 25760 |
+
"step": 3657
|
| 25761 |
+
},
|
| 25762 |
+
{
|
| 25763 |
+
"epoch": 5.086936692366163,
|
| 25764 |
+
"grad_norm": 0.05666826665401459,
|
| 25765 |
+
"learning_rate": 0.0002,
|
| 25766 |
+
"loss": 11.6668,
|
| 25767 |
+
"step": 3658
|
| 25768 |
+
},
|
| 25769 |
+
{
|
| 25770 |
+
"epoch": 5.088326636985558,
|
| 25771 |
+
"grad_norm": 0.056578971445560455,
|
| 25772 |
+
"learning_rate": 0.0002,
|
| 25773 |
+
"loss": 11.9064,
|
| 25774 |
+
"step": 3659
|
| 25775 |
+
},
|
| 25776 |
+
{
|
| 25777 |
+
"epoch": 5.089716581604952,
|
| 25778 |
+
"grad_norm": 0.048659004271030426,
|
| 25779 |
+
"learning_rate": 0.0002,
|
| 25780 |
+
"loss": 11.9647,
|
| 25781 |
+
"step": 3660
|
| 25782 |
+
},
|
| 25783 |
+
{
|
| 25784 |
+
"epoch": 5.091106526224346,
|
| 25785 |
+
"grad_norm": 0.056060101836919785,
|
| 25786 |
+
"learning_rate": 0.0002,
|
| 25787 |
+
"loss": 12.0774,
|
| 25788 |
+
"step": 3661
|
| 25789 |
+
},
|
| 25790 |
+
{
|
| 25791 |
+
"epoch": 5.09249647084374,
|
| 25792 |
+
"grad_norm": 0.04470730945467949,
|
| 25793 |
+
"learning_rate": 0.0002,
|
| 25794 |
+
"loss": 11.693,
|
| 25795 |
+
"step": 3662
|
| 25796 |
+
},
|
| 25797 |
+
{
|
| 25798 |
+
"epoch": 5.093886415463134,
|
| 25799 |
+
"grad_norm": 0.053428806364536285,
|
| 25800 |
+
"learning_rate": 0.0002,
|
| 25801 |
+
"loss": 11.9328,
|
| 25802 |
+
"step": 3663
|
| 25803 |
+
},
|
| 25804 |
+
{
|
| 25805 |
+
"epoch": 5.095276360082528,
|
| 25806 |
+
"grad_norm": 0.05473635718226433,
|
| 25807 |
+
"learning_rate": 0.0002,
|
| 25808 |
+
"loss": 11.7798,
|
| 25809 |
+
"step": 3664
|
| 25810 |
+
},
|
| 25811 |
+
{
|
| 25812 |
+
"epoch": 5.096666304701922,
|
| 25813 |
+
"grad_norm": 0.058004193007946014,
|
| 25814 |
+
"learning_rate": 0.0002,
|
| 25815 |
+
"loss": 11.9198,
|
| 25816 |
+
"step": 3665
|
| 25817 |
+
},
|
| 25818 |
+
{
|
| 25819 |
+
"epoch": 5.098056249321316,
|
| 25820 |
+
"grad_norm": 0.06698210537433624,
|
| 25821 |
+
"learning_rate": 0.0002,
|
| 25822 |
+
"loss": 11.8847,
|
| 25823 |
+
"step": 3666
|
| 25824 |
+
},
|
| 25825 |
+
{
|
| 25826 |
+
"epoch": 5.09944619394071,
|
| 25827 |
+
"grad_norm": 0.06329299509525299,
|
| 25828 |
+
"learning_rate": 0.0002,
|
| 25829 |
+
"loss": 11.8992,
|
| 25830 |
+
"step": 3667
|
| 25831 |
+
},
|
| 25832 |
+
{
|
| 25833 |
+
"epoch": 5.100836138560104,
|
| 25834 |
+
"grad_norm": 0.059009209275245667,
|
| 25835 |
+
"learning_rate": 0.0002,
|
| 25836 |
+
"loss": 11.7581,
|
| 25837 |
+
"step": 3668
|
| 25838 |
+
},
|
| 25839 |
+
{
|
| 25840 |
+
"epoch": 5.102226083179498,
|
| 25841 |
+
"grad_norm": 0.05407896265387535,
|
| 25842 |
+
"learning_rate": 0.0002,
|
| 25843 |
+
"loss": 12.048,
|
| 25844 |
+
"step": 3669
|
| 25845 |
+
},
|
| 25846 |
+
{
|
| 25847 |
+
"epoch": 5.103616027798893,
|
| 25848 |
+
"grad_norm": 0.059140268713235855,
|
| 25849 |
+
"learning_rate": 0.0002,
|
| 25850 |
+
"loss": 11.8611,
|
| 25851 |
+
"step": 3670
|
| 25852 |
+
},
|
| 25853 |
+
{
|
| 25854 |
+
"epoch": 5.105005972418287,
|
| 25855 |
+
"grad_norm": 0.05762072280049324,
|
| 25856 |
+
"learning_rate": 0.0002,
|
| 25857 |
+
"loss": 12.0508,
|
| 25858 |
+
"step": 3671
|
| 25859 |
+
},
|
| 25860 |
+
{
|
| 25861 |
+
"epoch": 5.1063959170376805,
|
| 25862 |
+
"grad_norm": 0.06736475974321365,
|
| 25863 |
+
"learning_rate": 0.0002,
|
| 25864 |
+
"loss": 11.8594,
|
| 25865 |
+
"step": 3672
|
| 25866 |
+
},
|
| 25867 |
+
{
|
| 25868 |
+
"epoch": 5.107785861657074,
|
| 25869 |
+
"grad_norm": 0.06504373252391815,
|
| 25870 |
+
"learning_rate": 0.0002,
|
| 25871 |
+
"loss": 11.8694,
|
| 25872 |
+
"step": 3673
|
| 25873 |
+
},
|
| 25874 |
+
{
|
| 25875 |
+
"epoch": 5.109175806276468,
|
| 25876 |
+
"grad_norm": 0.0682942345738411,
|
| 25877 |
+
"learning_rate": 0.0002,
|
| 25878 |
+
"loss": 11.7909,
|
| 25879 |
+
"step": 3674
|
| 25880 |
+
},
|
| 25881 |
+
{
|
| 25882 |
+
"epoch": 5.110565750895863,
|
| 25883 |
+
"grad_norm": 0.062256403267383575,
|
| 25884 |
+
"learning_rate": 0.0002,
|
| 25885 |
+
"loss": 11.7347,
|
| 25886 |
+
"step": 3675
|
| 25887 |
+
},
|
| 25888 |
+
{
|
| 25889 |
+
"epoch": 5.111955695515257,
|
| 25890 |
+
"grad_norm": 0.057039305567741394,
|
| 25891 |
+
"learning_rate": 0.0002,
|
| 25892 |
+
"loss": 11.96,
|
| 25893 |
+
"step": 3676
|
| 25894 |
+
},
|
| 25895 |
+
{
|
| 25896 |
+
"epoch": 5.113345640134651,
|
| 25897 |
+
"grad_norm": 0.06621230393648148,
|
| 25898 |
+
"learning_rate": 0.0002,
|
| 25899 |
+
"loss": 11.9943,
|
| 25900 |
+
"step": 3677
|
| 25901 |
+
},
|
| 25902 |
+
{
|
| 25903 |
+
"epoch": 5.114735584754045,
|
| 25904 |
+
"grad_norm": 0.060679852962493896,
|
| 25905 |
+
"learning_rate": 0.0002,
|
| 25906 |
+
"loss": 11.8618,
|
| 25907 |
+
"step": 3678
|
| 25908 |
+
},
|
| 25909 |
+
{
|
| 25910 |
+
"epoch": 5.116125529373439,
|
| 25911 |
+
"grad_norm": 0.05616457015275955,
|
| 25912 |
+
"learning_rate": 0.0002,
|
| 25913 |
+
"loss": 12.1701,
|
| 25914 |
+
"step": 3679
|
| 25915 |
+
},
|
| 25916 |
+
{
|
| 25917 |
+
"epoch": 5.1175154739928335,
|
| 25918 |
+
"grad_norm": 0.07456024736166,
|
| 25919 |
+
"learning_rate": 0.0002,
|
| 25920 |
+
"loss": 11.6655,
|
| 25921 |
+
"step": 3680
|
| 25922 |
+
},
|
| 25923 |
+
{
|
| 25924 |
+
"epoch": 5.118905418612227,
|
| 25925 |
+
"grad_norm": 0.06621438264846802,
|
| 25926 |
+
"learning_rate": 0.0002,
|
| 25927 |
+
"loss": 11.8532,
|
| 25928 |
+
"step": 3681
|
| 25929 |
+
},
|
| 25930 |
+
{
|
| 25931 |
+
"epoch": 5.120295363231621,
|
| 25932 |
+
"grad_norm": 0.06389164924621582,
|
| 25933 |
+
"learning_rate": 0.0002,
|
| 25934 |
+
"loss": 11.8321,
|
| 25935 |
+
"step": 3682
|
| 25936 |
+
},
|
| 25937 |
+
{
|
| 25938 |
+
"epoch": 5.121685307851015,
|
| 25939 |
+
"grad_norm": 0.05794215202331543,
|
| 25940 |
+
"learning_rate": 0.0002,
|
| 25941 |
+
"loss": 12.0268,
|
| 25942 |
+
"step": 3683
|
| 25943 |
+
},
|
| 25944 |
+
{
|
| 25945 |
+
"epoch": 5.123075252470409,
|
| 25946 |
+
"grad_norm": 0.05450157821178436,
|
| 25947 |
+
"learning_rate": 0.0002,
|
| 25948 |
+
"loss": 11.9194,
|
| 25949 |
+
"step": 3684
|
| 25950 |
+
},
|
| 25951 |
+
{
|
| 25952 |
+
"epoch": 5.124465197089804,
|
| 25953 |
+
"grad_norm": 0.0762237161397934,
|
| 25954 |
+
"learning_rate": 0.0002,
|
| 25955 |
+
"loss": 11.7739,
|
| 25956 |
+
"step": 3685
|
| 25957 |
+
},
|
| 25958 |
+
{
|
| 25959 |
+
"epoch": 5.125855141709198,
|
| 25960 |
+
"grad_norm": 0.07049744576215744,
|
| 25961 |
+
"learning_rate": 0.0002,
|
| 25962 |
+
"loss": 11.9284,
|
| 25963 |
+
"step": 3686
|
| 25964 |
+
},
|
| 25965 |
+
{
|
| 25966 |
+
"epoch": 5.127245086328592,
|
| 25967 |
+
"grad_norm": 0.06855335831642151,
|
| 25968 |
+
"learning_rate": 0.0002,
|
| 25969 |
+
"loss": 12.0113,
|
| 25970 |
+
"step": 3687
|
| 25971 |
+
},
|
| 25972 |
+
{
|
| 25973 |
+
"epoch": 5.128635030947986,
|
| 25974 |
+
"grad_norm": 0.06543952226638794,
|
| 25975 |
+
"learning_rate": 0.0002,
|
| 25976 |
+
"loss": 11.7059,
|
| 25977 |
+
"step": 3688
|
| 25978 |
+
},
|
| 25979 |
+
{
|
| 25980 |
+
"epoch": 5.1300249755673795,
|
| 25981 |
+
"grad_norm": 0.07390986382961273,
|
| 25982 |
+
"learning_rate": 0.0002,
|
| 25983 |
+
"loss": 11.7795,
|
| 25984 |
+
"step": 3689
|
| 25985 |
+
},
|
| 25986 |
+
{
|
| 25987 |
+
"epoch": 5.131414920186774,
|
| 25988 |
+
"grad_norm": 0.06839162111282349,
|
| 25989 |
+
"learning_rate": 0.0002,
|
| 25990 |
+
"loss": 12.1616,
|
| 25991 |
+
"step": 3690
|
| 25992 |
+
},
|
| 25993 |
+
{
|
| 25994 |
+
"epoch": 5.132804864806168,
|
| 25995 |
+
"grad_norm": 0.0818522721529007,
|
| 25996 |
+
"learning_rate": 0.0002,
|
| 25997 |
+
"loss": 11.7137,
|
| 25998 |
+
"step": 3691
|
| 25999 |
+
},
|
| 26000 |
+
{
|
| 26001 |
+
"epoch": 5.134194809425562,
|
| 26002 |
+
"grad_norm": 0.06569980829954147,
|
| 26003 |
+
"learning_rate": 0.0002,
|
| 26004 |
+
"loss": 11.8016,
|
| 26005 |
+
"step": 3692
|
| 26006 |
+
},
|
| 26007 |
+
{
|
| 26008 |
+
"epoch": 5.135584754044956,
|
| 26009 |
+
"grad_norm": 0.07135123014450073,
|
| 26010 |
+
"learning_rate": 0.0002,
|
| 26011 |
+
"loss": 12.0551,
|
| 26012 |
+
"step": 3693
|
| 26013 |
+
},
|
| 26014 |
+
{
|
| 26015 |
+
"epoch": 5.13697469866435,
|
| 26016 |
+
"grad_norm": 0.07680933177471161,
|
| 26017 |
+
"learning_rate": 0.0002,
|
| 26018 |
+
"loss": 11.8176,
|
| 26019 |
+
"step": 3694
|
| 26020 |
+
},
|
| 26021 |
+
{
|
| 26022 |
+
"epoch": 5.138364643283744,
|
| 26023 |
+
"grad_norm": 0.0850522369146347,
|
| 26024 |
+
"learning_rate": 0.0002,
|
| 26025 |
+
"loss": 12.0735,
|
| 26026 |
+
"step": 3695
|
| 26027 |
+
},
|
| 26028 |
+
{
|
| 26029 |
+
"epoch": 5.1397545879031385,
|
| 26030 |
+
"grad_norm": 0.06942490488290787,
|
| 26031 |
+
"learning_rate": 0.0002,
|
| 26032 |
+
"loss": 11.4675,
|
| 26033 |
+
"step": 3696
|
| 26034 |
+
},
|
| 26035 |
+
{
|
| 26036 |
+
"epoch": 5.1411445325225325,
|
| 26037 |
+
"grad_norm": 0.050059475004673004,
|
| 26038 |
+
"learning_rate": 0.0002,
|
| 26039 |
+
"loss": 12.0883,
|
| 26040 |
+
"step": 3697
|
| 26041 |
+
},
|
| 26042 |
+
{
|
| 26043 |
+
"epoch": 5.142534477141926,
|
| 26044 |
+
"grad_norm": 0.051331572234630585,
|
| 26045 |
+
"learning_rate": 0.0002,
|
| 26046 |
+
"loss": 12.0053,
|
| 26047 |
+
"step": 3698
|
| 26048 |
+
},
|
| 26049 |
+
{
|
| 26050 |
+
"epoch": 5.14392442176132,
|
| 26051 |
+
"grad_norm": 0.05305464193224907,
|
| 26052 |
+
"learning_rate": 0.0002,
|
| 26053 |
+
"loss": 12.2861,
|
| 26054 |
+
"step": 3699
|
| 26055 |
+
},
|
| 26056 |
+
{
|
| 26057 |
+
"epoch": 5.145314366380714,
|
| 26058 |
+
"grad_norm": 0.0444466657936573,
|
| 26059 |
+
"learning_rate": 0.0002,
|
| 26060 |
+
"loss": 11.3798,
|
| 26061 |
+
"step": 3700
|
| 26062 |
+
},
|
| 26063 |
+
{
|
| 26064 |
+
"epoch": 5.146704311000109,
|
| 26065 |
+
"grad_norm": 0.04427561163902283,
|
| 26066 |
+
"learning_rate": 0.0002,
|
| 26067 |
+
"loss": 11.608,
|
| 26068 |
+
"step": 3701
|
| 26069 |
+
},
|
| 26070 |
+
{
|
| 26071 |
+
"epoch": 5.148094255619503,
|
| 26072 |
+
"grad_norm": 0.05555248260498047,
|
| 26073 |
+
"learning_rate": 0.0002,
|
| 26074 |
+
"loss": 11.9291,
|
| 26075 |
+
"step": 3702
|
| 26076 |
+
},
|
| 26077 |
+
{
|
| 26078 |
+
"epoch": 5.149484200238897,
|
| 26079 |
+
"grad_norm": 0.04601334407925606,
|
| 26080 |
+
"learning_rate": 0.0002,
|
| 26081 |
+
"loss": 12.2645,
|
| 26082 |
+
"step": 3703
|
| 26083 |
+
},
|
| 26084 |
+
{
|
| 26085 |
+
"epoch": 5.150874144858291,
|
| 26086 |
+
"grad_norm": 0.0526413694024086,
|
| 26087 |
+
"learning_rate": 0.0002,
|
| 26088 |
+
"loss": 11.8358,
|
| 26089 |
+
"step": 3704
|
| 26090 |
+
},
|
| 26091 |
+
{
|
| 26092 |
+
"epoch": 5.1522640894776845,
|
| 26093 |
+
"grad_norm": 0.04684413596987724,
|
| 26094 |
+
"learning_rate": 0.0002,
|
| 26095 |
+
"loss": 12.1582,
|
| 26096 |
+
"step": 3705
|
| 26097 |
+
},
|
| 26098 |
+
{
|
| 26099 |
+
"epoch": 5.153654034097079,
|
| 26100 |
+
"grad_norm": 0.04493217170238495,
|
| 26101 |
+
"learning_rate": 0.0002,
|
| 26102 |
+
"loss": 11.3784,
|
| 26103 |
+
"step": 3706
|
| 26104 |
+
},
|
| 26105 |
+
{
|
| 26106 |
+
"epoch": 5.155043978716473,
|
| 26107 |
+
"grad_norm": 0.04843512177467346,
|
| 26108 |
+
"learning_rate": 0.0002,
|
| 26109 |
+
"loss": 12.3379,
|
| 26110 |
+
"step": 3707
|
| 26111 |
+
},
|
| 26112 |
+
{
|
| 26113 |
+
"epoch": 5.156433923335867,
|
| 26114 |
+
"grad_norm": 0.03999968245625496,
|
| 26115 |
+
"learning_rate": 0.0002,
|
| 26116 |
+
"loss": 11.6328,
|
| 26117 |
+
"step": 3708
|
| 26118 |
+
},
|
| 26119 |
+
{
|
| 26120 |
+
"epoch": 5.157823867955261,
|
| 26121 |
+
"grad_norm": 0.04911879450082779,
|
| 26122 |
+
"learning_rate": 0.0002,
|
| 26123 |
+
"loss": 11.9557,
|
| 26124 |
+
"step": 3709
|
| 26125 |
+
},
|
| 26126 |
+
{
|
| 26127 |
+
"epoch": 5.159213812574655,
|
| 26128 |
+
"grad_norm": 0.04678706079721451,
|
| 26129 |
+
"learning_rate": 0.0002,
|
| 26130 |
+
"loss": 11.7851,
|
| 26131 |
+
"step": 3710
|
| 26132 |
+
},
|
| 26133 |
+
{
|
| 26134 |
+
"epoch": 5.16060375719405,
|
| 26135 |
+
"grad_norm": 0.05315076559782028,
|
| 26136 |
+
"learning_rate": 0.0002,
|
| 26137 |
+
"loss": 11.8564,
|
| 26138 |
+
"step": 3711
|
| 26139 |
+
},
|
| 26140 |
+
{
|
| 26141 |
+
"epoch": 5.161993701813444,
|
| 26142 |
+
"grad_norm": 0.04515177384018898,
|
| 26143 |
+
"learning_rate": 0.0002,
|
| 26144 |
+
"loss": 12.2395,
|
| 26145 |
+
"step": 3712
|
| 26146 |
+
},
|
| 26147 |
+
{
|
| 26148 |
+
"epoch": 5.1633836464328375,
|
| 26149 |
+
"grad_norm": 0.050649650394916534,
|
| 26150 |
+
"learning_rate": 0.0002,
|
| 26151 |
+
"loss": 11.7413,
|
| 26152 |
+
"step": 3713
|
| 26153 |
+
},
|
| 26154 |
+
{
|
| 26155 |
+
"epoch": 5.164773591052231,
|
| 26156 |
+
"grad_norm": 0.04603847116231918,
|
| 26157 |
+
"learning_rate": 0.0002,
|
| 26158 |
+
"loss": 11.9793,
|
| 26159 |
+
"step": 3714
|
| 26160 |
+
},
|
| 26161 |
+
{
|
| 26162 |
+
"epoch": 5.166163535671625,
|
| 26163 |
+
"grad_norm": 0.04552611708641052,
|
| 26164 |
+
"learning_rate": 0.0002,
|
| 26165 |
+
"loss": 12.0548,
|
| 26166 |
+
"step": 3715
|
| 26167 |
+
},
|
| 26168 |
+
{
|
| 26169 |
+
"epoch": 5.167553480291019,
|
| 26170 |
+
"grad_norm": 0.04462776705622673,
|
| 26171 |
+
"learning_rate": 0.0002,
|
| 26172 |
+
"loss": 11.7306,
|
| 26173 |
+
"step": 3716
|
| 26174 |
+
},
|
| 26175 |
+
{
|
| 26176 |
+
"epoch": 5.168943424910414,
|
| 26177 |
+
"grad_norm": 0.04540819302201271,
|
| 26178 |
+
"learning_rate": 0.0002,
|
| 26179 |
+
"loss": 11.7643,
|
| 26180 |
+
"step": 3717
|
| 26181 |
+
},
|
| 26182 |
+
{
|
| 26183 |
+
"epoch": 5.170333369529808,
|
| 26184 |
+
"grad_norm": 0.05093303322792053,
|
| 26185 |
+
"learning_rate": 0.0002,
|
| 26186 |
+
"loss": 11.8553,
|
| 26187 |
+
"step": 3718
|
| 26188 |
+
},
|
| 26189 |
+
{
|
| 26190 |
+
"epoch": 5.171723314149202,
|
| 26191 |
+
"grad_norm": 0.05021725967526436,
|
| 26192 |
+
"learning_rate": 0.0002,
|
| 26193 |
+
"loss": 12.2092,
|
| 26194 |
+
"step": 3719
|
| 26195 |
+
},
|
| 26196 |
+
{
|
| 26197 |
+
"epoch": 5.173113258768596,
|
| 26198 |
+
"grad_norm": 0.04385516047477722,
|
| 26199 |
+
"learning_rate": 0.0002,
|
| 26200 |
+
"loss": 11.7394,
|
| 26201 |
+
"step": 3720
|
| 26202 |
+
},
|
| 26203 |
+
{
|
| 26204 |
+
"epoch": 5.17450320338799,
|
| 26205 |
+
"grad_norm": 0.04936952143907547,
|
| 26206 |
+
"learning_rate": 0.0002,
|
| 26207 |
+
"loss": 11.9082,
|
| 26208 |
+
"step": 3721
|
| 26209 |
+
},
|
| 26210 |
+
{
|
| 26211 |
+
"epoch": 5.175893148007384,
|
| 26212 |
+
"grad_norm": 0.046025052666664124,
|
| 26213 |
+
"learning_rate": 0.0002,
|
| 26214 |
+
"loss": 11.54,
|
| 26215 |
+
"step": 3722
|
| 26216 |
+
},
|
| 26217 |
+
{
|
| 26218 |
+
"epoch": 5.177283092626778,
|
| 26219 |
+
"grad_norm": 0.05317937210202217,
|
| 26220 |
+
"learning_rate": 0.0002,
|
| 26221 |
+
"loss": 12.206,
|
| 26222 |
+
"step": 3723
|
| 26223 |
+
},
|
| 26224 |
+
{
|
| 26225 |
+
"epoch": 5.178673037246172,
|
| 26226 |
+
"grad_norm": 0.047760725021362305,
|
| 26227 |
+
"learning_rate": 0.0002,
|
| 26228 |
+
"loss": 12.1129,
|
| 26229 |
+
"step": 3724
|
| 26230 |
+
},
|
| 26231 |
+
{
|
| 26232 |
+
"epoch": 5.180062981865566,
|
| 26233 |
+
"grad_norm": 0.04422136768698692,
|
| 26234 |
+
"learning_rate": 0.0002,
|
| 26235 |
+
"loss": 11.4629,
|
| 26236 |
+
"step": 3725
|
| 26237 |
+
},
|
| 26238 |
+
{
|
| 26239 |
+
"epoch": 5.18145292648496,
|
| 26240 |
+
"grad_norm": 0.04583797976374626,
|
| 26241 |
+
"learning_rate": 0.0002,
|
| 26242 |
+
"loss": 12.1181,
|
| 26243 |
+
"step": 3726
|
| 26244 |
+
},
|
| 26245 |
+
{
|
| 26246 |
+
"epoch": 5.182842871104355,
|
| 26247 |
+
"grad_norm": 0.04986279085278511,
|
| 26248 |
+
"learning_rate": 0.0002,
|
| 26249 |
+
"loss": 11.762,
|
| 26250 |
+
"step": 3727
|
| 26251 |
+
},
|
| 26252 |
+
{
|
| 26253 |
+
"epoch": 5.184232815723749,
|
| 26254 |
+
"grad_norm": 0.03881784901022911,
|
| 26255 |
+
"learning_rate": 0.0002,
|
| 26256 |
+
"loss": 11.9524,
|
| 26257 |
+
"step": 3728
|
| 26258 |
+
},
|
| 26259 |
+
{
|
| 26260 |
+
"epoch": 5.185622760343143,
|
| 26261 |
+
"grad_norm": 0.04903656616806984,
|
| 26262 |
+
"learning_rate": 0.0002,
|
| 26263 |
+
"loss": 11.9203,
|
| 26264 |
+
"step": 3729
|
| 26265 |
+
},
|
| 26266 |
+
{
|
| 26267 |
+
"epoch": 5.1870127049625365,
|
| 26268 |
+
"grad_norm": 0.051455602049827576,
|
| 26269 |
+
"learning_rate": 0.0002,
|
| 26270 |
+
"loss": 11.7791,
|
| 26271 |
+
"step": 3730
|
| 26272 |
+
},
|
| 26273 |
+
{
|
| 26274 |
+
"epoch": 5.18840264958193,
|
| 26275 |
+
"grad_norm": 0.046429604291915894,
|
| 26276 |
+
"learning_rate": 0.0002,
|
| 26277 |
+
"loss": 11.9003,
|
| 26278 |
+
"step": 3731
|
| 26279 |
+
},
|
| 26280 |
+
{
|
| 26281 |
+
"epoch": 5.189792594201325,
|
| 26282 |
+
"grad_norm": 0.04830280318856239,
|
| 26283 |
+
"learning_rate": 0.0002,
|
| 26284 |
+
"loss": 12.0652,
|
| 26285 |
+
"step": 3732
|
| 26286 |
+
},
|
| 26287 |
+
{
|
| 26288 |
+
"epoch": 5.191182538820719,
|
| 26289 |
+
"grad_norm": 0.05486230179667473,
|
| 26290 |
+
"learning_rate": 0.0002,
|
| 26291 |
+
"loss": 12.0077,
|
| 26292 |
+
"step": 3733
|
| 26293 |
+
},
|
| 26294 |
+
{
|
| 26295 |
+
"epoch": 5.192572483440113,
|
| 26296 |
+
"grad_norm": 0.05429707467556,
|
| 26297 |
+
"learning_rate": 0.0002,
|
| 26298 |
+
"loss": 11.6985,
|
| 26299 |
+
"step": 3734
|
| 26300 |
+
},
|
| 26301 |
+
{
|
| 26302 |
+
"epoch": 5.193962428059507,
|
| 26303 |
+
"grad_norm": 0.05715562403202057,
|
| 26304 |
+
"learning_rate": 0.0002,
|
| 26305 |
+
"loss": 12.0219,
|
| 26306 |
+
"step": 3735
|
| 26307 |
+
},
|
| 26308 |
+
{
|
| 26309 |
+
"epoch": 5.195352372678901,
|
| 26310 |
+
"grad_norm": 0.049032062292099,
|
| 26311 |
+
"learning_rate": 0.0002,
|
| 26312 |
+
"loss": 11.5941,
|
| 26313 |
+
"step": 3736
|
| 26314 |
+
},
|
| 26315 |
+
{
|
| 26316 |
+
"epoch": 5.196742317298295,
|
| 26317 |
+
"grad_norm": 0.05094841867685318,
|
| 26318 |
+
"learning_rate": 0.0002,
|
| 26319 |
+
"loss": 12.2352,
|
| 26320 |
+
"step": 3737
|
| 26321 |
+
},
|
| 26322 |
+
{
|
| 26323 |
+
"epoch": 5.1981322619176895,
|
| 26324 |
+
"grad_norm": 0.06227300688624382,
|
| 26325 |
+
"learning_rate": 0.0002,
|
| 26326 |
+
"loss": 11.5464,
|
| 26327 |
+
"step": 3738
|
| 26328 |
+
},
|
| 26329 |
+
{
|
| 26330 |
+
"epoch": 5.199522206537083,
|
| 26331 |
+
"grad_norm": 0.05904962867498398,
|
| 26332 |
+
"learning_rate": 0.0002,
|
| 26333 |
+
"loss": 12.2294,
|
| 26334 |
+
"step": 3739
|
| 26335 |
+
},
|
| 26336 |
+
{
|
| 26337 |
+
"epoch": 5.200912151156477,
|
| 26338 |
+
"grad_norm": 0.05094332993030548,
|
| 26339 |
+
"learning_rate": 0.0002,
|
| 26340 |
+
"loss": 11.948,
|
| 26341 |
+
"step": 3740
|
| 26342 |
+
},
|
| 26343 |
+
{
|
| 26344 |
+
"epoch": 5.202302095775871,
|
| 26345 |
+
"grad_norm": 0.050994522869586945,
|
| 26346 |
+
"learning_rate": 0.0002,
|
| 26347 |
+
"loss": 11.6468,
|
| 26348 |
+
"step": 3741
|
| 26349 |
+
},
|
| 26350 |
+
{
|
| 26351 |
+
"epoch": 5.203692040395265,
|
| 26352 |
+
"grad_norm": 0.05734934285283089,
|
| 26353 |
+
"learning_rate": 0.0002,
|
| 26354 |
+
"loss": 11.8583,
|
| 26355 |
+
"step": 3742
|
| 26356 |
+
},
|
| 26357 |
+
{
|
| 26358 |
+
"epoch": 5.20508198501466,
|
| 26359 |
+
"grad_norm": 0.046227265149354935,
|
| 26360 |
+
"learning_rate": 0.0002,
|
| 26361 |
+
"loss": 11.9803,
|
| 26362 |
+
"step": 3743
|
| 26363 |
+
},
|
| 26364 |
+
{
|
| 26365 |
+
"epoch": 5.206471929634054,
|
| 26366 |
+
"grad_norm": 0.05061519145965576,
|
| 26367 |
+
"learning_rate": 0.0002,
|
| 26368 |
+
"loss": 12.1794,
|
| 26369 |
+
"step": 3744
|
| 26370 |
+
},
|
| 26371 |
+
{
|
| 26372 |
+
"epoch": 5.207861874253448,
|
| 26373 |
+
"grad_norm": 0.048294030129909515,
|
| 26374 |
+
"learning_rate": 0.0002,
|
| 26375 |
+
"loss": 11.8556,
|
| 26376 |
+
"step": 3745
|
| 26377 |
+
},
|
| 26378 |
+
{
|
| 26379 |
+
"epoch": 5.209251818872842,
|
| 26380 |
+
"grad_norm": 0.049015454947948456,
|
| 26381 |
+
"learning_rate": 0.0002,
|
| 26382 |
+
"loss": 11.7749,
|
| 26383 |
+
"step": 3746
|
| 26384 |
+
},
|
| 26385 |
+
{
|
| 26386 |
+
"epoch": 5.2106417634922355,
|
| 26387 |
+
"grad_norm": 0.05540638417005539,
|
| 26388 |
+
"learning_rate": 0.0002,
|
| 26389 |
+
"loss": 11.8931,
|
| 26390 |
+
"step": 3747
|
| 26391 |
+
},
|
| 26392 |
+
{
|
| 26393 |
+
"epoch": 5.21203170811163,
|
| 26394 |
+
"grad_norm": 0.049739088863134384,
|
| 26395 |
+
"learning_rate": 0.0002,
|
| 26396 |
+
"loss": 11.8795,
|
| 26397 |
+
"step": 3748
|
| 26398 |
+
},
|
| 26399 |
+
{
|
| 26400 |
+
"epoch": 5.213421652731024,
|
| 26401 |
+
"grad_norm": 0.05611499771475792,
|
| 26402 |
+
"learning_rate": 0.0002,
|
| 26403 |
+
"loss": 11.8757,
|
| 26404 |
+
"step": 3749
|
| 26405 |
+
},
|
| 26406 |
+
{
|
| 26407 |
+
"epoch": 5.214811597350418,
|
| 26408 |
+
"grad_norm": 0.07287537306547165,
|
| 26409 |
+
"learning_rate": 0.0002,
|
| 26410 |
+
"loss": 12.1482,
|
| 26411 |
+
"step": 3750
|
| 26412 |
+
},
|
| 26413 |
+
{
|
| 26414 |
+
"epoch": 5.216201541969812,
|
| 26415 |
+
"grad_norm": 0.055914003401994705,
|
| 26416 |
+
"learning_rate": 0.0002,
|
| 26417 |
+
"loss": 11.9381,
|
| 26418 |
+
"step": 3751
|
| 26419 |
+
},
|
| 26420 |
+
{
|
| 26421 |
+
"epoch": 5.217591486589206,
|
| 26422 |
+
"grad_norm": 0.04964830353856087,
|
| 26423 |
+
"learning_rate": 0.0002,
|
| 26424 |
+
"loss": 11.6752,
|
| 26425 |
+
"step": 3752
|
| 26426 |
+
},
|
| 26427 |
+
{
|
| 26428 |
+
"epoch": 5.218981431208601,
|
| 26429 |
+
"grad_norm": 0.05374620854854584,
|
| 26430 |
+
"learning_rate": 0.0002,
|
| 26431 |
+
"loss": 11.7859,
|
| 26432 |
+
"step": 3753
|
| 26433 |
+
},
|
| 26434 |
+
{
|
| 26435 |
+
"epoch": 5.220371375827995,
|
| 26436 |
+
"grad_norm": 0.05140862986445427,
|
| 26437 |
+
"learning_rate": 0.0002,
|
| 26438 |
+
"loss": 11.5848,
|
| 26439 |
+
"step": 3754
|
| 26440 |
+
},
|
| 26441 |
+
{
|
| 26442 |
+
"epoch": 5.2217613204473885,
|
| 26443 |
+
"grad_norm": 0.06111348792910576,
|
| 26444 |
+
"learning_rate": 0.0002,
|
| 26445 |
+
"loss": 12.2047,
|
| 26446 |
+
"step": 3755
|
| 26447 |
+
},
|
| 26448 |
+
{
|
| 26449 |
+
"epoch": 5.223151265066782,
|
| 26450 |
+
"grad_norm": 0.05187017098069191,
|
| 26451 |
+
"learning_rate": 0.0002,
|
| 26452 |
+
"loss": 12.0435,
|
| 26453 |
+
"step": 3756
|
| 26454 |
+
},
|
| 26455 |
+
{
|
| 26456 |
+
"epoch": 5.224541209686176,
|
| 26457 |
+
"grad_norm": 0.05272003635764122,
|
| 26458 |
+
"learning_rate": 0.0002,
|
| 26459 |
+
"loss": 11.729,
|
| 26460 |
+
"step": 3757
|
| 26461 |
+
},
|
| 26462 |
+
{
|
| 26463 |
+
"epoch": 5.22593115430557,
|
| 26464 |
+
"grad_norm": 0.04720710963010788,
|
| 26465 |
+
"learning_rate": 0.0002,
|
| 26466 |
+
"loss": 11.8496,
|
| 26467 |
+
"step": 3758
|
| 26468 |
+
},
|
| 26469 |
+
{
|
| 26470 |
+
"epoch": 5.227321098924965,
|
| 26471 |
+
"grad_norm": 0.05178720876574516,
|
| 26472 |
+
"learning_rate": 0.0002,
|
| 26473 |
+
"loss": 11.9599,
|
| 26474 |
+
"step": 3759
|
| 26475 |
+
},
|
| 26476 |
+
{
|
| 26477 |
+
"epoch": 5.228711043544359,
|
| 26478 |
+
"grad_norm": 0.053378019481897354,
|
| 26479 |
+
"learning_rate": 0.0002,
|
| 26480 |
+
"loss": 12.0025,
|
| 26481 |
+
"step": 3760
|
| 26482 |
+
},
|
| 26483 |
+
{
|
| 26484 |
+
"epoch": 5.230100988163753,
|
| 26485 |
+
"grad_norm": 0.06272667646408081,
|
| 26486 |
+
"learning_rate": 0.0002,
|
| 26487 |
+
"loss": 11.7854,
|
| 26488 |
+
"step": 3761
|
| 26489 |
+
},
|
| 26490 |
+
{
|
| 26491 |
+
"epoch": 5.231490932783147,
|
| 26492 |
+
"grad_norm": 0.06924556940793991,
|
| 26493 |
+
"learning_rate": 0.0002,
|
| 26494 |
+
"loss": 11.9508,
|
| 26495 |
+
"step": 3762
|
| 26496 |
+
},
|
| 26497 |
+
{
|
| 26498 |
+
"epoch": 5.232880877402541,
|
| 26499 |
+
"grad_norm": 0.05396045371890068,
|
| 26500 |
+
"learning_rate": 0.0002,
|
| 26501 |
+
"loss": 11.8075,
|
| 26502 |
+
"step": 3763
|
| 26503 |
+
},
|
| 26504 |
+
{
|
| 26505 |
+
"epoch": 5.234270822021935,
|
| 26506 |
+
"grad_norm": 0.0530928373336792,
|
| 26507 |
+
"learning_rate": 0.0002,
|
| 26508 |
+
"loss": 11.6784,
|
| 26509 |
+
"step": 3764
|
| 26510 |
+
},
|
| 26511 |
+
{
|
| 26512 |
+
"epoch": 5.235660766641329,
|
| 26513 |
+
"grad_norm": 0.06755296885967255,
|
| 26514 |
+
"learning_rate": 0.0002,
|
| 26515 |
+
"loss": 12.0164,
|
| 26516 |
+
"step": 3765
|
| 26517 |
+
},
|
| 26518 |
+
{
|
| 26519 |
+
"epoch": 5.237050711260723,
|
| 26520 |
+
"grad_norm": 0.06049341708421707,
|
| 26521 |
+
"learning_rate": 0.0002,
|
| 26522 |
+
"loss": 11.8481,
|
| 26523 |
+
"step": 3766
|
| 26524 |
+
},
|
| 26525 |
+
{
|
| 26526 |
+
"epoch": 5.238440655880117,
|
| 26527 |
+
"grad_norm": 0.057866718620061874,
|
| 26528 |
+
"learning_rate": 0.0002,
|
| 26529 |
+
"loss": 11.8315,
|
| 26530 |
+
"step": 3767
|
| 26531 |
+
},
|
| 26532 |
+
{
|
| 26533 |
+
"epoch": 5.239830600499511,
|
| 26534 |
+
"grad_norm": 0.06039746478199959,
|
| 26535 |
+
"learning_rate": 0.0002,
|
| 26536 |
+
"loss": 11.9209,
|
| 26537 |
+
"step": 3768
|
| 26538 |
+
},
|
| 26539 |
+
{
|
| 26540 |
+
"epoch": 5.241220545118906,
|
| 26541 |
+
"grad_norm": 0.05683878809213638,
|
| 26542 |
+
"learning_rate": 0.0002,
|
| 26543 |
+
"loss": 12.0077,
|
| 26544 |
+
"step": 3769
|
| 26545 |
+
},
|
| 26546 |
+
{
|
| 26547 |
+
"epoch": 5.2426104897383,
|
| 26548 |
+
"grad_norm": 0.05440589785575867,
|
| 26549 |
+
"learning_rate": 0.0002,
|
| 26550 |
+
"loss": 11.9242,
|
| 26551 |
+
"step": 3770
|
| 26552 |
+
},
|
| 26553 |
+
{
|
| 26554 |
+
"epoch": 5.244000434357694,
|
| 26555 |
+
"grad_norm": 0.04907901585102081,
|
| 26556 |
+
"learning_rate": 0.0002,
|
| 26557 |
+
"loss": 11.9331,
|
| 26558 |
+
"step": 3771
|
| 26559 |
+
},
|
| 26560 |
+
{
|
| 26561 |
+
"epoch": 5.2453903789770875,
|
| 26562 |
+
"grad_norm": 0.06735330075025558,
|
| 26563 |
+
"learning_rate": 0.0002,
|
| 26564 |
+
"loss": 11.7374,
|
| 26565 |
+
"step": 3772
|
| 26566 |
+
},
|
| 26567 |
+
{
|
| 26568 |
+
"epoch": 5.246780323596481,
|
| 26569 |
+
"grad_norm": 0.060925912111997604,
|
| 26570 |
+
"learning_rate": 0.0002,
|
| 26571 |
+
"loss": 11.996,
|
| 26572 |
+
"step": 3773
|
| 26573 |
+
},
|
| 26574 |
+
{
|
| 26575 |
+
"epoch": 5.248170268215876,
|
| 26576 |
+
"grad_norm": 0.060880355536937714,
|
| 26577 |
+
"learning_rate": 0.0002,
|
| 26578 |
+
"loss": 11.9655,
|
| 26579 |
+
"step": 3774
|
| 26580 |
+
},
|
| 26581 |
+
{
|
| 26582 |
+
"epoch": 5.24956021283527,
|
| 26583 |
+
"grad_norm": 0.06049060449004173,
|
| 26584 |
+
"learning_rate": 0.0002,
|
| 26585 |
+
"loss": 11.7177,
|
| 26586 |
+
"step": 3775
|
| 26587 |
+
},
|
| 26588 |
+
{
|
| 26589 |
+
"epoch": 5.250950157454664,
|
| 26590 |
+
"grad_norm": 0.06330886483192444,
|
| 26591 |
+
"learning_rate": 0.0002,
|
| 26592 |
+
"loss": 11.774,
|
| 26593 |
+
"step": 3776
|
| 26594 |
+
},
|
| 26595 |
+
{
|
| 26596 |
+
"epoch": 5.252340102074058,
|
| 26597 |
+
"grad_norm": 0.05687471479177475,
|
| 26598 |
+
"learning_rate": 0.0002,
|
| 26599 |
+
"loss": 12.215,
|
| 26600 |
+
"step": 3777
|
| 26601 |
+
},
|
| 26602 |
+
{
|
| 26603 |
+
"epoch": 5.253730046693452,
|
| 26604 |
+
"grad_norm": 0.06074843183159828,
|
| 26605 |
+
"learning_rate": 0.0002,
|
| 26606 |
+
"loss": 11.7774,
|
| 26607 |
+
"step": 3778
|
| 26608 |
+
},
|
| 26609 |
+
{
|
| 26610 |
+
"epoch": 5.255119991312846,
|
| 26611 |
+
"grad_norm": 0.060445379465818405,
|
| 26612 |
+
"learning_rate": 0.0002,
|
| 26613 |
+
"loss": 11.8634,
|
| 26614 |
+
"step": 3779
|
| 26615 |
+
},
|
| 26616 |
+
{
|
| 26617 |
+
"epoch": 5.2565099359322405,
|
| 26618 |
+
"grad_norm": 0.0653340145945549,
|
| 26619 |
+
"learning_rate": 0.0002,
|
| 26620 |
+
"loss": 11.8201,
|
| 26621 |
+
"step": 3780
|
| 26622 |
+
},
|
| 26623 |
+
{
|
| 26624 |
+
"epoch": 5.257899880551634,
|
| 26625 |
+
"grad_norm": 0.06119542196393013,
|
| 26626 |
+
"learning_rate": 0.0002,
|
| 26627 |
+
"loss": 12.0148,
|
| 26628 |
+
"step": 3781
|
| 26629 |
+
},
|
| 26630 |
+
{
|
| 26631 |
+
"epoch": 5.259289825171028,
|
| 26632 |
+
"grad_norm": 0.08181419223546982,
|
| 26633 |
+
"learning_rate": 0.0002,
|
| 26634 |
+
"loss": 11.7457,
|
| 26635 |
+
"step": 3782
|
| 26636 |
+
},
|
| 26637 |
+
{
|
| 26638 |
+
"epoch": 5.260679769790422,
|
| 26639 |
+
"grad_norm": 0.0584963858127594,
|
| 26640 |
+
"learning_rate": 0.0002,
|
| 26641 |
+
"loss": 11.9313,
|
| 26642 |
+
"step": 3783
|
| 26643 |
+
},
|
| 26644 |
+
{
|
| 26645 |
+
"epoch": 5.262069714409816,
|
| 26646 |
+
"grad_norm": 0.06702666729688644,
|
| 26647 |
+
"learning_rate": 0.0002,
|
| 26648 |
+
"loss": 12.0346,
|
| 26649 |
+
"step": 3784
|
| 26650 |
+
},
|
| 26651 |
+
{
|
| 26652 |
+
"epoch": 5.263459659029211,
|
| 26653 |
+
"grad_norm": 0.062397684901952744,
|
| 26654 |
+
"learning_rate": 0.0002,
|
| 26655 |
+
"loss": 11.6889,
|
| 26656 |
+
"step": 3785
|
| 26657 |
+
},
|
| 26658 |
+
{
|
| 26659 |
+
"epoch": 5.264849603648605,
|
| 26660 |
+
"grad_norm": 0.0674617663025856,
|
| 26661 |
+
"learning_rate": 0.0002,
|
| 26662 |
+
"loss": 11.9462,
|
| 26663 |
+
"step": 3786
|
| 26664 |
+
},
|
| 26665 |
+
{
|
| 26666 |
+
"epoch": 5.266239548267999,
|
| 26667 |
+
"grad_norm": 0.06465965509414673,
|
| 26668 |
+
"learning_rate": 0.0002,
|
| 26669 |
+
"loss": 11.9214,
|
| 26670 |
+
"step": 3787
|
| 26671 |
+
},
|
| 26672 |
+
{
|
| 26673 |
+
"epoch": 5.267629492887393,
|
| 26674 |
+
"grad_norm": 0.07278496026992798,
|
| 26675 |
+
"learning_rate": 0.0002,
|
| 26676 |
+
"loss": 11.7067,
|
| 26677 |
+
"step": 3788
|
| 26678 |
+
},
|
| 26679 |
+
{
|
| 26680 |
+
"epoch": 5.2690194375067865,
|
| 26681 |
+
"grad_norm": 0.07649397850036621,
|
| 26682 |
+
"learning_rate": 0.0002,
|
| 26683 |
+
"loss": 11.9786,
|
| 26684 |
+
"step": 3789
|
| 26685 |
+
},
|
| 26686 |
+
{
|
| 26687 |
+
"epoch": 5.270409382126181,
|
| 26688 |
+
"grad_norm": 0.06857860833406448,
|
| 26689 |
+
"learning_rate": 0.0002,
|
| 26690 |
+
"loss": 11.9559,
|
| 26691 |
+
"step": 3790
|
| 26692 |
+
},
|
| 26693 |
+
{
|
| 26694 |
+
"epoch": 5.271799326745575,
|
| 26695 |
+
"grad_norm": 0.0621369294822216,
|
| 26696 |
+
"learning_rate": 0.0002,
|
| 26697 |
+
"loss": 11.7411,
|
| 26698 |
+
"step": 3791
|
| 26699 |
+
},
|
| 26700 |
+
{
|
| 26701 |
+
"epoch": 5.273189271364969,
|
| 26702 |
+
"grad_norm": 0.06749909371137619,
|
| 26703 |
+
"learning_rate": 0.0002,
|
| 26704 |
+
"loss": 12.0294,
|
| 26705 |
+
"step": 3792
|
| 26706 |
+
},
|
| 26707 |
+
{
|
| 26708 |
+
"epoch": 5.274579215984363,
|
| 26709 |
+
"grad_norm": 0.07126730680465698,
|
| 26710 |
+
"learning_rate": 0.0002,
|
| 26711 |
+
"loss": 11.882,
|
| 26712 |
+
"step": 3793
|
| 26713 |
+
},
|
| 26714 |
+
{
|
| 26715 |
+
"epoch": 5.275969160603757,
|
| 26716 |
+
"grad_norm": 0.07472597062587738,
|
| 26717 |
+
"learning_rate": 0.0002,
|
| 26718 |
+
"loss": 11.8713,
|
| 26719 |
+
"step": 3794
|
| 26720 |
+
},
|
| 26721 |
+
{
|
| 26722 |
+
"epoch": 5.277359105223152,
|
| 26723 |
+
"grad_norm": 0.07881154119968414,
|
| 26724 |
+
"learning_rate": 0.0002,
|
| 26725 |
+
"loss": 12.0237,
|
| 26726 |
+
"step": 3795
|
| 26727 |
+
},
|
| 26728 |
+
{
|
| 26729 |
+
"epoch": 5.278749049842546,
|
| 26730 |
+
"grad_norm": 0.05430987849831581,
|
| 26731 |
+
"learning_rate": 0.0002,
|
| 26732 |
+
"loss": 11.3424,
|
| 26733 |
+
"step": 3796
|
| 26734 |
+
},
|
| 26735 |
+
{
|
| 26736 |
+
"epoch": 5.2801389944619395,
|
| 26737 |
+
"grad_norm": 0.05311230942606926,
|
| 26738 |
+
"learning_rate": 0.0002,
|
| 26739 |
+
"loss": 12.2686,
|
| 26740 |
+
"step": 3797
|
| 26741 |
+
},
|
| 26742 |
+
{
|
| 26743 |
+
"epoch": 5.281528939081333,
|
| 26744 |
+
"grad_norm": 0.05020713806152344,
|
| 26745 |
+
"learning_rate": 0.0002,
|
| 26746 |
+
"loss": 11.815,
|
| 26747 |
+
"step": 3798
|
| 26748 |
+
},
|
| 26749 |
+
{
|
| 26750 |
+
"epoch": 5.282918883700727,
|
| 26751 |
+
"grad_norm": 0.04886842519044876,
|
| 26752 |
+
"learning_rate": 0.0002,
|
| 26753 |
+
"loss": 12.1525,
|
| 26754 |
+
"step": 3799
|
| 26755 |
+
},
|
| 26756 |
+
{
|
| 26757 |
+
"epoch": 5.284308828320121,
|
| 26758 |
+
"grad_norm": 0.04808910936117172,
|
| 26759 |
+
"learning_rate": 0.0002,
|
| 26760 |
+
"loss": 11.904,
|
| 26761 |
+
"step": 3800
|
| 26762 |
+
},
|
| 26763 |
+
{
|
| 26764 |
+
"epoch": 5.284308828320121,
|
| 26765 |
+
"eval_loss": 11.8898344039917,
|
| 26766 |
+
"eval_runtime": 4.875,
|
| 26767 |
+
"eval_samples_per_second": 94.975,
|
| 26768 |
+
"eval_steps_per_second": 23.795,
|
| 26769 |
+
"step": 3800
|
| 26770 |
}
|
| 26771 |
],
|
| 26772 |
"logging_steps": 1,
|
|
|
|
| 26781 |
"early_stopping_threshold": 0.0
|
| 26782 |
},
|
| 26783 |
"attributes": {
|
| 26784 |
+
"early_stopping_patience_counter": 0
|
| 26785 |
}
|
| 26786 |
},
|
| 26787 |
"TrainerControl": {
|
|
|
|
| 26795 |
"attributes": {}
|
| 26796 |
}
|
| 26797 |
},
|
| 26798 |
+
"total_flos": 26996487045120.0,
|
| 26799 |
"train_batch_size": 2,
|
| 26800 |
"trial_name": null,
|
| 26801 |
"trial_params": null
|