Training in progress, step 9200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 289452128
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3fd92c8eb120d0d4fb4481a8f5a7798dacd77b7f141c1866e73c9aae52d92228
|
| 3 |
size 289452128
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 147360212
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58876f961f3f67831ac23c460f205a190e883fd17e2698ec5629d38b6346c369
|
| 3 |
size 147360212
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:858d8ca66a71ae7ea5969da89fd712d359087bee67998a2eaeafe5a13a3efe88
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:810e7ed910a4e3c95dab0f302348147ca0b51661e68265e01c41682ddbd1e306
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 1.
|
| 3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -63375,6 +63375,1414 @@
|
|
| 63375 |
"eval_samples_per_second": 8.756,
|
| 63376 |
"eval_steps_per_second": 4.387,
|
| 63377 |
"step": 9000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63378 |
}
|
| 63379 |
],
|
| 63380 |
"logging_steps": 1,
|
|
@@ -63403,7 +64811,7 @@
|
|
| 63403 |
"attributes": {}
|
| 63404 |
}
|
| 63405 |
},
|
| 63406 |
-
"total_flos": 7.
|
| 63407 |
"train_batch_size": 2,
|
| 63408 |
"trial_name": null,
|
| 63409 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 1.2265260219573975,
|
| 3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-9200",
|
| 4 |
+
"epoch": 0.7475268642466839,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 9200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 63375 |
"eval_samples_per_second": 8.756,
|
| 63376 |
"eval_steps_per_second": 4.387,
|
| 63377 |
"step": 9000
|
| 63378 |
+
},
|
| 63379 |
+
{
|
| 63380 |
+
"epoch": 0.731357533161348,
|
| 63381 |
+
"grad_norm": 0.348073273897171,
|
| 63382 |
+
"learning_rate": 3.359731764692239e-05,
|
| 63383 |
+
"loss": 1.2414,
|
| 63384 |
+
"step": 9001
|
| 63385 |
+
},
|
| 63386 |
+
{
|
| 63387 |
+
"epoch": 0.7314387860813748,
|
| 63388 |
+
"grad_norm": 0.37491166591644287,
|
| 63389 |
+
"learning_rate": 3.357821762397748e-05,
|
| 63390 |
+
"loss": 1.3213,
|
| 63391 |
+
"step": 9002
|
| 63392 |
+
},
|
| 63393 |
+
{
|
| 63394 |
+
"epoch": 0.7315200390014016,
|
| 63395 |
+
"grad_norm": 0.308138370513916,
|
| 63396 |
+
"learning_rate": 3.3559121936260393e-05,
|
| 63397 |
+
"loss": 1.247,
|
| 63398 |
+
"step": 9003
|
| 63399 |
+
},
|
| 63400 |
+
{
|
| 63401 |
+
"epoch": 0.7316012919214284,
|
| 63402 |
+
"grad_norm": 0.33807840943336487,
|
| 63403 |
+
"learning_rate": 3.354003058501742e-05,
|
| 63404 |
+
"loss": 1.3332,
|
| 63405 |
+
"step": 9004
|
| 63406 |
+
},
|
| 63407 |
+
{
|
| 63408 |
+
"epoch": 0.7316825448414552,
|
| 63409 |
+
"grad_norm": 0.3593338131904602,
|
| 63410 |
+
"learning_rate": 3.3520943571494665e-05,
|
| 63411 |
+
"loss": 1.2574,
|
| 63412 |
+
"step": 9005
|
| 63413 |
+
},
|
| 63414 |
+
{
|
| 63415 |
+
"epoch": 0.731763797761482,
|
| 63416 |
+
"grad_norm": 0.2992275059223175,
|
| 63417 |
+
"learning_rate": 3.350186089693784e-05,
|
| 63418 |
+
"loss": 1.2264,
|
| 63419 |
+
"step": 9006
|
| 63420 |
+
},
|
| 63421 |
+
{
|
| 63422 |
+
"epoch": 0.7318450506815088,
|
| 63423 |
+
"grad_norm": 0.3149811923503876,
|
| 63424 |
+
"learning_rate": 3.348278256259254e-05,
|
| 63425 |
+
"loss": 1.2893,
|
| 63426 |
+
"step": 9007
|
| 63427 |
+
},
|
| 63428 |
+
{
|
| 63429 |
+
"epoch": 0.7319263036015357,
|
| 63430 |
+
"grad_norm": 0.34178659319877625,
|
| 63431 |
+
"learning_rate": 3.346370856970384e-05,
|
| 63432 |
+
"loss": 1.2631,
|
| 63433 |
+
"step": 9008
|
| 63434 |
+
},
|
| 63435 |
+
{
|
| 63436 |
+
"epoch": 0.7320075565215625,
|
| 63437 |
+
"grad_norm": 0.32926008105278015,
|
| 63438 |
+
"learning_rate": 3.344463891951673e-05,
|
| 63439 |
+
"loss": 1.2922,
|
| 63440 |
+
"step": 9009
|
| 63441 |
+
},
|
| 63442 |
+
{
|
| 63443 |
+
"epoch": 0.7320888094415893,
|
| 63444 |
+
"grad_norm": 0.33941230177879333,
|
| 63445 |
+
"learning_rate": 3.342557361327588e-05,
|
| 63446 |
+
"loss": 1.382,
|
| 63447 |
+
"step": 9010
|
| 63448 |
+
},
|
| 63449 |
+
{
|
| 63450 |
+
"epoch": 0.7321700623616161,
|
| 63451 |
+
"grad_norm": 0.3220233619213104,
|
| 63452 |
+
"learning_rate": 3.340651265222559e-05,
|
| 63453 |
+
"loss": 1.119,
|
| 63454 |
+
"step": 9011
|
| 63455 |
+
},
|
| 63456 |
+
{
|
| 63457 |
+
"epoch": 0.7322513152816429,
|
| 63458 |
+
"grad_norm": 0.3296765089035034,
|
| 63459 |
+
"learning_rate": 3.338745603760999e-05,
|
| 63460 |
+
"loss": 1.2108,
|
| 63461 |
+
"step": 9012
|
| 63462 |
+
},
|
| 63463 |
+
{
|
| 63464 |
+
"epoch": 0.7323325682016697,
|
| 63465 |
+
"grad_norm": 0.3547622561454773,
|
| 63466 |
+
"learning_rate": 3.3368403770672804e-05,
|
| 63467 |
+
"loss": 1.3369,
|
| 63468 |
+
"step": 9013
|
| 63469 |
+
},
|
| 63470 |
+
{
|
| 63471 |
+
"epoch": 0.7324138211216965,
|
| 63472 |
+
"grad_norm": 0.3414868712425232,
|
| 63473 |
+
"learning_rate": 3.3349355852657625e-05,
|
| 63474 |
+
"loss": 1.4216,
|
| 63475 |
+
"step": 9014
|
| 63476 |
+
},
|
| 63477 |
+
{
|
| 63478 |
+
"epoch": 0.7324950740417234,
|
| 63479 |
+
"grad_norm": 0.3140677213668823,
|
| 63480 |
+
"learning_rate": 3.333031228480762e-05,
|
| 63481 |
+
"loss": 1.3436,
|
| 63482 |
+
"step": 9015
|
| 63483 |
+
},
|
| 63484 |
+
{
|
| 63485 |
+
"epoch": 0.7325763269617502,
|
| 63486 |
+
"grad_norm": 0.3438255488872528,
|
| 63487 |
+
"learning_rate": 3.331127306836574e-05,
|
| 63488 |
+
"loss": 1.1428,
|
| 63489 |
+
"step": 9016
|
| 63490 |
+
},
|
| 63491 |
+
{
|
| 63492 |
+
"epoch": 0.732657579881777,
|
| 63493 |
+
"grad_norm": 0.33730071783065796,
|
| 63494 |
+
"learning_rate": 3.329223820457459e-05,
|
| 63495 |
+
"loss": 1.0325,
|
| 63496 |
+
"step": 9017
|
| 63497 |
+
},
|
| 63498 |
+
{
|
| 63499 |
+
"epoch": 0.7327388328018039,
|
| 63500 |
+
"grad_norm": 0.3351227641105652,
|
| 63501 |
+
"learning_rate": 3.3273207694676587e-05,
|
| 63502 |
+
"loss": 1.1308,
|
| 63503 |
+
"step": 9018
|
| 63504 |
+
},
|
| 63505 |
+
{
|
| 63506 |
+
"epoch": 0.7328200857218307,
|
| 63507 |
+
"grad_norm": 0.35805854201316833,
|
| 63508 |
+
"learning_rate": 3.3254181539913856e-05,
|
| 63509 |
+
"loss": 1.1686,
|
| 63510 |
+
"step": 9019
|
| 63511 |
+
},
|
| 63512 |
+
{
|
| 63513 |
+
"epoch": 0.7329013386418575,
|
| 63514 |
+
"grad_norm": 0.3186790645122528,
|
| 63515 |
+
"learning_rate": 3.323515974152812e-05,
|
| 63516 |
+
"loss": 1.3126,
|
| 63517 |
+
"step": 9020
|
| 63518 |
+
},
|
| 63519 |
+
{
|
| 63520 |
+
"epoch": 0.7329825915618843,
|
| 63521 |
+
"grad_norm": 0.30815765261650085,
|
| 63522 |
+
"learning_rate": 3.3216142300760966e-05,
|
| 63523 |
+
"loss": 1.2517,
|
| 63524 |
+
"step": 9021
|
| 63525 |
+
},
|
| 63526 |
+
{
|
| 63527 |
+
"epoch": 0.7330638444819111,
|
| 63528 |
+
"grad_norm": 0.30545684695243835,
|
| 63529 |
+
"learning_rate": 3.3197129218853575e-05,
|
| 63530 |
+
"loss": 1.1441,
|
| 63531 |
+
"step": 9022
|
| 63532 |
+
},
|
| 63533 |
+
{
|
| 63534 |
+
"epoch": 0.7331450974019379,
|
| 63535 |
+
"grad_norm": 0.3568319082260132,
|
| 63536 |
+
"learning_rate": 3.317812049704697e-05,
|
| 63537 |
+
"loss": 1.4065,
|
| 63538 |
+
"step": 9023
|
| 63539 |
+
},
|
| 63540 |
+
{
|
| 63541 |
+
"epoch": 0.7332263503219647,
|
| 63542 |
+
"grad_norm": 0.35160183906555176,
|
| 63543 |
+
"learning_rate": 3.3159116136581714e-05,
|
| 63544 |
+
"loss": 1.3386,
|
| 63545 |
+
"step": 9024
|
| 63546 |
+
},
|
| 63547 |
+
{
|
| 63548 |
+
"epoch": 0.7333076032419915,
|
| 63549 |
+
"grad_norm": 0.32895147800445557,
|
| 63550 |
+
"learning_rate": 3.3140116138698266e-05,
|
| 63551 |
+
"loss": 1.2688,
|
| 63552 |
+
"step": 9025
|
| 63553 |
+
},
|
| 63554 |
+
{
|
| 63555 |
+
"epoch": 0.7333888561620183,
|
| 63556 |
+
"grad_norm": 0.32300665974617004,
|
| 63557 |
+
"learning_rate": 3.312112050463665e-05,
|
| 63558 |
+
"loss": 1.308,
|
| 63559 |
+
"step": 9026
|
| 63560 |
+
},
|
| 63561 |
+
{
|
| 63562 |
+
"epoch": 0.7334701090820451,
|
| 63563 |
+
"grad_norm": 0.34199288487434387,
|
| 63564 |
+
"learning_rate": 3.3102129235636714e-05,
|
| 63565 |
+
"loss": 1.1883,
|
| 63566 |
+
"step": 9027
|
| 63567 |
+
},
|
| 63568 |
+
{
|
| 63569 |
+
"epoch": 0.7335513620020719,
|
| 63570 |
+
"grad_norm": 0.3434705138206482,
|
| 63571 |
+
"learning_rate": 3.308314233293803e-05,
|
| 63572 |
+
"loss": 1.3129,
|
| 63573 |
+
"step": 9028
|
| 63574 |
+
},
|
| 63575 |
+
{
|
| 63576 |
+
"epoch": 0.7336326149220987,
|
| 63577 |
+
"grad_norm": 0.34170761704444885,
|
| 63578 |
+
"learning_rate": 3.306415979777976e-05,
|
| 63579 |
+
"loss": 1.0662,
|
| 63580 |
+
"step": 9029
|
| 63581 |
+
},
|
| 63582 |
+
{
|
| 63583 |
+
"epoch": 0.7337138678421256,
|
| 63584 |
+
"grad_norm": 0.348827987909317,
|
| 63585 |
+
"learning_rate": 3.3045181631400925e-05,
|
| 63586 |
+
"loss": 1.3772,
|
| 63587 |
+
"step": 9030
|
| 63588 |
+
},
|
| 63589 |
+
{
|
| 63590 |
+
"epoch": 0.7337951207621524,
|
| 63591 |
+
"grad_norm": 0.36657270789146423,
|
| 63592 |
+
"learning_rate": 3.3026207835040165e-05,
|
| 63593 |
+
"loss": 1.249,
|
| 63594 |
+
"step": 9031
|
| 63595 |
+
},
|
| 63596 |
+
{
|
| 63597 |
+
"epoch": 0.7338763736821792,
|
| 63598 |
+
"grad_norm": 0.31669020652770996,
|
| 63599 |
+
"learning_rate": 3.300723840993586e-05,
|
| 63600 |
+
"loss": 1.3094,
|
| 63601 |
+
"step": 9032
|
| 63602 |
+
},
|
| 63603 |
+
{
|
| 63604 |
+
"epoch": 0.733957626602206,
|
| 63605 |
+
"grad_norm": 0.36015674471855164,
|
| 63606 |
+
"learning_rate": 3.2988273357326084e-05,
|
| 63607 |
+
"loss": 1.1854,
|
| 63608 |
+
"step": 9033
|
| 63609 |
+
},
|
| 63610 |
+
{
|
| 63611 |
+
"epoch": 0.7340388795222328,
|
| 63612 |
+
"grad_norm": 0.3001486361026764,
|
| 63613 |
+
"learning_rate": 3.2969312678448714e-05,
|
| 63614 |
+
"loss": 1.1946,
|
| 63615 |
+
"step": 9034
|
| 63616 |
+
},
|
| 63617 |
+
{
|
| 63618 |
+
"epoch": 0.7341201324422596,
|
| 63619 |
+
"grad_norm": 0.33679264783859253,
|
| 63620 |
+
"learning_rate": 3.2950356374541213e-05,
|
| 63621 |
+
"loss": 1.3195,
|
| 63622 |
+
"step": 9035
|
| 63623 |
+
},
|
| 63624 |
+
{
|
| 63625 |
+
"epoch": 0.7342013853622864,
|
| 63626 |
+
"grad_norm": 0.3070138692855835,
|
| 63627 |
+
"learning_rate": 3.2931404446840866e-05,
|
| 63628 |
+
"loss": 1.2649,
|
| 63629 |
+
"step": 9036
|
| 63630 |
+
},
|
| 63631 |
+
{
|
| 63632 |
+
"epoch": 0.7342826382823132,
|
| 63633 |
+
"grad_norm": 0.3212503492832184,
|
| 63634 |
+
"learning_rate": 3.2912456896584646e-05,
|
| 63635 |
+
"loss": 1.1644,
|
| 63636 |
+
"step": 9037
|
| 63637 |
+
},
|
| 63638 |
+
{
|
| 63639 |
+
"epoch": 0.7343638912023401,
|
| 63640 |
+
"grad_norm": 0.36129093170166016,
|
| 63641 |
+
"learning_rate": 3.289351372500916e-05,
|
| 63642 |
+
"loss": 1.2652,
|
| 63643 |
+
"step": 9038
|
| 63644 |
+
},
|
| 63645 |
+
{
|
| 63646 |
+
"epoch": 0.7344451441223669,
|
| 63647 |
+
"grad_norm": 0.32358092069625854,
|
| 63648 |
+
"learning_rate": 3.287457493335091e-05,
|
| 63649 |
+
"loss": 1.3497,
|
| 63650 |
+
"step": 9039
|
| 63651 |
+
},
|
| 63652 |
+
{
|
| 63653 |
+
"epoch": 0.7345263970423938,
|
| 63654 |
+
"grad_norm": 0.30324509739875793,
|
| 63655 |
+
"learning_rate": 3.2855640522845846e-05,
|
| 63656 |
+
"loss": 1.2742,
|
| 63657 |
+
"step": 9040
|
| 63658 |
+
},
|
| 63659 |
+
{
|
| 63660 |
+
"epoch": 0.7346076499624206,
|
| 63661 |
+
"grad_norm": 0.3230375349521637,
|
| 63662 |
+
"learning_rate": 3.283671049472989e-05,
|
| 63663 |
+
"loss": 1.271,
|
| 63664 |
+
"step": 9041
|
| 63665 |
+
},
|
| 63666 |
+
{
|
| 63667 |
+
"epoch": 0.7346889028824474,
|
| 63668 |
+
"grad_norm": 0.3371600806713104,
|
| 63669 |
+
"learning_rate": 3.281778485023851e-05,
|
| 63670 |
+
"loss": 1.1763,
|
| 63671 |
+
"step": 9042
|
| 63672 |
+
},
|
| 63673 |
+
{
|
| 63674 |
+
"epoch": 0.7347701558024742,
|
| 63675 |
+
"grad_norm": 0.3698885142803192,
|
| 63676 |
+
"learning_rate": 3.279886359060701e-05,
|
| 63677 |
+
"loss": 1.2673,
|
| 63678 |
+
"step": 9043
|
| 63679 |
+
},
|
| 63680 |
+
{
|
| 63681 |
+
"epoch": 0.734851408722501,
|
| 63682 |
+
"grad_norm": 0.37326911091804504,
|
| 63683 |
+
"learning_rate": 3.277994671707028e-05,
|
| 63684 |
+
"loss": 1.3909,
|
| 63685 |
+
"step": 9044
|
| 63686 |
+
},
|
| 63687 |
+
{
|
| 63688 |
+
"epoch": 0.7349326616425278,
|
| 63689 |
+
"grad_norm": 0.32233452796936035,
|
| 63690 |
+
"learning_rate": 3.276103423086301e-05,
|
| 63691 |
+
"loss": 1.2641,
|
| 63692 |
+
"step": 9045
|
| 63693 |
+
},
|
| 63694 |
+
{
|
| 63695 |
+
"epoch": 0.7350139145625546,
|
| 63696 |
+
"grad_norm": 0.30027875304222107,
|
| 63697 |
+
"learning_rate": 3.274212613321962e-05,
|
| 63698 |
+
"loss": 1.2189,
|
| 63699 |
+
"step": 9046
|
| 63700 |
+
},
|
| 63701 |
+
{
|
| 63702 |
+
"epoch": 0.7350951674825814,
|
| 63703 |
+
"grad_norm": 0.3577303886413574,
|
| 63704 |
+
"learning_rate": 3.272322242537416e-05,
|
| 63705 |
+
"loss": 1.0988,
|
| 63706 |
+
"step": 9047
|
| 63707 |
+
},
|
| 63708 |
+
{
|
| 63709 |
+
"epoch": 0.7351764204026082,
|
| 63710 |
+
"grad_norm": 0.38352838158607483,
|
| 63711 |
+
"learning_rate": 3.270432310856052e-05,
|
| 63712 |
+
"loss": 1.4526,
|
| 63713 |
+
"step": 9048
|
| 63714 |
+
},
|
| 63715 |
+
{
|
| 63716 |
+
"epoch": 0.735257673322635,
|
| 63717 |
+
"grad_norm": 0.37333592772483826,
|
| 63718 |
+
"learning_rate": 3.26854281840121e-05,
|
| 63719 |
+
"loss": 1.1502,
|
| 63720 |
+
"step": 9049
|
| 63721 |
+
},
|
| 63722 |
+
{
|
| 63723 |
+
"epoch": 0.7353389262426618,
|
| 63724 |
+
"grad_norm": 0.31793317198753357,
|
| 63725 |
+
"learning_rate": 3.266653765296224e-05,
|
| 63726 |
+
"loss": 1.2313,
|
| 63727 |
+
"step": 9050
|
| 63728 |
+
},
|
| 63729 |
+
{
|
| 63730 |
+
"epoch": 0.7354201791626886,
|
| 63731 |
+
"grad_norm": 0.2976967692375183,
|
| 63732 |
+
"learning_rate": 3.264765151664382e-05,
|
| 63733 |
+
"loss": 1.0955,
|
| 63734 |
+
"step": 9051
|
| 63735 |
+
},
|
| 63736 |
+
{
|
| 63737 |
+
"epoch": 0.7355014320827155,
|
| 63738 |
+
"grad_norm": 0.3185284733772278,
|
| 63739 |
+
"learning_rate": 3.262876977628956e-05,
|
| 63740 |
+
"loss": 1.2651,
|
| 63741 |
+
"step": 9052
|
| 63742 |
+
},
|
| 63743 |
+
{
|
| 63744 |
+
"epoch": 0.7355826850027423,
|
| 63745 |
+
"grad_norm": 0.3035105764865875,
|
| 63746 |
+
"learning_rate": 3.260989243313178e-05,
|
| 63747 |
+
"loss": 1.2007,
|
| 63748 |
+
"step": 9053
|
| 63749 |
+
},
|
| 63750 |
+
{
|
| 63751 |
+
"epoch": 0.7356639379227691,
|
| 63752 |
+
"grad_norm": 0.30189043283462524,
|
| 63753 |
+
"learning_rate": 3.25910194884026e-05,
|
| 63754 |
+
"loss": 1.2241,
|
| 63755 |
+
"step": 9054
|
| 63756 |
+
},
|
| 63757 |
+
{
|
| 63758 |
+
"epoch": 0.7357451908427959,
|
| 63759 |
+
"grad_norm": 0.3131310045719147,
|
| 63760 |
+
"learning_rate": 3.2572150943333854e-05,
|
| 63761 |
+
"loss": 1.2576,
|
| 63762 |
+
"step": 9055
|
| 63763 |
+
},
|
| 63764 |
+
{
|
| 63765 |
+
"epoch": 0.7358264437628227,
|
| 63766 |
+
"grad_norm": 0.34634116291999817,
|
| 63767 |
+
"learning_rate": 3.255328679915703e-05,
|
| 63768 |
+
"loss": 1.3716,
|
| 63769 |
+
"step": 9056
|
| 63770 |
+
},
|
| 63771 |
+
{
|
| 63772 |
+
"epoch": 0.7359076966828495,
|
| 63773 |
+
"grad_norm": 0.3289729356765747,
|
| 63774 |
+
"learning_rate": 3.2534427057103355e-05,
|
| 63775 |
+
"loss": 1.2091,
|
| 63776 |
+
"step": 9057
|
| 63777 |
+
},
|
| 63778 |
+
{
|
| 63779 |
+
"epoch": 0.7359889496028763,
|
| 63780 |
+
"grad_norm": 0.3263621926307678,
|
| 63781 |
+
"learning_rate": 3.2515571718403735e-05,
|
| 63782 |
+
"loss": 1.2983,
|
| 63783 |
+
"step": 9058
|
| 63784 |
+
},
|
| 63785 |
+
{
|
| 63786 |
+
"epoch": 0.7360702025229031,
|
| 63787 |
+
"grad_norm": 0.39237716794013977,
|
| 63788 |
+
"learning_rate": 3.249672078428889e-05,
|
| 63789 |
+
"loss": 1.0393,
|
| 63790 |
+
"step": 9059
|
| 63791 |
+
},
|
| 63792 |
+
{
|
| 63793 |
+
"epoch": 0.7361514554429299,
|
| 63794 |
+
"grad_norm": 0.3372330665588379,
|
| 63795 |
+
"learning_rate": 3.247787425598912e-05,
|
| 63796 |
+
"loss": 1.2616,
|
| 63797 |
+
"step": 9060
|
| 63798 |
+
},
|
| 63799 |
+
{
|
| 63800 |
+
"epoch": 0.7362327083629568,
|
| 63801 |
+
"grad_norm": 0.3186988830566406,
|
| 63802 |
+
"learning_rate": 3.2459032134734566e-05,
|
| 63803 |
+
"loss": 1.2087,
|
| 63804 |
+
"step": 9061
|
| 63805 |
+
},
|
| 63806 |
+
{
|
| 63807 |
+
"epoch": 0.7363139612829837,
|
| 63808 |
+
"grad_norm": 0.35245460271835327,
|
| 63809 |
+
"learning_rate": 3.2440194421754955e-05,
|
| 63810 |
+
"loss": 1.3665,
|
| 63811 |
+
"step": 9062
|
| 63812 |
+
},
|
| 63813 |
+
{
|
| 63814 |
+
"epoch": 0.7363952142030105,
|
| 63815 |
+
"grad_norm": 0.3209587037563324,
|
| 63816 |
+
"learning_rate": 3.242136111827981e-05,
|
| 63817 |
+
"loss": 1.2174,
|
| 63818 |
+
"step": 9063
|
| 63819 |
+
},
|
| 63820 |
+
{
|
| 63821 |
+
"epoch": 0.7364764671230373,
|
| 63822 |
+
"grad_norm": 0.34210798144340515,
|
| 63823 |
+
"learning_rate": 3.240253222553844e-05,
|
| 63824 |
+
"loss": 1.2495,
|
| 63825 |
+
"step": 9064
|
| 63826 |
+
},
|
| 63827 |
+
{
|
| 63828 |
+
"epoch": 0.7365577200430641,
|
| 63829 |
+
"grad_norm": 0.3220295310020447,
|
| 63830 |
+
"learning_rate": 3.2383707744759627e-05,
|
| 63831 |
+
"loss": 1.1942,
|
| 63832 |
+
"step": 9065
|
| 63833 |
+
},
|
| 63834 |
+
{
|
| 63835 |
+
"epoch": 0.7366389729630909,
|
| 63836 |
+
"grad_norm": 0.3506152629852295,
|
| 63837 |
+
"learning_rate": 3.2364887677172094e-05,
|
| 63838 |
+
"loss": 1.0295,
|
| 63839 |
+
"step": 9066
|
| 63840 |
+
},
|
| 63841 |
+
{
|
| 63842 |
+
"epoch": 0.7367202258831177,
|
| 63843 |
+
"grad_norm": 0.3315095901489258,
|
| 63844 |
+
"learning_rate": 3.234607202400416e-05,
|
| 63845 |
+
"loss": 1.1466,
|
| 63846 |
+
"step": 9067
|
| 63847 |
+
},
|
| 63848 |
+
{
|
| 63849 |
+
"epoch": 0.7368014788031445,
|
| 63850 |
+
"grad_norm": 0.35386794805526733,
|
| 63851 |
+
"learning_rate": 3.232726078648392e-05,
|
| 63852 |
+
"loss": 1.208,
|
| 63853 |
+
"step": 9068
|
| 63854 |
+
},
|
| 63855 |
+
{
|
| 63856 |
+
"epoch": 0.7368827317231713,
|
| 63857 |
+
"grad_norm": 0.3082221746444702,
|
| 63858 |
+
"learning_rate": 3.230845396583909e-05,
|
| 63859 |
+
"loss": 1.1706,
|
| 63860 |
+
"step": 9069
|
| 63861 |
+
},
|
| 63862 |
+
{
|
| 63863 |
+
"epoch": 0.7369639846431981,
|
| 63864 |
+
"grad_norm": 0.33266887068748474,
|
| 63865 |
+
"learning_rate": 3.228965156329724e-05,
|
| 63866 |
+
"loss": 1.3056,
|
| 63867 |
+
"step": 9070
|
| 63868 |
+
},
|
| 63869 |
+
{
|
| 63870 |
+
"epoch": 0.7370452375632249,
|
| 63871 |
+
"grad_norm": 0.3169810175895691,
|
| 63872 |
+
"learning_rate": 3.2270853580085494e-05,
|
| 63873 |
+
"loss": 1.1438,
|
| 63874 |
+
"step": 9071
|
| 63875 |
+
},
|
| 63876 |
+
{
|
| 63877 |
+
"epoch": 0.7371264904832517,
|
| 63878 |
+
"grad_norm": 0.3323366940021515,
|
| 63879 |
+
"learning_rate": 3.225206001743082e-05,
|
| 63880 |
+
"loss": 1.3633,
|
| 63881 |
+
"step": 9072
|
| 63882 |
+
},
|
| 63883 |
+
{
|
| 63884 |
+
"epoch": 0.7372077434032785,
|
| 63885 |
+
"grad_norm": 0.30098995566368103,
|
| 63886 |
+
"learning_rate": 3.2233270876559804e-05,
|
| 63887 |
+
"loss": 1.1154,
|
| 63888 |
+
"step": 9073
|
| 63889 |
+
},
|
| 63890 |
+
{
|
| 63891 |
+
"epoch": 0.7372889963233054,
|
| 63892 |
+
"grad_norm": 0.36186954379081726,
|
| 63893 |
+
"learning_rate": 3.2214486158698775e-05,
|
| 63894 |
+
"loss": 1.3407,
|
| 63895 |
+
"step": 9074
|
| 63896 |
+
},
|
| 63897 |
+
{
|
| 63898 |
+
"epoch": 0.7373702492433322,
|
| 63899 |
+
"grad_norm": 0.32620713114738464,
|
| 63900 |
+
"learning_rate": 3.21957058650738e-05,
|
| 63901 |
+
"loss": 1.361,
|
| 63902 |
+
"step": 9075
|
| 63903 |
+
},
|
| 63904 |
+
{
|
| 63905 |
+
"epoch": 0.737451502163359,
|
| 63906 |
+
"grad_norm": 0.3505888283252716,
|
| 63907 |
+
"learning_rate": 3.217692999691061e-05,
|
| 63908 |
+
"loss": 1.2883,
|
| 63909 |
+
"step": 9076
|
| 63910 |
+
},
|
| 63911 |
+
{
|
| 63912 |
+
"epoch": 0.7375327550833858,
|
| 63913 |
+
"grad_norm": 0.3355899751186371,
|
| 63914 |
+
"learning_rate": 3.2158158555434704e-05,
|
| 63915 |
+
"loss": 1.267,
|
| 63916 |
+
"step": 9077
|
| 63917 |
+
},
|
| 63918 |
+
{
|
| 63919 |
+
"epoch": 0.7376140080034126,
|
| 63920 |
+
"grad_norm": 0.3477858006954193,
|
| 63921 |
+
"learning_rate": 3.213939154187121e-05,
|
| 63922 |
+
"loss": 1.128,
|
| 63923 |
+
"step": 9078
|
| 63924 |
+
},
|
| 63925 |
+
{
|
| 63926 |
+
"epoch": 0.7376952609234394,
|
| 63927 |
+
"grad_norm": 0.37346211075782776,
|
| 63928 |
+
"learning_rate": 3.2120628957445076e-05,
|
| 63929 |
+
"loss": 1.2174,
|
| 63930 |
+
"step": 9079
|
| 63931 |
+
},
|
| 63932 |
+
{
|
| 63933 |
+
"epoch": 0.7377765138434662,
|
| 63934 |
+
"grad_norm": 0.3103655278682709,
|
| 63935 |
+
"learning_rate": 3.210187080338087e-05,
|
| 63936 |
+
"loss": 1.2658,
|
| 63937 |
+
"step": 9080
|
| 63938 |
+
},
|
| 63939 |
+
{
|
| 63940 |
+
"epoch": 0.737857766763493,
|
| 63941 |
+
"grad_norm": 0.385791152715683,
|
| 63942 |
+
"learning_rate": 3.208311708090288e-05,
|
| 63943 |
+
"loss": 1.2197,
|
| 63944 |
+
"step": 9081
|
| 63945 |
+
},
|
| 63946 |
+
{
|
| 63947 |
+
"epoch": 0.7379390196835198,
|
| 63948 |
+
"grad_norm": 0.33055251836776733,
|
| 63949 |
+
"learning_rate": 3.206436779123518e-05,
|
| 63950 |
+
"loss": 1.2981,
|
| 63951 |
+
"step": 9082
|
| 63952 |
+
},
|
| 63953 |
+
{
|
| 63954 |
+
"epoch": 0.7380202726035467,
|
| 63955 |
+
"grad_norm": 0.2954128682613373,
|
| 63956 |
+
"learning_rate": 3.204562293560144e-05,
|
| 63957 |
+
"loss": 1.2787,
|
| 63958 |
+
"step": 9083
|
| 63959 |
+
},
|
| 63960 |
+
{
|
| 63961 |
+
"epoch": 0.7381015255235736,
|
| 63962 |
+
"grad_norm": 0.326296329498291,
|
| 63963 |
+
"learning_rate": 3.202688251522518e-05,
|
| 63964 |
+
"loss": 1.17,
|
| 63965 |
+
"step": 9084
|
| 63966 |
+
},
|
| 63967 |
+
{
|
| 63968 |
+
"epoch": 0.7381827784436004,
|
| 63969 |
+
"grad_norm": 0.34508001804351807,
|
| 63970 |
+
"learning_rate": 3.200814653132945e-05,
|
| 63971 |
+
"loss": 1.2064,
|
| 63972 |
+
"step": 9085
|
| 63973 |
+
},
|
| 63974 |
+
{
|
| 63975 |
+
"epoch": 0.7382640313636272,
|
| 63976 |
+
"grad_norm": 0.33513298630714417,
|
| 63977 |
+
"learning_rate": 3.1989414985137225e-05,
|
| 63978 |
+
"loss": 1.1878,
|
| 63979 |
+
"step": 9086
|
| 63980 |
+
},
|
| 63981 |
+
{
|
| 63982 |
+
"epoch": 0.738345284283654,
|
| 63983 |
+
"grad_norm": 0.32026422023773193,
|
| 63984 |
+
"learning_rate": 3.1970687877870984e-05,
|
| 63985 |
+
"loss": 1.2029,
|
| 63986 |
+
"step": 9087
|
| 63987 |
+
},
|
| 63988 |
+
{
|
| 63989 |
+
"epoch": 0.7384265372036808,
|
| 63990 |
+
"grad_norm": 0.3384708762168884,
|
| 63991 |
+
"learning_rate": 3.1951965210753086e-05,
|
| 63992 |
+
"loss": 1.3822,
|
| 63993 |
+
"step": 9088
|
| 63994 |
+
},
|
| 63995 |
+
{
|
| 63996 |
+
"epoch": 0.7385077901237076,
|
| 63997 |
+
"grad_norm": 0.35732302069664,
|
| 63998 |
+
"learning_rate": 3.1933246985005494e-05,
|
| 63999 |
+
"loss": 1.4157,
|
| 64000 |
+
"step": 9089
|
| 64001 |
+
},
|
| 64002 |
+
{
|
| 64003 |
+
"epoch": 0.7385890430437344,
|
| 64004 |
+
"grad_norm": 0.3109513223171234,
|
| 64005 |
+
"learning_rate": 3.1914533201849873e-05,
|
| 64006 |
+
"loss": 1.2396,
|
| 64007 |
+
"step": 9090
|
| 64008 |
+
},
|
| 64009 |
+
{
|
| 64010 |
+
"epoch": 0.7386702959637612,
|
| 64011 |
+
"grad_norm": 0.33402466773986816,
|
| 64012 |
+
"learning_rate": 3.18958238625077e-05,
|
| 64013 |
+
"loss": 1.2844,
|
| 64014 |
+
"step": 9091
|
| 64015 |
+
},
|
| 64016 |
+
{
|
| 64017 |
+
"epoch": 0.738751548883788,
|
| 64018 |
+
"grad_norm": 0.33208608627319336,
|
| 64019 |
+
"learning_rate": 3.187711896820005e-05,
|
| 64020 |
+
"loss": 1.3217,
|
| 64021 |
+
"step": 9092
|
| 64022 |
+
},
|
| 64023 |
+
{
|
| 64024 |
+
"epoch": 0.7388328018038148,
|
| 64025 |
+
"grad_norm": 0.3771694600582123,
|
| 64026 |
+
"learning_rate": 3.185841852014782e-05,
|
| 64027 |
+
"loss": 1.1666,
|
| 64028 |
+
"step": 9093
|
| 64029 |
+
},
|
| 64030 |
+
{
|
| 64031 |
+
"epoch": 0.7389140547238416,
|
| 64032 |
+
"grad_norm": 0.3381398916244507,
|
| 64033 |
+
"learning_rate": 3.1839722519571466e-05,
|
| 64034 |
+
"loss": 1.2446,
|
| 64035 |
+
"step": 9094
|
| 64036 |
+
},
|
| 64037 |
+
{
|
| 64038 |
+
"epoch": 0.7389953076438684,
|
| 64039 |
+
"grad_norm": 0.3422229290008545,
|
| 64040 |
+
"learning_rate": 3.182103096769132e-05,
|
| 64041 |
+
"loss": 1.215,
|
| 64042 |
+
"step": 9095
|
| 64043 |
+
},
|
| 64044 |
+
{
|
| 64045 |
+
"epoch": 0.7390765605638953,
|
| 64046 |
+
"grad_norm": 0.3245979845523834,
|
| 64047 |
+
"learning_rate": 3.180234386572729e-05,
|
| 64048 |
+
"loss": 1.2033,
|
| 64049 |
+
"step": 9096
|
| 64050 |
+
},
|
| 64051 |
+
{
|
| 64052 |
+
"epoch": 0.7391578134839221,
|
| 64053 |
+
"grad_norm": 0.3490990996360779,
|
| 64054 |
+
"learning_rate": 3.178366121489911e-05,
|
| 64055 |
+
"loss": 1.3717,
|
| 64056 |
+
"step": 9097
|
| 64057 |
+
},
|
| 64058 |
+
{
|
| 64059 |
+
"epoch": 0.7392390664039489,
|
| 64060 |
+
"grad_norm": 0.34459319710731506,
|
| 64061 |
+
"learning_rate": 3.176498301642612e-05,
|
| 64062 |
+
"loss": 1.2152,
|
| 64063 |
+
"step": 9098
|
| 64064 |
+
},
|
| 64065 |
+
{
|
| 64066 |
+
"epoch": 0.7393203193239757,
|
| 64067 |
+
"grad_norm": 0.34330037236213684,
|
| 64068 |
+
"learning_rate": 3.1746309271527385e-05,
|
| 64069 |
+
"loss": 1.3088,
|
| 64070 |
+
"step": 9099
|
| 64071 |
+
},
|
| 64072 |
+
{
|
| 64073 |
+
"epoch": 0.7394015722440025,
|
| 64074 |
+
"grad_norm": 0.352679580450058,
|
| 64075 |
+
"learning_rate": 3.1727639981421776e-05,
|
| 64076 |
+
"loss": 1.1924,
|
| 64077 |
+
"step": 9100
|
| 64078 |
+
},
|
| 64079 |
+
{
|
| 64080 |
+
"epoch": 0.7394828251640293,
|
| 64081 |
+
"grad_norm": 0.3242005705833435,
|
| 64082 |
+
"learning_rate": 3.170897514732772e-05,
|
| 64083 |
+
"loss": 1.2136,
|
| 64084 |
+
"step": 9101
|
| 64085 |
+
},
|
| 64086 |
+
{
|
| 64087 |
+
"epoch": 0.7395640780840561,
|
| 64088 |
+
"grad_norm": 0.30310970544815063,
|
| 64089 |
+
"learning_rate": 3.169031477046352e-05,
|
| 64090 |
+
"loss": 1.2164,
|
| 64091 |
+
"step": 9102
|
| 64092 |
+
},
|
| 64093 |
+
{
|
| 64094 |
+
"epoch": 0.7396453310040829,
|
| 64095 |
+
"grad_norm": 0.312107652425766,
|
| 64096 |
+
"learning_rate": 3.1671658852047046e-05,
|
| 64097 |
+
"loss": 1.3525,
|
| 64098 |
+
"step": 9103
|
| 64099 |
+
},
|
| 64100 |
+
{
|
| 64101 |
+
"epoch": 0.7397265839241097,
|
| 64102 |
+
"grad_norm": 0.3404179811477661,
|
| 64103 |
+
"learning_rate": 3.165300739329599e-05,
|
| 64104 |
+
"loss": 1.3124,
|
| 64105 |
+
"step": 9104
|
| 64106 |
+
},
|
| 64107 |
+
{
|
| 64108 |
+
"epoch": 0.7398078368441365,
|
| 64109 |
+
"grad_norm": 0.30616334080696106,
|
| 64110 |
+
"learning_rate": 3.1634360395427655e-05,
|
| 64111 |
+
"loss": 1.3691,
|
| 64112 |
+
"step": 9105
|
| 64113 |
+
},
|
| 64114 |
+
{
|
| 64115 |
+
"epoch": 0.7398890897641635,
|
| 64116 |
+
"grad_norm": 0.3797966539859772,
|
| 64117 |
+
"learning_rate": 3.161571785965907e-05,
|
| 64118 |
+
"loss": 1.2576,
|
| 64119 |
+
"step": 9106
|
| 64120 |
+
},
|
| 64121 |
+
{
|
| 64122 |
+
"epoch": 0.7399703426841903,
|
| 64123 |
+
"grad_norm": 0.32728254795074463,
|
| 64124 |
+
"learning_rate": 3.1597079787207077e-05,
|
| 64125 |
+
"loss": 1.0564,
|
| 64126 |
+
"step": 9107
|
| 64127 |
+
},
|
| 64128 |
+
{
|
| 64129 |
+
"epoch": 0.7400515956042171,
|
| 64130 |
+
"grad_norm": 0.33260443806648254,
|
| 64131 |
+
"learning_rate": 3.157844617928807e-05,
|
| 64132 |
+
"loss": 1.3926,
|
| 64133 |
+
"step": 9108
|
| 64134 |
+
},
|
| 64135 |
+
{
|
| 64136 |
+
"epoch": 0.7401328485242439,
|
| 64137 |
+
"grad_norm": 0.3523138165473938,
|
| 64138 |
+
"learning_rate": 3.15598170371183e-05,
|
| 64139 |
+
"loss": 1.1644,
|
| 64140 |
+
"step": 9109
|
| 64141 |
+
},
|
| 64142 |
+
{
|
| 64143 |
+
"epoch": 0.7402141014442707,
|
| 64144 |
+
"grad_norm": 0.30208835005760193,
|
| 64145 |
+
"learning_rate": 3.1541192361913584e-05,
|
| 64146 |
+
"loss": 1.1864,
|
| 64147 |
+
"step": 9110
|
| 64148 |
+
},
|
| 64149 |
+
{
|
| 64150 |
+
"epoch": 0.7402953543642975,
|
| 64151 |
+
"grad_norm": 0.3135199248790741,
|
| 64152 |
+
"learning_rate": 3.152257215488961e-05,
|
| 64153 |
+
"loss": 1.2826,
|
| 64154 |
+
"step": 9111
|
| 64155 |
+
},
|
| 64156 |
+
{
|
| 64157 |
+
"epoch": 0.7403766072843243,
|
| 64158 |
+
"grad_norm": 0.31153541803359985,
|
| 64159 |
+
"learning_rate": 3.150395641726159e-05,
|
| 64160 |
+
"loss": 1.2264,
|
| 64161 |
+
"step": 9112
|
| 64162 |
+
},
|
| 64163 |
+
{
|
| 64164 |
+
"epoch": 0.7404578602043511,
|
| 64165 |
+
"grad_norm": 0.3170436918735504,
|
| 64166 |
+
"learning_rate": 3.148534515024463e-05,
|
| 64167 |
+
"loss": 1.2778,
|
| 64168 |
+
"step": 9113
|
| 64169 |
+
},
|
| 64170 |
+
{
|
| 64171 |
+
"epoch": 0.7405391131243779,
|
| 64172 |
+
"grad_norm": 0.33815568685531616,
|
| 64173 |
+
"learning_rate": 3.1466738355053395e-05,
|
| 64174 |
+
"loss": 1.1795,
|
| 64175 |
+
"step": 9114
|
| 64176 |
+
},
|
| 64177 |
+
{
|
| 64178 |
+
"epoch": 0.7406203660444047,
|
| 64179 |
+
"grad_norm": 0.354052871465683,
|
| 64180 |
+
"learning_rate": 3.14481360329023e-05,
|
| 64181 |
+
"loss": 1.2374,
|
| 64182 |
+
"step": 9115
|
| 64183 |
+
},
|
| 64184 |
+
{
|
| 64185 |
+
"epoch": 0.7407016189644315,
|
| 64186 |
+
"grad_norm": 0.3765040636062622,
|
| 64187 |
+
"learning_rate": 3.142953818500555e-05,
|
| 64188 |
+
"loss": 1.3717,
|
| 64189 |
+
"step": 9116
|
| 64190 |
+
},
|
| 64191 |
+
{
|
| 64192 |
+
"epoch": 0.7407828718844583,
|
| 64193 |
+
"grad_norm": 0.3269070088863373,
|
| 64194 |
+
"learning_rate": 3.1410944812576916e-05,
|
| 64195 |
+
"loss": 1.2,
|
| 64196 |
+
"step": 9117
|
| 64197 |
+
},
|
| 64198 |
+
{
|
| 64199 |
+
"epoch": 0.7408641248044852,
|
| 64200 |
+
"grad_norm": 0.3338371217250824,
|
| 64201 |
+
"learning_rate": 3.139235591683003e-05,
|
| 64202 |
+
"loss": 1.2424,
|
| 64203 |
+
"step": 9118
|
| 64204 |
+
},
|
| 64205 |
+
{
|
| 64206 |
+
"epoch": 0.740945377724512,
|
| 64207 |
+
"grad_norm": 0.347904235124588,
|
| 64208 |
+
"learning_rate": 3.137377149897809e-05,
|
| 64209 |
+
"loss": 1.364,
|
| 64210 |
+
"step": 9119
|
| 64211 |
+
},
|
| 64212 |
+
{
|
| 64213 |
+
"epoch": 0.7410266306445388,
|
| 64214 |
+
"grad_norm": 0.34670281410217285,
|
| 64215 |
+
"learning_rate": 3.135519156023412e-05,
|
| 64216 |
+
"loss": 1.2374,
|
| 64217 |
+
"step": 9120
|
| 64218 |
+
},
|
| 64219 |
+
{
|
| 64220 |
+
"epoch": 0.7411078835645656,
|
| 64221 |
+
"grad_norm": 0.35943886637687683,
|
| 64222 |
+
"learning_rate": 3.1336616101810776e-05,
|
| 64223 |
+
"loss": 1.2821,
|
| 64224 |
+
"step": 9121
|
| 64225 |
+
},
|
| 64226 |
+
{
|
| 64227 |
+
"epoch": 0.7411891364845924,
|
| 64228 |
+
"grad_norm": 0.31779685616493225,
|
| 64229 |
+
"learning_rate": 3.1318045124920416e-05,
|
| 64230 |
+
"loss": 1.1725,
|
| 64231 |
+
"step": 9122
|
| 64232 |
+
},
|
| 64233 |
+
{
|
| 64234 |
+
"epoch": 0.7412703894046192,
|
| 64235 |
+
"grad_norm": 0.3223528563976288,
|
| 64236 |
+
"learning_rate": 3.1299478630775184e-05,
|
| 64237 |
+
"loss": 1.3209,
|
| 64238 |
+
"step": 9123
|
| 64239 |
+
},
|
| 64240 |
+
{
|
| 64241 |
+
"epoch": 0.741351642324646,
|
| 64242 |
+
"grad_norm": 0.32965174317359924,
|
| 64243 |
+
"learning_rate": 3.128091662058682e-05,
|
| 64244 |
+
"loss": 1.2314,
|
| 64245 |
+
"step": 9124
|
| 64246 |
+
},
|
| 64247 |
+
{
|
| 64248 |
+
"epoch": 0.7414328952446728,
|
| 64249 |
+
"grad_norm": 0.3232046961784363,
|
| 64250 |
+
"learning_rate": 3.126235909556691e-05,
|
| 64251 |
+
"loss": 1.1719,
|
| 64252 |
+
"step": 9125
|
| 64253 |
+
},
|
| 64254 |
+
{
|
| 64255 |
+
"epoch": 0.7415141481646996,
|
| 64256 |
+
"grad_norm": 0.33232125639915466,
|
| 64257 |
+
"learning_rate": 3.1243806056926594e-05,
|
| 64258 |
+
"loss": 1.3169,
|
| 64259 |
+
"step": 9126
|
| 64260 |
+
},
|
| 64261 |
+
{
|
| 64262 |
+
"epoch": 0.7415954010847264,
|
| 64263 |
+
"grad_norm": 0.33275437355041504,
|
| 64264 |
+
"learning_rate": 3.122525750587686e-05,
|
| 64265 |
+
"loss": 1.1767,
|
| 64266 |
+
"step": 9127
|
| 64267 |
+
},
|
| 64268 |
+
{
|
| 64269 |
+
"epoch": 0.7416766540047532,
|
| 64270 |
+
"grad_norm": 0.34603801369667053,
|
| 64271 |
+
"learning_rate": 3.120671344362828e-05,
|
| 64272 |
+
"loss": 1.1955,
|
| 64273 |
+
"step": 9128
|
| 64274 |
+
},
|
| 64275 |
+
{
|
| 64276 |
+
"epoch": 0.7417579069247802,
|
| 64277 |
+
"grad_norm": 0.31008902192115784,
|
| 64278 |
+
"learning_rate": 3.1188173871391236e-05,
|
| 64279 |
+
"loss": 1.0595,
|
| 64280 |
+
"step": 9129
|
| 64281 |
+
},
|
| 64282 |
+
{
|
| 64283 |
+
"epoch": 0.741839159844807,
|
| 64284 |
+
"grad_norm": 0.3435424566268921,
|
| 64285 |
+
"learning_rate": 3.1169638790375765e-05,
|
| 64286 |
+
"loss": 1.2666,
|
| 64287 |
+
"step": 9130
|
| 64288 |
+
},
|
| 64289 |
+
{
|
| 64290 |
+
"epoch": 0.7419204127648338,
|
| 64291 |
+
"grad_norm": 0.32308465242385864,
|
| 64292 |
+
"learning_rate": 3.115110820179157e-05,
|
| 64293 |
+
"loss": 1.2424,
|
| 64294 |
+
"step": 9131
|
| 64295 |
+
},
|
| 64296 |
+
{
|
| 64297 |
+
"epoch": 0.7420016656848606,
|
| 64298 |
+
"grad_norm": 0.35629531741142273,
|
| 64299 |
+
"learning_rate": 3.1132582106848176e-05,
|
| 64300 |
+
"loss": 1.2084,
|
| 64301 |
+
"step": 9132
|
| 64302 |
+
},
|
| 64303 |
+
{
|
| 64304 |
+
"epoch": 0.7420829186048874,
|
| 64305 |
+
"grad_norm": 0.32756277918815613,
|
| 64306 |
+
"learning_rate": 3.1114060506754684e-05,
|
| 64307 |
+
"loss": 1.1962,
|
| 64308 |
+
"step": 9133
|
| 64309 |
+
},
|
| 64310 |
+
{
|
| 64311 |
+
"epoch": 0.7421641715249142,
|
| 64312 |
+
"grad_norm": 0.3760039806365967,
|
| 64313 |
+
"learning_rate": 3.109554340272003e-05,
|
| 64314 |
+
"loss": 1.3643,
|
| 64315 |
+
"step": 9134
|
| 64316 |
+
},
|
| 64317 |
+
{
|
| 64318 |
+
"epoch": 0.742245424444941,
|
| 64319 |
+
"grad_norm": 0.3522399663925171,
|
| 64320 |
+
"learning_rate": 3.107703079595272e-05,
|
| 64321 |
+
"loss": 1.2972,
|
| 64322 |
+
"step": 9135
|
| 64323 |
+
},
|
| 64324 |
+
{
|
| 64325 |
+
"epoch": 0.7423266773649678,
|
| 64326 |
+
"grad_norm": 0.3352539837360382,
|
| 64327 |
+
"learning_rate": 3.105852268766112e-05,
|
| 64328 |
+
"loss": 1.2868,
|
| 64329 |
+
"step": 9136
|
| 64330 |
+
},
|
| 64331 |
+
{
|
| 64332 |
+
"epoch": 0.7424079302849946,
|
| 64333 |
+
"grad_norm": 0.3391285836696625,
|
| 64334 |
+
"learning_rate": 3.104001907905317e-05,
|
| 64335 |
+
"loss": 1.1879,
|
| 64336 |
+
"step": 9137
|
| 64337 |
+
},
|
| 64338 |
+
{
|
| 64339 |
+
"epoch": 0.7424891832050214,
|
| 64340 |
+
"grad_norm": 0.3408769965171814,
|
| 64341 |
+
"learning_rate": 3.102151997133654e-05,
|
| 64342 |
+
"loss": 1.2441,
|
| 64343 |
+
"step": 9138
|
| 64344 |
+
},
|
| 64345 |
+
{
|
| 64346 |
+
"epoch": 0.7425704361250482,
|
| 64347 |
+
"grad_norm": 0.3147300183773041,
|
| 64348 |
+
"learning_rate": 3.1003025365718695e-05,
|
| 64349 |
+
"loss": 1.2876,
|
| 64350 |
+
"step": 9139
|
| 64351 |
+
},
|
| 64352 |
+
{
|
| 64353 |
+
"epoch": 0.7426516890450751,
|
| 64354 |
+
"grad_norm": 0.3416130542755127,
|
| 64355 |
+
"learning_rate": 3.098453526340669e-05,
|
| 64356 |
+
"loss": 1.2842,
|
| 64357 |
+
"step": 9140
|
| 64358 |
+
},
|
| 64359 |
+
{
|
| 64360 |
+
"epoch": 0.7427329419651019,
|
| 64361 |
+
"grad_norm": 0.33703699707984924,
|
| 64362 |
+
"learning_rate": 3.09660496656074e-05,
|
| 64363 |
+
"loss": 1.3103,
|
| 64364 |
+
"step": 9141
|
| 64365 |
+
},
|
| 64366 |
+
{
|
| 64367 |
+
"epoch": 0.7428141948851287,
|
| 64368 |
+
"grad_norm": 0.29872438311576843,
|
| 64369 |
+
"learning_rate": 3.094756857352726e-05,
|
| 64370 |
+
"loss": 1.224,
|
| 64371 |
+
"step": 9142
|
| 64372 |
+
},
|
| 64373 |
+
{
|
| 64374 |
+
"epoch": 0.7428954478051555,
|
| 64375 |
+
"grad_norm": 0.42391490936279297,
|
| 64376 |
+
"learning_rate": 3.0929091988372596e-05,
|
| 64377 |
+
"loss": 1.1894,
|
| 64378 |
+
"step": 9143
|
| 64379 |
+
},
|
| 64380 |
+
{
|
| 64381 |
+
"epoch": 0.7429767007251823,
|
| 64382 |
+
"grad_norm": 0.3333875238895416,
|
| 64383 |
+
"learning_rate": 3.091061991134926e-05,
|
| 64384 |
+
"loss": 1.2733,
|
| 64385 |
+
"step": 9144
|
| 64386 |
+
},
|
| 64387 |
+
{
|
| 64388 |
+
"epoch": 0.7430579536452091,
|
| 64389 |
+
"grad_norm": 0.34605643153190613,
|
| 64390 |
+
"learning_rate": 3.089215234366295e-05,
|
| 64391 |
+
"loss": 1.258,
|
| 64392 |
+
"step": 9145
|
| 64393 |
+
},
|
| 64394 |
+
{
|
| 64395 |
+
"epoch": 0.7431392065652359,
|
| 64396 |
+
"grad_norm": 0.33759409189224243,
|
| 64397 |
+
"learning_rate": 3.087368928651898e-05,
|
| 64398 |
+
"loss": 1.3026,
|
| 64399 |
+
"step": 9146
|
| 64400 |
+
},
|
| 64401 |
+
{
|
| 64402 |
+
"epoch": 0.7432204594852627,
|
| 64403 |
+
"grad_norm": 0.34033578634262085,
|
| 64404 |
+
"learning_rate": 3.085523074112238e-05,
|
| 64405 |
+
"loss": 1.2499,
|
| 64406 |
+
"step": 9147
|
| 64407 |
+
},
|
| 64408 |
+
{
|
| 64409 |
+
"epoch": 0.7433017124052895,
|
| 64410 |
+
"grad_norm": 0.3603235185146332,
|
| 64411 |
+
"learning_rate": 3.083677670867795e-05,
|
| 64412 |
+
"loss": 1.2516,
|
| 64413 |
+
"step": 9148
|
| 64414 |
+
},
|
| 64415 |
+
{
|
| 64416 |
+
"epoch": 0.7433829653253163,
|
| 64417 |
+
"grad_norm": 0.34968891739845276,
|
| 64418 |
+
"learning_rate": 3.0818327190390106e-05,
|
| 64419 |
+
"loss": 1.3573,
|
| 64420 |
+
"step": 9149
|
| 64421 |
+
},
|
| 64422 |
+
{
|
| 64423 |
+
"epoch": 0.7434642182453431,
|
| 64424 |
+
"grad_norm": 0.3203131854534149,
|
| 64425 |
+
"learning_rate": 3.079988218746307e-05,
|
| 64426 |
+
"loss": 1.3328,
|
| 64427 |
+
"step": 9150
|
| 64428 |
+
},
|
| 64429 |
+
{
|
| 64430 |
+
"epoch": 0.74354547116537,
|
| 64431 |
+
"grad_norm": 0.32282334566116333,
|
| 64432 |
+
"learning_rate": 3.0781441701100645e-05,
|
| 64433 |
+
"loss": 1.2059,
|
| 64434 |
+
"step": 9151
|
| 64435 |
+
},
|
| 64436 |
+
{
|
| 64437 |
+
"epoch": 0.7436267240853969,
|
| 64438 |
+
"grad_norm": 0.36723271012306213,
|
| 64439 |
+
"learning_rate": 3.0763005732506467e-05,
|
| 64440 |
+
"loss": 1.1064,
|
| 64441 |
+
"step": 9152
|
| 64442 |
+
},
|
| 64443 |
+
{
|
| 64444 |
+
"epoch": 0.7437079770054237,
|
| 64445 |
+
"grad_norm": 0.3379858434200287,
|
| 64446 |
+
"learning_rate": 3.0744574282883776e-05,
|
| 64447 |
+
"loss": 1.3848,
|
| 64448 |
+
"step": 9153
|
| 64449 |
+
},
|
| 64450 |
+
{
|
| 64451 |
+
"epoch": 0.7437892299254505,
|
| 64452 |
+
"grad_norm": 0.3310505449771881,
|
| 64453 |
+
"learning_rate": 3.072614735343563e-05,
|
| 64454 |
+
"loss": 1.1985,
|
| 64455 |
+
"step": 9154
|
| 64456 |
+
},
|
| 64457 |
+
{
|
| 64458 |
+
"epoch": 0.7438704828454773,
|
| 64459 |
+
"grad_norm": 0.3467575013637543,
|
| 64460 |
+
"learning_rate": 3.070772494536461e-05,
|
| 64461 |
+
"loss": 1.3874,
|
| 64462 |
+
"step": 9155
|
| 64463 |
+
},
|
| 64464 |
+
{
|
| 64465 |
+
"epoch": 0.7439517357655041,
|
| 64466 |
+
"grad_norm": 0.3295603096485138,
|
| 64467 |
+
"learning_rate": 3.0689307059873205e-05,
|
| 64468 |
+
"loss": 1.2214,
|
| 64469 |
+
"step": 9156
|
| 64470 |
+
},
|
| 64471 |
+
{
|
| 64472 |
+
"epoch": 0.7440329886855309,
|
| 64473 |
+
"grad_norm": 0.31485670804977417,
|
| 64474 |
+
"learning_rate": 3.067089369816345e-05,
|
| 64475 |
+
"loss": 1.3145,
|
| 64476 |
+
"step": 9157
|
| 64477 |
+
},
|
| 64478 |
+
{
|
| 64479 |
+
"epoch": 0.7441142416055577,
|
| 64480 |
+
"grad_norm": 0.320719838142395,
|
| 64481 |
+
"learning_rate": 3.065248486143718e-05,
|
| 64482 |
+
"loss": 1.2323,
|
| 64483 |
+
"step": 9158
|
| 64484 |
+
},
|
| 64485 |
+
{
|
| 64486 |
+
"epoch": 0.7441954945255845,
|
| 64487 |
+
"grad_norm": 0.33504194021224976,
|
| 64488 |
+
"learning_rate": 3.063408055089595e-05,
|
| 64489 |
+
"loss": 1.1288,
|
| 64490 |
+
"step": 9159
|
| 64491 |
+
},
|
| 64492 |
+
{
|
| 64493 |
+
"epoch": 0.7442767474456113,
|
| 64494 |
+
"grad_norm": 0.3372930586338043,
|
| 64495 |
+
"learning_rate": 3.0615680767740896e-05,
|
| 64496 |
+
"loss": 1.212,
|
| 64497 |
+
"step": 9160
|
| 64498 |
+
},
|
| 64499 |
+
{
|
| 64500 |
+
"epoch": 0.7443580003656382,
|
| 64501 |
+
"grad_norm": 0.30762991309165955,
|
| 64502 |
+
"learning_rate": 3.059728551317301e-05,
|
| 64503 |
+
"loss": 1.2057,
|
| 64504 |
+
"step": 9161
|
| 64505 |
+
},
|
| 64506 |
+
{
|
| 64507 |
+
"epoch": 0.744439253285665,
|
| 64508 |
+
"grad_norm": 0.3550109565258026,
|
| 64509 |
+
"learning_rate": 3.057889478839289e-05,
|
| 64510 |
+
"loss": 1.2898,
|
| 64511 |
+
"step": 9162
|
| 64512 |
+
},
|
| 64513 |
+
{
|
| 64514 |
+
"epoch": 0.7445205062056918,
|
| 64515 |
+
"grad_norm": 0.32850003242492676,
|
| 64516 |
+
"learning_rate": 3.056050859460085e-05,
|
| 64517 |
+
"loss": 1.2147,
|
| 64518 |
+
"step": 9163
|
| 64519 |
+
},
|
| 64520 |
+
{
|
| 64521 |
+
"epoch": 0.7446017591257186,
|
| 64522 |
+
"grad_norm": 0.31743425130844116,
|
| 64523 |
+
"learning_rate": 3.0542126932996896e-05,
|
| 64524 |
+
"loss": 1.2501,
|
| 64525 |
+
"step": 9164
|
| 64526 |
+
},
|
| 64527 |
+
{
|
| 64528 |
+
"epoch": 0.7446830120457454,
|
| 64529 |
+
"grad_norm": 0.3677429258823395,
|
| 64530 |
+
"learning_rate": 3.0523749804780844e-05,
|
| 64531 |
+
"loss": 1.1736,
|
| 64532 |
+
"step": 9165
|
| 64533 |
+
},
|
| 64534 |
+
{
|
| 64535 |
+
"epoch": 0.7447642649657722,
|
| 64536 |
+
"grad_norm": 0.35321274399757385,
|
| 64537 |
+
"learning_rate": 3.0505377211152054e-05,
|
| 64538 |
+
"loss": 1.3599,
|
| 64539 |
+
"step": 9166
|
| 64540 |
+
},
|
| 64541 |
+
{
|
| 64542 |
+
"epoch": 0.744845517885799,
|
| 64543 |
+
"grad_norm": 0.35959261655807495,
|
| 64544 |
+
"learning_rate": 3.048700915330972e-05,
|
| 64545 |
+
"loss": 1.3706,
|
| 64546 |
+
"step": 9167
|
| 64547 |
+
},
|
| 64548 |
+
{
|
| 64549 |
+
"epoch": 0.7449267708058258,
|
| 64550 |
+
"grad_norm": 0.33937638998031616,
|
| 64551 |
+
"learning_rate": 3.0468645632452696e-05,
|
| 64552 |
+
"loss": 1.2069,
|
| 64553 |
+
"step": 9168
|
| 64554 |
+
},
|
| 64555 |
+
{
|
| 64556 |
+
"epoch": 0.7450080237258526,
|
| 64557 |
+
"grad_norm": 0.3306202292442322,
|
| 64558 |
+
"learning_rate": 3.0450286649779503e-05,
|
| 64559 |
+
"loss": 1.4208,
|
| 64560 |
+
"step": 9169
|
| 64561 |
+
},
|
| 64562 |
+
{
|
| 64563 |
+
"epoch": 0.7450892766458794,
|
| 64564 |
+
"grad_norm": 0.3024480640888214,
|
| 64565 |
+
"learning_rate": 3.043193220648848e-05,
|
| 64566 |
+
"loss": 1.2818,
|
| 64567 |
+
"step": 9170
|
| 64568 |
+
},
|
| 64569 |
+
{
|
| 64570 |
+
"epoch": 0.7451705295659062,
|
| 64571 |
+
"grad_norm": 0.35168448090553284,
|
| 64572 |
+
"learning_rate": 3.0413582303777454e-05,
|
| 64573 |
+
"loss": 1.1555,
|
| 64574 |
+
"step": 9171
|
| 64575 |
+
},
|
| 64576 |
+
{
|
| 64577 |
+
"epoch": 0.745251782485933,
|
| 64578 |
+
"grad_norm": 0.3499079942703247,
|
| 64579 |
+
"learning_rate": 3.0395236942844186e-05,
|
| 64580 |
+
"loss": 1.3723,
|
| 64581 |
+
"step": 9172
|
| 64582 |
+
},
|
| 64583 |
+
{
|
| 64584 |
+
"epoch": 0.7453330354059599,
|
| 64585 |
+
"grad_norm": 0.3550935387611389,
|
| 64586 |
+
"learning_rate": 3.0376896124886002e-05,
|
| 64587 |
+
"loss": 1.1615,
|
| 64588 |
+
"step": 9173
|
| 64589 |
+
},
|
| 64590 |
+
{
|
| 64591 |
+
"epoch": 0.7454142883259867,
|
| 64592 |
+
"grad_norm": 0.2961089611053467,
|
| 64593 |
+
"learning_rate": 3.03585598511e-05,
|
| 64594 |
+
"loss": 1.2438,
|
| 64595 |
+
"step": 9174
|
| 64596 |
+
},
|
| 64597 |
+
{
|
| 64598 |
+
"epoch": 0.7454955412460136,
|
| 64599 |
+
"grad_norm": 0.34811124205589294,
|
| 64600 |
+
"learning_rate": 3.0340228122682922e-05,
|
| 64601 |
+
"loss": 1.2854,
|
| 64602 |
+
"step": 9175
|
| 64603 |
+
},
|
| 64604 |
+
{
|
| 64605 |
+
"epoch": 0.7455767941660404,
|
| 64606 |
+
"grad_norm": 0.3188280165195465,
|
| 64607 |
+
"learning_rate": 3.032190094083127e-05,
|
| 64608 |
+
"loss": 1.3945,
|
| 64609 |
+
"step": 9176
|
| 64610 |
+
},
|
| 64611 |
+
{
|
| 64612 |
+
"epoch": 0.7456580470860672,
|
| 64613 |
+
"grad_norm": 0.3317304849624634,
|
| 64614 |
+
"learning_rate": 3.0303578306741254e-05,
|
| 64615 |
+
"loss": 1.105,
|
| 64616 |
+
"step": 9177
|
| 64617 |
+
},
|
| 64618 |
+
{
|
| 64619 |
+
"epoch": 0.745739300006094,
|
| 64620 |
+
"grad_norm": 0.32441195845603943,
|
| 64621 |
+
"learning_rate": 3.0285260221608725e-05,
|
| 64622 |
+
"loss": 1.2027,
|
| 64623 |
+
"step": 9178
|
| 64624 |
+
},
|
| 64625 |
+
{
|
| 64626 |
+
"epoch": 0.7458205529261208,
|
| 64627 |
+
"grad_norm": 0.357093870639801,
|
| 64628 |
+
"learning_rate": 3.026694668662927e-05,
|
| 64629 |
+
"loss": 1.1876,
|
| 64630 |
+
"step": 9179
|
| 64631 |
+
},
|
| 64632 |
+
{
|
| 64633 |
+
"epoch": 0.7459018058461476,
|
| 64634 |
+
"grad_norm": 0.36744824051856995,
|
| 64635 |
+
"learning_rate": 3.024863770299816e-05,
|
| 64636 |
+
"loss": 1.2186,
|
| 64637 |
+
"step": 9180
|
| 64638 |
+
},
|
| 64639 |
+
{
|
| 64640 |
+
"epoch": 0.7459830587661744,
|
| 64641 |
+
"grad_norm": 0.3444533944129944,
|
| 64642 |
+
"learning_rate": 3.023033327191045e-05,
|
| 64643 |
+
"loss": 1.2738,
|
| 64644 |
+
"step": 9181
|
| 64645 |
+
},
|
| 64646 |
+
{
|
| 64647 |
+
"epoch": 0.7460643116862012,
|
| 64648 |
+
"grad_norm": 0.3519356846809387,
|
| 64649 |
+
"learning_rate": 3.0212033394560756e-05,
|
| 64650 |
+
"loss": 1.2089,
|
| 64651 |
+
"step": 9182
|
| 64652 |
+
},
|
| 64653 |
+
{
|
| 64654 |
+
"epoch": 0.746145564606228,
|
| 64655 |
+
"grad_norm": 0.37970083951950073,
|
| 64656 |
+
"learning_rate": 3.019373807214355e-05,
|
| 64657 |
+
"loss": 1.2327,
|
| 64658 |
+
"step": 9183
|
| 64659 |
+
},
|
| 64660 |
+
{
|
| 64661 |
+
"epoch": 0.7462268175262549,
|
| 64662 |
+
"grad_norm": 0.33998361229896545,
|
| 64663 |
+
"learning_rate": 3.0175447305852878e-05,
|
| 64664 |
+
"loss": 1.2462,
|
| 64665 |
+
"step": 9184
|
| 64666 |
+
},
|
| 64667 |
+
{
|
| 64668 |
+
"epoch": 0.7463080704462817,
|
| 64669 |
+
"grad_norm": 0.34882432222366333,
|
| 64670 |
+
"learning_rate": 3.0157161096882568e-05,
|
| 64671 |
+
"loss": 1.3215,
|
| 64672 |
+
"step": 9185
|
| 64673 |
+
},
|
| 64674 |
+
{
|
| 64675 |
+
"epoch": 0.7463893233663085,
|
| 64676 |
+
"grad_norm": 0.3606092035770416,
|
| 64677 |
+
"learning_rate": 3.0138879446426204e-05,
|
| 64678 |
+
"loss": 1.1736,
|
| 64679 |
+
"step": 9186
|
| 64680 |
+
},
|
| 64681 |
+
{
|
| 64682 |
+
"epoch": 0.7464705762863353,
|
| 64683 |
+
"grad_norm": 0.3216451406478882,
|
| 64684 |
+
"learning_rate": 3.0120602355676852e-05,
|
| 64685 |
+
"loss": 1.1357,
|
| 64686 |
+
"step": 9187
|
| 64687 |
+
},
|
| 64688 |
+
{
|
| 64689 |
+
"epoch": 0.7465518292063621,
|
| 64690 |
+
"grad_norm": 0.33774533867836,
|
| 64691 |
+
"learning_rate": 3.0102329825827526e-05,
|
| 64692 |
+
"loss": 1.1731,
|
| 64693 |
+
"step": 9188
|
| 64694 |
+
},
|
| 64695 |
+
{
|
| 64696 |
+
"epoch": 0.7466330821263889,
|
| 64697 |
+
"grad_norm": 0.3276657164096832,
|
| 64698 |
+
"learning_rate": 3.0084061858070778e-05,
|
| 64699 |
+
"loss": 1.2376,
|
| 64700 |
+
"step": 9189
|
| 64701 |
+
},
|
| 64702 |
+
{
|
| 64703 |
+
"epoch": 0.7467143350464157,
|
| 64704 |
+
"grad_norm": 0.336821585893631,
|
| 64705 |
+
"learning_rate": 3.006579845359898e-05,
|
| 64706 |
+
"loss": 1.3018,
|
| 64707 |
+
"step": 9190
|
| 64708 |
+
},
|
| 64709 |
+
{
|
| 64710 |
+
"epoch": 0.7467955879664425,
|
| 64711 |
+
"grad_norm": 0.332073450088501,
|
| 64712 |
+
"learning_rate": 3.0047539613604104e-05,
|
| 64713 |
+
"loss": 1.2073,
|
| 64714 |
+
"step": 9191
|
| 64715 |
+
},
|
| 64716 |
+
{
|
| 64717 |
+
"epoch": 0.7468768408864693,
|
| 64718 |
+
"grad_norm": 0.38143953680992126,
|
| 64719 |
+
"learning_rate": 3.0029285339277925e-05,
|
| 64720 |
+
"loss": 1.435,
|
| 64721 |
+
"step": 9192
|
| 64722 |
+
},
|
| 64723 |
+
{
|
| 64724 |
+
"epoch": 0.7469580938064961,
|
| 64725 |
+
"grad_norm": 0.3325652778148651,
|
| 64726 |
+
"learning_rate": 3.0011035631811803e-05,
|
| 64727 |
+
"loss": 1.2437,
|
| 64728 |
+
"step": 9193
|
| 64729 |
+
},
|
| 64730 |
+
{
|
| 64731 |
+
"epoch": 0.747039346726523,
|
| 64732 |
+
"grad_norm": 0.3207436203956604,
|
| 64733 |
+
"learning_rate": 2.9992790492396916e-05,
|
| 64734 |
+
"loss": 1.2947,
|
| 64735 |
+
"step": 9194
|
| 64736 |
+
},
|
| 64737 |
+
{
|
| 64738 |
+
"epoch": 0.7471205996465498,
|
| 64739 |
+
"grad_norm": 0.3105325400829315,
|
| 64740 |
+
"learning_rate": 2.9974549922224072e-05,
|
| 64741 |
+
"loss": 1.3212,
|
| 64742 |
+
"step": 9195
|
| 64743 |
+
},
|
| 64744 |
+
{
|
| 64745 |
+
"epoch": 0.7472018525665766,
|
| 64746 |
+
"grad_norm": 0.3505750298500061,
|
| 64747 |
+
"learning_rate": 2.995631392248377e-05,
|
| 64748 |
+
"loss": 1.2632,
|
| 64749 |
+
"step": 9196
|
| 64750 |
+
},
|
| 64751 |
+
{
|
| 64752 |
+
"epoch": 0.7472831054866034,
|
| 64753 |
+
"grad_norm": 0.3020137548446655,
|
| 64754 |
+
"learning_rate": 2.9938082494366305e-05,
|
| 64755 |
+
"loss": 1.1906,
|
| 64756 |
+
"step": 9197
|
| 64757 |
+
},
|
| 64758 |
+
{
|
| 64759 |
+
"epoch": 0.7473643584066303,
|
| 64760 |
+
"grad_norm": 0.34674298763275146,
|
| 64761 |
+
"learning_rate": 2.9919855639061522e-05,
|
| 64762 |
+
"loss": 1.2134,
|
| 64763 |
+
"step": 9198
|
| 64764 |
+
},
|
| 64765 |
+
{
|
| 64766 |
+
"epoch": 0.7474456113266571,
|
| 64767 |
+
"grad_norm": 0.31727543473243713,
|
| 64768 |
+
"learning_rate": 2.990163335775915e-05,
|
| 64769 |
+
"loss": 1.4379,
|
| 64770 |
+
"step": 9199
|
| 64771 |
+
},
|
| 64772 |
+
{
|
| 64773 |
+
"epoch": 0.7475268642466839,
|
| 64774 |
+
"grad_norm": 0.35940784215927124,
|
| 64775 |
+
"learning_rate": 2.988341565164844e-05,
|
| 64776 |
+
"loss": 1.3599,
|
| 64777 |
+
"step": 9200
|
| 64778 |
+
},
|
| 64779 |
+
{
|
| 64780 |
+
"epoch": 0.7475268642466839,
|
| 64781 |
+
"eval_loss": 1.2265260219573975,
|
| 64782 |
+
"eval_runtime": 56.5338,
|
| 64783 |
+
"eval_samples_per_second": 8.756,
|
| 64784 |
+
"eval_steps_per_second": 4.387,
|
| 64785 |
+
"step": 9200
|
| 64786 |
}
|
| 64787 |
],
|
| 64788 |
"logging_steps": 1,
|
|
|
|
| 64811 |
"attributes": {}
|
| 64812 |
}
|
| 64813 |
},
|
| 64814 |
+
"total_flos": 7.610792241266688e+17,
|
| 64815 |
"train_batch_size": 2,
|
| 64816 |
"trial_name": null,
|
| 64817 |
"trial_params": null
|