Training in progress, step 1570, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 50365768
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4708548b45c8348628ae58470ab2c8de30081e4857f0ea0f0012c207b4b19a1
|
| 3 |
size 50365768
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 25859412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3eafb75aabef5d499a7b4b505c732d9aae83f1e9b33ae5c2a8b87bfb0e3d8ea7
|
| 3 |
size 25859412
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9120e5e0f2890bc6d75221f2df80f8c47456ada42a7b7b6bb80bbadd64eed705
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1f9acbfb9eb31ecbbbaf93a19358576018d3e7f8c62edbce6b77a1fdef5699a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 393,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -8292,6 +8292,2743 @@
|
|
| 8292 |
"eval_samples_per_second": 18.037,
|
| 8293 |
"eval_steps_per_second": 9.028,
|
| 8294 |
"step": 1179
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8295 |
}
|
| 8296 |
],
|
| 8297 |
"logging_steps": 1,
|
|
@@ -8306,12 +11043,12 @@
|
|
| 8306 |
"should_evaluate": false,
|
| 8307 |
"should_log": false,
|
| 8308 |
"should_save": true,
|
| 8309 |
-
"should_training_stop":
|
| 8310 |
},
|
| 8311 |
"attributes": {}
|
| 8312 |
}
|
| 8313 |
},
|
| 8314 |
-
"total_flos": 2.
|
| 8315 |
"train_batch_size": 2,
|
| 8316 |
"trial_name": null,
|
| 8317 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.7105680018103643,
|
| 5 |
"eval_steps": 393,
|
| 6 |
+
"global_step": 1570,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 8292 |
"eval_samples_per_second": 18.037,
|
| 8293 |
"eval_steps_per_second": 9.028,
|
| 8294 |
"step": 1179
|
| 8295 |
+
},
|
| 8296 |
+
{
|
| 8297 |
+
"epoch": 0.5340574790676623,
|
| 8298 |
+
"grad_norm": 0.48865947127342224,
|
| 8299 |
+
"learning_rate": 2.9289321881345254e-05,
|
| 8300 |
+
"loss": 0.762,
|
| 8301 |
+
"step": 1180
|
| 8302 |
+
},
|
| 8303 |
+
{
|
| 8304 |
+
"epoch": 0.534510070151618,
|
| 8305 |
+
"grad_norm": 0.5784907341003418,
|
| 8306 |
+
"learning_rate": 2.9147065268982666e-05,
|
| 8307 |
+
"loss": 1.0548,
|
| 8308 |
+
"step": 1181
|
| 8309 |
+
},
|
| 8310 |
+
{
|
| 8311 |
+
"epoch": 0.5349626612355737,
|
| 8312 |
+
"grad_norm": 0.5535488128662109,
|
| 8313 |
+
"learning_rate": 2.9005096004664177e-05,
|
| 8314 |
+
"loss": 0.8442,
|
| 8315 |
+
"step": 1182
|
| 8316 |
+
},
|
| 8317 |
+
{
|
| 8318 |
+
"epoch": 0.5354152523195294,
|
| 8319 |
+
"grad_norm": 0.541508674621582,
|
| 8320 |
+
"learning_rate": 2.886341466415412e-05,
|
| 8321 |
+
"loss": 0.6527,
|
| 8322 |
+
"step": 1183
|
| 8323 |
+
},
|
| 8324 |
+
{
|
| 8325 |
+
"epoch": 0.5358678434034849,
|
| 8326 |
+
"grad_norm": 0.5890561938285828,
|
| 8327 |
+
"learning_rate": 2.8722021822049027e-05,
|
| 8328 |
+
"loss": 0.8764,
|
| 8329 |
+
"step": 1184
|
| 8330 |
+
},
|
| 8331 |
+
{
|
| 8332 |
+
"epoch": 0.5363204344874406,
|
| 8333 |
+
"grad_norm": 0.4952785074710846,
|
| 8334 |
+
"learning_rate": 2.858091805177554e-05,
|
| 8335 |
+
"loss": 0.8619,
|
| 8336 |
+
"step": 1185
|
| 8337 |
+
},
|
| 8338 |
+
{
|
| 8339 |
+
"epoch": 0.5367730255713963,
|
| 8340 |
+
"grad_norm": 0.4867090582847595,
|
| 8341 |
+
"learning_rate": 2.84401039255879e-05,
|
| 8342 |
+
"loss": 0.7619,
|
| 8343 |
+
"step": 1186
|
| 8344 |
+
},
|
| 8345 |
+
{
|
| 8346 |
+
"epoch": 0.5372256166553518,
|
| 8347 |
+
"grad_norm": 0.5630788207054138,
|
| 8348 |
+
"learning_rate": 2.8299580014565664e-05,
|
| 8349 |
+
"loss": 0.9779,
|
| 8350 |
+
"step": 1187
|
| 8351 |
+
},
|
| 8352 |
+
{
|
| 8353 |
+
"epoch": 0.5376782077393075,
|
| 8354 |
+
"grad_norm": 0.5053603053092957,
|
| 8355 |
+
"learning_rate": 2.815934688861146e-05,
|
| 8356 |
+
"loss": 0.8156,
|
| 8357 |
+
"step": 1188
|
| 8358 |
+
},
|
| 8359 |
+
{
|
| 8360 |
+
"epoch": 0.5381307988232632,
|
| 8361 |
+
"grad_norm": 0.463606595993042,
|
| 8362 |
+
"learning_rate": 2.8019405116448516e-05,
|
| 8363 |
+
"loss": 0.6967,
|
| 8364 |
+
"step": 1189
|
| 8365 |
+
},
|
| 8366 |
+
{
|
| 8367 |
+
"epoch": 0.5385833899072189,
|
| 8368 |
+
"grad_norm": 0.4266325533390045,
|
| 8369 |
+
"learning_rate": 2.7879755265618555e-05,
|
| 8370 |
+
"loss": 0.6799,
|
| 8371 |
+
"step": 1190
|
| 8372 |
+
},
|
| 8373 |
+
{
|
| 8374 |
+
"epoch": 0.5390359809911744,
|
| 8375 |
+
"grad_norm": 0.4817747175693512,
|
| 8376 |
+
"learning_rate": 2.7740397902479387e-05,
|
| 8377 |
+
"loss": 0.765,
|
| 8378 |
+
"step": 1191
|
| 8379 |
+
},
|
| 8380 |
+
{
|
| 8381 |
+
"epoch": 0.5394885720751301,
|
| 8382 |
+
"grad_norm": 0.5528002381324768,
|
| 8383 |
+
"learning_rate": 2.7601333592202583e-05,
|
| 8384 |
+
"loss": 0.8884,
|
| 8385 |
+
"step": 1192
|
| 8386 |
+
},
|
| 8387 |
+
{
|
| 8388 |
+
"epoch": 0.5399411631590858,
|
| 8389 |
+
"grad_norm": 0.6013336777687073,
|
| 8390 |
+
"learning_rate": 2.746256289877126e-05,
|
| 8391 |
+
"loss": 1.1148,
|
| 8392 |
+
"step": 1193
|
| 8393 |
+
},
|
| 8394 |
+
{
|
| 8395 |
+
"epoch": 0.5403937542430414,
|
| 8396 |
+
"grad_norm": 0.45258426666259766,
|
| 8397 |
+
"learning_rate": 2.7324086384977698e-05,
|
| 8398 |
+
"loss": 0.6737,
|
| 8399 |
+
"step": 1194
|
| 8400 |
+
},
|
| 8401 |
+
{
|
| 8402 |
+
"epoch": 0.540846345326997,
|
| 8403 |
+
"grad_norm": 0.4618009030818939,
|
| 8404 |
+
"learning_rate": 2.7185904612421176e-05,
|
| 8405 |
+
"loss": 0.6267,
|
| 8406 |
+
"step": 1195
|
| 8407 |
+
},
|
| 8408 |
+
{
|
| 8409 |
+
"epoch": 0.5412989364109527,
|
| 8410 |
+
"grad_norm": 0.5477524399757385,
|
| 8411 |
+
"learning_rate": 2.7048018141505604e-05,
|
| 8412 |
+
"loss": 0.6358,
|
| 8413 |
+
"step": 1196
|
| 8414 |
+
},
|
| 8415 |
+
{
|
| 8416 |
+
"epoch": 0.5417515274949084,
|
| 8417 |
+
"grad_norm": 0.5210055112838745,
|
| 8418 |
+
"learning_rate": 2.6910427531437287e-05,
|
| 8419 |
+
"loss": 0.8353,
|
| 8420 |
+
"step": 1197
|
| 8421 |
+
},
|
| 8422 |
+
{
|
| 8423 |
+
"epoch": 0.542204118578864,
|
| 8424 |
+
"grad_norm": 0.489916056394577,
|
| 8425 |
+
"learning_rate": 2.677313334022268e-05,
|
| 8426 |
+
"loss": 0.7661,
|
| 8427 |
+
"step": 1198
|
| 8428 |
+
},
|
| 8429 |
+
{
|
| 8430 |
+
"epoch": 0.5426567096628196,
|
| 8431 |
+
"grad_norm": 0.5317739844322205,
|
| 8432 |
+
"learning_rate": 2.6636136124666e-05,
|
| 8433 |
+
"loss": 0.8718,
|
| 8434 |
+
"step": 1199
|
| 8435 |
+
},
|
| 8436 |
+
{
|
| 8437 |
+
"epoch": 0.5431093007467753,
|
| 8438 |
+
"grad_norm": 0.563149094581604,
|
| 8439 |
+
"learning_rate": 2.6499436440367165e-05,
|
| 8440 |
+
"loss": 1.0695,
|
| 8441 |
+
"step": 1200
|
| 8442 |
+
},
|
| 8443 |
+
{
|
| 8444 |
+
"epoch": 0.5435618918307309,
|
| 8445 |
+
"grad_norm": 0.6181795597076416,
|
| 8446 |
+
"learning_rate": 2.6363034841719392e-05,
|
| 8447 |
+
"loss": 0.6715,
|
| 8448 |
+
"step": 1201
|
| 8449 |
+
},
|
| 8450 |
+
{
|
| 8451 |
+
"epoch": 0.5440144829146866,
|
| 8452 |
+
"grad_norm": 0.5255316495895386,
|
| 8453 |
+
"learning_rate": 2.622693188190699e-05,
|
| 8454 |
+
"loss": 0.7982,
|
| 8455 |
+
"step": 1202
|
| 8456 |
+
},
|
| 8457 |
+
{
|
| 8458 |
+
"epoch": 0.5444670739986422,
|
| 8459 |
+
"grad_norm": 0.5731631517410278,
|
| 8460 |
+
"learning_rate": 2.609112811290315e-05,
|
| 8461 |
+
"loss": 0.9418,
|
| 8462 |
+
"step": 1203
|
| 8463 |
+
},
|
| 8464 |
+
{
|
| 8465 |
+
"epoch": 0.5449196650825979,
|
| 8466 |
+
"grad_norm": 0.44508278369903564,
|
| 8467 |
+
"learning_rate": 2.59556240854677e-05,
|
| 8468 |
+
"loss": 0.7452,
|
| 8469 |
+
"step": 1204
|
| 8470 |
+
},
|
| 8471 |
+
{
|
| 8472 |
+
"epoch": 0.5453722561665535,
|
| 8473 |
+
"grad_norm": 0.5581162571907043,
|
| 8474 |
+
"learning_rate": 2.5820420349144693e-05,
|
| 8475 |
+
"loss": 0.9905,
|
| 8476 |
+
"step": 1205
|
| 8477 |
+
},
|
| 8478 |
+
{
|
| 8479 |
+
"epoch": 0.5458248472505092,
|
| 8480 |
+
"grad_norm": 0.4913443922996521,
|
| 8481 |
+
"learning_rate": 2.5685517452260567e-05,
|
| 8482 |
+
"loss": 0.8525,
|
| 8483 |
+
"step": 1206
|
| 8484 |
+
},
|
| 8485 |
+
{
|
| 8486 |
+
"epoch": 0.5462774383344648,
|
| 8487 |
+
"grad_norm": 0.6032697558403015,
|
| 8488 |
+
"learning_rate": 2.5550915941921526e-05,
|
| 8489 |
+
"loss": 0.8903,
|
| 8490 |
+
"step": 1207
|
| 8491 |
+
},
|
| 8492 |
+
{
|
| 8493 |
+
"epoch": 0.5467300294184204,
|
| 8494 |
+
"grad_norm": 0.5988163352012634,
|
| 8495 |
+
"learning_rate": 2.541661636401157e-05,
|
| 8496 |
+
"loss": 0.8709,
|
| 8497 |
+
"step": 1208
|
| 8498 |
+
},
|
| 8499 |
+
{
|
| 8500 |
+
"epoch": 0.5471826205023761,
|
| 8501 |
+
"grad_norm": 0.4508473873138428,
|
| 8502 |
+
"learning_rate": 2.52826192631901e-05,
|
| 8503 |
+
"loss": 0.6725,
|
| 8504 |
+
"step": 1209
|
| 8505 |
+
},
|
| 8506 |
+
{
|
| 8507 |
+
"epoch": 0.5476352115863318,
|
| 8508 |
+
"grad_norm": 0.5168390274047852,
|
| 8509 |
+
"learning_rate": 2.514892518288988e-05,
|
| 8510 |
+
"loss": 0.6997,
|
| 8511 |
+
"step": 1210
|
| 8512 |
+
},
|
| 8513 |
+
{
|
| 8514 |
+
"epoch": 0.5480878026702874,
|
| 8515 |
+
"grad_norm": 0.48190394043922424,
|
| 8516 |
+
"learning_rate": 2.5015534665314755e-05,
|
| 8517 |
+
"loss": 0.8461,
|
| 8518 |
+
"step": 1211
|
| 8519 |
+
},
|
| 8520 |
+
{
|
| 8521 |
+
"epoch": 0.548540393754243,
|
| 8522 |
+
"grad_norm": 0.5142277479171753,
|
| 8523 |
+
"learning_rate": 2.488244825143743e-05,
|
| 8524 |
+
"loss": 0.9458,
|
| 8525 |
+
"step": 1212
|
| 8526 |
+
},
|
| 8527 |
+
{
|
| 8528 |
+
"epoch": 0.5489929848381987,
|
| 8529 |
+
"grad_norm": 0.5585710406303406,
|
| 8530 |
+
"learning_rate": 2.4749666480997337e-05,
|
| 8531 |
+
"loss": 0.8265,
|
| 8532 |
+
"step": 1213
|
| 8533 |
+
},
|
| 8534 |
+
{
|
| 8535 |
+
"epoch": 0.5494455759221544,
|
| 8536 |
+
"grad_norm": 0.5171539783477783,
|
| 8537 |
+
"learning_rate": 2.4617189892498327e-05,
|
| 8538 |
+
"loss": 0.8442,
|
| 8539 |
+
"step": 1214
|
| 8540 |
+
},
|
| 8541 |
+
{
|
| 8542 |
+
"epoch": 0.5498981670061099,
|
| 8543 |
+
"grad_norm": 0.5711890459060669,
|
| 8544 |
+
"learning_rate": 2.4485019023206635e-05,
|
| 8545 |
+
"loss": 0.9659,
|
| 8546 |
+
"step": 1215
|
| 8547 |
+
},
|
| 8548 |
+
{
|
| 8549 |
+
"epoch": 0.5503507580900656,
|
| 8550 |
+
"grad_norm": 0.5380867719650269,
|
| 8551 |
+
"learning_rate": 2.4353154409148637e-05,
|
| 8552 |
+
"loss": 0.8394,
|
| 8553 |
+
"step": 1216
|
| 8554 |
+
},
|
| 8555 |
+
{
|
| 8556 |
+
"epoch": 0.5508033491740213,
|
| 8557 |
+
"grad_norm": 0.4734165370464325,
|
| 8558 |
+
"learning_rate": 2.4221596585108663e-05,
|
| 8559 |
+
"loss": 0.5909,
|
| 8560 |
+
"step": 1217
|
| 8561 |
+
},
|
| 8562 |
+
{
|
| 8563 |
+
"epoch": 0.551255940257977,
|
| 8564 |
+
"grad_norm": 0.5847920775413513,
|
| 8565 |
+
"learning_rate": 2.409034608462686e-05,
|
| 8566 |
+
"loss": 0.9881,
|
| 8567 |
+
"step": 1218
|
| 8568 |
+
},
|
| 8569 |
+
{
|
| 8570 |
+
"epoch": 0.5517085313419325,
|
| 8571 |
+
"grad_norm": 0.5413870811462402,
|
| 8572 |
+
"learning_rate": 2.3959403439996907e-05,
|
| 8573 |
+
"loss": 0.9771,
|
| 8574 |
+
"step": 1219
|
| 8575 |
+
},
|
| 8576 |
+
{
|
| 8577 |
+
"epoch": 0.5521611224258882,
|
| 8578 |
+
"grad_norm": 0.5978078842163086,
|
| 8579 |
+
"learning_rate": 2.382876918226409e-05,
|
| 8580 |
+
"loss": 0.7637,
|
| 8581 |
+
"step": 1220
|
| 8582 |
+
},
|
| 8583 |
+
{
|
| 8584 |
+
"epoch": 0.5526137135098439,
|
| 8585 |
+
"grad_norm": 0.5320140719413757,
|
| 8586 |
+
"learning_rate": 2.369844384122293e-05,
|
| 8587 |
+
"loss": 0.8887,
|
| 8588 |
+
"step": 1221
|
| 8589 |
+
},
|
| 8590 |
+
{
|
| 8591 |
+
"epoch": 0.5530663045937995,
|
| 8592 |
+
"grad_norm": 0.38298875093460083,
|
| 8593 |
+
"learning_rate": 2.356842794541516e-05,
|
| 8594 |
+
"loss": 0.4509,
|
| 8595 |
+
"step": 1222
|
| 8596 |
+
},
|
| 8597 |
+
{
|
| 8598 |
+
"epoch": 0.5535188956777551,
|
| 8599 |
+
"grad_norm": 0.4932439625263214,
|
| 8600 |
+
"learning_rate": 2.3438722022127546e-05,
|
| 8601 |
+
"loss": 0.7125,
|
| 8602 |
+
"step": 1223
|
| 8603 |
+
},
|
| 8604 |
+
{
|
| 8605 |
+
"epoch": 0.5539714867617108,
|
| 8606 |
+
"grad_norm": 0.4653611481189728,
|
| 8607 |
+
"learning_rate": 2.330932659738967e-05,
|
| 8608 |
+
"loss": 0.7279,
|
| 8609 |
+
"step": 1224
|
| 8610 |
+
},
|
| 8611 |
+
{
|
| 8612 |
+
"epoch": 0.5544240778456664,
|
| 8613 |
+
"grad_norm": 0.6703446507453918,
|
| 8614 |
+
"learning_rate": 2.318024219597196e-05,
|
| 8615 |
+
"loss": 1.2327,
|
| 8616 |
+
"step": 1225
|
| 8617 |
+
},
|
| 8618 |
+
{
|
| 8619 |
+
"epoch": 0.5548766689296221,
|
| 8620 |
+
"grad_norm": 0.5315276980400085,
|
| 8621 |
+
"learning_rate": 2.3051469341383402e-05,
|
| 8622 |
+
"loss": 0.9666,
|
| 8623 |
+
"step": 1226
|
| 8624 |
+
},
|
| 8625 |
+
{
|
| 8626 |
+
"epoch": 0.5553292600135777,
|
| 8627 |
+
"grad_norm": 0.4821360111236572,
|
| 8628 |
+
"learning_rate": 2.2923008555869552e-05,
|
| 8629 |
+
"loss": 0.7568,
|
| 8630 |
+
"step": 1227
|
| 8631 |
+
},
|
| 8632 |
+
{
|
| 8633 |
+
"epoch": 0.5557818510975334,
|
| 8634 |
+
"grad_norm": 0.7255955338478088,
|
| 8635 |
+
"learning_rate": 2.2794860360410342e-05,
|
| 8636 |
+
"loss": 1.0656,
|
| 8637 |
+
"step": 1228
|
| 8638 |
+
},
|
| 8639 |
+
{
|
| 8640 |
+
"epoch": 0.556234442181489,
|
| 8641 |
+
"grad_norm": 0.5304329991340637,
|
| 8642 |
+
"learning_rate": 2.266702527471788e-05,
|
| 8643 |
+
"loss": 0.8769,
|
| 8644 |
+
"step": 1229
|
| 8645 |
+
},
|
| 8646 |
+
{
|
| 8647 |
+
"epoch": 0.5566870332654447,
|
| 8648 |
+
"grad_norm": 0.5752689242362976,
|
| 8649 |
+
"learning_rate": 2.2539503817234553e-05,
|
| 8650 |
+
"loss": 1.0325,
|
| 8651 |
+
"step": 1230
|
| 8652 |
+
},
|
| 8653 |
+
{
|
| 8654 |
+
"epoch": 0.5571396243494003,
|
| 8655 |
+
"grad_norm": 0.5128302574157715,
|
| 8656 |
+
"learning_rate": 2.241229650513077e-05,
|
| 8657 |
+
"loss": 0.8263,
|
| 8658 |
+
"step": 1231
|
| 8659 |
+
},
|
| 8660 |
+
{
|
| 8661 |
+
"epoch": 0.5575922154333559,
|
| 8662 |
+
"grad_norm": 0.4604417383670807,
|
| 8663 |
+
"learning_rate": 2.2285403854302912e-05,
|
| 8664 |
+
"loss": 0.6971,
|
| 8665 |
+
"step": 1232
|
| 8666 |
+
},
|
| 8667 |
+
{
|
| 8668 |
+
"epoch": 0.5580448065173116,
|
| 8669 |
+
"grad_norm": 0.5629538893699646,
|
| 8670 |
+
"learning_rate": 2.2158826379371258e-05,
|
| 8671 |
+
"loss": 0.7893,
|
| 8672 |
+
"step": 1233
|
| 8673 |
+
},
|
| 8674 |
+
{
|
| 8675 |
+
"epoch": 0.5584973976012673,
|
| 8676 |
+
"grad_norm": 0.5437077879905701,
|
| 8677 |
+
"learning_rate": 2.2032564593677774e-05,
|
| 8678 |
+
"loss": 0.6816,
|
| 8679 |
+
"step": 1234
|
| 8680 |
+
},
|
| 8681 |
+
{
|
| 8682 |
+
"epoch": 0.5589499886852229,
|
| 8683 |
+
"grad_norm": 0.550563633441925,
|
| 8684 |
+
"learning_rate": 2.1906619009284257e-05,
|
| 8685 |
+
"loss": 0.8575,
|
| 8686 |
+
"step": 1235
|
| 8687 |
+
},
|
| 8688 |
+
{
|
| 8689 |
+
"epoch": 0.5594025797691785,
|
| 8690 |
+
"grad_norm": 0.5454031825065613,
|
| 8691 |
+
"learning_rate": 2.178099013697005e-05,
|
| 8692 |
+
"loss": 0.7705,
|
| 8693 |
+
"step": 1236
|
| 8694 |
+
},
|
| 8695 |
+
{
|
| 8696 |
+
"epoch": 0.5598551708531342,
|
| 8697 |
+
"grad_norm": 0.5278245210647583,
|
| 8698 |
+
"learning_rate": 2.165567848623009e-05,
|
| 8699 |
+
"loss": 0.9149,
|
| 8700 |
+
"step": 1237
|
| 8701 |
+
},
|
| 8702 |
+
{
|
| 8703 |
+
"epoch": 0.5603077619370899,
|
| 8704 |
+
"grad_norm": 0.4524843394756317,
|
| 8705 |
+
"learning_rate": 2.153068456527283e-05,
|
| 8706 |
+
"loss": 0.6749,
|
| 8707 |
+
"step": 1238
|
| 8708 |
+
},
|
| 8709 |
+
{
|
| 8710 |
+
"epoch": 0.5607603530210454,
|
| 8711 |
+
"grad_norm": 0.5835239887237549,
|
| 8712 |
+
"learning_rate": 2.1406008881018047e-05,
|
| 8713 |
+
"loss": 0.8808,
|
| 8714 |
+
"step": 1239
|
| 8715 |
+
},
|
| 8716 |
+
{
|
| 8717 |
+
"epoch": 0.5612129441050011,
|
| 8718 |
+
"grad_norm": 0.5145358443260193,
|
| 8719 |
+
"learning_rate": 2.1281651939094992e-05,
|
| 8720 |
+
"loss": 0.9971,
|
| 8721 |
+
"step": 1240
|
| 8722 |
+
},
|
| 8723 |
+
{
|
| 8724 |
+
"epoch": 0.5616655351889568,
|
| 8725 |
+
"grad_norm": 0.5458048582077026,
|
| 8726 |
+
"learning_rate": 2.1157614243840206e-05,
|
| 8727 |
+
"loss": 0.9558,
|
| 8728 |
+
"step": 1241
|
| 8729 |
+
},
|
| 8730 |
+
{
|
| 8731 |
+
"epoch": 0.5621181262729125,
|
| 8732 |
+
"grad_norm": 0.502225935459137,
|
| 8733 |
+
"learning_rate": 2.1033896298295508e-05,
|
| 8734 |
+
"loss": 0.7903,
|
| 8735 |
+
"step": 1242
|
| 8736 |
+
},
|
| 8737 |
+
{
|
| 8738 |
+
"epoch": 0.562570717356868,
|
| 8739 |
+
"grad_norm": 0.5830442905426025,
|
| 8740 |
+
"learning_rate": 2.0910498604205986e-05,
|
| 8741 |
+
"loss": 1.1186,
|
| 8742 |
+
"step": 1243
|
| 8743 |
+
},
|
| 8744 |
+
{
|
| 8745 |
+
"epoch": 0.5630233084408237,
|
| 8746 |
+
"grad_norm": 0.47947055101394653,
|
| 8747 |
+
"learning_rate": 2.0787421662017825e-05,
|
| 8748 |
+
"loss": 0.7421,
|
| 8749 |
+
"step": 1244
|
| 8750 |
+
},
|
| 8751 |
+
{
|
| 8752 |
+
"epoch": 0.5634758995247794,
|
| 8753 |
+
"grad_norm": 0.5443291068077087,
|
| 8754 |
+
"learning_rate": 2.0664665970876496e-05,
|
| 8755 |
+
"loss": 1.0004,
|
| 8756 |
+
"step": 1245
|
| 8757 |
+
},
|
| 8758 |
+
{
|
| 8759 |
+
"epoch": 0.563928490608735,
|
| 8760 |
+
"grad_norm": 0.6094164848327637,
|
| 8761 |
+
"learning_rate": 2.0542232028624586e-05,
|
| 8762 |
+
"loss": 0.9367,
|
| 8763 |
+
"step": 1246
|
| 8764 |
+
},
|
| 8765 |
+
{
|
| 8766 |
+
"epoch": 0.5643810816926906,
|
| 8767 |
+
"grad_norm": 0.5203437805175781,
|
| 8768 |
+
"learning_rate": 2.0420120331799786e-05,
|
| 8769 |
+
"loss": 1.035,
|
| 8770 |
+
"step": 1247
|
| 8771 |
+
},
|
| 8772 |
+
{
|
| 8773 |
+
"epoch": 0.5648336727766463,
|
| 8774 |
+
"grad_norm": 0.5212861895561218,
|
| 8775 |
+
"learning_rate": 2.0298331375632962e-05,
|
| 8776 |
+
"loss": 1.0298,
|
| 8777 |
+
"step": 1248
|
| 8778 |
+
},
|
| 8779 |
+
{
|
| 8780 |
+
"epoch": 0.565286263860602,
|
| 8781 |
+
"grad_norm": 0.5760666728019714,
|
| 8782 |
+
"learning_rate": 2.0176865654045974e-05,
|
| 8783 |
+
"loss": 1.0492,
|
| 8784 |
+
"step": 1249
|
| 8785 |
+
},
|
| 8786 |
+
{
|
| 8787 |
+
"epoch": 0.5657388549445576,
|
| 8788 |
+
"grad_norm": 0.5951511859893799,
|
| 8789 |
+
"learning_rate": 2.0055723659649904e-05,
|
| 8790 |
+
"loss": 1.0702,
|
| 8791 |
+
"step": 1250
|
| 8792 |
+
},
|
| 8793 |
+
{
|
| 8794 |
+
"epoch": 0.5661914460285132,
|
| 8795 |
+
"grad_norm": 0.4950634837150574,
|
| 8796 |
+
"learning_rate": 1.9934905883742882e-05,
|
| 8797 |
+
"loss": 0.6607,
|
| 8798 |
+
"step": 1251
|
| 8799 |
+
},
|
| 8800 |
+
{
|
| 8801 |
+
"epoch": 0.5666440371124689,
|
| 8802 |
+
"grad_norm": 0.42447158694267273,
|
| 8803 |
+
"learning_rate": 1.981441281630816e-05,
|
| 8804 |
+
"loss": 0.6971,
|
| 8805 |
+
"step": 1252
|
| 8806 |
+
},
|
| 8807 |
+
{
|
| 8808 |
+
"epoch": 0.5670966281964245,
|
| 8809 |
+
"grad_norm": 0.5706565976142883,
|
| 8810 |
+
"learning_rate": 1.969424494601213e-05,
|
| 8811 |
+
"loss": 1.0281,
|
| 8812 |
+
"step": 1253
|
| 8813 |
+
},
|
| 8814 |
+
{
|
| 8815 |
+
"epoch": 0.5675492192803802,
|
| 8816 |
+
"grad_norm": 0.5865153074264526,
|
| 8817 |
+
"learning_rate": 1.9574402760202315e-05,
|
| 8818 |
+
"loss": 1.1408,
|
| 8819 |
+
"step": 1254
|
| 8820 |
+
},
|
| 8821 |
+
{
|
| 8822 |
+
"epoch": 0.5680018103643358,
|
| 8823 |
+
"grad_norm": 0.5091719627380371,
|
| 8824 |
+
"learning_rate": 1.94548867449054e-05,
|
| 8825 |
+
"loss": 0.7702,
|
| 8826 |
+
"step": 1255
|
| 8827 |
+
},
|
| 8828 |
+
{
|
| 8829 |
+
"epoch": 0.5684544014482915,
|
| 8830 |
+
"grad_norm": 0.5343332290649414,
|
| 8831 |
+
"learning_rate": 1.933569738482529e-05,
|
| 8832 |
+
"loss": 0.9963,
|
| 8833 |
+
"step": 1256
|
| 8834 |
+
},
|
| 8835 |
+
{
|
| 8836 |
+
"epoch": 0.5689069925322471,
|
| 8837 |
+
"grad_norm": 0.5233331918716431,
|
| 8838 |
+
"learning_rate": 1.9216835163341106e-05,
|
| 8839 |
+
"loss": 0.9176,
|
| 8840 |
+
"step": 1257
|
| 8841 |
+
},
|
| 8842 |
+
{
|
| 8843 |
+
"epoch": 0.5693595836162028,
|
| 8844 |
+
"grad_norm": 0.45247891545295715,
|
| 8845 |
+
"learning_rate": 1.9098300562505266e-05,
|
| 8846 |
+
"loss": 0.6073,
|
| 8847 |
+
"step": 1258
|
| 8848 |
+
},
|
| 8849 |
+
{
|
| 8850 |
+
"epoch": 0.5698121747001584,
|
| 8851 |
+
"grad_norm": 0.4159603416919708,
|
| 8852 |
+
"learning_rate": 1.8980094063041432e-05,
|
| 8853 |
+
"loss": 0.6153,
|
| 8854 |
+
"step": 1259
|
| 8855 |
+
},
|
| 8856 |
+
{
|
| 8857 |
+
"epoch": 0.570264765784114,
|
| 8858 |
+
"grad_norm": 0.45843058824539185,
|
| 8859 |
+
"learning_rate": 1.8862216144342692e-05,
|
| 8860 |
+
"loss": 0.5988,
|
| 8861 |
+
"step": 1260
|
| 8862 |
+
},
|
| 8863 |
+
{
|
| 8864 |
+
"epoch": 0.5707173568680697,
|
| 8865 |
+
"grad_norm": 0.5220310091972351,
|
| 8866 |
+
"learning_rate": 1.8744667284469575e-05,
|
| 8867 |
+
"loss": 0.8899,
|
| 8868 |
+
"step": 1261
|
| 8869 |
+
},
|
| 8870 |
+
{
|
| 8871 |
+
"epoch": 0.5711699479520254,
|
| 8872 |
+
"grad_norm": 0.529350221157074,
|
| 8873 |
+
"learning_rate": 1.8627447960148037e-05,
|
| 8874 |
+
"loss": 0.818,
|
| 8875 |
+
"step": 1262
|
| 8876 |
+
},
|
| 8877 |
+
{
|
| 8878 |
+
"epoch": 0.571622539035981,
|
| 8879 |
+
"grad_norm": 0.48126599192619324,
|
| 8880 |
+
"learning_rate": 1.851055864676765e-05,
|
| 8881 |
+
"loss": 0.7028,
|
| 8882 |
+
"step": 1263
|
| 8883 |
+
},
|
| 8884 |
+
{
|
| 8885 |
+
"epoch": 0.5720751301199366,
|
| 8886 |
+
"grad_norm": 0.4716106653213501,
|
| 8887 |
+
"learning_rate": 1.8393999818379525e-05,
|
| 8888 |
+
"loss": 0.7965,
|
| 8889 |
+
"step": 1264
|
| 8890 |
+
},
|
| 8891 |
+
{
|
| 8892 |
+
"epoch": 0.5725277212038923,
|
| 8893 |
+
"grad_norm": 0.550206184387207,
|
| 8894 |
+
"learning_rate": 1.8277771947694523e-05,
|
| 8895 |
+
"loss": 1.1316,
|
| 8896 |
+
"step": 1265
|
| 8897 |
+
},
|
| 8898 |
+
{
|
| 8899 |
+
"epoch": 0.572980312287848,
|
| 8900 |
+
"grad_norm": 0.5137092471122742,
|
| 8901 |
+
"learning_rate": 1.8161875506081293e-05,
|
| 8902 |
+
"loss": 0.8549,
|
| 8903 |
+
"step": 1266
|
| 8904 |
+
},
|
| 8905 |
+
{
|
| 8906 |
+
"epoch": 0.5734329033718035,
|
| 8907 |
+
"grad_norm": 0.6171010136604309,
|
| 8908 |
+
"learning_rate": 1.804631096356435e-05,
|
| 8909 |
+
"loss": 0.9547,
|
| 8910 |
+
"step": 1267
|
| 8911 |
+
},
|
| 8912 |
+
{
|
| 8913 |
+
"epoch": 0.5738854944557592,
|
| 8914 |
+
"grad_norm": 0.5315536260604858,
|
| 8915 |
+
"learning_rate": 1.7931078788822175e-05,
|
| 8916 |
+
"loss": 0.896,
|
| 8917 |
+
"step": 1268
|
| 8918 |
+
},
|
| 8919 |
+
{
|
| 8920 |
+
"epoch": 0.5743380855397149,
|
| 8921 |
+
"grad_norm": 0.5210160613059998,
|
| 8922 |
+
"learning_rate": 1.781617944918528e-05,
|
| 8923 |
+
"loss": 0.8643,
|
| 8924 |
+
"step": 1269
|
| 8925 |
+
},
|
| 8926 |
+
{
|
| 8927 |
+
"epoch": 0.5747906766236706,
|
| 8928 |
+
"grad_norm": 0.4929242432117462,
|
| 8929 |
+
"learning_rate": 1.7701613410634365e-05,
|
| 8930 |
+
"loss": 0.7251,
|
| 8931 |
+
"step": 1270
|
| 8932 |
+
},
|
| 8933 |
+
{
|
| 8934 |
+
"epoch": 0.5752432677076261,
|
| 8935 |
+
"grad_norm": 0.5473180413246155,
|
| 8936 |
+
"learning_rate": 1.7587381137798432e-05,
|
| 8937 |
+
"loss": 0.9597,
|
| 8938 |
+
"step": 1271
|
| 8939 |
+
},
|
| 8940 |
+
{
|
| 8941 |
+
"epoch": 0.5756958587915818,
|
| 8942 |
+
"grad_norm": 0.5931307077407837,
|
| 8943 |
+
"learning_rate": 1.747348309395286e-05,
|
| 8944 |
+
"loss": 0.9572,
|
| 8945 |
+
"step": 1272
|
| 8946 |
+
},
|
| 8947 |
+
{
|
| 8948 |
+
"epoch": 0.5761484498755375,
|
| 8949 |
+
"grad_norm": 0.6130375862121582,
|
| 8950 |
+
"learning_rate": 1.735991974101756e-05,
|
| 8951 |
+
"loss": 1.0984,
|
| 8952 |
+
"step": 1273
|
| 8953 |
+
},
|
| 8954 |
+
{
|
| 8955 |
+
"epoch": 0.576601040959493,
|
| 8956 |
+
"grad_norm": 0.5877550840377808,
|
| 8957 |
+
"learning_rate": 1.7246691539555028e-05,
|
| 8958 |
+
"loss": 1.08,
|
| 8959 |
+
"step": 1274
|
| 8960 |
+
},
|
| 8961 |
+
{
|
| 8962 |
+
"epoch": 0.5770536320434487,
|
| 8963 |
+
"grad_norm": 0.498898446559906,
|
| 8964 |
+
"learning_rate": 1.7133798948768597e-05,
|
| 8965 |
+
"loss": 0.8566,
|
| 8966 |
+
"step": 1275
|
| 8967 |
+
},
|
| 8968 |
+
{
|
| 8969 |
+
"epoch": 0.5775062231274044,
|
| 8970 |
+
"grad_norm": 0.4698163568973541,
|
| 8971 |
+
"learning_rate": 1.7021242426500493e-05,
|
| 8972 |
+
"loss": 0.7786,
|
| 8973 |
+
"step": 1276
|
| 8974 |
+
},
|
| 8975 |
+
{
|
| 8976 |
+
"epoch": 0.5779588142113601,
|
| 8977 |
+
"grad_norm": 0.4652780592441559,
|
| 8978 |
+
"learning_rate": 1.6909022429229982e-05,
|
| 8979 |
+
"loss": 0.8628,
|
| 8980 |
+
"step": 1277
|
| 8981 |
+
},
|
| 8982 |
+
{
|
| 8983 |
+
"epoch": 0.5784114052953157,
|
| 8984 |
+
"grad_norm": 0.4151431620121002,
|
| 8985 |
+
"learning_rate": 1.6797139412071584e-05,
|
| 8986 |
+
"loss": 0.6402,
|
| 8987 |
+
"step": 1278
|
| 8988 |
+
},
|
| 8989 |
+
{
|
| 8990 |
+
"epoch": 0.5788639963792713,
|
| 8991 |
+
"grad_norm": 0.47546225786209106,
|
| 8992 |
+
"learning_rate": 1.6685593828773095e-05,
|
| 8993 |
+
"loss": 0.7404,
|
| 8994 |
+
"step": 1279
|
| 8995 |
+
},
|
| 8996 |
+
{
|
| 8997 |
+
"epoch": 0.579316587463227,
|
| 8998 |
+
"grad_norm": 0.47583967447280884,
|
| 8999 |
+
"learning_rate": 1.657438613171387e-05,
|
| 9000 |
+
"loss": 0.7162,
|
| 9001 |
+
"step": 1280
|
| 9002 |
+
},
|
| 9003 |
+
{
|
| 9004 |
+
"epoch": 0.5797691785471826,
|
| 9005 |
+
"grad_norm": 0.5199639797210693,
|
| 9006 |
+
"learning_rate": 1.6463516771902988e-05,
|
| 9007 |
+
"loss": 0.8366,
|
| 9008 |
+
"step": 1281
|
| 9009 |
+
},
|
| 9010 |
+
{
|
| 9011 |
+
"epoch": 0.5802217696311383,
|
| 9012 |
+
"grad_norm": 0.5876262784004211,
|
| 9013 |
+
"learning_rate": 1.6352986198977325e-05,
|
| 9014 |
+
"loss": 1.0567,
|
| 9015 |
+
"step": 1282
|
| 9016 |
+
},
|
| 9017 |
+
{
|
| 9018 |
+
"epoch": 0.5806743607150939,
|
| 9019 |
+
"grad_norm": 0.5289322137832642,
|
| 9020 |
+
"learning_rate": 1.624279486119984e-05,
|
| 9021 |
+
"loss": 0.9105,
|
| 9022 |
+
"step": 1283
|
| 9023 |
+
},
|
| 9024 |
+
{
|
| 9025 |
+
"epoch": 0.5811269517990496,
|
| 9026 |
+
"grad_norm": 0.5706743597984314,
|
| 9027 |
+
"learning_rate": 1.6132943205457606e-05,
|
| 9028 |
+
"loss": 1.1233,
|
| 9029 |
+
"step": 1284
|
| 9030 |
+
},
|
| 9031 |
+
{
|
| 9032 |
+
"epoch": 0.5815795428830052,
|
| 9033 |
+
"grad_norm": 0.5402660369873047,
|
| 9034 |
+
"learning_rate": 1.6023431677260214e-05,
|
| 9035 |
+
"loss": 0.7923,
|
| 9036 |
+
"step": 1285
|
| 9037 |
+
},
|
| 9038 |
+
{
|
| 9039 |
+
"epoch": 0.5820321339669609,
|
| 9040 |
+
"grad_norm": 0.4756923317909241,
|
| 9041 |
+
"learning_rate": 1.5914260720737795e-05,
|
| 9042 |
+
"loss": 0.7638,
|
| 9043 |
+
"step": 1286
|
| 9044 |
+
},
|
| 9045 |
+
{
|
| 9046 |
+
"epoch": 0.5824847250509165,
|
| 9047 |
+
"grad_norm": 0.5244125127792358,
|
| 9048 |
+
"learning_rate": 1.5805430778639263e-05,
|
| 9049 |
+
"loss": 0.7667,
|
| 9050 |
+
"step": 1287
|
| 9051 |
+
},
|
| 9052 |
+
{
|
| 9053 |
+
"epoch": 0.5829373161348721,
|
| 9054 |
+
"grad_norm": 0.5348141193389893,
|
| 9055 |
+
"learning_rate": 1.5696942292330576e-05,
|
| 9056 |
+
"loss": 0.8909,
|
| 9057 |
+
"step": 1288
|
| 9058 |
+
},
|
| 9059 |
+
{
|
| 9060 |
+
"epoch": 0.5833899072188278,
|
| 9061 |
+
"grad_norm": 0.5148683786392212,
|
| 9062 |
+
"learning_rate": 1.5588795701792803e-05,
|
| 9063 |
+
"loss": 0.9972,
|
| 9064 |
+
"step": 1289
|
| 9065 |
+
},
|
| 9066 |
+
{
|
| 9067 |
+
"epoch": 0.5838424983027835,
|
| 9068 |
+
"grad_norm": 0.5113561749458313,
|
| 9069 |
+
"learning_rate": 1.5480991445620542e-05,
|
| 9070 |
+
"loss": 0.8348,
|
| 9071 |
+
"step": 1290
|
| 9072 |
+
},
|
| 9073 |
+
{
|
| 9074 |
+
"epoch": 0.5842950893867391,
|
| 9075 |
+
"grad_norm": 0.4689948260784149,
|
| 9076 |
+
"learning_rate": 1.5373529961019974e-05,
|
| 9077 |
+
"loss": 0.7019,
|
| 9078 |
+
"step": 1291
|
| 9079 |
+
},
|
| 9080 |
+
{
|
| 9081 |
+
"epoch": 0.5847476804706947,
|
| 9082 |
+
"grad_norm": 0.45713579654693604,
|
| 9083 |
+
"learning_rate": 1.5266411683807168e-05,
|
| 9084 |
+
"loss": 0.6865,
|
| 9085 |
+
"step": 1292
|
| 9086 |
+
},
|
| 9087 |
+
{
|
| 9088 |
+
"epoch": 0.5852002715546504,
|
| 9089 |
+
"grad_norm": 0.46141064167022705,
|
| 9090 |
+
"learning_rate": 1.5159637048406328e-05,
|
| 9091 |
+
"loss": 0.6813,
|
| 9092 |
+
"step": 1293
|
| 9093 |
+
},
|
| 9094 |
+
{
|
| 9095 |
+
"epoch": 0.585652862638606,
|
| 9096 |
+
"grad_norm": 0.6206589341163635,
|
| 9097 |
+
"learning_rate": 1.5053206487847914e-05,
|
| 9098 |
+
"loss": 0.9386,
|
| 9099 |
+
"step": 1294
|
| 9100 |
+
},
|
| 9101 |
+
{
|
| 9102 |
+
"epoch": 0.5861054537225616,
|
| 9103 |
+
"grad_norm": 0.4815228283405304,
|
| 9104 |
+
"learning_rate": 1.4947120433767047e-05,
|
| 9105 |
+
"loss": 0.7759,
|
| 9106 |
+
"step": 1295
|
| 9107 |
+
},
|
| 9108 |
+
{
|
| 9109 |
+
"epoch": 0.5865580448065173,
|
| 9110 |
+
"grad_norm": 0.6688939929008484,
|
| 9111 |
+
"learning_rate": 1.484137931640167e-05,
|
| 9112 |
+
"loss": 0.9813,
|
| 9113 |
+
"step": 1296
|
| 9114 |
+
},
|
| 9115 |
+
{
|
| 9116 |
+
"epoch": 0.587010635890473,
|
| 9117 |
+
"grad_norm": 0.4881376624107361,
|
| 9118 |
+
"learning_rate": 1.4735983564590783e-05,
|
| 9119 |
+
"loss": 0.8419,
|
| 9120 |
+
"step": 1297
|
| 9121 |
+
},
|
| 9122 |
+
{
|
| 9123 |
+
"epoch": 0.5874632269744287,
|
| 9124 |
+
"grad_norm": 0.49161502718925476,
|
| 9125 |
+
"learning_rate": 1.4630933605772801e-05,
|
| 9126 |
+
"loss": 0.8166,
|
| 9127 |
+
"step": 1298
|
| 9128 |
+
},
|
| 9129 |
+
{
|
| 9130 |
+
"epoch": 0.5879158180583842,
|
| 9131 |
+
"grad_norm": 0.4156933128833771,
|
| 9132 |
+
"learning_rate": 1.4526229865983665e-05,
|
| 9133 |
+
"loss": 0.4593,
|
| 9134 |
+
"step": 1299
|
| 9135 |
+
},
|
| 9136 |
+
{
|
| 9137 |
+
"epoch": 0.5883684091423399,
|
| 9138 |
+
"grad_norm": 0.5126404166221619,
|
| 9139 |
+
"learning_rate": 1.442187276985526e-05,
|
| 9140 |
+
"loss": 0.952,
|
| 9141 |
+
"step": 1300
|
| 9142 |
+
},
|
| 9143 |
+
{
|
| 9144 |
+
"epoch": 0.5888210002262956,
|
| 9145 |
+
"grad_norm": 0.5474767088890076,
|
| 9146 |
+
"learning_rate": 1.4317862740613664e-05,
|
| 9147 |
+
"loss": 0.8347,
|
| 9148 |
+
"step": 1301
|
| 9149 |
+
},
|
| 9150 |
+
{
|
| 9151 |
+
"epoch": 0.5892735913102511,
|
| 9152 |
+
"grad_norm": 0.5409084558486938,
|
| 9153 |
+
"learning_rate": 1.4214200200077343e-05,
|
| 9154 |
+
"loss": 0.9025,
|
| 9155 |
+
"step": 1302
|
| 9156 |
+
},
|
| 9157 |
+
{
|
| 9158 |
+
"epoch": 0.5897261823942068,
|
| 9159 |
+
"grad_norm": 0.48775801062583923,
|
| 9160 |
+
"learning_rate": 1.4110885568655564e-05,
|
| 9161 |
+
"loss": 0.8533,
|
| 9162 |
+
"step": 1303
|
| 9163 |
+
},
|
| 9164 |
+
{
|
| 9165 |
+
"epoch": 0.5901787734781625,
|
| 9166 |
+
"grad_norm": 0.5462921857833862,
|
| 9167 |
+
"learning_rate": 1.400791926534657e-05,
|
| 9168 |
+
"loss": 0.9145,
|
| 9169 |
+
"step": 1304
|
| 9170 |
+
},
|
| 9171 |
+
{
|
| 9172 |
+
"epoch": 0.5906313645621182,
|
| 9173 |
+
"grad_norm": 0.5149967074394226,
|
| 9174 |
+
"learning_rate": 1.3905301707735985e-05,
|
| 9175 |
+
"loss": 0.8007,
|
| 9176 |
+
"step": 1305
|
| 9177 |
+
},
|
| 9178 |
+
{
|
| 9179 |
+
"epoch": 0.5910839556460737,
|
| 9180 |
+
"grad_norm": 0.4886469841003418,
|
| 9181 |
+
"learning_rate": 1.3803033311995072e-05,
|
| 9182 |
+
"loss": 0.6551,
|
| 9183 |
+
"step": 1306
|
| 9184 |
+
},
|
| 9185 |
+
{
|
| 9186 |
+
"epoch": 0.5915365467300294,
|
| 9187 |
+
"grad_norm": 0.42015543580055237,
|
| 9188 |
+
"learning_rate": 1.3701114492879007e-05,
|
| 9189 |
+
"loss": 0.5759,
|
| 9190 |
+
"step": 1307
|
| 9191 |
+
},
|
| 9192 |
+
{
|
| 9193 |
+
"epoch": 0.5919891378139851,
|
| 9194 |
+
"grad_norm": 0.43609780073165894,
|
| 9195 |
+
"learning_rate": 1.3599545663725321e-05,
|
| 9196 |
+
"loss": 0.639,
|
| 9197 |
+
"step": 1308
|
| 9198 |
+
},
|
| 9199 |
+
{
|
| 9200 |
+
"epoch": 0.5924417288979407,
|
| 9201 |
+
"grad_norm": 0.4982304871082306,
|
| 9202 |
+
"learning_rate": 1.3498327236452013e-05,
|
| 9203 |
+
"loss": 0.8886,
|
| 9204 |
+
"step": 1309
|
| 9205 |
+
},
|
| 9206 |
+
{
|
| 9207 |
+
"epoch": 0.5928943199818963,
|
| 9208 |
+
"grad_norm": 0.7850152850151062,
|
| 9209 |
+
"learning_rate": 1.339745962155613e-05,
|
| 9210 |
+
"loss": 0.7553,
|
| 9211 |
+
"step": 1310
|
| 9212 |
+
},
|
| 9213 |
+
{
|
| 9214 |
+
"epoch": 0.593346911065852,
|
| 9215 |
+
"grad_norm": 0.6349214911460876,
|
| 9216 |
+
"learning_rate": 1.3296943228111925e-05,
|
| 9217 |
+
"loss": 0.825,
|
| 9218 |
+
"step": 1311
|
| 9219 |
+
},
|
| 9220 |
+
{
|
| 9221 |
+
"epoch": 0.5937995021498077,
|
| 9222 |
+
"grad_norm": 0.509701669216156,
|
| 9223 |
+
"learning_rate": 1.3196778463769255e-05,
|
| 9224 |
+
"loss": 0.7534,
|
| 9225 |
+
"step": 1312
|
| 9226 |
+
},
|
| 9227 |
+
{
|
| 9228 |
+
"epoch": 0.5942520932337633,
|
| 9229 |
+
"grad_norm": 0.4454813301563263,
|
| 9230 |
+
"learning_rate": 1.3096965734751943e-05,
|
| 9231 |
+
"loss": 0.69,
|
| 9232 |
+
"step": 1313
|
| 9233 |
+
},
|
| 9234 |
+
{
|
| 9235 |
+
"epoch": 0.594704684317719,
|
| 9236 |
+
"grad_norm": 0.5413982272148132,
|
| 9237 |
+
"learning_rate": 1.2997505445856084e-05,
|
| 9238 |
+
"loss": 0.8602,
|
| 9239 |
+
"step": 1314
|
| 9240 |
+
},
|
| 9241 |
+
{
|
| 9242 |
+
"epoch": 0.5951572754016746,
|
| 9243 |
+
"grad_norm": 0.460597425699234,
|
| 9244 |
+
"learning_rate": 1.2898398000448443e-05,
|
| 9245 |
+
"loss": 0.6843,
|
| 9246 |
+
"step": 1315
|
| 9247 |
+
},
|
| 9248 |
+
{
|
| 9249 |
+
"epoch": 0.5956098664856302,
|
| 9250 |
+
"grad_norm": 0.49289670586586,
|
| 9251 |
+
"learning_rate": 1.2799643800464834e-05,
|
| 9252 |
+
"loss": 0.6259,
|
| 9253 |
+
"step": 1316
|
| 9254 |
+
},
|
| 9255 |
+
{
|
| 9256 |
+
"epoch": 0.5960624575695859,
|
| 9257 |
+
"grad_norm": 0.5211944580078125,
|
| 9258 |
+
"learning_rate": 1.2701243246408422e-05,
|
| 9259 |
+
"loss": 0.8669,
|
| 9260 |
+
"step": 1317
|
| 9261 |
+
},
|
| 9262 |
+
{
|
| 9263 |
+
"epoch": 0.5965150486535415,
|
| 9264 |
+
"grad_norm": 0.42615196108818054,
|
| 9265 |
+
"learning_rate": 1.260319673734821e-05,
|
| 9266 |
+
"loss": 0.6003,
|
| 9267 |
+
"step": 1318
|
| 9268 |
+
},
|
| 9269 |
+
{
|
| 9270 |
+
"epoch": 0.5969676397374972,
|
| 9271 |
+
"grad_norm": 0.6688699722290039,
|
| 9272 |
+
"learning_rate": 1.2505504670917256e-05,
|
| 9273 |
+
"loss": 1.1166,
|
| 9274 |
+
"step": 1319
|
| 9275 |
+
},
|
| 9276 |
+
{
|
| 9277 |
+
"epoch": 0.5974202308214528,
|
| 9278 |
+
"grad_norm": 0.4943235218524933,
|
| 9279 |
+
"learning_rate": 1.2408167443311214e-05,
|
| 9280 |
+
"loss": 0.726,
|
| 9281 |
+
"step": 1320
|
| 9282 |
+
},
|
| 9283 |
+
{
|
| 9284 |
+
"epoch": 0.5978728219054085,
|
| 9285 |
+
"grad_norm": 0.5515215992927551,
|
| 9286 |
+
"learning_rate": 1.2311185449286677e-05,
|
| 9287 |
+
"loss": 1.0609,
|
| 9288 |
+
"step": 1321
|
| 9289 |
+
},
|
| 9290 |
+
{
|
| 9291 |
+
"epoch": 0.5983254129893641,
|
| 9292 |
+
"grad_norm": 0.5061673521995544,
|
| 9293 |
+
"learning_rate": 1.2214559082159537e-05,
|
| 9294 |
+
"loss": 1.0484,
|
| 9295 |
+
"step": 1322
|
| 9296 |
+
},
|
| 9297 |
+
{
|
| 9298 |
+
"epoch": 0.5987780040733197,
|
| 9299 |
+
"grad_norm": 0.5210549831390381,
|
| 9300 |
+
"learning_rate": 1.2118288733803473e-05,
|
| 9301 |
+
"loss": 0.81,
|
| 9302 |
+
"step": 1323
|
| 9303 |
+
},
|
| 9304 |
+
{
|
| 9305 |
+
"epoch": 0.5992305951572754,
|
| 9306 |
+
"grad_norm": 1.2081853151321411,
|
| 9307 |
+
"learning_rate": 1.2022374794648228e-05,
|
| 9308 |
+
"loss": 0.7087,
|
| 9309 |
+
"step": 1324
|
| 9310 |
+
},
|
| 9311 |
+
{
|
| 9312 |
+
"epoch": 0.5996831862412311,
|
| 9313 |
+
"grad_norm": 0.5520872473716736,
|
| 9314 |
+
"learning_rate": 1.1926817653678157e-05,
|
| 9315 |
+
"loss": 0.6642,
|
| 9316 |
+
"step": 1325
|
| 9317 |
+
},
|
| 9318 |
+
{
|
| 9319 |
+
"epoch": 0.6001357773251867,
|
| 9320 |
+
"grad_norm": 0.5837110280990601,
|
| 9321 |
+
"learning_rate": 1.1831617698430609e-05,
|
| 9322 |
+
"loss": 0.8913,
|
| 9323 |
+
"step": 1326
|
| 9324 |
+
},
|
| 9325 |
+
{
|
| 9326 |
+
"epoch": 0.6005883684091423,
|
| 9327 |
+
"grad_norm": 0.47259557247161865,
|
| 9328 |
+
"learning_rate": 1.1736775314994342e-05,
|
| 9329 |
+
"loss": 0.6563,
|
| 9330 |
+
"step": 1327
|
| 9331 |
+
},
|
| 9332 |
+
{
|
| 9333 |
+
"epoch": 0.601040959493098,
|
| 9334 |
+
"grad_norm": 0.4800024628639221,
|
| 9335 |
+
"learning_rate": 1.1642290888007956e-05,
|
| 9336 |
+
"loss": 0.7851,
|
| 9337 |
+
"step": 1328
|
| 9338 |
+
},
|
| 9339 |
+
{
|
| 9340 |
+
"epoch": 0.6014935505770537,
|
| 9341 |
+
"grad_norm": 0.566657304763794,
|
| 9342 |
+
"learning_rate": 1.15481648006583e-05,
|
| 9343 |
+
"loss": 1.0709,
|
| 9344 |
+
"step": 1329
|
| 9345 |
+
},
|
| 9346 |
+
{
|
| 9347 |
+
"epoch": 0.6019461416610092,
|
| 9348 |
+
"grad_norm": 0.40888628363609314,
|
| 9349 |
+
"learning_rate": 1.1454397434679021e-05,
|
| 9350 |
+
"loss": 0.3452,
|
| 9351 |
+
"step": 1330
|
| 9352 |
+
},
|
| 9353 |
+
{
|
| 9354 |
+
"epoch": 0.6023987327449649,
|
| 9355 |
+
"grad_norm": 0.4961101710796356,
|
| 9356 |
+
"learning_rate": 1.1360989170348902e-05,
|
| 9357 |
+
"loss": 0.7467,
|
| 9358 |
+
"step": 1331
|
| 9359 |
+
},
|
| 9360 |
+
{
|
| 9361 |
+
"epoch": 0.6028513238289206,
|
| 9362 |
+
"grad_norm": 0.4577196538448334,
|
| 9363 |
+
"learning_rate": 1.1267940386490416e-05,
|
| 9364 |
+
"loss": 0.6897,
|
| 9365 |
+
"step": 1332
|
| 9366 |
+
},
|
| 9367 |
+
{
|
| 9368 |
+
"epoch": 0.6033039149128763,
|
| 9369 |
+
"grad_norm": 0.4734448194503784,
|
| 9370 |
+
"learning_rate": 1.1175251460468117e-05,
|
| 9371 |
+
"loss": 0.8086,
|
| 9372 |
+
"step": 1333
|
| 9373 |
+
},
|
| 9374 |
+
{
|
| 9375 |
+
"epoch": 0.6037565059968318,
|
| 9376 |
+
"grad_norm": 0.48683977127075195,
|
| 9377 |
+
"learning_rate": 1.10829227681871e-05,
|
| 9378 |
+
"loss": 0.8685,
|
| 9379 |
+
"step": 1334
|
| 9380 |
+
},
|
| 9381 |
+
{
|
| 9382 |
+
"epoch": 0.6042090970807875,
|
| 9383 |
+
"grad_norm": 0.46204137802124023,
|
| 9384 |
+
"learning_rate": 1.0990954684091558e-05,
|
| 9385 |
+
"loss": 0.5823,
|
| 9386 |
+
"step": 1335
|
| 9387 |
+
},
|
| 9388 |
+
{
|
| 9389 |
+
"epoch": 0.6046616881647432,
|
| 9390 |
+
"grad_norm": 0.47728225588798523,
|
| 9391 |
+
"learning_rate": 1.0899347581163221e-05,
|
| 9392 |
+
"loss": 0.8974,
|
| 9393 |
+
"step": 1336
|
| 9394 |
+
},
|
| 9395 |
+
{
|
| 9396 |
+
"epoch": 0.6051142792486988,
|
| 9397 |
+
"grad_norm": 0.4536563754081726,
|
| 9398 |
+
"learning_rate": 1.0808101830919814e-05,
|
| 9399 |
+
"loss": 0.7476,
|
| 9400 |
+
"step": 1337
|
| 9401 |
+
},
|
| 9402 |
+
{
|
| 9403 |
+
"epoch": 0.6055668703326544,
|
| 9404 |
+
"grad_norm": 0.519241452217102,
|
| 9405 |
+
"learning_rate": 1.0717217803413604e-05,
|
| 9406 |
+
"loss": 0.8848,
|
| 9407 |
+
"step": 1338
|
| 9408 |
+
},
|
| 9409 |
+
{
|
| 9410 |
+
"epoch": 0.6060194614166101,
|
| 9411 |
+
"grad_norm": 0.5880079865455627,
|
| 9412 |
+
"learning_rate": 1.062669586722983e-05,
|
| 9413 |
+
"loss": 1.1778,
|
| 9414 |
+
"step": 1339
|
| 9415 |
+
},
|
| 9416 |
+
{
|
| 9417 |
+
"epoch": 0.6064720525005657,
|
| 9418 |
+
"grad_norm": 0.5012646317481995,
|
| 9419 |
+
"learning_rate": 1.0536536389485275e-05,
|
| 9420 |
+
"loss": 0.9761,
|
| 9421 |
+
"step": 1340
|
| 9422 |
+
},
|
| 9423 |
+
{
|
| 9424 |
+
"epoch": 0.6069246435845214,
|
| 9425 |
+
"grad_norm": 0.5676561594009399,
|
| 9426 |
+
"learning_rate": 1.044673973582675e-05,
|
| 9427 |
+
"loss": 0.9059,
|
| 9428 |
+
"step": 1341
|
| 9429 |
+
},
|
| 9430 |
+
{
|
| 9431 |
+
"epoch": 0.607377234668477,
|
| 9432 |
+
"grad_norm": 0.5131206512451172,
|
| 9433 |
+
"learning_rate": 1.0357306270429624e-05,
|
| 9434 |
+
"loss": 0.9466,
|
| 9435 |
+
"step": 1342
|
| 9436 |
+
},
|
| 9437 |
+
{
|
| 9438 |
+
"epoch": 0.6078298257524327,
|
| 9439 |
+
"grad_norm": 0.5565215945243835,
|
| 9440 |
+
"learning_rate": 1.0268236355996341e-05,
|
| 9441 |
+
"loss": 1.1076,
|
| 9442 |
+
"step": 1343
|
| 9443 |
+
},
|
| 9444 |
+
{
|
| 9445 |
+
"epoch": 0.6082824168363883,
|
| 9446 |
+
"grad_norm": 0.5122553110122681,
|
| 9447 |
+
"learning_rate": 1.0179530353754874e-05,
|
| 9448 |
+
"loss": 0.8366,
|
| 9449 |
+
"step": 1344
|
| 9450 |
+
},
|
| 9451 |
+
{
|
| 9452 |
+
"epoch": 0.608735007920344,
|
| 9453 |
+
"grad_norm": 0.4143733084201813,
|
| 9454 |
+
"learning_rate": 1.0091188623457415e-05,
|
| 9455 |
+
"loss": 0.5818,
|
| 9456 |
+
"step": 1345
|
| 9457 |
+
},
|
| 9458 |
+
{
|
| 9459 |
+
"epoch": 0.6091875990042996,
|
| 9460 |
+
"grad_norm": 0.4871106445789337,
|
| 9461 |
+
"learning_rate": 1.0003211523378796e-05,
|
| 9462 |
+
"loss": 0.8138,
|
| 9463 |
+
"step": 1346
|
| 9464 |
+
},
|
| 9465 |
+
{
|
| 9466 |
+
"epoch": 0.6096401900882552,
|
| 9467 |
+
"grad_norm": 0.5023435354232788,
|
| 9468 |
+
"learning_rate": 9.915599410315068e-06,
|
| 9469 |
+
"loss": 0.8095,
|
| 9470 |
+
"step": 1347
|
| 9471 |
+
},
|
| 9472 |
+
{
|
| 9473 |
+
"epoch": 0.6100927811722109,
|
| 9474 |
+
"grad_norm": 0.5308644771575928,
|
| 9475 |
+
"learning_rate": 9.828352639582072e-06,
|
| 9476 |
+
"loss": 0.955,
|
| 9477 |
+
"step": 1348
|
| 9478 |
+
},
|
| 9479 |
+
{
|
| 9480 |
+
"epoch": 0.6105453722561666,
|
| 9481 |
+
"grad_norm": 0.46719393134117126,
|
| 9482 |
+
"learning_rate": 9.74147156501396e-06,
|
| 9483 |
+
"loss": 0.7615,
|
| 9484 |
+
"step": 1349
|
| 9485 |
+
},
|
| 9486 |
+
{
|
| 9487 |
+
"epoch": 0.6109979633401222,
|
| 9488 |
+
"grad_norm": 0.4629960358142853,
|
| 9489 |
+
"learning_rate": 9.65495653896179e-06,
|
| 9490 |
+
"loss": 0.6945,
|
| 9491 |
+
"step": 1350
|
| 9492 |
+
},
|
| 9493 |
+
{
|
| 9494 |
+
"epoch": 0.6114505544240778,
|
| 9495 |
+
"grad_norm": 0.4505084455013275,
|
| 9496 |
+
"learning_rate": 9.568807912292077e-06,
|
| 9497 |
+
"loss": 0.7088,
|
| 9498 |
+
"step": 1351
|
| 9499 |
+
},
|
| 9500 |
+
{
|
| 9501 |
+
"epoch": 0.6119031455080335,
|
| 9502 |
+
"grad_norm": 0.4612928032875061,
|
| 9503 |
+
"learning_rate": 9.483026034385467e-06,
|
| 9504 |
+
"loss": 0.5966,
|
| 9505 |
+
"step": 1352
|
| 9506 |
+
},
|
| 9507 |
+
{
|
| 9508 |
+
"epoch": 0.6123557365919892,
|
| 9509 |
+
"grad_norm": 0.81995689868927,
|
| 9510 |
+
"learning_rate": 9.397611253135118e-06,
|
| 9511 |
+
"loss": 0.751,
|
| 9512 |
+
"step": 1353
|
| 9513 |
+
},
|
| 9514 |
+
{
|
| 9515 |
+
"epoch": 0.6128083276759447,
|
| 9516 |
+
"grad_norm": 0.49387305974960327,
|
| 9517 |
+
"learning_rate": 9.31256391494546e-06,
|
| 9518 |
+
"loss": 0.8838,
|
| 9519 |
+
"step": 1354
|
| 9520 |
+
},
|
| 9521 |
+
{
|
| 9522 |
+
"epoch": 0.6132609187599004,
|
| 9523 |
+
"grad_norm": 0.5457798838615417,
|
| 9524 |
+
"learning_rate": 9.227884364730744e-06,
|
| 9525 |
+
"loss": 0.851,
|
| 9526 |
+
"step": 1355
|
| 9527 |
+
},
|
| 9528 |
+
{
|
| 9529 |
+
"epoch": 0.6137135098438561,
|
| 9530 |
+
"grad_norm": 0.4536944031715393,
|
| 9531 |
+
"learning_rate": 9.143572945913614e-06,
|
| 9532 |
+
"loss": 0.8056,
|
| 9533 |
+
"step": 1356
|
| 9534 |
+
},
|
| 9535 |
+
{
|
| 9536 |
+
"epoch": 0.6141661009278118,
|
| 9537 |
+
"grad_norm": 0.5066845417022705,
|
| 9538 |
+
"learning_rate": 9.05963000042378e-06,
|
| 9539 |
+
"loss": 0.8617,
|
| 9540 |
+
"step": 1357
|
| 9541 |
+
},
|
| 9542 |
+
{
|
| 9543 |
+
"epoch": 0.6146186920117673,
|
| 9544 |
+
"grad_norm": 0.5071107745170593,
|
| 9545 |
+
"learning_rate": 8.976055868696542e-06,
|
| 9546 |
+
"loss": 0.8629,
|
| 9547 |
+
"step": 1358
|
| 9548 |
+
},
|
| 9549 |
+
{
|
| 9550 |
+
"epoch": 0.615071283095723,
|
| 9551 |
+
"grad_norm": 0.5773394107818604,
|
| 9552 |
+
"learning_rate": 8.892850889671455e-06,
|
| 9553 |
+
"loss": 1.1922,
|
| 9554 |
+
"step": 1359
|
| 9555 |
+
},
|
| 9556 |
+
{
|
| 9557 |
+
"epoch": 0.6155238741796787,
|
| 9558 |
+
"grad_norm": 0.48692917823791504,
|
| 9559 |
+
"learning_rate": 8.810015400790994e-06,
|
| 9560 |
+
"loss": 0.7168,
|
| 9561 |
+
"step": 1360
|
| 9562 |
+
},
|
| 9563 |
+
{
|
| 9564 |
+
"epoch": 0.6159764652636343,
|
| 9565 |
+
"grad_norm": 0.4717068374156952,
|
| 9566 |
+
"learning_rate": 8.727549737999097e-06,
|
| 9567 |
+
"loss": 0.8643,
|
| 9568 |
+
"step": 1361
|
| 9569 |
+
},
|
| 9570 |
+
{
|
| 9571 |
+
"epoch": 0.6164290563475899,
|
| 9572 |
+
"grad_norm": 0.5000625252723694,
|
| 9573 |
+
"learning_rate": 8.645454235739903e-06,
|
| 9574 |
+
"loss": 0.7273,
|
| 9575 |
+
"step": 1362
|
| 9576 |
+
},
|
| 9577 |
+
{
|
| 9578 |
+
"epoch": 0.6168816474315456,
|
| 9579 |
+
"grad_norm": 0.43107035756111145,
|
| 9580 |
+
"learning_rate": 8.563729226956319e-06,
|
| 9581 |
+
"loss": 0.6671,
|
| 9582 |
+
"step": 1363
|
| 9583 |
+
},
|
| 9584 |
+
{
|
| 9585 |
+
"epoch": 0.6173342385155013,
|
| 9586 |
+
"grad_norm": 0.6068969368934631,
|
| 9587 |
+
"learning_rate": 8.482375043088665e-06,
|
| 9588 |
+
"loss": 1.2765,
|
| 9589 |
+
"step": 1364
|
| 9590 |
+
},
|
| 9591 |
+
{
|
| 9592 |
+
"epoch": 0.6177868295994569,
|
| 9593 |
+
"grad_norm": 0.5019914507865906,
|
| 9594 |
+
"learning_rate": 8.401392014073405e-06,
|
| 9595 |
+
"loss": 0.9452,
|
| 9596 |
+
"step": 1365
|
| 9597 |
+
},
|
| 9598 |
+
{
|
| 9599 |
+
"epoch": 0.6182394206834125,
|
| 9600 |
+
"grad_norm": 0.5206013917922974,
|
| 9601 |
+
"learning_rate": 8.32078046834176e-06,
|
| 9602 |
+
"loss": 0.9439,
|
| 9603 |
+
"step": 1366
|
| 9604 |
+
},
|
| 9605 |
+
{
|
| 9606 |
+
"epoch": 0.6186920117673682,
|
| 9607 |
+
"grad_norm": 0.4639444649219513,
|
| 9608 |
+
"learning_rate": 8.240540732818347e-06,
|
| 9609 |
+
"loss": 0.802,
|
| 9610 |
+
"step": 1367
|
| 9611 |
+
},
|
| 9612 |
+
{
|
| 9613 |
+
"epoch": 0.6191446028513238,
|
| 9614 |
+
"grad_norm": 0.5678917765617371,
|
| 9615 |
+
"learning_rate": 8.160673132919938e-06,
|
| 9616 |
+
"loss": 1.0333,
|
| 9617 |
+
"step": 1368
|
| 9618 |
+
},
|
| 9619 |
+
{
|
| 9620 |
+
"epoch": 0.6195971939352795,
|
| 9621 |
+
"grad_norm": 0.5168341994285583,
|
| 9622 |
+
"learning_rate": 8.081177992554013e-06,
|
| 9623 |
+
"loss": 0.8182,
|
| 9624 |
+
"step": 1369
|
| 9625 |
+
},
|
| 9626 |
+
{
|
| 9627 |
+
"epoch": 0.6200497850192351,
|
| 9628 |
+
"grad_norm": 0.611613929271698,
|
| 9629 |
+
"learning_rate": 8.002055634117578e-06,
|
| 9630 |
+
"loss": 1.0066,
|
| 9631 |
+
"step": 1370
|
| 9632 |
+
},
|
| 9633 |
+
{
|
| 9634 |
+
"epoch": 0.6205023761031908,
|
| 9635 |
+
"grad_norm": 0.5084848999977112,
|
| 9636 |
+
"learning_rate": 7.923306378495809e-06,
|
| 9637 |
+
"loss": 1.0312,
|
| 9638 |
+
"step": 1371
|
| 9639 |
+
},
|
| 9640 |
+
{
|
| 9641 |
+
"epoch": 0.6209549671871464,
|
| 9642 |
+
"grad_norm": 0.3988780081272125,
|
| 9643 |
+
"learning_rate": 7.844930545060703e-06,
|
| 9644 |
+
"loss": 0.4544,
|
| 9645 |
+
"step": 1372
|
| 9646 |
+
},
|
| 9647 |
+
{
|
| 9648 |
+
"epoch": 0.6214075582711021,
|
| 9649 |
+
"grad_norm": 0.6262059211730957,
|
| 9650 |
+
"learning_rate": 7.766928451669863e-06,
|
| 9651 |
+
"loss": 1.004,
|
| 9652 |
+
"step": 1373
|
| 9653 |
+
},
|
| 9654 |
+
{
|
| 9655 |
+
"epoch": 0.6218601493550577,
|
| 9656 |
+
"grad_norm": 0.5216922760009766,
|
| 9657 |
+
"learning_rate": 7.689300414665124e-06,
|
| 9658 |
+
"loss": 0.7166,
|
| 9659 |
+
"step": 1374
|
| 9660 |
+
},
|
| 9661 |
+
{
|
| 9662 |
+
"epoch": 0.6223127404390133,
|
| 9663 |
+
"grad_norm": 0.5540860891342163,
|
| 9664 |
+
"learning_rate": 7.612046748871327e-06,
|
| 9665 |
+
"loss": 0.8727,
|
| 9666 |
+
"step": 1375
|
| 9667 |
+
},
|
| 9668 |
+
{
|
| 9669 |
+
"epoch": 0.622765331522969,
|
| 9670 |
+
"grad_norm": 0.44992414116859436,
|
| 9671 |
+
"learning_rate": 7.5351677675950635e-06,
|
| 9672 |
+
"loss": 0.6504,
|
| 9673 |
+
"step": 1376
|
| 9674 |
+
},
|
| 9675 |
+
{
|
| 9676 |
+
"epoch": 0.6232179226069247,
|
| 9677 |
+
"grad_norm": 0.4714016616344452,
|
| 9678 |
+
"learning_rate": 7.458663782623343e-06,
|
| 9679 |
+
"loss": 0.8419,
|
| 9680 |
+
"step": 1377
|
| 9681 |
+
},
|
| 9682 |
+
{
|
| 9683 |
+
"epoch": 0.6236705136908803,
|
| 9684 |
+
"grad_norm": 0.49986764788627625,
|
| 9685 |
+
"learning_rate": 7.382535104222366e-06,
|
| 9686 |
+
"loss": 0.7894,
|
| 9687 |
+
"step": 1378
|
| 9688 |
+
},
|
| 9689 |
+
{
|
| 9690 |
+
"epoch": 0.6241231047748359,
|
| 9691 |
+
"grad_norm": 0.5532403588294983,
|
| 9692 |
+
"learning_rate": 7.306782041136218e-06,
|
| 9693 |
+
"loss": 0.901,
|
| 9694 |
+
"step": 1379
|
| 9695 |
+
},
|
| 9696 |
+
{
|
| 9697 |
+
"epoch": 0.6245756958587916,
|
| 9698 |
+
"grad_norm": 0.5384380221366882,
|
| 9699 |
+
"learning_rate": 7.231404900585714e-06,
|
| 9700 |
+
"loss": 0.9753,
|
| 9701 |
+
"step": 1380
|
| 9702 |
+
},
|
| 9703 |
+
{
|
| 9704 |
+
"epoch": 0.6250282869427473,
|
| 9705 |
+
"grad_norm": 0.5002140402793884,
|
| 9706 |
+
"learning_rate": 7.156403988267069e-06,
|
| 9707 |
+
"loss": 0.8285,
|
| 9708 |
+
"step": 1381
|
| 9709 |
+
},
|
| 9710 |
+
{
|
| 9711 |
+
"epoch": 0.6254808780267028,
|
| 9712 |
+
"grad_norm": 0.5726694464683533,
|
| 9713 |
+
"learning_rate": 7.08177960835068e-06,
|
| 9714 |
+
"loss": 0.8634,
|
| 9715 |
+
"step": 1382
|
| 9716 |
+
},
|
| 9717 |
+
{
|
| 9718 |
+
"epoch": 0.6259334691106585,
|
| 9719 |
+
"grad_norm": 0.5283421874046326,
|
| 9720 |
+
"learning_rate": 7.0075320634799045e-06,
|
| 9721 |
+
"loss": 0.7802,
|
| 9722 |
+
"step": 1383
|
| 9723 |
+
},
|
| 9724 |
+
{
|
| 9725 |
+
"epoch": 0.6263860601946142,
|
| 9726 |
+
"grad_norm": 0.4411744475364685,
|
| 9727 |
+
"learning_rate": 6.9336616547697965e-06,
|
| 9728 |
+
"loss": 0.5788,
|
| 9729 |
+
"step": 1384
|
| 9730 |
+
},
|
| 9731 |
+
{
|
| 9732 |
+
"epoch": 0.6268386512785699,
|
| 9733 |
+
"grad_norm": 0.5233549475669861,
|
| 9734 |
+
"learning_rate": 6.860168681805945e-06,
|
| 9735 |
+
"loss": 0.8098,
|
| 9736 |
+
"step": 1385
|
| 9737 |
+
},
|
| 9738 |
+
{
|
| 9739 |
+
"epoch": 0.6272912423625254,
|
| 9740 |
+
"grad_norm": 0.5534676313400269,
|
| 9741 |
+
"learning_rate": 6.787053442643232e-06,
|
| 9742 |
+
"loss": 1.0433,
|
| 9743 |
+
"step": 1386
|
| 9744 |
+
},
|
| 9745 |
+
{
|
| 9746 |
+
"epoch": 0.6277438334464811,
|
| 9747 |
+
"grad_norm": 0.5603635907173157,
|
| 9748 |
+
"learning_rate": 6.714316233804574e-06,
|
| 9749 |
+
"loss": 0.8382,
|
| 9750 |
+
"step": 1387
|
| 9751 |
+
},
|
| 9752 |
+
{
|
| 9753 |
+
"epoch": 0.6281964245304368,
|
| 9754 |
+
"grad_norm": 0.48828354477882385,
|
| 9755 |
+
"learning_rate": 6.6419573502798374e-06,
|
| 9756 |
+
"loss": 0.7261,
|
| 9757 |
+
"step": 1388
|
| 9758 |
+
},
|
| 9759 |
+
{
|
| 9760 |
+
"epoch": 0.6286490156143923,
|
| 9761 |
+
"grad_norm": 0.46339505910873413,
|
| 9762 |
+
"learning_rate": 6.5699770855244815e-06,
|
| 9763 |
+
"loss": 0.6944,
|
| 9764 |
+
"step": 1389
|
| 9765 |
+
},
|
| 9766 |
+
{
|
| 9767 |
+
"epoch": 0.629101606698348,
|
| 9768 |
+
"grad_norm": 0.5434744954109192,
|
| 9769 |
+
"learning_rate": 6.498375731458528e-06,
|
| 9770 |
+
"loss": 0.9542,
|
| 9771 |
+
"step": 1390
|
| 9772 |
+
},
|
| 9773 |
+
{
|
| 9774 |
+
"epoch": 0.6295541977823037,
|
| 9775 |
+
"grad_norm": 0.49759843945503235,
|
| 9776 |
+
"learning_rate": 6.427153578465262e-06,
|
| 9777 |
+
"loss": 0.7949,
|
| 9778 |
+
"step": 1391
|
| 9779 |
+
},
|
| 9780 |
+
{
|
| 9781 |
+
"epoch": 0.6300067888662594,
|
| 9782 |
+
"grad_norm": 0.5009887218475342,
|
| 9783 |
+
"learning_rate": 6.356310915390118e-06,
|
| 9784 |
+
"loss": 0.8088,
|
| 9785 |
+
"step": 1392
|
| 9786 |
+
},
|
| 9787 |
+
{
|
| 9788 |
+
"epoch": 0.630459379950215,
|
| 9789 |
+
"grad_norm": 0.5697285532951355,
|
| 9790 |
+
"learning_rate": 6.28584802953951e-06,
|
| 9791 |
+
"loss": 0.9026,
|
| 9792 |
+
"step": 1393
|
| 9793 |
+
},
|
| 9794 |
+
{
|
| 9795 |
+
"epoch": 0.6309119710341706,
|
| 9796 |
+
"grad_norm": 0.48114413022994995,
|
| 9797 |
+
"learning_rate": 6.215765206679569e-06,
|
| 9798 |
+
"loss": 0.733,
|
| 9799 |
+
"step": 1394
|
| 9800 |
+
},
|
| 9801 |
+
{
|
| 9802 |
+
"epoch": 0.6313645621181263,
|
| 9803 |
+
"grad_norm": 0.5739166140556335,
|
| 9804 |
+
"learning_rate": 6.146062731035129e-06,
|
| 9805 |
+
"loss": 1.1062,
|
| 9806 |
+
"step": 1395
|
| 9807 |
+
},
|
| 9808 |
+
{
|
| 9809 |
+
"epoch": 0.6318171532020819,
|
| 9810 |
+
"grad_norm": 0.475238561630249,
|
| 9811 |
+
"learning_rate": 6.076740885288479e-06,
|
| 9812 |
+
"loss": 0.7195,
|
| 9813 |
+
"step": 1396
|
| 9814 |
+
},
|
| 9815 |
+
{
|
| 9816 |
+
"epoch": 0.6322697442860375,
|
| 9817 |
+
"grad_norm": 0.5954925417900085,
|
| 9818 |
+
"learning_rate": 6.007799950578264e-06,
|
| 9819 |
+
"loss": 1.0236,
|
| 9820 |
+
"step": 1397
|
| 9821 |
+
},
|
| 9822 |
+
{
|
| 9823 |
+
"epoch": 0.6327223353699932,
|
| 9824 |
+
"grad_norm": 0.5766705870628357,
|
| 9825 |
+
"learning_rate": 5.939240206498287e-06,
|
| 9826 |
+
"loss": 0.9263,
|
| 9827 |
+
"step": 1398
|
| 9828 |
+
},
|
| 9829 |
+
{
|
| 9830 |
+
"epoch": 0.6331749264539489,
|
| 9831 |
+
"grad_norm": 0.4999594986438751,
|
| 9832 |
+
"learning_rate": 5.8710619310964445e-06,
|
| 9833 |
+
"loss": 0.696,
|
| 9834 |
+
"step": 1399
|
| 9835 |
+
},
|
| 9836 |
+
{
|
| 9837 |
+
"epoch": 0.6336275175379045,
|
| 9838 |
+
"grad_norm": 0.4891878068447113,
|
| 9839 |
+
"learning_rate": 5.803265400873514e-06,
|
| 9840 |
+
"loss": 0.9738,
|
| 9841 |
+
"step": 1400
|
| 9842 |
+
},
|
| 9843 |
+
{
|
| 9844 |
+
"epoch": 0.6340801086218602,
|
| 9845 |
+
"grad_norm": 0.5583204627037048,
|
| 9846 |
+
"learning_rate": 5.735850890782157e-06,
|
| 9847 |
+
"loss": 1.0242,
|
| 9848 |
+
"step": 1401
|
| 9849 |
+
},
|
| 9850 |
+
{
|
| 9851 |
+
"epoch": 0.6345326997058158,
|
| 9852 |
+
"grad_norm": 0.49677279591560364,
|
| 9853 |
+
"learning_rate": 5.668818674225685e-06,
|
| 9854 |
+
"loss": 0.757,
|
| 9855 |
+
"step": 1402
|
| 9856 |
+
},
|
| 9857 |
+
{
|
| 9858 |
+
"epoch": 0.6349852907897714,
|
| 9859 |
+
"grad_norm": 0.4881908595561981,
|
| 9860 |
+
"learning_rate": 5.602169023057013e-06,
|
| 9861 |
+
"loss": 0.7328,
|
| 9862 |
+
"step": 1403
|
| 9863 |
+
},
|
| 9864 |
+
{
|
| 9865 |
+
"epoch": 0.6354378818737271,
|
| 9866 |
+
"grad_norm": 0.5154109597206116,
|
| 9867 |
+
"learning_rate": 5.5359022075775146e-06,
|
| 9868 |
+
"loss": 0.8986,
|
| 9869 |
+
"step": 1404
|
| 9870 |
+
},
|
| 9871 |
+
{
|
| 9872 |
+
"epoch": 0.6358904729576828,
|
| 9873 |
+
"grad_norm": 0.585472047328949,
|
| 9874 |
+
"learning_rate": 5.470018496535967e-06,
|
| 9875 |
+
"loss": 0.7595,
|
| 9876 |
+
"step": 1405
|
| 9877 |
+
},
|
| 9878 |
+
{
|
| 9879 |
+
"epoch": 0.6363430640416384,
|
| 9880 |
+
"grad_norm": 0.5411213636398315,
|
| 9881 |
+
"learning_rate": 5.40451815712748e-06,
|
| 9882 |
+
"loss": 0.9672,
|
| 9883 |
+
"step": 1406
|
| 9884 |
+
},
|
| 9885 |
+
{
|
| 9886 |
+
"epoch": 0.636795655125594,
|
| 9887 |
+
"grad_norm": 0.4666892886161804,
|
| 9888 |
+
"learning_rate": 5.33940145499231e-06,
|
| 9889 |
+
"loss": 0.6498,
|
| 9890 |
+
"step": 1407
|
| 9891 |
+
},
|
| 9892 |
+
{
|
| 9893 |
+
"epoch": 0.6372482462095497,
|
| 9894 |
+
"grad_norm": 0.4871276319026947,
|
| 9895 |
+
"learning_rate": 5.274668654214932e-06,
|
| 9896 |
+
"loss": 0.6612,
|
| 9897 |
+
"step": 1408
|
| 9898 |
+
},
|
| 9899 |
+
{
|
| 9900 |
+
"epoch": 0.6377008372935054,
|
| 9901 |
+
"grad_norm": 0.6037775874137878,
|
| 9902 |
+
"learning_rate": 5.210320017322812e-06,
|
| 9903 |
+
"loss": 1.0683,
|
| 9904 |
+
"step": 1409
|
| 9905 |
+
},
|
| 9906 |
+
{
|
| 9907 |
+
"epoch": 0.6381534283774609,
|
| 9908 |
+
"grad_norm": 0.5718627572059631,
|
| 9909 |
+
"learning_rate": 5.146355805285452e-06,
|
| 9910 |
+
"loss": 1.0986,
|
| 9911 |
+
"step": 1410
|
| 9912 |
+
},
|
| 9913 |
+
{
|
| 9914 |
+
"epoch": 0.6386060194614166,
|
| 9915 |
+
"grad_norm": 0.5335869789123535,
|
| 9916 |
+
"learning_rate": 5.08277627751329e-06,
|
| 9917 |
+
"loss": 0.6686,
|
| 9918 |
+
"step": 1411
|
| 9919 |
+
},
|
| 9920 |
+
{
|
| 9921 |
+
"epoch": 0.6390586105453723,
|
| 9922 |
+
"grad_norm": 0.44057121872901917,
|
| 9923 |
+
"learning_rate": 5.01958169185669e-06,
|
| 9924 |
+
"loss": 0.587,
|
| 9925 |
+
"step": 1412
|
| 9926 |
+
},
|
| 9927 |
+
{
|
| 9928 |
+
"epoch": 0.639511201629328,
|
| 9929 |
+
"grad_norm": 0.4762479066848755,
|
| 9930 |
+
"learning_rate": 4.956772304604818e-06,
|
| 9931 |
+
"loss": 0.6572,
|
| 9932 |
+
"step": 1413
|
| 9933 |
+
},
|
| 9934 |
+
{
|
| 9935 |
+
"epoch": 0.6399637927132835,
|
| 9936 |
+
"grad_norm": 0.5564635396003723,
|
| 9937 |
+
"learning_rate": 4.8943483704846475e-06,
|
| 9938 |
+
"loss": 1.0787,
|
| 9939 |
+
"step": 1414
|
| 9940 |
+
},
|
| 9941 |
+
{
|
| 9942 |
+
"epoch": 0.6404163837972392,
|
| 9943 |
+
"grad_norm": 0.461028128862381,
|
| 9944 |
+
"learning_rate": 4.832310142659946e-06,
|
| 9945 |
+
"loss": 0.6813,
|
| 9946 |
+
"step": 1415
|
| 9947 |
+
},
|
| 9948 |
+
{
|
| 9949 |
+
"epoch": 0.6408689748811949,
|
| 9950 |
+
"grad_norm": 0.5097917318344116,
|
| 9951 |
+
"learning_rate": 4.7706578727302224e-06,
|
| 9952 |
+
"loss": 0.892,
|
| 9953 |
+
"step": 1416
|
| 9954 |
+
},
|
| 9955 |
+
{
|
| 9956 |
+
"epoch": 0.6413215659651504,
|
| 9957 |
+
"grad_norm": 0.5178496241569519,
|
| 9958 |
+
"learning_rate": 4.709391810729713e-06,
|
| 9959 |
+
"loss": 0.9012,
|
| 9960 |
+
"step": 1417
|
| 9961 |
+
},
|
| 9962 |
+
{
|
| 9963 |
+
"epoch": 0.6417741570491061,
|
| 9964 |
+
"grad_norm": 0.49436789751052856,
|
| 9965 |
+
"learning_rate": 4.648512205126376e-06,
|
| 9966 |
+
"loss": 0.7641,
|
| 9967 |
+
"step": 1418
|
| 9968 |
+
},
|
| 9969 |
+
{
|
| 9970 |
+
"epoch": 0.6422267481330618,
|
| 9971 |
+
"grad_norm": 0.8753583431243896,
|
| 9972 |
+
"learning_rate": 4.588019302820834e-06,
|
| 9973 |
+
"loss": 0.9981,
|
| 9974 |
+
"step": 1419
|
| 9975 |
+
},
|
| 9976 |
+
{
|
| 9977 |
+
"epoch": 0.6426793392170175,
|
| 9978 |
+
"grad_norm": 0.5542361736297607,
|
| 9979 |
+
"learning_rate": 4.527913349145441e-06,
|
| 9980 |
+
"loss": 0.9366,
|
| 9981 |
+
"step": 1420
|
| 9982 |
+
},
|
| 9983 |
+
{
|
| 9984 |
+
"epoch": 0.643131930300973,
|
| 9985 |
+
"grad_norm": 0.41615116596221924,
|
| 9986 |
+
"learning_rate": 4.468194587863273e-06,
|
| 9987 |
+
"loss": 0.5197,
|
| 9988 |
+
"step": 1421
|
| 9989 |
+
},
|
| 9990 |
+
{
|
| 9991 |
+
"epoch": 0.6435845213849287,
|
| 9992 |
+
"grad_norm": 0.5194451212882996,
|
| 9993 |
+
"learning_rate": 4.408863261167096e-06,
|
| 9994 |
+
"loss": 0.7452,
|
| 9995 |
+
"step": 1422
|
| 9996 |
+
},
|
| 9997 |
+
{
|
| 9998 |
+
"epoch": 0.6440371124688844,
|
| 9999 |
+
"grad_norm": 0.43391209840774536,
|
| 10000 |
+
"learning_rate": 4.349919609678455e-06,
|
| 10001 |
+
"loss": 0.6486,
|
| 10002 |
+
"step": 1423
|
| 10003 |
+
},
|
| 10004 |
+
{
|
| 10005 |
+
"epoch": 0.64448970355284,
|
| 10006 |
+
"grad_norm": 0.4887462556362152,
|
| 10007 |
+
"learning_rate": 4.291363872446597e-06,
|
| 10008 |
+
"loss": 0.6932,
|
| 10009 |
+
"step": 1424
|
| 10010 |
+
},
|
| 10011 |
+
{
|
| 10012 |
+
"epoch": 0.6449422946367956,
|
| 10013 |
+
"grad_norm": 0.6093336343765259,
|
| 10014 |
+
"learning_rate": 4.233196286947605e-06,
|
| 10015 |
+
"loss": 1.1266,
|
| 10016 |
+
"step": 1425
|
| 10017 |
+
},
|
| 10018 |
+
{
|
| 10019 |
+
"epoch": 0.6453948857207513,
|
| 10020 |
+
"grad_norm": 0.4346993565559387,
|
| 10021 |
+
"learning_rate": 4.175417089083378e-06,
|
| 10022 |
+
"loss": 0.5141,
|
| 10023 |
+
"step": 1426
|
| 10024 |
+
},
|
| 10025 |
+
{
|
| 10026 |
+
"epoch": 0.645847476804707,
|
| 10027 |
+
"grad_norm": 0.6119694709777832,
|
| 10028 |
+
"learning_rate": 4.118026513180695e-06,
|
| 10029 |
+
"loss": 0.9554,
|
| 10030 |
+
"step": 1427
|
| 10031 |
+
},
|
| 10032 |
+
{
|
| 10033 |
+
"epoch": 0.6463000678886626,
|
| 10034 |
+
"grad_norm": 0.5438103079795837,
|
| 10035 |
+
"learning_rate": 4.061024791990253e-06,
|
| 10036 |
+
"loss": 0.8614,
|
| 10037 |
+
"step": 1428
|
| 10038 |
+
},
|
| 10039 |
+
{
|
| 10040 |
+
"epoch": 0.6467526589726182,
|
| 10041 |
+
"grad_norm": 0.6257548928260803,
|
| 10042 |
+
"learning_rate": 4.004412156685711e-06,
|
| 10043 |
+
"loss": 0.9136,
|
| 10044 |
+
"step": 1429
|
| 10045 |
+
},
|
| 10046 |
+
{
|
| 10047 |
+
"epoch": 0.6472052500565739,
|
| 10048 |
+
"grad_norm": 0.5468961000442505,
|
| 10049 |
+
"learning_rate": 3.948188836862776e-06,
|
| 10050 |
+
"loss": 0.7189,
|
| 10051 |
+
"step": 1430
|
| 10052 |
+
},
|
| 10053 |
+
{
|
| 10054 |
+
"epoch": 0.6476578411405295,
|
| 10055 |
+
"grad_norm": 0.44468608498573303,
|
| 10056 |
+
"learning_rate": 3.892355060538289e-06,
|
| 10057 |
+
"loss": 0.6663,
|
| 10058 |
+
"step": 1431
|
| 10059 |
+
},
|
| 10060 |
+
{
|
| 10061 |
+
"epoch": 0.6481104322244852,
|
| 10062 |
+
"grad_norm": 0.5603303909301758,
|
| 10063 |
+
"learning_rate": 3.836911054149239e-06,
|
| 10064 |
+
"loss": 0.8297,
|
| 10065 |
+
"step": 1432
|
| 10066 |
+
},
|
| 10067 |
+
{
|
| 10068 |
+
"epoch": 0.6485630233084408,
|
| 10069 |
+
"grad_norm": 0.5190114974975586,
|
| 10070 |
+
"learning_rate": 3.7818570425519173e-06,
|
| 10071 |
+
"loss": 0.7831,
|
| 10072 |
+
"step": 1433
|
| 10073 |
+
},
|
| 10074 |
+
{
|
| 10075 |
+
"epoch": 0.6490156143923965,
|
| 10076 |
+
"grad_norm": 0.5360861420631409,
|
| 10077 |
+
"learning_rate": 3.7271932490209328e-06,
|
| 10078 |
+
"loss": 0.9773,
|
| 10079 |
+
"step": 1434
|
| 10080 |
+
},
|
| 10081 |
+
{
|
| 10082 |
+
"epoch": 0.6494682054763521,
|
| 10083 |
+
"grad_norm": 0.5341346263885498,
|
| 10084 |
+
"learning_rate": 3.6729198952483724e-06,
|
| 10085 |
+
"loss": 0.7224,
|
| 10086 |
+
"step": 1435
|
| 10087 |
+
},
|
| 10088 |
+
{
|
| 10089 |
+
"epoch": 0.6499207965603078,
|
| 10090 |
+
"grad_norm": 0.6777652502059937,
|
| 10091 |
+
"learning_rate": 3.6190372013428562e-06,
|
| 10092 |
+
"loss": 1.1764,
|
| 10093 |
+
"step": 1436
|
| 10094 |
+
},
|
| 10095 |
+
{
|
| 10096 |
+
"epoch": 0.6503733876442634,
|
| 10097 |
+
"grad_norm": 0.5760977864265442,
|
| 10098 |
+
"learning_rate": 3.5655453858286614e-06,
|
| 10099 |
+
"loss": 1.1423,
|
| 10100 |
+
"step": 1437
|
| 10101 |
+
},
|
| 10102 |
+
{
|
| 10103 |
+
"epoch": 0.650825978728219,
|
| 10104 |
+
"grad_norm": 0.44717341661453247,
|
| 10105 |
+
"learning_rate": 3.512444665644865e-06,
|
| 10106 |
+
"loss": 0.5806,
|
| 10107 |
+
"step": 1438
|
| 10108 |
+
},
|
| 10109 |
+
{
|
| 10110 |
+
"epoch": 0.6512785698121747,
|
| 10111 |
+
"grad_norm": 0.5548418760299683,
|
| 10112 |
+
"learning_rate": 3.4597352561443807e-06,
|
| 10113 |
+
"loss": 0.8524,
|
| 10114 |
+
"step": 1439
|
| 10115 |
+
},
|
| 10116 |
+
{
|
| 10117 |
+
"epoch": 0.6517311608961304,
|
| 10118 |
+
"grad_norm": 0.5654526948928833,
|
| 10119 |
+
"learning_rate": 3.40741737109318e-06,
|
| 10120 |
+
"loss": 1.0357,
|
| 10121 |
+
"step": 1440
|
| 10122 |
+
},
|
| 10123 |
+
{
|
| 10124 |
+
"epoch": 0.652183751980086,
|
| 10125 |
+
"grad_norm": 0.5160826444625854,
|
| 10126 |
+
"learning_rate": 3.355491222669371e-06,
|
| 10127 |
+
"loss": 0.7621,
|
| 10128 |
+
"step": 1441
|
| 10129 |
+
},
|
| 10130 |
+
{
|
| 10131 |
+
"epoch": 0.6526363430640416,
|
| 10132 |
+
"grad_norm": 0.4938196539878845,
|
| 10133 |
+
"learning_rate": 3.3039570214623782e-06,
|
| 10134 |
+
"loss": 0.7649,
|
| 10135 |
+
"step": 1442
|
| 10136 |
+
},
|
| 10137 |
+
{
|
| 10138 |
+
"epoch": 0.6530889341479973,
|
| 10139 |
+
"grad_norm": 0.5363398790359497,
|
| 10140 |
+
"learning_rate": 3.2528149764720186e-06,
|
| 10141 |
+
"loss": 0.8831,
|
| 10142 |
+
"step": 1443
|
| 10143 |
+
},
|
| 10144 |
+
{
|
| 10145 |
+
"epoch": 0.653541525231953,
|
| 10146 |
+
"grad_norm": 0.6059714555740356,
|
| 10147 |
+
"learning_rate": 3.202065295107726e-06,
|
| 10148 |
+
"loss": 0.9239,
|
| 10149 |
+
"step": 1444
|
| 10150 |
+
},
|
| 10151 |
+
{
|
| 10152 |
+
"epoch": 0.6539941163159085,
|
| 10153 |
+
"grad_norm": 0.5283812284469604,
|
| 10154 |
+
"learning_rate": 3.1517081831876737e-06,
|
| 10155 |
+
"loss": 0.8756,
|
| 10156 |
+
"step": 1445
|
| 10157 |
+
},
|
| 10158 |
+
{
|
| 10159 |
+
"epoch": 0.6544467073998642,
|
| 10160 |
+
"grad_norm": 0.5337501764297485,
|
| 10161 |
+
"learning_rate": 3.1017438449379434e-06,
|
| 10162 |
+
"loss": 1.0205,
|
| 10163 |
+
"step": 1446
|
| 10164 |
+
},
|
| 10165 |
+
{
|
| 10166 |
+
"epoch": 0.6548992984838199,
|
| 10167 |
+
"grad_norm": 0.5220997929573059,
|
| 10168 |
+
"learning_rate": 3.052172482991711e-06,
|
| 10169 |
+
"loss": 1.0293,
|
| 10170 |
+
"step": 1447
|
| 10171 |
+
},
|
| 10172 |
+
{
|
| 10173 |
+
"epoch": 0.6553518895677756,
|
| 10174 |
+
"grad_norm": 0.6735256314277649,
|
| 10175 |
+
"learning_rate": 3.0029942983884173e-06,
|
| 10176 |
+
"loss": 0.9027,
|
| 10177 |
+
"step": 1448
|
| 10178 |
+
},
|
| 10179 |
+
{
|
| 10180 |
+
"epoch": 0.6558044806517311,
|
| 10181 |
+
"grad_norm": 0.5760489106178284,
|
| 10182 |
+
"learning_rate": 2.9542094905729457e-06,
|
| 10183 |
+
"loss": 1.1187,
|
| 10184 |
+
"step": 1449
|
| 10185 |
+
},
|
| 10186 |
+
{
|
| 10187 |
+
"epoch": 0.6562570717356868,
|
| 10188 |
+
"grad_norm": 0.5203390717506409,
|
| 10189 |
+
"learning_rate": 2.905818257394799e-06,
|
| 10190 |
+
"loss": 0.9437,
|
| 10191 |
+
"step": 1450
|
| 10192 |
+
},
|
| 10193 |
+
{
|
| 10194 |
+
"epoch": 0.6567096628196425,
|
| 10195 |
+
"grad_norm": 0.4524308741092682,
|
| 10196 |
+
"learning_rate": 2.8578207951073353e-06,
|
| 10197 |
+
"loss": 0.6448,
|
| 10198 |
+
"step": 1451
|
| 10199 |
+
},
|
| 10200 |
+
{
|
| 10201 |
+
"epoch": 0.6571622539035981,
|
| 10202 |
+
"grad_norm": 0.4728708863258362,
|
| 10203 |
+
"learning_rate": 2.810217298366968e-06,
|
| 10204 |
+
"loss": 0.6843,
|
| 10205 |
+
"step": 1452
|
| 10206 |
+
},
|
| 10207 |
+
{
|
| 10208 |
+
"epoch": 0.6576148449875537,
|
| 10209 |
+
"grad_norm": 0.5334341526031494,
|
| 10210 |
+
"learning_rate": 2.7630079602323442e-06,
|
| 10211 |
+
"loss": 0.8001,
|
| 10212 |
+
"step": 1453
|
| 10213 |
+
},
|
| 10214 |
+
{
|
| 10215 |
+
"epoch": 0.6580674360715094,
|
| 10216 |
+
"grad_norm": 0.484829306602478,
|
| 10217 |
+
"learning_rate": 2.716192972163556e-06,
|
| 10218 |
+
"loss": 0.7185,
|
| 10219 |
+
"step": 1454
|
| 10220 |
+
},
|
| 10221 |
+
{
|
| 10222 |
+
"epoch": 0.658520027155465,
|
| 10223 |
+
"grad_norm": 0.5486847162246704,
|
| 10224 |
+
"learning_rate": 2.6697725240214076e-06,
|
| 10225 |
+
"loss": 0.9219,
|
| 10226 |
+
"step": 1455
|
| 10227 |
+
},
|
| 10228 |
+
{
|
| 10229 |
+
"epoch": 0.6589726182394207,
|
| 10230 |
+
"grad_norm": 0.551567792892456,
|
| 10231 |
+
"learning_rate": 2.6237468040666512e-06,
|
| 10232 |
+
"loss": 1.0728,
|
| 10233 |
+
"step": 1456
|
| 10234 |
+
},
|
| 10235 |
+
{
|
| 10236 |
+
"epoch": 0.6594252093233763,
|
| 10237 |
+
"grad_norm": 0.4729478657245636,
|
| 10238 |
+
"learning_rate": 2.578115998959152e-06,
|
| 10239 |
+
"loss": 0.655,
|
| 10240 |
+
"step": 1457
|
| 10241 |
+
},
|
| 10242 |
+
{
|
| 10243 |
+
"epoch": 0.659877800407332,
|
| 10244 |
+
"grad_norm": 0.5266134738922119,
|
| 10245 |
+
"learning_rate": 2.532880293757223e-06,
|
| 10246 |
+
"loss": 0.9098,
|
| 10247 |
+
"step": 1458
|
| 10248 |
+
},
|
| 10249 |
+
{
|
| 10250 |
+
"epoch": 0.6603303914912876,
|
| 10251 |
+
"grad_norm": 0.408477246761322,
|
| 10252 |
+
"learning_rate": 2.4880398719167586e-06,
|
| 10253 |
+
"loss": 0.644,
|
| 10254 |
+
"step": 1459
|
| 10255 |
+
},
|
| 10256 |
+
{
|
| 10257 |
+
"epoch": 0.6607829825752433,
|
| 10258 |
+
"grad_norm": 0.5005697011947632,
|
| 10259 |
+
"learning_rate": 2.4435949152906145e-06,
|
| 10260 |
+
"loss": 0.8143,
|
| 10261 |
+
"step": 1460
|
| 10262 |
+
},
|
| 10263 |
+
{
|
| 10264 |
+
"epoch": 0.6612355736591989,
|
| 10265 |
+
"grad_norm": 0.5645555257797241,
|
| 10266 |
+
"learning_rate": 2.3995456041278066e-06,
|
| 10267 |
+
"loss": 1.0237,
|
| 10268 |
+
"step": 1461
|
| 10269 |
+
},
|
| 10270 |
+
{
|
| 10271 |
+
"epoch": 0.6616881647431545,
|
| 10272 |
+
"grad_norm": 0.5656578540802002,
|
| 10273 |
+
"learning_rate": 2.3558921170727888e-06,
|
| 10274 |
+
"loss": 0.746,
|
| 10275 |
+
"step": 1462
|
| 10276 |
+
},
|
| 10277 |
+
{
|
| 10278 |
+
"epoch": 0.6621407558271102,
|
| 10279 |
+
"grad_norm": 0.48828980326652527,
|
| 10280 |
+
"learning_rate": 2.312634631164723e-06,
|
| 10281 |
+
"loss": 0.8299,
|
| 10282 |
+
"step": 1463
|
| 10283 |
+
},
|
| 10284 |
+
{
|
| 10285 |
+
"epoch": 0.6625933469110659,
|
| 10286 |
+
"grad_norm": 0.5643355250358582,
|
| 10287 |
+
"learning_rate": 2.2697733218367436e-06,
|
| 10288 |
+
"loss": 0.987,
|
| 10289 |
+
"step": 1464
|
| 10290 |
+
},
|
| 10291 |
+
{
|
| 10292 |
+
"epoch": 0.6630459379950215,
|
| 10293 |
+
"grad_norm": 0.49453938007354736,
|
| 10294 |
+
"learning_rate": 2.2273083629153147e-06,
|
| 10295 |
+
"loss": 0.7006,
|
| 10296 |
+
"step": 1465
|
| 10297 |
+
},
|
| 10298 |
+
{
|
| 10299 |
+
"epoch": 0.6634985290789771,
|
| 10300 |
+
"grad_norm": 0.5133518576622009,
|
| 10301 |
+
"learning_rate": 2.1852399266194314e-06,
|
| 10302 |
+
"loss": 0.8049,
|
| 10303 |
+
"step": 1466
|
| 10304 |
+
},
|
| 10305 |
+
{
|
| 10306 |
+
"epoch": 0.6639511201629328,
|
| 10307 |
+
"grad_norm": 0.456297367811203,
|
| 10308 |
+
"learning_rate": 2.1435681835600184e-06,
|
| 10309 |
+
"loss": 0.5972,
|
| 10310 |
+
"step": 1467
|
| 10311 |
+
},
|
| 10312 |
+
{
|
| 10313 |
+
"epoch": 0.6644037112468885,
|
| 10314 |
+
"grad_norm": 0.5126147270202637,
|
| 10315 |
+
"learning_rate": 2.1022933027391555e-06,
|
| 10316 |
+
"loss": 1.0061,
|
| 10317 |
+
"step": 1468
|
| 10318 |
+
},
|
| 10319 |
+
{
|
| 10320 |
+
"epoch": 0.664856302330844,
|
| 10321 |
+
"grad_norm": 0.5274229645729065,
|
| 10322 |
+
"learning_rate": 2.06141545154942e-06,
|
| 10323 |
+
"loss": 0.8853,
|
| 10324 |
+
"step": 1469
|
| 10325 |
+
},
|
| 10326 |
+
{
|
| 10327 |
+
"epoch": 0.6653088934147997,
|
| 10328 |
+
"grad_norm": 0.4462442100048065,
|
| 10329 |
+
"learning_rate": 2.0209347957732328e-06,
|
| 10330 |
+
"loss": 0.6457,
|
| 10331 |
+
"step": 1470
|
| 10332 |
+
},
|
| 10333 |
+
{
|
| 10334 |
+
"epoch": 0.6657614844987554,
|
| 10335 |
+
"grad_norm": 0.5552085041999817,
|
| 10336 |
+
"learning_rate": 1.9808514995821593e-06,
|
| 10337 |
+
"loss": 0.9793,
|
| 10338 |
+
"step": 1471
|
| 10339 |
+
},
|
| 10340 |
+
{
|
| 10341 |
+
"epoch": 0.6662140755827111,
|
| 10342 |
+
"grad_norm": 0.4626757800579071,
|
| 10343 |
+
"learning_rate": 1.941165725536265e-06,
|
| 10344 |
+
"loss": 0.7582,
|
| 10345 |
+
"step": 1472
|
| 10346 |
+
},
|
| 10347 |
+
{
|
| 10348 |
+
"epoch": 0.6666666666666666,
|
| 10349 |
+
"grad_norm": 0.6225503087043762,
|
| 10350 |
+
"learning_rate": 1.9018776345834155e-06,
|
| 10351 |
+
"loss": 0.5593,
|
| 10352 |
+
"step": 1473
|
| 10353 |
+
},
|
| 10354 |
+
{
|
| 10355 |
+
"epoch": 0.6671192577506223,
|
| 10356 |
+
"grad_norm": 0.4841187000274658,
|
| 10357 |
+
"learning_rate": 1.8629873860586566e-06,
|
| 10358 |
+
"loss": 0.8542,
|
| 10359 |
+
"step": 1474
|
| 10360 |
+
},
|
| 10361 |
+
{
|
| 10362 |
+
"epoch": 0.667571848834578,
|
| 10363 |
+
"grad_norm": 0.636572539806366,
|
| 10364 |
+
"learning_rate": 1.8244951376835906e-06,
|
| 10365 |
+
"loss": 1.1556,
|
| 10366 |
+
"step": 1475
|
| 10367 |
+
},
|
| 10368 |
+
{
|
| 10369 |
+
"epoch": 0.6680244399185336,
|
| 10370 |
+
"grad_norm": 0.5484120845794678,
|
| 10371 |
+
"learning_rate": 1.7864010455656554e-06,
|
| 10372 |
+
"loss": 1.1598,
|
| 10373 |
+
"step": 1476
|
| 10374 |
+
},
|
| 10375 |
+
{
|
| 10376 |
+
"epoch": 0.6684770310024892,
|
| 10377 |
+
"grad_norm": 0.5752256512641907,
|
| 10378 |
+
"learning_rate": 1.7487052641976032e-06,
|
| 10379 |
+
"loss": 0.9162,
|
| 10380 |
+
"step": 1477
|
| 10381 |
+
},
|
| 10382 |
+
{
|
| 10383 |
+
"epoch": 0.6689296220864449,
|
| 10384 |
+
"grad_norm": 0.5652234554290771,
|
| 10385 |
+
"learning_rate": 1.7114079464567888e-06,
|
| 10386 |
+
"loss": 0.8911,
|
| 10387 |
+
"step": 1478
|
| 10388 |
+
},
|
| 10389 |
+
{
|
| 10390 |
+
"epoch": 0.6693822131704006,
|
| 10391 |
+
"grad_norm": 0.4782993495464325,
|
| 10392 |
+
"learning_rate": 1.6745092436045494e-06,
|
| 10393 |
+
"loss": 0.8625,
|
| 10394 |
+
"step": 1479
|
| 10395 |
+
},
|
| 10396 |
+
{
|
| 10397 |
+
"epoch": 0.6698348042543562,
|
| 10398 |
+
"grad_norm": 0.5361889600753784,
|
| 10399 |
+
"learning_rate": 1.6380093052856483e-06,
|
| 10400 |
+
"loss": 0.8206,
|
| 10401 |
+
"step": 1480
|
| 10402 |
+
},
|
| 10403 |
+
{
|
| 10404 |
+
"epoch": 0.6702873953383118,
|
| 10405 |
+
"grad_norm": 0.47865453362464905,
|
| 10406 |
+
"learning_rate": 1.6019082795276307e-06,
|
| 10407 |
+
"loss": 0.7713,
|
| 10408 |
+
"step": 1481
|
| 10409 |
+
},
|
| 10410 |
+
{
|
| 10411 |
+
"epoch": 0.6707399864222675,
|
| 10412 |
+
"grad_norm": 0.457772821187973,
|
| 10413 |
+
"learning_rate": 1.566206312740226e-06,
|
| 10414 |
+
"loss": 0.6757,
|
| 10415 |
+
"step": 1482
|
| 10416 |
+
},
|
| 10417 |
+
{
|
| 10418 |
+
"epoch": 0.6711925775062231,
|
| 10419 |
+
"grad_norm": 0.5296614170074463,
|
| 10420 |
+
"learning_rate": 1.5309035497147684e-06,
|
| 10421 |
+
"loss": 0.9659,
|
| 10422 |
+
"step": 1483
|
| 10423 |
+
},
|
| 10424 |
+
{
|
| 10425 |
+
"epoch": 0.6716451685901788,
|
| 10426 |
+
"grad_norm": 0.496402770280838,
|
| 10427 |
+
"learning_rate": 1.4960001336235875e-06,
|
| 10428 |
+
"loss": 0.8881,
|
| 10429 |
+
"step": 1484
|
| 10430 |
+
},
|
| 10431 |
+
{
|
| 10432 |
+
"epoch": 0.6720977596741344,
|
| 10433 |
+
"grad_norm": 0.43575870990753174,
|
| 10434 |
+
"learning_rate": 1.4614962060194304e-06,
|
| 10435 |
+
"loss": 0.6084,
|
| 10436 |
+
"step": 1485
|
| 10437 |
+
},
|
| 10438 |
+
{
|
| 10439 |
+
"epoch": 0.6725503507580901,
|
| 10440 |
+
"grad_norm": 0.6141435503959656,
|
| 10441 |
+
"learning_rate": 1.4273919068349184e-06,
|
| 10442 |
+
"loss": 0.8805,
|
| 10443 |
+
"step": 1486
|
| 10444 |
+
},
|
| 10445 |
+
{
|
| 10446 |
+
"epoch": 0.6730029418420457,
|
| 10447 |
+
"grad_norm": 0.5889500975608826,
|
| 10448 |
+
"learning_rate": 1.3936873743819357e-06,
|
| 10449 |
+
"loss": 1.056,
|
| 10450 |
+
"step": 1487
|
| 10451 |
+
},
|
| 10452 |
+
{
|
| 10453 |
+
"epoch": 0.6734555329260014,
|
| 10454 |
+
"grad_norm": 0.4447315037250519,
|
| 10455 |
+
"learning_rate": 1.3603827453511186e-06,
|
| 10456 |
+
"loss": 0.6903,
|
| 10457 |
+
"step": 1488
|
| 10458 |
+
},
|
| 10459 |
+
{
|
| 10460 |
+
"epoch": 0.673908124009957,
|
| 10461 |
+
"grad_norm": 0.5051842331886292,
|
| 10462 |
+
"learning_rate": 1.3274781548112458e-06,
|
| 10463 |
+
"loss": 0.7553,
|
| 10464 |
+
"step": 1489
|
| 10465 |
+
},
|
| 10466 |
+
{
|
| 10467 |
+
"epoch": 0.6743607150939126,
|
| 10468 |
+
"grad_norm": 0.5147336721420288,
|
| 10469 |
+
"learning_rate": 1.2949737362087156e-06,
|
| 10470 |
+
"loss": 0.8062,
|
| 10471 |
+
"step": 1490
|
| 10472 |
+
},
|
| 10473 |
+
{
|
| 10474 |
+
"epoch": 0.6748133061778683,
|
| 10475 |
+
"grad_norm": 0.5651899576187134,
|
| 10476 |
+
"learning_rate": 1.2628696213670355e-06,
|
| 10477 |
+
"loss": 0.9131,
|
| 10478 |
+
"step": 1491
|
| 10479 |
+
},
|
| 10480 |
+
{
|
| 10481 |
+
"epoch": 0.675265897261824,
|
| 10482 |
+
"grad_norm": 0.569429337978363,
|
| 10483 |
+
"learning_rate": 1.231165940486234e-06,
|
| 10484 |
+
"loss": 0.9232,
|
| 10485 |
+
"step": 1492
|
| 10486 |
+
},
|
| 10487 |
+
{
|
| 10488 |
+
"epoch": 0.6757184883457796,
|
| 10489 |
+
"grad_norm": 0.5901250839233398,
|
| 10490 |
+
"learning_rate": 1.1998628221423614e-06,
|
| 10491 |
+
"loss": 1.138,
|
| 10492 |
+
"step": 1493
|
| 10493 |
+
},
|
| 10494 |
+
{
|
| 10495 |
+
"epoch": 0.6761710794297352,
|
| 10496 |
+
"grad_norm": 0.47215431928634644,
|
| 10497 |
+
"learning_rate": 1.1689603932869665e-06,
|
| 10498 |
+
"loss": 0.7919,
|
| 10499 |
+
"step": 1494
|
| 10500 |
+
},
|
| 10501 |
+
{
|
| 10502 |
+
"epoch": 0.6766236705136909,
|
| 10503 |
+
"grad_norm": 0.5352398753166199,
|
| 10504 |
+
"learning_rate": 1.1384587792465872e-06,
|
| 10505 |
+
"loss": 0.6431,
|
| 10506 |
+
"step": 1495
|
| 10507 |
+
},
|
| 10508 |
+
{
|
| 10509 |
+
"epoch": 0.6770762615976466,
|
| 10510 |
+
"grad_norm": 0.50892174243927,
|
| 10511 |
+
"learning_rate": 1.1083581037222068e-06,
|
| 10512 |
+
"loss": 0.7254,
|
| 10513 |
+
"step": 1496
|
| 10514 |
+
},
|
| 10515 |
+
{
|
| 10516 |
+
"epoch": 0.6775288526816021,
|
| 10517 |
+
"grad_norm": 0.5627516508102417,
|
| 10518 |
+
"learning_rate": 1.0786584887888307e-06,
|
| 10519 |
+
"loss": 0.949,
|
| 10520 |
+
"step": 1497
|
| 10521 |
+
},
|
| 10522 |
+
{
|
| 10523 |
+
"epoch": 0.6779814437655578,
|
| 10524 |
+
"grad_norm": 0.4339214265346527,
|
| 10525 |
+
"learning_rate": 1.0493600548948878e-06,
|
| 10526 |
+
"loss": 0.6264,
|
| 10527 |
+
"step": 1498
|
| 10528 |
+
},
|
| 10529 |
+
{
|
| 10530 |
+
"epoch": 0.6784340348495135,
|
| 10531 |
+
"grad_norm": 0.42282262444496155,
|
| 10532 |
+
"learning_rate": 1.020462920861831e-06,
|
| 10533 |
+
"loss": 0.5289,
|
| 10534 |
+
"step": 1499
|
| 10535 |
+
},
|
| 10536 |
+
{
|
| 10537 |
+
"epoch": 0.6788866259334692,
|
| 10538 |
+
"grad_norm": 0.46268293261528015,
|
| 10539 |
+
"learning_rate": 9.919672038835925e-07,
|
| 10540 |
+
"loss": 0.6008,
|
| 10541 |
+
"step": 1500
|
| 10542 |
+
},
|
| 10543 |
+
{
|
| 10544 |
+
"epoch": 0.6793392170174247,
|
| 10545 |
+
"grad_norm": 0.5364608764648438,
|
| 10546 |
+
"learning_rate": 9.638730195261625e-07,
|
| 10547 |
+
"loss": 0.6824,
|
| 10548 |
+
"step": 1501
|
| 10549 |
+
},
|
| 10550 |
+
{
|
| 10551 |
+
"epoch": 0.6797918081013804,
|
| 10552 |
+
"grad_norm": 0.5147013664245605,
|
| 10553 |
+
"learning_rate": 9.36180481727067e-07,
|
| 10554 |
+
"loss": 0.8468,
|
| 10555 |
+
"step": 1502
|
| 10556 |
+
},
|
| 10557 |
+
{
|
| 10558 |
+
"epoch": 0.6802443991853361,
|
| 10559 |
+
"grad_norm": 0.5776438117027283,
|
| 10560 |
+
"learning_rate": 9.088897027949462e-07,
|
| 10561 |
+
"loss": 0.7729,
|
| 10562 |
+
"step": 1503
|
| 10563 |
+
},
|
| 10564 |
+
{
|
| 10565 |
+
"epoch": 0.6806969902692916,
|
| 10566 |
+
"grad_norm": 0.47045034170150757,
|
| 10567 |
+
"learning_rate": 8.820007934090879e-07,
|
| 10568 |
+
"loss": 0.8525,
|
| 10569 |
+
"step": 1504
|
| 10570 |
+
},
|
| 10571 |
+
{
|
| 10572 |
+
"epoch": 0.6811495813532473,
|
| 10573 |
+
"grad_norm": 0.554664134979248,
|
| 10574 |
+
"learning_rate": 8.555138626189618e-07,
|
| 10575 |
+
"loss": 0.8944,
|
| 10576 |
+
"step": 1505
|
| 10577 |
+
},
|
| 10578 |
+
{
|
| 10579 |
+
"epoch": 0.681602172437203,
|
| 10580 |
+
"grad_norm": 0.5782079696655273,
|
| 10581 |
+
"learning_rate": 8.294290178437969e-07,
|
| 10582 |
+
"loss": 0.8888,
|
| 10583 |
+
"step": 1506
|
| 10584 |
+
},
|
| 10585 |
+
{
|
| 10586 |
+
"epoch": 0.6820547635211587,
|
| 10587 |
+
"grad_norm": 0.5328008532524109,
|
| 10588 |
+
"learning_rate": 8.037463648721488e-07,
|
| 10589 |
+
"loss": 0.8906,
|
| 10590 |
+
"step": 1507
|
| 10591 |
+
},
|
| 10592 |
+
{
|
| 10593 |
+
"epoch": 0.6825073546051142,
|
| 10594 |
+
"grad_norm": 0.5287159085273743,
|
| 10595 |
+
"learning_rate": 7.78466007861467e-07,
|
| 10596 |
+
"loss": 0.757,
|
| 10597 |
+
"step": 1508
|
| 10598 |
+
},
|
| 10599 |
+
{
|
| 10600 |
+
"epoch": 0.6829599456890699,
|
| 10601 |
+
"grad_norm": 0.5075718760490417,
|
| 10602 |
+
"learning_rate": 7.535880493376279e-07,
|
| 10603 |
+
"loss": 0.8139,
|
| 10604 |
+
"step": 1509
|
| 10605 |
+
},
|
| 10606 |
+
{
|
| 10607 |
+
"epoch": 0.6834125367730256,
|
| 10608 |
+
"grad_norm": 0.5284056067466736,
|
| 10609 |
+
"learning_rate": 7.291125901946027e-07,
|
| 10610 |
+
"loss": 0.9401,
|
| 10611 |
+
"step": 1510
|
| 10612 |
+
},
|
| 10613 |
+
{
|
| 10614 |
+
"epoch": 0.6838651278569812,
|
| 10615 |
+
"grad_norm": 0.7645094394683838,
|
| 10616 |
+
"learning_rate": 7.050397296939792e-07,
|
| 10617 |
+
"loss": 0.7314,
|
| 10618 |
+
"step": 1511
|
| 10619 |
+
},
|
| 10620 |
+
{
|
| 10621 |
+
"epoch": 0.6843177189409368,
|
| 10622 |
+
"grad_norm": 0.602204442024231,
|
| 10623 |
+
"learning_rate": 6.813695654645957e-07,
|
| 10624 |
+
"loss": 1.0208,
|
| 10625 |
+
"step": 1512
|
| 10626 |
+
},
|
| 10627 |
+
{
|
| 10628 |
+
"epoch": 0.6847703100248925,
|
| 10629 |
+
"grad_norm": 0.5655729174613953,
|
| 10630 |
+
"learning_rate": 6.581021935021304e-07,
|
| 10631 |
+
"loss": 1.0402,
|
| 10632 |
+
"step": 1513
|
| 10633 |
+
},
|
| 10634 |
+
{
|
| 10635 |
+
"epoch": 0.6852229011088482,
|
| 10636 |
+
"grad_norm": 0.6173549294471741,
|
| 10637 |
+
"learning_rate": 6.352377081687011e-07,
|
| 10638 |
+
"loss": 1.0761,
|
| 10639 |
+
"step": 1514
|
| 10640 |
+
},
|
| 10641 |
+
{
|
| 10642 |
+
"epoch": 0.6856754921928038,
|
| 10643 |
+
"grad_norm": 0.6081221103668213,
|
| 10644 |
+
"learning_rate": 6.127762021925221e-07,
|
| 10645 |
+
"loss": 1.0481,
|
| 10646 |
+
"step": 1515
|
| 10647 |
+
},
|
| 10648 |
+
{
|
| 10649 |
+
"epoch": 0.6861280832767594,
|
| 10650 |
+
"grad_norm": 0.6282268166542053,
|
| 10651 |
+
"learning_rate": 5.907177666674812e-07,
|
| 10652 |
+
"loss": 1.0363,
|
| 10653 |
+
"step": 1516
|
| 10654 |
+
},
|
| 10655 |
+
{
|
| 10656 |
+
"epoch": 0.6865806743607151,
|
| 10657 |
+
"grad_norm": 0.5493825674057007,
|
| 10658 |
+
"learning_rate": 5.690624910527964e-07,
|
| 10659 |
+
"loss": 0.8727,
|
| 10660 |
+
"step": 1517
|
| 10661 |
+
},
|
| 10662 |
+
{
|
| 10663 |
+
"epoch": 0.6870332654446707,
|
| 10664 |
+
"grad_norm": 0.5676363706588745,
|
| 10665 |
+
"learning_rate": 5.478104631726711e-07,
|
| 10666 |
+
"loss": 0.8488,
|
| 10667 |
+
"step": 1518
|
| 10668 |
+
},
|
| 10669 |
+
{
|
| 10670 |
+
"epoch": 0.6874858565286264,
|
| 10671 |
+
"grad_norm": 0.49481111764907837,
|
| 10672 |
+
"learning_rate": 5.269617692158613e-07,
|
| 10673 |
+
"loss": 0.6527,
|
| 10674 |
+
"step": 1519
|
| 10675 |
+
},
|
| 10676 |
+
{
|
| 10677 |
+
"epoch": 0.687938447612582,
|
| 10678 |
+
"grad_norm": 0.4594171643257141,
|
| 10679 |
+
"learning_rate": 5.065164937354428e-07,
|
| 10680 |
+
"loss": 0.8464,
|
| 10681 |
+
"step": 1520
|
| 10682 |
+
},
|
| 10683 |
+
{
|
| 10684 |
+
"epoch": 0.6883910386965377,
|
| 10685 |
+
"grad_norm": 0.5010755062103271,
|
| 10686 |
+
"learning_rate": 4.864747196483554e-07,
|
| 10687 |
+
"loss": 0.6373,
|
| 10688 |
+
"step": 1521
|
| 10689 |
+
},
|
| 10690 |
+
{
|
| 10691 |
+
"epoch": 0.6888436297804933,
|
| 10692 |
+
"grad_norm": 0.5262997150421143,
|
| 10693 |
+
"learning_rate": 4.668365282351372e-07,
|
| 10694 |
+
"loss": 0.7576,
|
| 10695 |
+
"step": 1522
|
| 10696 |
+
},
|
| 10697 |
+
{
|
| 10698 |
+
"epoch": 0.689296220864449,
|
| 10699 |
+
"grad_norm": 0.4877280592918396,
|
| 10700 |
+
"learning_rate": 4.476019991395908e-07,
|
| 10701 |
+
"loss": 0.7472,
|
| 10702 |
+
"step": 1523
|
| 10703 |
+
},
|
| 10704 |
+
{
|
| 10705 |
+
"epoch": 0.6897488119484046,
|
| 10706 |
+
"grad_norm": 0.5093807578086853,
|
| 10707 |
+
"learning_rate": 4.2877121036840606e-07,
|
| 10708 |
+
"loss": 0.7657,
|
| 10709 |
+
"step": 1524
|
| 10710 |
+
},
|
| 10711 |
+
{
|
| 10712 |
+
"epoch": 0.6902014030323602,
|
| 10713 |
+
"grad_norm": 0.5577916502952576,
|
| 10714 |
+
"learning_rate": 4.103442382909051e-07,
|
| 10715 |
+
"loss": 0.8773,
|
| 10716 |
+
"step": 1525
|
| 10717 |
+
},
|
| 10718 |
+
{
|
| 10719 |
+
"epoch": 0.6906539941163159,
|
| 10720 |
+
"grad_norm": 0.5437626242637634,
|
| 10721 |
+
"learning_rate": 3.923211576387087e-07,
|
| 10722 |
+
"loss": 0.8471,
|
| 10723 |
+
"step": 1526
|
| 10724 |
+
},
|
| 10725 |
+
{
|
| 10726 |
+
"epoch": 0.6911065852002716,
|
| 10727 |
+
"grad_norm": 0.4851123094558716,
|
| 10728 |
+
"learning_rate": 3.74702041505437e-07,
|
| 10729 |
+
"loss": 0.7631,
|
| 10730 |
+
"step": 1527
|
| 10731 |
+
},
|
| 10732 |
+
{
|
| 10733 |
+
"epoch": 0.6915591762842273,
|
| 10734 |
+
"grad_norm": 0.511060893535614,
|
| 10735 |
+
"learning_rate": 3.5748696134639825e-07,
|
| 10736 |
+
"loss": 0.7885,
|
| 10737 |
+
"step": 1528
|
| 10738 |
+
},
|
| 10739 |
+
{
|
| 10740 |
+
"epoch": 0.6920117673681828,
|
| 10741 |
+
"grad_norm": 0.44935715198516846,
|
| 10742 |
+
"learning_rate": 3.406759869783005e-07,
|
| 10743 |
+
"loss": 0.5878,
|
| 10744 |
+
"step": 1529
|
| 10745 |
+
},
|
| 10746 |
+
{
|
| 10747 |
+
"epoch": 0.6924643584521385,
|
| 10748 |
+
"grad_norm": 0.5244868397712708,
|
| 10749 |
+
"learning_rate": 3.2426918657900704e-07,
|
| 10750 |
+
"loss": 0.8548,
|
| 10751 |
+
"step": 1530
|
| 10752 |
+
},
|
| 10753 |
+
{
|
| 10754 |
+
"epoch": 0.6929169495360942,
|
| 10755 |
+
"grad_norm": 0.467731237411499,
|
| 10756 |
+
"learning_rate": 3.0826662668720364e-07,
|
| 10757 |
+
"loss": 0.8462,
|
| 10758 |
+
"step": 1531
|
| 10759 |
+
},
|
| 10760 |
+
{
|
| 10761 |
+
"epoch": 0.6933695406200497,
|
| 10762 |
+
"grad_norm": 0.49825143814086914,
|
| 10763 |
+
"learning_rate": 2.9266837220217613e-07,
|
| 10764 |
+
"loss": 0.6598,
|
| 10765 |
+
"step": 1532
|
| 10766 |
+
},
|
| 10767 |
+
{
|
| 10768 |
+
"epoch": 0.6938221317040054,
|
| 10769 |
+
"grad_norm": 0.48928219079971313,
|
| 10770 |
+
"learning_rate": 2.7747448638352215e-07,
|
| 10771 |
+
"loss": 0.7955,
|
| 10772 |
+
"step": 1533
|
| 10773 |
+
},
|
| 10774 |
+
{
|
| 10775 |
+
"epoch": 0.6942747227879611,
|
| 10776 |
+
"grad_norm": 0.5015487670898438,
|
| 10777 |
+
"learning_rate": 2.6268503085089547e-07,
|
| 10778 |
+
"loss": 0.9172,
|
| 10779 |
+
"step": 1534
|
| 10780 |
+
},
|
| 10781 |
+
{
|
| 10782 |
+
"epoch": 0.6947273138719168,
|
| 10783 |
+
"grad_norm": 0.44026800990104675,
|
| 10784 |
+
"learning_rate": 2.4830006558373973e-07,
|
| 10785 |
+
"loss": 0.6144,
|
| 10786 |
+
"step": 1535
|
| 10787 |
+
},
|
| 10788 |
+
{
|
| 10789 |
+
"epoch": 0.6951799049558723,
|
| 10790 |
+
"grad_norm": 0.5041724443435669,
|
| 10791 |
+
"learning_rate": 2.343196489211219e-07,
|
| 10792 |
+
"loss": 0.6941,
|
| 10793 |
+
"step": 1536
|
| 10794 |
+
},
|
| 10795 |
+
{
|
| 10796 |
+
"epoch": 0.695632496039828,
|
| 10797 |
+
"grad_norm": 0.5276736617088318,
|
| 10798 |
+
"learning_rate": 2.2074383756137686e-07,
|
| 10799 |
+
"loss": 0.9376,
|
| 10800 |
+
"step": 1537
|
| 10801 |
+
},
|
| 10802 |
+
{
|
| 10803 |
+
"epoch": 0.6960850871237837,
|
| 10804 |
+
"grad_norm": 0.539641261100769,
|
| 10805 |
+
"learning_rate": 2.0757268656198537e-07,
|
| 10806 |
+
"loss": 0.8445,
|
| 10807 |
+
"step": 1538
|
| 10808 |
+
},
|
| 10809 |
+
{
|
| 10810 |
+
"epoch": 0.6965376782077393,
|
| 10811 |
+
"grad_norm": 0.6825346350669861,
|
| 10812 |
+
"learning_rate": 1.948062493392744e-07,
|
| 10813 |
+
"loss": 1.1659,
|
| 10814 |
+
"step": 1539
|
| 10815 |
+
},
|
| 10816 |
+
{
|
| 10817 |
+
"epoch": 0.6969902692916949,
|
| 10818 |
+
"grad_norm": 0.5396426320075989,
|
| 10819 |
+
"learning_rate": 1.824445776682504e-07,
|
| 10820 |
+
"loss": 0.8319,
|
| 10821 |
+
"step": 1540
|
| 10822 |
+
},
|
| 10823 |
+
{
|
| 10824 |
+
"epoch": 0.6974428603756506,
|
| 10825 |
+
"grad_norm": 0.47626543045043945,
|
| 10826 |
+
"learning_rate": 1.7048772168237748e-07,
|
| 10827 |
+
"loss": 0.6278,
|
| 10828 |
+
"step": 1541
|
| 10829 |
+
},
|
| 10830 |
+
{
|
| 10831 |
+
"epoch": 0.6978954514596063,
|
| 10832 |
+
"grad_norm": 0.5386638641357422,
|
| 10833 |
+
"learning_rate": 1.5893572987333293e-07,
|
| 10834 |
+
"loss": 0.8372,
|
| 10835 |
+
"step": 1542
|
| 10836 |
+
},
|
| 10837 |
+
{
|
| 10838 |
+
"epoch": 0.6983480425435619,
|
| 10839 |
+
"grad_norm": 0.47005295753479004,
|
| 10840 |
+
"learning_rate": 1.477886490908742e-07,
|
| 10841 |
+
"loss": 0.7101,
|
| 10842 |
+
"step": 1543
|
| 10843 |
+
},
|
| 10844 |
+
{
|
| 10845 |
+
"epoch": 0.6988006336275175,
|
| 10846 |
+
"grad_norm": 0.48174676299095154,
|
| 10847 |
+
"learning_rate": 1.3704652454261668e-07,
|
| 10848 |
+
"loss": 0.6952,
|
| 10849 |
+
"step": 1544
|
| 10850 |
+
},
|
| 10851 |
+
{
|
| 10852 |
+
"epoch": 0.6992532247114732,
|
| 10853 |
+
"grad_norm": 0.46316617727279663,
|
| 10854 |
+
"learning_rate": 1.2670939979384512e-07,
|
| 10855 |
+
"loss": 0.7623,
|
| 10856 |
+
"step": 1545
|
| 10857 |
+
},
|
| 10858 |
+
{
|
| 10859 |
+
"epoch": 0.6997058157954288,
|
| 10860 |
+
"grad_norm": 0.6071258783340454,
|
| 10861 |
+
"learning_rate": 1.1677731676733584e-07,
|
| 10862 |
+
"loss": 1.0641,
|
| 10863 |
+
"step": 1546
|
| 10864 |
+
},
|
| 10865 |
+
{
|
| 10866 |
+
"epoch": 0.7001584068793845,
|
| 10867 |
+
"grad_norm": 0.4970654249191284,
|
| 10868 |
+
"learning_rate": 1.0725031574323474e-07,
|
| 10869 |
+
"loss": 0.7059,
|
| 10870 |
+
"step": 1547
|
| 10871 |
+
},
|
| 10872 |
+
{
|
| 10873 |
+
"epoch": 0.7006109979633401,
|
| 10874 |
+
"grad_norm": 0.4778405725955963,
|
| 10875 |
+
"learning_rate": 9.8128435358813e-08,
|
| 10876 |
+
"loss": 0.7019,
|
| 10877 |
+
"step": 1548
|
| 10878 |
+
},
|
| 10879 |
+
{
|
| 10880 |
+
"epoch": 0.7010635890472958,
|
| 10881 |
+
"grad_norm": 0.653716504573822,
|
| 10882 |
+
"learning_rate": 8.941171260835601e-08,
|
| 10883 |
+
"loss": 1.1438,
|
| 10884 |
+
"step": 1549
|
| 10885 |
+
},
|
| 10886 |
+
{
|
| 10887 |
+
"epoch": 0.7015161801312514,
|
| 10888 |
+
"grad_norm": 0.480570524930954,
|
| 10889 |
+
"learning_rate": 8.110018284304133e-08,
|
| 10890 |
+
"loss": 0.7621,
|
| 10891 |
+
"step": 1550
|
| 10892 |
+
},
|
| 10893 |
+
{
|
| 10894 |
+
"epoch": 0.7019687712152071,
|
| 10895 |
+
"grad_norm": 0.5458505153656006,
|
| 10896 |
+
"learning_rate": 7.319387977072766e-08,
|
| 10897 |
+
"loss": 1.0065,
|
| 10898 |
+
"step": 1551
|
| 10899 |
+
},
|
| 10900 |
+
{
|
| 10901 |
+
"epoch": 0.7024213622991627,
|
| 10902 |
+
"grad_norm": 0.4744986593723297,
|
| 10903 |
+
"learning_rate": 6.569283545587724e-08,
|
| 10904 |
+
"loss": 0.7546,
|
| 10905 |
+
"step": 1552
|
| 10906 |
+
},
|
| 10907 |
+
{
|
| 10908 |
+
"epoch": 0.7028739533831183,
|
| 10909 |
+
"grad_norm": 0.5370805859565735,
|
| 10910 |
+
"learning_rate": 5.8597080319389156e-08,
|
| 10911 |
+
"loss": 1.1206,
|
| 10912 |
+
"step": 1553
|
| 10913 |
+
},
|
| 10914 |
+
{
|
| 10915 |
+
"epoch": 0.703326544467074,
|
| 10916 |
+
"grad_norm": 0.49838805198669434,
|
| 10917 |
+
"learning_rate": 5.190664313851068e-08,
|
| 10918 |
+
"loss": 0.69,
|
| 10919 |
+
"step": 1554
|
| 10920 |
+
},
|
| 10921 |
+
{
|
| 10922 |
+
"epoch": 0.7037791355510297,
|
| 10923 |
+
"grad_norm": 0.5323602557182312,
|
| 10924 |
+
"learning_rate": 4.562155104665955e-08,
|
| 10925 |
+
"loss": 0.7611,
|
| 10926 |
+
"step": 1555
|
| 10927 |
+
},
|
| 10928 |
+
{
|
| 10929 |
+
"epoch": 0.7042317266349853,
|
| 10930 |
+
"grad_norm": 0.5184367299079895,
|
| 10931 |
+
"learning_rate": 3.9741829533401775e-08,
|
| 10932 |
+
"loss": 0.9261,
|
| 10933 |
+
"step": 1556
|
| 10934 |
+
},
|
| 10935 |
+
{
|
| 10936 |
+
"epoch": 0.7046843177189409,
|
| 10937 |
+
"grad_norm": 0.5408939719200134,
|
| 10938 |
+
"learning_rate": 3.4267502444274015e-08,
|
| 10939 |
+
"loss": 0.7767,
|
| 10940 |
+
"step": 1557
|
| 10941 |
+
},
|
| 10942 |
+
{
|
| 10943 |
+
"epoch": 0.7051369088028966,
|
| 10944 |
+
"grad_norm": 0.49419355392456055,
|
| 10945 |
+
"learning_rate": 2.9198591980705848e-08,
|
| 10946 |
+
"loss": 0.8236,
|
| 10947 |
+
"step": 1558
|
| 10948 |
+
},
|
| 10949 |
+
{
|
| 10950 |
+
"epoch": 0.7055894998868523,
|
| 10951 |
+
"grad_norm": 0.5697168111801147,
|
| 10952 |
+
"learning_rate": 2.4535118699953176e-08,
|
| 10953 |
+
"loss": 1.0097,
|
| 10954 |
+
"step": 1559
|
| 10955 |
+
},
|
| 10956 |
+
{
|
| 10957 |
+
"epoch": 0.7060420909708078,
|
| 10958 |
+
"grad_norm": 0.6209654808044434,
|
| 10959 |
+
"learning_rate": 2.0277101514987184e-08,
|
| 10960 |
+
"loss": 1.1677,
|
| 10961 |
+
"step": 1560
|
| 10962 |
+
},
|
| 10963 |
+
{
|
| 10964 |
+
"epoch": 0.7064946820547635,
|
| 10965 |
+
"grad_norm": 0.4766218364238739,
|
| 10966 |
+
"learning_rate": 1.642455769444995e-08,
|
| 10967 |
+
"loss": 0.6273,
|
| 10968 |
+
"step": 1561
|
| 10969 |
+
},
|
| 10970 |
+
{
|
| 10971 |
+
"epoch": 0.7069472731387192,
|
| 10972 |
+
"grad_norm": 0.5815181136131287,
|
| 10973 |
+
"learning_rate": 1.2977502862532297e-08,
|
| 10974 |
+
"loss": 0.8964,
|
| 10975 |
+
"step": 1562
|
| 10976 |
+
},
|
| 10977 |
+
{
|
| 10978 |
+
"epoch": 0.7073998642226749,
|
| 10979 |
+
"grad_norm": 0.5831205248832703,
|
| 10980 |
+
"learning_rate": 9.935950998962717e-09,
|
| 10981 |
+
"loss": 0.8613,
|
| 10982 |
+
"step": 1563
|
| 10983 |
+
},
|
| 10984 |
+
{
|
| 10985 |
+
"epoch": 0.7078524553066304,
|
| 10986 |
+
"grad_norm": 0.4257395565509796,
|
| 10987 |
+
"learning_rate": 7.2999144389296335e-09,
|
| 10988 |
+
"loss": 0.6569,
|
| 10989 |
+
"step": 1564
|
| 10990 |
+
},
|
| 10991 |
+
{
|
| 10992 |
+
"epoch": 0.7083050463905861,
|
| 10993 |
+
"grad_norm": 0.6523287296295166,
|
| 10994 |
+
"learning_rate": 5.069403873025902e-09,
|
| 10995 |
+
"loss": 1.023,
|
| 10996 |
+
"step": 1565
|
| 10997 |
+
},
|
| 10998 |
+
{
|
| 10999 |
+
"epoch": 0.7087576374745418,
|
| 11000 |
+
"grad_norm": 0.5527802109718323,
|
| 11001 |
+
"learning_rate": 3.244428347204398e-09,
|
| 11002 |
+
"loss": 0.8362,
|
| 11003 |
+
"step": 1566
|
| 11004 |
+
},
|
| 11005 |
+
{
|
| 11006 |
+
"epoch": 0.7092102285584974,
|
| 11007 |
+
"grad_norm": 0.42681992053985596,
|
| 11008 |
+
"learning_rate": 1.8249952627669154e-09,
|
| 11009 |
+
"loss": 0.5779,
|
| 11010 |
+
"step": 1567
|
| 11011 |
+
},
|
| 11012 |
+
{
|
| 11013 |
+
"epoch": 0.709662819642453,
|
| 11014 |
+
"grad_norm": 0.489521861076355,
|
| 11015 |
+
"learning_rate": 8.111103762975524e-10,
|
| 11016 |
+
"loss": 0.8613,
|
| 11017 |
+
"step": 1568
|
| 11018 |
+
},
|
| 11019 |
+
{
|
| 11020 |
+
"epoch": 0.7101154107264087,
|
| 11021 |
+
"grad_norm": 0.55333411693573,
|
| 11022 |
+
"learning_rate": 2.027777996738145e-10,
|
| 11023 |
+
"loss": 1.0663,
|
| 11024 |
+
"step": 1569
|
| 11025 |
+
},
|
| 11026 |
+
{
|
| 11027 |
+
"epoch": 0.7105680018103643,
|
| 11028 |
+
"grad_norm": 0.5273703336715698,
|
| 11029 |
+
"learning_rate": 0.0,
|
| 11030 |
+
"loss": 0.7856,
|
| 11031 |
+
"step": 1570
|
| 11032 |
}
|
| 11033 |
],
|
| 11034 |
"logging_steps": 1,
|
|
|
|
| 11043 |
"should_evaluate": false,
|
| 11044 |
"should_log": false,
|
| 11045 |
"should_save": true,
|
| 11046 |
+
"should_training_stop": true
|
| 11047 |
},
|
| 11048 |
"attributes": {}
|
| 11049 |
}
|
| 11050 |
},
|
| 11051 |
+
"total_flos": 2.8612606783861555e+17,
|
| 11052 |
"train_batch_size": 2,
|
| 11053 |
"trial_name": null,
|
| 11054 |
"trial_params": null
|