Training in progress, step 927, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 22573704
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a13d30f04b78111442d48ae811e3434b942dd475d659f4712fa20724ef92664
|
| 3 |
size 22573704
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 11711226
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:993dc89b8df3ccbae23982c39d965f0fdb1d948df8cc38125e8fe6f1bddb3131
|
| 3 |
size 11711226
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f03b4e1a0c48da8bd558a7ffc954cbb030030779a9aa7481ccae88528b0ff1f0
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:878386a9657e9de6544843ac1e59cd0d10db7a61af466358b98118ef0a905bc0
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4879,6 +4879,1631 @@
|
|
| 4879 |
"learning_rate": 1.4704946534781695e-05,
|
| 4880 |
"loss": 0.0001,
|
| 4881 |
"step": 696
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4882 |
}
|
| 4883 |
],
|
| 4884 |
"logging_steps": 1,
|
|
@@ -4893,12 +6518,12 @@
|
|
| 4893 |
"should_evaluate": false,
|
| 4894 |
"should_log": false,
|
| 4895 |
"should_save": true,
|
| 4896 |
-
"should_training_stop":
|
| 4897 |
},
|
| 4898 |
"attributes": {}
|
| 4899 |
}
|
| 4900 |
},
|
| 4901 |
-
"total_flos":
|
| 4902 |
"train_batch_size": 4,
|
| 4903 |
"trial_name": null,
|
| 4904 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0002697599136767,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 927,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4879 |
"learning_rate": 1.4704946534781695e-05,
|
| 4880 |
"loss": 0.0001,
|
| 4881 |
"step": 696
|
| 4882 |
+
},
|
| 4883 |
+
{
|
| 4884 |
+
"epoch": 0.7520906393309954,
|
| 4885 |
+
"grad_norm": 0.05158804729580879,
|
| 4886 |
+
"learning_rate": 1.4584477966753324e-05,
|
| 4887 |
+
"loss": 0.0006,
|
| 4888 |
+
"step": 697
|
| 4889 |
+
},
|
| 4890 |
+
{
|
| 4891 |
+
"epoch": 0.7531696789857028,
|
| 4892 |
+
"grad_norm": 0.0036936409305781126,
|
| 4893 |
+
"learning_rate": 1.4464420577918958e-05,
|
| 4894 |
+
"loss": 0.0001,
|
| 4895 |
+
"step": 698
|
| 4896 |
+
},
|
| 4897 |
+
{
|
| 4898 |
+
"epoch": 0.75424871864041,
|
| 4899 |
+
"grad_norm": 0.0024699585046619177,
|
| 4900 |
+
"learning_rate": 1.434477576216176e-05,
|
| 4901 |
+
"loss": 0.0001,
|
| 4902 |
+
"step": 699
|
| 4903 |
+
},
|
| 4904 |
+
{
|
| 4905 |
+
"epoch": 0.7553277582951173,
|
| 4906 |
+
"grad_norm": 0.04906982183456421,
|
| 4907 |
+
"learning_rate": 1.4225544908574873e-05,
|
| 4908 |
+
"loss": 0.0007,
|
| 4909 |
+
"step": 700
|
| 4910 |
+
},
|
| 4911 |
+
{
|
| 4912 |
+
"epoch": 0.7564067979498247,
|
| 4913 |
+
"grad_norm": 0.08218776434659958,
|
| 4914 |
+
"learning_rate": 1.410672940144529e-05,
|
| 4915 |
+
"loss": 0.001,
|
| 4916 |
+
"step": 701
|
| 4917 |
+
},
|
| 4918 |
+
{
|
| 4919 |
+
"epoch": 0.7574858376045319,
|
| 4920 |
+
"grad_norm": 0.0007760393782518804,
|
| 4921 |
+
"learning_rate": 1.398833062023775e-05,
|
| 4922 |
+
"loss": 0.0,
|
| 4923 |
+
"step": 702
|
| 4924 |
+
},
|
| 4925 |
+
{
|
| 4926 |
+
"epoch": 0.7585648772592393,
|
| 4927 |
+
"grad_norm": 0.0009303510887548327,
|
| 4928 |
+
"learning_rate": 1.3870349939578787e-05,
|
| 4929 |
+
"loss": 0.0,
|
| 4930 |
+
"step": 703
|
| 4931 |
+
},
|
| 4932 |
+
{
|
| 4933 |
+
"epoch": 0.7596439169139466,
|
| 4934 |
+
"grad_norm": 0.001700832974165678,
|
| 4935 |
+
"learning_rate": 1.3752788729240695e-05,
|
| 4936 |
+
"loss": 0.0001,
|
| 4937 |
+
"step": 704
|
| 4938 |
+
},
|
| 4939 |
+
{
|
| 4940 |
+
"epoch": 0.7607229565686539,
|
| 4941 |
+
"grad_norm": 0.0007123792893253267,
|
| 4942 |
+
"learning_rate": 1.3635648354125663e-05,
|
| 4943 |
+
"loss": 0.0,
|
| 4944 |
+
"step": 705
|
| 4945 |
+
},
|
| 4946 |
+
{
|
| 4947 |
+
"epoch": 0.7618019962233612,
|
| 4948 |
+
"grad_norm": 0.000575919752009213,
|
| 4949 |
+
"learning_rate": 1.3518930174249933e-05,
|
| 4950 |
+
"loss": 0.0,
|
| 4951 |
+
"step": 706
|
| 4952 |
+
},
|
| 4953 |
+
{
|
| 4954 |
+
"epoch": 0.7628810358780685,
|
| 4955 |
+
"grad_norm": 0.004258546978235245,
|
| 4956 |
+
"learning_rate": 1.3402635544727993e-05,
|
| 4957 |
+
"loss": 0.0,
|
| 4958 |
+
"step": 707
|
| 4959 |
+
},
|
| 4960 |
+
{
|
| 4961 |
+
"epoch": 0.7639600755327758,
|
| 4962 |
+
"grad_norm": 0.0015764714917168021,
|
| 4963 |
+
"learning_rate": 1.328676581575684e-05,
|
| 4964 |
+
"loss": 0.0001,
|
| 4965 |
+
"step": 708
|
| 4966 |
+
},
|
| 4967 |
+
{
|
| 4968 |
+
"epoch": 0.7650391151874831,
|
| 4969 |
+
"grad_norm": 0.0005665950011461973,
|
| 4970 |
+
"learning_rate": 1.3171322332600367e-05,
|
| 4971 |
+
"loss": 0.0,
|
| 4972 |
+
"step": 709
|
| 4973 |
+
},
|
| 4974 |
+
{
|
| 4975 |
+
"epoch": 0.7661181548421905,
|
| 4976 |
+
"grad_norm": 0.0013904630905017257,
|
| 4977 |
+
"learning_rate": 1.3056306435573634e-05,
|
| 4978 |
+
"loss": 0.0,
|
| 4979 |
+
"step": 710
|
| 4980 |
+
},
|
| 4981 |
+
{
|
| 4982 |
+
"epoch": 0.7671971944968977,
|
| 4983 |
+
"grad_norm": 0.0020118393003940582,
|
| 4984 |
+
"learning_rate": 1.294171946002739e-05,
|
| 4985 |
+
"loss": 0.0001,
|
| 4986 |
+
"step": 711
|
| 4987 |
+
},
|
| 4988 |
+
{
|
| 4989 |
+
"epoch": 0.7682762341516051,
|
| 4990 |
+
"grad_norm": 0.0007088634883984923,
|
| 4991 |
+
"learning_rate": 1.2827562736332555e-05,
|
| 4992 |
+
"loss": 0.0,
|
| 4993 |
+
"step": 712
|
| 4994 |
+
},
|
| 4995 |
+
{
|
| 4996 |
+
"epoch": 0.7693552738063124,
|
| 4997 |
+
"grad_norm": 0.0002638621663209051,
|
| 4998 |
+
"learning_rate": 1.2713837589864725e-05,
|
| 4999 |
+
"loss": 0.0,
|
| 5000 |
+
"step": 713
|
| 5001 |
+
},
|
| 5002 |
+
{
|
| 5003 |
+
"epoch": 0.7704343134610196,
|
| 5004 |
+
"grad_norm": 0.006345916073769331,
|
| 5005 |
+
"learning_rate": 1.2600545340988883e-05,
|
| 5006 |
+
"loss": 0.0001,
|
| 5007 |
+
"step": 714
|
| 5008 |
+
},
|
| 5009 |
+
{
|
| 5010 |
+
"epoch": 0.771513353115727,
|
| 5011 |
+
"grad_norm": 0.031157804653048515,
|
| 5012 |
+
"learning_rate": 1.2487687305043976e-05,
|
| 5013 |
+
"loss": 0.0002,
|
| 5014 |
+
"step": 715
|
| 5015 |
+
},
|
| 5016 |
+
{
|
| 5017 |
+
"epoch": 0.7725923927704343,
|
| 5018 |
+
"grad_norm": 0.0013597413199022412,
|
| 5019 |
+
"learning_rate": 1.2375264792327668e-05,
|
| 5020 |
+
"loss": 0.0001,
|
| 5021 |
+
"step": 716
|
| 5022 |
+
},
|
| 5023 |
+
{
|
| 5024 |
+
"epoch": 0.7736714324251416,
|
| 5025 |
+
"grad_norm": 0.0021487667690962553,
|
| 5026 |
+
"learning_rate": 1.2263279108081161e-05,
|
| 5027 |
+
"loss": 0.0,
|
| 5028 |
+
"step": 717
|
| 5029 |
+
},
|
| 5030 |
+
{
|
| 5031 |
+
"epoch": 0.7747504720798489,
|
| 5032 |
+
"grad_norm": 0.0022396312560886145,
|
| 5033 |
+
"learning_rate": 1.2151731552474e-05,
|
| 5034 |
+
"loss": 0.0001,
|
| 5035 |
+
"step": 718
|
| 5036 |
+
},
|
| 5037 |
+
{
|
| 5038 |
+
"epoch": 0.7758295117345563,
|
| 5039 |
+
"grad_norm": 0.00040765569428913295,
|
| 5040 |
+
"learning_rate": 1.2040623420588986e-05,
|
| 5041 |
+
"loss": 0.0,
|
| 5042 |
+
"step": 719
|
| 5043 |
+
},
|
| 5044 |
+
{
|
| 5045 |
+
"epoch": 0.7769085513892635,
|
| 5046 |
+
"grad_norm": 0.004759886767715216,
|
| 5047 |
+
"learning_rate": 1.1929956002407194e-05,
|
| 5048 |
+
"loss": 0.0001,
|
| 5049 |
+
"step": 720
|
| 5050 |
+
},
|
| 5051 |
+
{
|
| 5052 |
+
"epoch": 0.7779875910439709,
|
| 5053 |
+
"grad_norm": 0.0008044896530918777,
|
| 5054 |
+
"learning_rate": 1.1819730582792915e-05,
|
| 5055 |
+
"loss": 0.0,
|
| 5056 |
+
"step": 721
|
| 5057 |
+
},
|
| 5058 |
+
{
|
| 5059 |
+
"epoch": 0.7790666306986782,
|
| 5060 |
+
"grad_norm": 0.0006749728345312178,
|
| 5061 |
+
"learning_rate": 1.1709948441478764e-05,
|
| 5062 |
+
"loss": 0.0,
|
| 5063 |
+
"step": 722
|
| 5064 |
+
},
|
| 5065 |
+
{
|
| 5066 |
+
"epoch": 0.7801456703533854,
|
| 5067 |
+
"grad_norm": 0.002530804369598627,
|
| 5068 |
+
"learning_rate": 1.1600610853050858e-05,
|
| 5069 |
+
"loss": 0.0001,
|
| 5070 |
+
"step": 723
|
| 5071 |
+
},
|
| 5072 |
+
{
|
| 5073 |
+
"epoch": 0.7812247100080928,
|
| 5074 |
+
"grad_norm": 0.0008276696898974478,
|
| 5075 |
+
"learning_rate": 1.1491719086933967e-05,
|
| 5076 |
+
"loss": 0.0,
|
| 5077 |
+
"step": 724
|
| 5078 |
+
},
|
| 5079 |
+
{
|
| 5080 |
+
"epoch": 0.7823037496628001,
|
| 5081 |
+
"grad_norm": 0.000745826808270067,
|
| 5082 |
+
"learning_rate": 1.1383274407376847e-05,
|
| 5083 |
+
"loss": 0.0,
|
| 5084 |
+
"step": 725
|
| 5085 |
+
},
|
| 5086 |
+
{
|
| 5087 |
+
"epoch": 0.7833827893175074,
|
| 5088 |
+
"grad_norm": 0.0006307657458819449,
|
| 5089 |
+
"learning_rate": 1.1275278073437479e-05,
|
| 5090 |
+
"loss": 0.0,
|
| 5091 |
+
"step": 726
|
| 5092 |
+
},
|
| 5093 |
+
{
|
| 5094 |
+
"epoch": 0.7844618289722147,
|
| 5095 |
+
"grad_norm": 0.002632482908666134,
|
| 5096 |
+
"learning_rate": 1.1167731338968479e-05,
|
| 5097 |
+
"loss": 0.0001,
|
| 5098 |
+
"step": 727
|
| 5099 |
+
},
|
| 5100 |
+
{
|
| 5101 |
+
"epoch": 0.7855408686269221,
|
| 5102 |
+
"grad_norm": 0.0005170971271581948,
|
| 5103 |
+
"learning_rate": 1.106063545260258e-05,
|
| 5104 |
+
"loss": 0.0,
|
| 5105 |
+
"step": 728
|
| 5106 |
+
},
|
| 5107 |
+
{
|
| 5108 |
+
"epoch": 0.7866199082816293,
|
| 5109 |
+
"grad_norm": 0.0007977172499522567,
|
| 5110 |
+
"learning_rate": 1.0953991657738088e-05,
|
| 5111 |
+
"loss": 0.0,
|
| 5112 |
+
"step": 729
|
| 5113 |
+
},
|
| 5114 |
+
{
|
| 5115 |
+
"epoch": 0.7876989479363367,
|
| 5116 |
+
"grad_norm": 0.0015923945466056466,
|
| 5117 |
+
"learning_rate": 1.0847801192524454e-05,
|
| 5118 |
+
"loss": 0.0001,
|
| 5119 |
+
"step": 730
|
| 5120 |
+
},
|
| 5121 |
+
{
|
| 5122 |
+
"epoch": 0.788777987591044,
|
| 5123 |
+
"grad_norm": 0.0023606619797647,
|
| 5124 |
+
"learning_rate": 1.0742065289847942e-05,
|
| 5125 |
+
"loss": 0.0,
|
| 5126 |
+
"step": 731
|
| 5127 |
+
},
|
| 5128 |
+
{
|
| 5129 |
+
"epoch": 0.7898570272457512,
|
| 5130 |
+
"grad_norm": 0.00036575764534063637,
|
| 5131 |
+
"learning_rate": 1.0636785177317254e-05,
|
| 5132 |
+
"loss": 0.0,
|
| 5133 |
+
"step": 732
|
| 5134 |
+
},
|
| 5135 |
+
{
|
| 5136 |
+
"epoch": 0.7909360669004586,
|
| 5137 |
+
"grad_norm": 0.06830903887748718,
|
| 5138 |
+
"learning_rate": 1.0531962077249313e-05,
|
| 5139 |
+
"loss": 0.0002,
|
| 5140 |
+
"step": 733
|
| 5141 |
+
},
|
| 5142 |
+
{
|
| 5143 |
+
"epoch": 0.7920151065551659,
|
| 5144 |
+
"grad_norm": 0.0012319240486249328,
|
| 5145 |
+
"learning_rate": 1.0427597206655048e-05,
|
| 5146 |
+
"loss": 0.0,
|
| 5147 |
+
"step": 734
|
| 5148 |
+
},
|
| 5149 |
+
{
|
| 5150 |
+
"epoch": 0.7930941462098732,
|
| 5151 |
+
"grad_norm": 0.00039516360266134143,
|
| 5152 |
+
"learning_rate": 1.0323691777225285e-05,
|
| 5153 |
+
"loss": 0.0,
|
| 5154 |
+
"step": 735
|
| 5155 |
+
},
|
| 5156 |
+
{
|
| 5157 |
+
"epoch": 0.7941731858645805,
|
| 5158 |
+
"grad_norm": 0.0006764131248928607,
|
| 5159 |
+
"learning_rate": 1.0220246995316707e-05,
|
| 5160 |
+
"loss": 0.0,
|
| 5161 |
+
"step": 736
|
| 5162 |
+
},
|
| 5163 |
+
{
|
| 5164 |
+
"epoch": 0.7952522255192879,
|
| 5165 |
+
"grad_norm": 0.0015891552902758121,
|
| 5166 |
+
"learning_rate": 1.0117264061937775e-05,
|
| 5167 |
+
"loss": 0.0,
|
| 5168 |
+
"step": 737
|
| 5169 |
+
},
|
| 5170 |
+
{
|
| 5171 |
+
"epoch": 0.7963312651739951,
|
| 5172 |
+
"grad_norm": 0.0009112340630963445,
|
| 5173 |
+
"learning_rate": 1.001474417273483e-05,
|
| 5174 |
+
"loss": 0.0,
|
| 5175 |
+
"step": 738
|
| 5176 |
+
},
|
| 5177 |
+
{
|
| 5178 |
+
"epoch": 0.7974103048287025,
|
| 5179 |
+
"grad_norm": 0.0006295150960795581,
|
| 5180 |
+
"learning_rate": 9.91268851797822e-06,
|
| 5181 |
+
"loss": 0.0,
|
| 5182 |
+
"step": 739
|
| 5183 |
+
},
|
| 5184 |
+
{
|
| 5185 |
+
"epoch": 0.7984893444834098,
|
| 5186 |
+
"grad_norm": 0.006182640790939331,
|
| 5187 |
+
"learning_rate": 9.811098282548447e-06,
|
| 5188 |
+
"loss": 0.0002,
|
| 5189 |
+
"step": 740
|
| 5190 |
+
},
|
| 5191 |
+
{
|
| 5192 |
+
"epoch": 0.799568384138117,
|
| 5193 |
+
"grad_norm": 0.021598558872938156,
|
| 5194 |
+
"learning_rate": 9.709974645922432e-06,
|
| 5195 |
+
"loss": 0.0003,
|
| 5196 |
+
"step": 741
|
| 5197 |
+
},
|
| 5198 |
+
{
|
| 5199 |
+
"epoch": 0.8006474237928244,
|
| 5200 |
+
"grad_norm": 0.0007181694963946939,
|
| 5201 |
+
"learning_rate": 9.609318782159848e-06,
|
| 5202 |
+
"loss": 0.0,
|
| 5203 |
+
"step": 742
|
| 5204 |
+
},
|
| 5205 |
+
{
|
| 5206 |
+
"epoch": 0.8017264634475317,
|
| 5207 |
+
"grad_norm": 0.000500496244058013,
|
| 5208 |
+
"learning_rate": 9.509131859889425e-06,
|
| 5209 |
+
"loss": 0.0,
|
| 5210 |
+
"step": 743
|
| 5211 |
+
},
|
| 5212 |
+
{
|
| 5213 |
+
"epoch": 0.802805503102239,
|
| 5214 |
+
"grad_norm": 0.0019652985502034426,
|
| 5215 |
+
"learning_rate": 9.409415042295422e-06,
|
| 5216 |
+
"loss": 0.0,
|
| 5217 |
+
"step": 744
|
| 5218 |
+
},
|
| 5219 |
+
{
|
| 5220 |
+
"epoch": 0.8038845427569463,
|
| 5221 |
+
"grad_norm": 0.0021862483117729425,
|
| 5222 |
+
"learning_rate": 9.310169487104131e-06,
|
| 5223 |
+
"loss": 0.0001,
|
| 5224 |
+
"step": 745
|
| 5225 |
+
},
|
| 5226 |
+
{
|
| 5227 |
+
"epoch": 0.8049635824116537,
|
| 5228 |
+
"grad_norm": 0.04134347662329674,
|
| 5229 |
+
"learning_rate": 9.211396346570395e-06,
|
| 5230 |
+
"loss": 0.0005,
|
| 5231 |
+
"step": 746
|
| 5232 |
+
},
|
| 5233 |
+
{
|
| 5234 |
+
"epoch": 0.8060426220663609,
|
| 5235 |
+
"grad_norm": 0.00047488484415225685,
|
| 5236 |
+
"learning_rate": 9.113096767464302e-06,
|
| 5237 |
+
"loss": 0.0,
|
| 5238 |
+
"step": 747
|
| 5239 |
+
},
|
| 5240 |
+
{
|
| 5241 |
+
"epoch": 0.8071216617210683,
|
| 5242 |
+
"grad_norm": 0.002430742373690009,
|
| 5243 |
+
"learning_rate": 9.015271891057775e-06,
|
| 5244 |
+
"loss": 0.0001,
|
| 5245 |
+
"step": 748
|
| 5246 |
+
},
|
| 5247 |
+
{
|
| 5248 |
+
"epoch": 0.8082007013757756,
|
| 5249 |
+
"grad_norm": 0.008186195977032185,
|
| 5250 |
+
"learning_rate": 8.917922853111405e-06,
|
| 5251 |
+
"loss": 0.0002,
|
| 5252 |
+
"step": 749
|
| 5253 |
+
},
|
| 5254 |
+
{
|
| 5255 |
+
"epoch": 0.8092797410304828,
|
| 5256 |
+
"grad_norm": 0.14236406981945038,
|
| 5257 |
+
"learning_rate": 8.821050783861212e-06,
|
| 5258 |
+
"loss": 0.001,
|
| 5259 |
+
"step": 750
|
| 5260 |
+
},
|
| 5261 |
+
{
|
| 5262 |
+
"epoch": 0.8103587806851902,
|
| 5263 |
+
"grad_norm": 0.029549632221460342,
|
| 5264 |
+
"learning_rate": 8.724656808005555e-06,
|
| 5265 |
+
"loss": 0.0007,
|
| 5266 |
+
"step": 751
|
| 5267 |
+
},
|
| 5268 |
+
{
|
| 5269 |
+
"epoch": 0.8114378203398975,
|
| 5270 |
+
"grad_norm": 0.0005387684213928878,
|
| 5271 |
+
"learning_rate": 8.62874204469204e-06,
|
| 5272 |
+
"loss": 0.0,
|
| 5273 |
+
"step": 752
|
| 5274 |
+
},
|
| 5275 |
+
{
|
| 5276 |
+
"epoch": 0.8125168599946048,
|
| 5277 |
+
"grad_norm": 0.0027514882385730743,
|
| 5278 |
+
"learning_rate": 8.533307607504597e-06,
|
| 5279 |
+
"loss": 0.0001,
|
| 5280 |
+
"step": 753
|
| 5281 |
+
},
|
| 5282 |
+
{
|
| 5283 |
+
"epoch": 0.8135958996493121,
|
| 5284 |
+
"grad_norm": 0.0006713513284921646,
|
| 5285 |
+
"learning_rate": 8.438354604450454e-06,
|
| 5286 |
+
"loss": 0.0,
|
| 5287 |
+
"step": 754
|
| 5288 |
+
},
|
| 5289 |
+
{
|
| 5290 |
+
"epoch": 0.8146749393040195,
|
| 5291 |
+
"grad_norm": 0.0029521603137254715,
|
| 5292 |
+
"learning_rate": 8.343884137947333e-06,
|
| 5293 |
+
"loss": 0.0,
|
| 5294 |
+
"step": 755
|
| 5295 |
+
},
|
| 5296 |
+
{
|
| 5297 |
+
"epoch": 0.8157539789587267,
|
| 5298 |
+
"grad_norm": 0.0014442915562540293,
|
| 5299 |
+
"learning_rate": 8.24989730481065e-06,
|
| 5300 |
+
"loss": 0.0,
|
| 5301 |
+
"step": 756
|
| 5302 |
+
},
|
| 5303 |
+
{
|
| 5304 |
+
"epoch": 0.816833018613434,
|
| 5305 |
+
"grad_norm": 0.002349106827750802,
|
| 5306 |
+
"learning_rate": 8.15639519624075e-06,
|
| 5307 |
+
"loss": 0.0001,
|
| 5308 |
+
"step": 757
|
| 5309 |
+
},
|
| 5310 |
+
{
|
| 5311 |
+
"epoch": 0.8179120582681414,
|
| 5312 |
+
"grad_norm": 0.0008182977908290923,
|
| 5313 |
+
"learning_rate": 8.063378897810275e-06,
|
| 5314 |
+
"loss": 0.0,
|
| 5315 |
+
"step": 758
|
| 5316 |
+
},
|
| 5317 |
+
{
|
| 5318 |
+
"epoch": 0.8189910979228486,
|
| 5319 |
+
"grad_norm": 0.0003832654620055109,
|
| 5320 |
+
"learning_rate": 7.970849489451548e-06,
|
| 5321 |
+
"loss": 0.0,
|
| 5322 |
+
"step": 759
|
| 5323 |
+
},
|
| 5324 |
+
{
|
| 5325 |
+
"epoch": 0.820070137577556,
|
| 5326 |
+
"grad_norm": 0.00035305522033013403,
|
| 5327 |
+
"learning_rate": 7.878808045444014e-06,
|
| 5328 |
+
"loss": 0.0,
|
| 5329 |
+
"step": 760
|
| 5330 |
+
},
|
| 5331 |
+
{
|
| 5332 |
+
"epoch": 0.8211491772322633,
|
| 5333 |
+
"grad_norm": 0.002128857420757413,
|
| 5334 |
+
"learning_rate": 7.787255634401785e-06,
|
| 5335 |
+
"loss": 0.0001,
|
| 5336 |
+
"step": 761
|
| 5337 |
+
},
|
| 5338 |
+
{
|
| 5339 |
+
"epoch": 0.8222282168869706,
|
| 5340 |
+
"grad_norm": 0.004124282859265804,
|
| 5341 |
+
"learning_rate": 7.696193319261241e-06,
|
| 5342 |
+
"loss": 0.0001,
|
| 5343 |
+
"step": 762
|
| 5344 |
+
},
|
| 5345 |
+
{
|
| 5346 |
+
"epoch": 0.8233072565416779,
|
| 5347 |
+
"grad_norm": 0.0065514869056642056,
|
| 5348 |
+
"learning_rate": 7.605622157268655e-06,
|
| 5349 |
+
"loss": 0.0001,
|
| 5350 |
+
"step": 763
|
| 5351 |
+
},
|
| 5352 |
+
{
|
| 5353 |
+
"epoch": 0.8243862961963853,
|
| 5354 |
+
"grad_norm": 0.0015036019030958414,
|
| 5355 |
+
"learning_rate": 7.515543199967989e-06,
|
| 5356 |
+
"loss": 0.0001,
|
| 5357 |
+
"step": 764
|
| 5358 |
+
},
|
| 5359 |
+
{
|
| 5360 |
+
"epoch": 0.8254653358510925,
|
| 5361 |
+
"grad_norm": 0.0008803247474133968,
|
| 5362 |
+
"learning_rate": 7.425957493188601e-06,
|
| 5363 |
+
"loss": 0.0,
|
| 5364 |
+
"step": 765
|
| 5365 |
+
},
|
| 5366 |
+
{
|
| 5367 |
+
"epoch": 0.8265443755057998,
|
| 5368 |
+
"grad_norm": 0.0006218330236151814,
|
| 5369 |
+
"learning_rate": 7.33686607703315e-06,
|
| 5370 |
+
"loss": 0.0,
|
| 5371 |
+
"step": 766
|
| 5372 |
+
},
|
| 5373 |
+
{
|
| 5374 |
+
"epoch": 0.8276234151605072,
|
| 5375 |
+
"grad_norm": 0.0005795125034637749,
|
| 5376 |
+
"learning_rate": 7.248269985865513e-06,
|
| 5377 |
+
"loss": 0.0,
|
| 5378 |
+
"step": 767
|
| 5379 |
+
},
|
| 5380 |
+
{
|
| 5381 |
+
"epoch": 0.8287024548152144,
|
| 5382 |
+
"grad_norm": 0.010642164386808872,
|
| 5383 |
+
"learning_rate": 7.160170248298781e-06,
|
| 5384 |
+
"loss": 0.0003,
|
| 5385 |
+
"step": 768
|
| 5386 |
+
},
|
| 5387 |
+
{
|
| 5388 |
+
"epoch": 0.8297814944699218,
|
| 5389 |
+
"grad_norm": 0.00045828381553292274,
|
| 5390 |
+
"learning_rate": 7.072567887183279e-06,
|
| 5391 |
+
"loss": 0.0,
|
| 5392 |
+
"step": 769
|
| 5393 |
+
},
|
| 5394 |
+
{
|
| 5395 |
+
"epoch": 0.8308605341246291,
|
| 5396 |
+
"grad_norm": 0.0003997626481577754,
|
| 5397 |
+
"learning_rate": 6.985463919594781e-06,
|
| 5398 |
+
"loss": 0.0,
|
| 5399 |
+
"step": 770
|
| 5400 |
+
},
|
| 5401 |
+
{
|
| 5402 |
+
"epoch": 0.8319395737793364,
|
| 5403 |
+
"grad_norm": 0.019628409296274185,
|
| 5404 |
+
"learning_rate": 6.898859356822585e-06,
|
| 5405 |
+
"loss": 0.0002,
|
| 5406 |
+
"step": 771
|
| 5407 |
+
},
|
| 5408 |
+
{
|
| 5409 |
+
"epoch": 0.8330186134340437,
|
| 5410 |
+
"grad_norm": 0.000559719861485064,
|
| 5411 |
+
"learning_rate": 6.812755204357857e-06,
|
| 5412 |
+
"loss": 0.0,
|
| 5413 |
+
"step": 772
|
| 5414 |
+
},
|
| 5415 |
+
{
|
| 5416 |
+
"epoch": 0.8340976530887511,
|
| 5417 |
+
"grad_norm": 0.0010943820234388113,
|
| 5418 |
+
"learning_rate": 6.727152461881925e-06,
|
| 5419 |
+
"loss": 0.0,
|
| 5420 |
+
"step": 773
|
| 5421 |
+
},
|
| 5422 |
+
{
|
| 5423 |
+
"epoch": 0.8351766927434583,
|
| 5424 |
+
"grad_norm": 0.000744102755561471,
|
| 5425 |
+
"learning_rate": 6.642052123254666e-06,
|
| 5426 |
+
"loss": 0.0,
|
| 5427 |
+
"step": 774
|
| 5428 |
+
},
|
| 5429 |
+
{
|
| 5430 |
+
"epoch": 0.8362557323981656,
|
| 5431 |
+
"grad_norm": 0.0002823436225298792,
|
| 5432 |
+
"learning_rate": 6.5574551765029855e-06,
|
| 5433 |
+
"loss": 0.0,
|
| 5434 |
+
"step": 775
|
| 5435 |
+
},
|
| 5436 |
+
{
|
| 5437 |
+
"epoch": 0.837334772052873,
|
| 5438 |
+
"grad_norm": 0.0008850269368849695,
|
| 5439 |
+
"learning_rate": 6.4733626038093465e-06,
|
| 5440 |
+
"loss": 0.0,
|
| 5441 |
+
"step": 776
|
| 5442 |
+
},
|
| 5443 |
+
{
|
| 5444 |
+
"epoch": 0.8384138117075802,
|
| 5445 |
+
"grad_norm": 0.0005732047138735652,
|
| 5446 |
+
"learning_rate": 6.389775381500351e-06,
|
| 5447 |
+
"loss": 0.0,
|
| 5448 |
+
"step": 777
|
| 5449 |
+
},
|
| 5450 |
+
{
|
| 5451 |
+
"epoch": 0.8394928513622876,
|
| 5452 |
+
"grad_norm": 0.000512587430421263,
|
| 5453 |
+
"learning_rate": 6.306694480035408e-06,
|
| 5454 |
+
"loss": 0.0,
|
| 5455 |
+
"step": 778
|
| 5456 |
+
},
|
| 5457 |
+
{
|
| 5458 |
+
"epoch": 0.8405718910169949,
|
| 5459 |
+
"grad_norm": 0.012505724094808102,
|
| 5460 |
+
"learning_rate": 6.22412086399547e-06,
|
| 5461 |
+
"loss": 0.0001,
|
| 5462 |
+
"step": 779
|
| 5463 |
+
},
|
| 5464 |
+
{
|
| 5465 |
+
"epoch": 0.8416509306717022,
|
| 5466 |
+
"grad_norm": 0.008105852641165257,
|
| 5467 |
+
"learning_rate": 6.142055492071841e-06,
|
| 5468 |
+
"loss": 0.0001,
|
| 5469 |
+
"step": 780
|
| 5470 |
+
},
|
| 5471 |
+
{
|
| 5472 |
+
"epoch": 0.8427299703264095,
|
| 5473 |
+
"grad_norm": 0.0011853997129946947,
|
| 5474 |
+
"learning_rate": 6.060499317055024e-06,
|
| 5475 |
+
"loss": 0.0,
|
| 5476 |
+
"step": 781
|
| 5477 |
+
},
|
| 5478 |
+
{
|
| 5479 |
+
"epoch": 0.8438090099811169,
|
| 5480 |
+
"grad_norm": 0.0013676261296495795,
|
| 5481 |
+
"learning_rate": 5.979453285823711e-06,
|
| 5482 |
+
"loss": 0.0001,
|
| 5483 |
+
"step": 782
|
| 5484 |
+
},
|
| 5485 |
+
{
|
| 5486 |
+
"epoch": 0.8448880496358241,
|
| 5487 |
+
"grad_norm": 0.09016416221857071,
|
| 5488 |
+
"learning_rate": 5.898918339333714e-06,
|
| 5489 |
+
"loss": 0.0046,
|
| 5490 |
+
"step": 783
|
| 5491 |
+
},
|
| 5492 |
+
{
|
| 5493 |
+
"epoch": 0.8459670892905314,
|
| 5494 |
+
"grad_norm": 0.0019953937735408545,
|
| 5495 |
+
"learning_rate": 5.818895412607095e-06,
|
| 5496 |
+
"loss": 0.0001,
|
| 5497 |
+
"step": 784
|
| 5498 |
+
},
|
| 5499 |
+
{
|
| 5500 |
+
"epoch": 0.8470461289452388,
|
| 5501 |
+
"grad_norm": 0.001061942195519805,
|
| 5502 |
+
"learning_rate": 5.739385434721295e-06,
|
| 5503 |
+
"loss": 0.0001,
|
| 5504 |
+
"step": 785
|
| 5505 |
+
},
|
| 5506 |
+
{
|
| 5507 |
+
"epoch": 0.848125168599946,
|
| 5508 |
+
"grad_norm": 0.001198655809275806,
|
| 5509 |
+
"learning_rate": 5.660389328798332e-06,
|
| 5510 |
+
"loss": 0.0,
|
| 5511 |
+
"step": 786
|
| 5512 |
+
},
|
| 5513 |
+
{
|
| 5514 |
+
"epoch": 0.8492042082546534,
|
| 5515 |
+
"grad_norm": 0.0005077929818071425,
|
| 5516 |
+
"learning_rate": 5.581908011994131e-06,
|
| 5517 |
+
"loss": 0.0,
|
| 5518 |
+
"step": 787
|
| 5519 |
+
},
|
| 5520 |
+
{
|
| 5521 |
+
"epoch": 0.8502832479093607,
|
| 5522 |
+
"grad_norm": 0.002251911675557494,
|
| 5523 |
+
"learning_rate": 5.50394239548781e-06,
|
| 5524 |
+
"loss": 0.0001,
|
| 5525 |
+
"step": 788
|
| 5526 |
+
},
|
| 5527 |
+
{
|
| 5528 |
+
"epoch": 0.851362287564068,
|
| 5529 |
+
"grad_norm": 0.001725160633213818,
|
| 5530 |
+
"learning_rate": 5.426493384471154e-06,
|
| 5531 |
+
"loss": 0.0001,
|
| 5532 |
+
"step": 789
|
| 5533 |
+
},
|
| 5534 |
+
{
|
| 5535 |
+
"epoch": 0.8524413272187753,
|
| 5536 |
+
"grad_norm": 0.0005451784818433225,
|
| 5537 |
+
"learning_rate": 5.349561878138076e-06,
|
| 5538 |
+
"loss": 0.0,
|
| 5539 |
+
"step": 790
|
| 5540 |
+
},
|
| 5541 |
+
{
|
| 5542 |
+
"epoch": 0.8535203668734826,
|
| 5543 |
+
"grad_norm": 0.0010075717000290751,
|
| 5544 |
+
"learning_rate": 5.2731487696741854e-06,
|
| 5545 |
+
"loss": 0.0,
|
| 5546 |
+
"step": 791
|
| 5547 |
+
},
|
| 5548 |
+
{
|
| 5549 |
+
"epoch": 0.8545994065281899,
|
| 5550 |
+
"grad_norm": 0.005417757201939821,
|
| 5551 |
+
"learning_rate": 5.197254946246416e-06,
|
| 5552 |
+
"loss": 0.0001,
|
| 5553 |
+
"step": 792
|
| 5554 |
+
},
|
| 5555 |
+
{
|
| 5556 |
+
"epoch": 0.8556784461828972,
|
| 5557 |
+
"grad_norm": 0.002934516640380025,
|
| 5558 |
+
"learning_rate": 5.121881288992758e-06,
|
| 5559 |
+
"loss": 0.0001,
|
| 5560 |
+
"step": 793
|
| 5561 |
+
},
|
| 5562 |
+
{
|
| 5563 |
+
"epoch": 0.8567574858376046,
|
| 5564 |
+
"grad_norm": 0.0006306925206445158,
|
| 5565 |
+
"learning_rate": 5.047028673011966e-06,
|
| 5566 |
+
"loss": 0.0,
|
| 5567 |
+
"step": 794
|
| 5568 |
+
},
|
| 5569 |
+
{
|
| 5570 |
+
"epoch": 0.8578365254923118,
|
| 5571 |
+
"grad_norm": 0.0084421681240201,
|
| 5572 |
+
"learning_rate": 4.9726979673534454e-06,
|
| 5573 |
+
"loss": 0.0001,
|
| 5574 |
+
"step": 795
|
| 5575 |
+
},
|
| 5576 |
+
{
|
| 5577 |
+
"epoch": 0.8589155651470192,
|
| 5578 |
+
"grad_norm": 0.06783898919820786,
|
| 5579 |
+
"learning_rate": 4.8988900350071474e-06,
|
| 5580 |
+
"loss": 0.0009,
|
| 5581 |
+
"step": 796
|
| 5582 |
+
},
|
| 5583 |
+
{
|
| 5584 |
+
"epoch": 0.8599946048017265,
|
| 5585 |
+
"grad_norm": 0.0039332592859864235,
|
| 5586 |
+
"learning_rate": 4.825605732893545e-06,
|
| 5587 |
+
"loss": 0.0001,
|
| 5588 |
+
"step": 797
|
| 5589 |
+
},
|
| 5590 |
+
{
|
| 5591 |
+
"epoch": 0.8610736444564338,
|
| 5592 |
+
"grad_norm": 0.0025624572299420834,
|
| 5593 |
+
"learning_rate": 4.752845911853698e-06,
|
| 5594 |
+
"loss": 0.0,
|
| 5595 |
+
"step": 798
|
| 5596 |
+
},
|
| 5597 |
+
{
|
| 5598 |
+
"epoch": 0.8621526841111411,
|
| 5599 |
+
"grad_norm": 0.199618399143219,
|
| 5600 |
+
"learning_rate": 4.68061141663938e-06,
|
| 5601 |
+
"loss": 0.007,
|
| 5602 |
+
"step": 799
|
| 5603 |
+
},
|
| 5604 |
+
{
|
| 5605 |
+
"epoch": 0.8632317237658484,
|
| 5606 |
+
"grad_norm": 0.002309242030605674,
|
| 5607 |
+
"learning_rate": 4.608903085903238e-06,
|
| 5608 |
+
"loss": 0.0001,
|
| 5609 |
+
"step": 800
|
| 5610 |
+
},
|
| 5611 |
+
{
|
| 5612 |
+
"epoch": 0.8643107634205557,
|
| 5613 |
+
"grad_norm": 0.04261766001582146,
|
| 5614 |
+
"learning_rate": 4.537721752189078e-06,
|
| 5615 |
+
"loss": 0.0043,
|
| 5616 |
+
"step": 801
|
| 5617 |
+
},
|
| 5618 |
+
{
|
| 5619 |
+
"epoch": 0.865389803075263,
|
| 5620 |
+
"grad_norm": 0.004815481137484312,
|
| 5621 |
+
"learning_rate": 4.4670682419221955e-06,
|
| 5622 |
+
"loss": 0.0001,
|
| 5623 |
+
"step": 802
|
| 5624 |
+
},
|
| 5625 |
+
{
|
| 5626 |
+
"epoch": 0.8664688427299704,
|
| 5627 |
+
"grad_norm": 0.0005674590356647968,
|
| 5628 |
+
"learning_rate": 4.3969433753997975e-06,
|
| 5629 |
+
"loss": 0.0,
|
| 5630 |
+
"step": 803
|
| 5631 |
+
},
|
| 5632 |
+
{
|
| 5633 |
+
"epoch": 0.8675478823846776,
|
| 5634 |
+
"grad_norm": 0.0014791572466492653,
|
| 5635 |
+
"learning_rate": 4.327347966781437e-06,
|
| 5636 |
+
"loss": 0.0001,
|
| 5637 |
+
"step": 804
|
| 5638 |
+
},
|
| 5639 |
+
{
|
| 5640 |
+
"epoch": 0.868626922039385,
|
| 5641 |
+
"grad_norm": 0.0011209994554519653,
|
| 5642 |
+
"learning_rate": 4.258282824079618e-06,
|
| 5643 |
+
"loss": 0.0001,
|
| 5644 |
+
"step": 805
|
| 5645 |
+
},
|
| 5646 |
+
{
|
| 5647 |
+
"epoch": 0.8697059616940923,
|
| 5648 |
+
"grad_norm": 0.0006980937323532999,
|
| 5649 |
+
"learning_rate": 4.189748749150357e-06,
|
| 5650 |
+
"loss": 0.0,
|
| 5651 |
+
"step": 806
|
| 5652 |
+
},
|
| 5653 |
+
{
|
| 5654 |
+
"epoch": 0.8707850013487995,
|
| 5655 |
+
"grad_norm": 0.0009885356994345784,
|
| 5656 |
+
"learning_rate": 4.121746537683907e-06,
|
| 5657 |
+
"loss": 0.0,
|
| 5658 |
+
"step": 807
|
| 5659 |
+
},
|
| 5660 |
+
{
|
| 5661 |
+
"epoch": 0.8718640410035069,
|
| 5662 |
+
"grad_norm": 0.0009470575023442507,
|
| 5663 |
+
"learning_rate": 4.0542769791955095e-06,
|
| 5664 |
+
"loss": 0.0,
|
| 5665 |
+
"step": 808
|
| 5666 |
+
},
|
| 5667 |
+
{
|
| 5668 |
+
"epoch": 0.8729430806582142,
|
| 5669 |
+
"grad_norm": 0.00587791483849287,
|
| 5670 |
+
"learning_rate": 3.987340857016225e-06,
|
| 5671 |
+
"loss": 0.0001,
|
| 5672 |
+
"step": 809
|
| 5673 |
+
},
|
| 5674 |
+
{
|
| 5675 |
+
"epoch": 0.8740221203129215,
|
| 5676 |
+
"grad_norm": 0.00039143982576206326,
|
| 5677 |
+
"learning_rate": 3.92093894828387e-06,
|
| 5678 |
+
"loss": 0.0,
|
| 5679 |
+
"step": 810
|
| 5680 |
+
},
|
| 5681 |
+
{
|
| 5682 |
+
"epoch": 0.8751011599676288,
|
| 5683 |
+
"grad_norm": 0.0009302119724452496,
|
| 5684 |
+
"learning_rate": 3.855072023933931e-06,
|
| 5685 |
+
"loss": 0.0,
|
| 5686 |
+
"step": 811
|
| 5687 |
+
},
|
| 5688 |
+
{
|
| 5689 |
+
"epoch": 0.8761801996223362,
|
| 5690 |
+
"grad_norm": 0.03408830985426903,
|
| 5691 |
+
"learning_rate": 3.7897408486906815e-06,
|
| 5692 |
+
"loss": 0.0005,
|
| 5693 |
+
"step": 812
|
| 5694 |
+
},
|
| 5695 |
+
{
|
| 5696 |
+
"epoch": 0.8772592392770434,
|
| 5697 |
+
"grad_norm": 0.006774293724447489,
|
| 5698 |
+
"learning_rate": 3.724946181058242e-06,
|
| 5699 |
+
"loss": 0.0001,
|
| 5700 |
+
"step": 813
|
| 5701 |
+
},
|
| 5702 |
+
{
|
| 5703 |
+
"epoch": 0.8783382789317508,
|
| 5704 |
+
"grad_norm": 0.012250292114913464,
|
| 5705 |
+
"learning_rate": 3.660688773311838e-06,
|
| 5706 |
+
"loss": 0.0001,
|
| 5707 |
+
"step": 814
|
| 5708 |
+
},
|
| 5709 |
+
{
|
| 5710 |
+
"epoch": 0.8794173185864581,
|
| 5711 |
+
"grad_norm": 0.11477049440145493,
|
| 5712 |
+
"learning_rate": 3.596969371488995e-06,
|
| 5713 |
+
"loss": 0.0007,
|
| 5714 |
+
"step": 815
|
| 5715 |
+
},
|
| 5716 |
+
{
|
| 5717 |
+
"epoch": 0.8804963582411653,
|
| 5718 |
+
"grad_norm": 0.005304504185914993,
|
| 5719 |
+
"learning_rate": 3.5337887153809478e-06,
|
| 5720 |
+
"loss": 0.0002,
|
| 5721 |
+
"step": 816
|
| 5722 |
+
},
|
| 5723 |
+
{
|
| 5724 |
+
"epoch": 0.8815753978958727,
|
| 5725 |
+
"grad_norm": 0.0033430811017751694,
|
| 5726 |
+
"learning_rate": 3.4711475385240055e-06,
|
| 5727 |
+
"loss": 0.0001,
|
| 5728 |
+
"step": 817
|
| 5729 |
+
},
|
| 5730 |
+
{
|
| 5731 |
+
"epoch": 0.8826544375505799,
|
| 5732 |
+
"grad_norm": 0.0007113641477189958,
|
| 5733 |
+
"learning_rate": 3.4090465681910435e-06,
|
| 5734 |
+
"loss": 0.0,
|
| 5735 |
+
"step": 818
|
| 5736 |
+
},
|
| 5737 |
+
{
|
| 5738 |
+
"epoch": 0.8837334772052873,
|
| 5739 |
+
"grad_norm": 0.0005928325117565691,
|
| 5740 |
+
"learning_rate": 3.347486525383059e-06,
|
| 5741 |
+
"loss": 0.0,
|
| 5742 |
+
"step": 819
|
| 5743 |
+
},
|
| 5744 |
+
{
|
| 5745 |
+
"epoch": 0.8848125168599946,
|
| 5746 |
+
"grad_norm": 0.003862974001094699,
|
| 5747 |
+
"learning_rate": 3.2864681248208183e-06,
|
| 5748 |
+
"loss": 0.0001,
|
| 5749 |
+
"step": 820
|
| 5750 |
+
},
|
| 5751 |
+
{
|
| 5752 |
+
"epoch": 0.8858915565147019,
|
| 5753 |
+
"grad_norm": 0.002365738619118929,
|
| 5754 |
+
"learning_rate": 3.2259920749365236e-06,
|
| 5755 |
+
"loss": 0.0001,
|
| 5756 |
+
"step": 821
|
| 5757 |
+
},
|
| 5758 |
+
{
|
| 5759 |
+
"epoch": 0.8869705961694092,
|
| 5760 |
+
"grad_norm": 0.0006231599254533648,
|
| 5761 |
+
"learning_rate": 3.1660590778656407e-06,
|
| 5762 |
+
"loss": 0.0,
|
| 5763 |
+
"step": 822
|
| 5764 |
+
},
|
| 5765 |
+
{
|
| 5766 |
+
"epoch": 0.8880496358241166,
|
| 5767 |
+
"grad_norm": 0.00047246352187357843,
|
| 5768 |
+
"learning_rate": 3.1066698294386965e-06,
|
| 5769 |
+
"loss": 0.0,
|
| 5770 |
+
"step": 823
|
| 5771 |
+
},
|
| 5772 |
+
{
|
| 5773 |
+
"epoch": 0.8891286754788238,
|
| 5774 |
+
"grad_norm": 0.0005266937077976763,
|
| 5775 |
+
"learning_rate": 3.0478250191732115e-06,
|
| 5776 |
+
"loss": 0.0,
|
| 5777 |
+
"step": 824
|
| 5778 |
+
},
|
| 5779 |
+
{
|
| 5780 |
+
"epoch": 0.8902077151335311,
|
| 5781 |
+
"grad_norm": 0.0013274071970954537,
|
| 5782 |
+
"learning_rate": 2.989525330265719e-06,
|
| 5783 |
+
"loss": 0.0001,
|
| 5784 |
+
"step": 825
|
| 5785 |
+
},
|
| 5786 |
+
{
|
| 5787 |
+
"epoch": 0.8912867547882385,
|
| 5788 |
+
"grad_norm": 0.0015190548729151487,
|
| 5789 |
+
"learning_rate": 2.931771439583808e-06,
|
| 5790 |
+
"loss": 0.0001,
|
| 5791 |
+
"step": 826
|
| 5792 |
+
},
|
| 5793 |
+
{
|
| 5794 |
+
"epoch": 0.8923657944429457,
|
| 5795 |
+
"grad_norm": 0.0027888966724276543,
|
| 5796 |
+
"learning_rate": 2.8745640176582765e-06,
|
| 5797 |
+
"loss": 0.0001,
|
| 5798 |
+
"step": 827
|
| 5799 |
+
},
|
| 5800 |
+
{
|
| 5801 |
+
"epoch": 0.8934448340976531,
|
| 5802 |
+
"grad_norm": 0.001688987365923822,
|
| 5803 |
+
"learning_rate": 2.8179037286753416e-06,
|
| 5804 |
+
"loss": 0.0001,
|
| 5805 |
+
"step": 828
|
| 5806 |
+
},
|
| 5807 |
+
{
|
| 5808 |
+
"epoch": 0.8945238737523604,
|
| 5809 |
+
"grad_norm": 0.0006218472844921052,
|
| 5810 |
+
"learning_rate": 2.7617912304689354e-06,
|
| 5811 |
+
"loss": 0.0,
|
| 5812 |
+
"step": 829
|
| 5813 |
+
},
|
| 5814 |
+
{
|
| 5815 |
+
"epoch": 0.8956029134070677,
|
| 5816 |
+
"grad_norm": 0.0003677410713862628,
|
| 5817 |
+
"learning_rate": 2.7062271745130594e-06,
|
| 5818 |
+
"loss": 0.0,
|
| 5819 |
+
"step": 830
|
| 5820 |
+
},
|
| 5821 |
+
{
|
| 5822 |
+
"epoch": 0.896681953061775,
|
| 5823 |
+
"grad_norm": 0.0038491019513458014,
|
| 5824 |
+
"learning_rate": 2.651212205914211e-06,
|
| 5825 |
+
"loss": 0.0001,
|
| 5826 |
+
"step": 831
|
| 5827 |
+
},
|
| 5828 |
+
{
|
| 5829 |
+
"epoch": 0.8977609927164824,
|
| 5830 |
+
"grad_norm": 0.0006347736343741417,
|
| 5831 |
+
"learning_rate": 2.5967469634039177e-06,
|
| 5832 |
+
"loss": 0.0,
|
| 5833 |
+
"step": 832
|
| 5834 |
+
},
|
| 5835 |
+
{
|
| 5836 |
+
"epoch": 0.8988400323711896,
|
| 5837 |
+
"grad_norm": 0.0007753499085083604,
|
| 5838 |
+
"learning_rate": 2.5428320793313144e-06,
|
| 5839 |
+
"loss": 0.0,
|
| 5840 |
+
"step": 833
|
| 5841 |
+
},
|
| 5842 |
+
{
|
| 5843 |
+
"epoch": 0.8999190720258969,
|
| 5844 |
+
"grad_norm": 0.0004899702616967261,
|
| 5845 |
+
"learning_rate": 2.489468179655796e-06,
|
| 5846 |
+
"loss": 0.0,
|
| 5847 |
+
"step": 834
|
| 5848 |
+
},
|
| 5849 |
+
{
|
| 5850 |
+
"epoch": 0.9009981116806043,
|
| 5851 |
+
"grad_norm": 0.0005360933137126267,
|
| 5852 |
+
"learning_rate": 2.436655883939737e-06,
|
| 5853 |
+
"loss": 0.0,
|
| 5854 |
+
"step": 835
|
| 5855 |
+
},
|
| 5856 |
+
{
|
| 5857 |
+
"epoch": 0.9020771513353115,
|
| 5858 |
+
"grad_norm": 0.0007349636871367693,
|
| 5859 |
+
"learning_rate": 2.3843958053413275e-06,
|
| 5860 |
+
"loss": 0.0,
|
| 5861 |
+
"step": 836
|
| 5862 |
+
},
|
| 5863 |
+
{
|
| 5864 |
+
"epoch": 0.9031561909900189,
|
| 5865 |
+
"grad_norm": 0.00036560322041623294,
|
| 5866 |
+
"learning_rate": 2.3326885506074314e-06,
|
| 5867 |
+
"loss": 0.0,
|
| 5868 |
+
"step": 837
|
| 5869 |
+
},
|
| 5870 |
+
{
|
| 5871 |
+
"epoch": 0.9042352306447262,
|
| 5872 |
+
"grad_norm": 0.0012470025103539228,
|
| 5873 |
+
"learning_rate": 2.2815347200665415e-06,
|
| 5874 |
+
"loss": 0.0001,
|
| 5875 |
+
"step": 838
|
| 5876 |
+
},
|
| 5877 |
+
{
|
| 5878 |
+
"epoch": 0.9053142702994335,
|
| 5879 |
+
"grad_norm": 0.0024500922299921513,
|
| 5880 |
+
"learning_rate": 2.2309349076218456e-06,
|
| 5881 |
+
"loss": 0.0001,
|
| 5882 |
+
"step": 839
|
| 5883 |
+
},
|
| 5884 |
+
{
|
| 5885 |
+
"epoch": 0.9063933099541408,
|
| 5886 |
+
"grad_norm": 0.0010569506557658315,
|
| 5887 |
+
"learning_rate": 2.1808897007442765e-06,
|
| 5888 |
+
"loss": 0.0,
|
| 5889 |
+
"step": 840
|
| 5890 |
+
},
|
| 5891 |
+
{
|
| 5892 |
+
"epoch": 0.9074723496088482,
|
| 5893 |
+
"grad_norm": 0.010917078703641891,
|
| 5894 |
+
"learning_rate": 2.131399680465729e-06,
|
| 5895 |
+
"loss": 0.0001,
|
| 5896 |
+
"step": 841
|
| 5897 |
+
},
|
| 5898 |
+
{
|
| 5899 |
+
"epoch": 0.9085513892635554,
|
| 5900 |
+
"grad_norm": 0.0013472632272168994,
|
| 5901 |
+
"learning_rate": 2.082465421372304e-06,
|
| 5902 |
+
"loss": 0.0001,
|
| 5903 |
+
"step": 842
|
| 5904 |
+
},
|
| 5905 |
+
{
|
| 5906 |
+
"epoch": 0.9096304289182627,
|
| 5907 |
+
"grad_norm": 0.0006471822853200138,
|
| 5908 |
+
"learning_rate": 2.0340874915976306e-06,
|
| 5909 |
+
"loss": 0.0,
|
| 5910 |
+
"step": 843
|
| 5911 |
+
},
|
| 5912 |
+
{
|
| 5913 |
+
"epoch": 0.9107094685729701,
|
| 5914 |
+
"grad_norm": 0.002463939832523465,
|
| 5915 |
+
"learning_rate": 1.9862664528162766e-06,
|
| 5916 |
+
"loss": 0.0001,
|
| 5917 |
+
"step": 844
|
| 5918 |
+
},
|
| 5919 |
+
{
|
| 5920 |
+
"epoch": 0.9117885082276773,
|
| 5921 |
+
"grad_norm": 0.0010989494621753693,
|
| 5922 |
+
"learning_rate": 1.939002860237249e-06,
|
| 5923 |
+
"loss": 0.0,
|
| 5924 |
+
"step": 845
|
| 5925 |
+
},
|
| 5926 |
+
{
|
| 5927 |
+
"epoch": 0.9128675478823847,
|
| 5928 |
+
"grad_norm": 0.001474874559789896,
|
| 5929 |
+
"learning_rate": 1.8922972625974923e-06,
|
| 5930 |
+
"loss": 0.0,
|
| 5931 |
+
"step": 846
|
| 5932 |
+
},
|
| 5933 |
+
{
|
| 5934 |
+
"epoch": 0.913946587537092,
|
| 5935 |
+
"grad_norm": 0.006552140228450298,
|
| 5936 |
+
"learning_rate": 1.8461502021555722e-06,
|
| 5937 |
+
"loss": 0.0001,
|
| 5938 |
+
"step": 847
|
| 5939 |
+
},
|
| 5940 |
+
{
|
| 5941 |
+
"epoch": 0.9150256271917993,
|
| 5942 |
+
"grad_norm": 0.0013835965655744076,
|
| 5943 |
+
"learning_rate": 1.8005622146853474e-06,
|
| 5944 |
+
"loss": 0.0,
|
| 5945 |
+
"step": 848
|
| 5946 |
+
},
|
| 5947 |
+
{
|
| 5948 |
+
"epoch": 0.9161046668465066,
|
| 5949 |
+
"grad_norm": 0.0032614474184811115,
|
| 5950 |
+
"learning_rate": 1.75553382946978e-06,
|
| 5951 |
+
"loss": 0.0001,
|
| 5952 |
+
"step": 849
|
| 5953 |
+
},
|
| 5954 |
+
{
|
| 5955 |
+
"epoch": 0.917183706501214,
|
| 5956 |
+
"grad_norm": 0.33866822719573975,
|
| 5957 |
+
"learning_rate": 1.7110655692947397e-06,
|
| 5958 |
+
"loss": 0.0067,
|
| 5959 |
+
"step": 850
|
| 5960 |
+
},
|
| 5961 |
+
{
|
| 5962 |
+
"epoch": 0.9182627461559212,
|
| 5963 |
+
"grad_norm": 0.13300298154354095,
|
| 5964 |
+
"learning_rate": 1.6671579504429991e-06,
|
| 5965 |
+
"loss": 0.0043,
|
| 5966 |
+
"step": 851
|
| 5967 |
+
},
|
| 5968 |
+
{
|
| 5969 |
+
"epoch": 0.9193417858106285,
|
| 5970 |
+
"grad_norm": 0.018160967156291008,
|
| 5971 |
+
"learning_rate": 1.6238114826881867e-06,
|
| 5972 |
+
"loss": 0.0004,
|
| 5973 |
+
"step": 852
|
| 5974 |
+
},
|
| 5975 |
+
{
|
| 5976 |
+
"epoch": 0.9204208254653359,
|
| 5977 |
+
"grad_norm": 0.0012685490073636174,
|
| 5978 |
+
"learning_rate": 1.5810266692888931e-06,
|
| 5979 |
+
"loss": 0.0,
|
| 5980 |
+
"step": 853
|
| 5981 |
+
},
|
| 5982 |
+
{
|
| 5983 |
+
"epoch": 0.9214998651200431,
|
| 5984 |
+
"grad_norm": 0.0034011940006166697,
|
| 5985 |
+
"learning_rate": 1.5388040069828247e-06,
|
| 5986 |
+
"loss": 0.0001,
|
| 5987 |
+
"step": 854
|
| 5988 |
+
},
|
| 5989 |
+
{
|
| 5990 |
+
"epoch": 0.9225789047747505,
|
| 5991 |
+
"grad_norm": 0.000653221330139786,
|
| 5992 |
+
"learning_rate": 1.4971439859810198e-06,
|
| 5993 |
+
"loss": 0.0,
|
| 5994 |
+
"step": 855
|
| 5995 |
+
},
|
| 5996 |
+
{
|
| 5997 |
+
"epoch": 0.9236579444294578,
|
| 5998 |
+
"grad_norm": 0.0010317250853404403,
|
| 5999 |
+
"learning_rate": 1.456047089962198e-06,
|
| 6000 |
+
"loss": 0.0001,
|
| 6001 |
+
"step": 856
|
| 6002 |
+
},
|
| 6003 |
+
{
|
| 6004 |
+
"epoch": 0.924736984084165,
|
| 6005 |
+
"grad_norm": 0.002824347233399749,
|
| 6006 |
+
"learning_rate": 1.4155137960670972e-06,
|
| 6007 |
+
"loss": 0.0001,
|
| 6008 |
+
"step": 857
|
| 6009 |
+
},
|
| 6010 |
+
{
|
| 6011 |
+
"epoch": 0.9258160237388724,
|
| 6012 |
+
"grad_norm": 0.007415778003633022,
|
| 6013 |
+
"learning_rate": 1.375544574892962e-06,
|
| 6014 |
+
"loss": 0.0,
|
| 6015 |
+
"step": 858
|
| 6016 |
+
},
|
| 6017 |
+
{
|
| 6018 |
+
"epoch": 0.9268950633935797,
|
| 6019 |
+
"grad_norm": 0.002814686391502619,
|
| 6020 |
+
"learning_rate": 1.3361398904880806e-06,
|
| 6021 |
+
"loss": 0.0001,
|
| 6022 |
+
"step": 859
|
| 6023 |
+
},
|
| 6024 |
+
{
|
| 6025 |
+
"epoch": 0.927974103048287,
|
| 6026 |
+
"grad_norm": 0.0007033959846012294,
|
| 6027 |
+
"learning_rate": 1.2973002003463797e-06,
|
| 6028 |
+
"loss": 0.0,
|
| 6029 |
+
"step": 860
|
| 6030 |
+
},
|
| 6031 |
+
{
|
| 6032 |
+
"epoch": 0.9290531427029943,
|
| 6033 |
+
"grad_norm": 0.0005535990349017084,
|
| 6034 |
+
"learning_rate": 1.259025955402121e-06,
|
| 6035 |
+
"loss": 0.0,
|
| 6036 |
+
"step": 861
|
| 6037 |
+
},
|
| 6038 |
+
{
|
| 6039 |
+
"epoch": 0.9301321823577017,
|
| 6040 |
+
"grad_norm": 0.001261221943423152,
|
| 6041 |
+
"learning_rate": 1.221317600024685e-06,
|
| 6042 |
+
"loss": 0.0001,
|
| 6043 |
+
"step": 862
|
| 6044 |
+
},
|
| 6045 |
+
{
|
| 6046 |
+
"epoch": 0.9312112220124089,
|
| 6047 |
+
"grad_norm": 0.001392426434904337,
|
| 6048 |
+
"learning_rate": 1.1841755720133797e-06,
|
| 6049 |
+
"loss": 0.0,
|
| 6050 |
+
"step": 863
|
| 6051 |
+
},
|
| 6052 |
+
{
|
| 6053 |
+
"epoch": 0.9322902616671163,
|
| 6054 |
+
"grad_norm": 0.001041140523739159,
|
| 6055 |
+
"learning_rate": 1.1476003025923721e-06,
|
| 6056 |
+
"loss": 0.0001,
|
| 6057 |
+
"step": 864
|
| 6058 |
+
},
|
| 6059 |
+
{
|
| 6060 |
+
"epoch": 0.9333693013218236,
|
| 6061 |
+
"grad_norm": 0.0011780766071751714,
|
| 6062 |
+
"learning_rate": 1.111592216405688e-06,
|
| 6063 |
+
"loss": 0.0001,
|
| 6064 |
+
"step": 865
|
| 6065 |
+
},
|
| 6066 |
+
{
|
| 6067 |
+
"epoch": 0.9344483409765308,
|
| 6068 |
+
"grad_norm": 0.0004143784462939948,
|
| 6069 |
+
"learning_rate": 1.076151731512276e-06,
|
| 6070 |
+
"loss": 0.0,
|
| 6071 |
+
"step": 866
|
| 6072 |
+
},
|
| 6073 |
+
{
|
| 6074 |
+
"epoch": 0.9355273806312382,
|
| 6075 |
+
"grad_norm": 0.0006516797002404928,
|
| 6076 |
+
"learning_rate": 1.0412792593811504e-06,
|
| 6077 |
+
"loss": 0.0,
|
| 6078 |
+
"step": 867
|
| 6079 |
+
},
|
| 6080 |
+
{
|
| 6081 |
+
"epoch": 0.9366064202859455,
|
| 6082 |
+
"grad_norm": 0.20983706414699554,
|
| 6083 |
+
"learning_rate": 1.0069752048866232e-06,
|
| 6084 |
+
"loss": 0.0026,
|
| 6085 |
+
"step": 868
|
| 6086 |
+
},
|
| 6087 |
+
{
|
| 6088 |
+
"epoch": 0.9376854599406528,
|
| 6089 |
+
"grad_norm": 0.0004500496725086123,
|
| 6090 |
+
"learning_rate": 9.732399663035908e-07,
|
| 6091 |
+
"loss": 0.0,
|
| 6092 |
+
"step": 869
|
| 6093 |
+
},
|
| 6094 |
+
{
|
| 6095 |
+
"epoch": 0.9387644995953601,
|
| 6096 |
+
"grad_norm": 0.0008151094079948962,
|
| 6097 |
+
"learning_rate": 9.400739353029209e-07,
|
| 6098 |
+
"loss": 0.0,
|
| 6099 |
+
"step": 870
|
| 6100 |
+
},
|
| 6101 |
+
{
|
| 6102 |
+
"epoch": 0.9398435392500675,
|
| 6103 |
+
"grad_norm": 0.0031570757273584604,
|
| 6104 |
+
"learning_rate": 9.074774969469013e-07,
|
| 6105 |
+
"loss": 0.0,
|
| 6106 |
+
"step": 871
|
| 6107 |
+
},
|
| 6108 |
+
{
|
| 6109 |
+
"epoch": 0.9409225789047747,
|
| 6110 |
+
"grad_norm": 0.007572251372039318,
|
| 6111 |
+
"learning_rate": 8.754510296847651e-07,
|
| 6112 |
+
"loss": 0.0002,
|
| 6113 |
+
"step": 872
|
| 6114 |
+
},
|
| 6115 |
+
{
|
| 6116 |
+
"epoch": 0.9420016185594821,
|
| 6117 |
+
"grad_norm": 0.0003502563340589404,
|
| 6118 |
+
"learning_rate": 8.439949053483054e-07,
|
| 6119 |
+
"loss": 0.0,
|
| 6120 |
+
"step": 873
|
| 6121 |
+
},
|
| 6122 |
+
{
|
| 6123 |
+
"epoch": 0.9430806582141894,
|
| 6124 |
+
"grad_norm": 0.0007077174377627671,
|
| 6125 |
+
"learning_rate": 8.13109489147551e-07,
|
| 6126 |
+
"loss": 0.0,
|
| 6127 |
+
"step": 874
|
| 6128 |
+
},
|
| 6129 |
+
{
|
| 6130 |
+
"epoch": 0.9441596978688966,
|
| 6131 |
+
"grad_norm": 0.0004631859774235636,
|
| 6132 |
+
"learning_rate": 7.827951396665312e-07,
|
| 6133 |
+
"loss": 0.0,
|
| 6134 |
+
"step": 875
|
| 6135 |
+
},
|
| 6136 |
+
{
|
| 6137 |
+
"epoch": 0.945238737523604,
|
| 6138 |
+
"grad_norm": 0.0014131515054032207,
|
| 6139 |
+
"learning_rate": 7.530522088591063e-07,
|
| 6140 |
+
"loss": 0.0,
|
| 6141 |
+
"step": 876
|
| 6142 |
+
},
|
| 6143 |
+
{
|
| 6144 |
+
"epoch": 0.9463177771783113,
|
| 6145 |
+
"grad_norm": 0.0007791262469254434,
|
| 6146 |
+
"learning_rate": 7.238810420448883e-07,
|
| 6147 |
+
"loss": 0.0,
|
| 6148 |
+
"step": 877
|
| 6149 |
+
},
|
| 6150 |
+
{
|
| 6151 |
+
"epoch": 0.9473968168330186,
|
| 6152 |
+
"grad_norm": 0.0003000040305778384,
|
| 6153 |
+
"learning_rate": 6.952819779052378e-07,
|
| 6154 |
+
"loss": 0.0,
|
| 6155 |
+
"step": 878
|
| 6156 |
+
},
|
| 6157 |
+
{
|
| 6158 |
+
"epoch": 0.9484758564877259,
|
| 6159 |
+
"grad_norm": 0.003854983951896429,
|
| 6160 |
+
"learning_rate": 6.672553484792954e-07,
|
| 6161 |
+
"loss": 0.0001,
|
| 6162 |
+
"step": 879
|
| 6163 |
+
},
|
| 6164 |
+
{
|
| 6165 |
+
"epoch": 0.9495548961424333,
|
| 6166 |
+
"grad_norm": 0.004172316752374172,
|
| 6167 |
+
"learning_rate": 6.398014791601847e-07,
|
| 6168 |
+
"loss": 0.0001,
|
| 6169 |
+
"step": 880
|
| 6170 |
+
},
|
| 6171 |
+
{
|
| 6172 |
+
"epoch": 0.9506339357971405,
|
| 6173 |
+
"grad_norm": 0.005723369307816029,
|
| 6174 |
+
"learning_rate": 6.129206886911931e-07,
|
| 6175 |
+
"loss": 0.0001,
|
| 6176 |
+
"step": 881
|
| 6177 |
+
},
|
| 6178 |
+
{
|
| 6179 |
+
"epoch": 0.9517129754518479,
|
| 6180 |
+
"grad_norm": 0.0028106626123189926,
|
| 6181 |
+
"learning_rate": 5.866132891620746e-07,
|
| 6182 |
+
"loss": 0.0001,
|
| 6183 |
+
"step": 882
|
| 6184 |
+
},
|
| 6185 |
+
{
|
| 6186 |
+
"epoch": 0.9527920151065552,
|
| 6187 |
+
"grad_norm": 0.04048357903957367,
|
| 6188 |
+
"learning_rate": 5.608795860054416e-07,
|
| 6189 |
+
"loss": 0.0007,
|
| 6190 |
+
"step": 883
|
| 6191 |
+
},
|
| 6192 |
+
{
|
| 6193 |
+
"epoch": 0.9538710547612624,
|
| 6194 |
+
"grad_norm": 0.001071687089279294,
|
| 6195 |
+
"learning_rate": 5.357198779932015e-07,
|
| 6196 |
+
"loss": 0.0,
|
| 6197 |
+
"step": 884
|
| 6198 |
+
},
|
| 6199 |
+
{
|
| 6200 |
+
"epoch": 0.9549500944159698,
|
| 6201 |
+
"grad_norm": 0.0003824660088866949,
|
| 6202 |
+
"learning_rate": 5.111344572331145e-07,
|
| 6203 |
+
"loss": 0.0,
|
| 6204 |
+
"step": 885
|
| 6205 |
+
},
|
| 6206 |
+
{
|
| 6207 |
+
"epoch": 0.9560291340706771,
|
| 6208 |
+
"grad_norm": 0.002592804143205285,
|
| 6209 |
+
"learning_rate": 4.871236091653741e-07,
|
| 6210 |
+
"loss": 0.0001,
|
| 6211 |
+
"step": 886
|
| 6212 |
+
},
|
| 6213 |
+
{
|
| 6214 |
+
"epoch": 0.9571081737253844,
|
| 6215 |
+
"grad_norm": 0.001733283163048327,
|
| 6216 |
+
"learning_rate": 4.636876125593048e-07,
|
| 6217 |
+
"loss": 0.0001,
|
| 6218 |
+
"step": 887
|
| 6219 |
+
},
|
| 6220 |
+
{
|
| 6221 |
+
"epoch": 0.9581872133800917,
|
| 6222 |
+
"grad_norm": 0.0009807611349970102,
|
| 6223 |
+
"learning_rate": 4.40826739510114e-07,
|
| 6224 |
+
"loss": 0.0,
|
| 6225 |
+
"step": 888
|
| 6226 |
+
},
|
| 6227 |
+
{
|
| 6228 |
+
"epoch": 0.9592662530347991,
|
| 6229 |
+
"grad_norm": 0.000411438406445086,
|
| 6230 |
+
"learning_rate": 4.1854125543576705e-07,
|
| 6231 |
+
"loss": 0.0,
|
| 6232 |
+
"step": 889
|
| 6233 |
+
},
|
| 6234 |
+
{
|
| 6235 |
+
"epoch": 0.9603452926895063,
|
| 6236 |
+
"grad_norm": 0.0011044451966881752,
|
| 6237 |
+
"learning_rate": 3.96831419073862e-07,
|
| 6238 |
+
"loss": 0.0,
|
| 6239 |
+
"step": 890
|
| 6240 |
+
},
|
| 6241 |
+
{
|
| 6242 |
+
"epoch": 0.9614243323442137,
|
| 6243 |
+
"grad_norm": 0.0008655402925796807,
|
| 6244 |
+
"learning_rate": 3.7569748247866523e-07,
|
| 6245 |
+
"loss": 0.0,
|
| 6246 |
+
"step": 891
|
| 6247 |
+
},
|
| 6248 |
+
{
|
| 6249 |
+
"epoch": 0.962503371998921,
|
| 6250 |
+
"grad_norm": 0.001193237490952015,
|
| 6251 |
+
"learning_rate": 3.5513969101814147e-07,
|
| 6252 |
+
"loss": 0.0001,
|
| 6253 |
+
"step": 892
|
| 6254 |
+
},
|
| 6255 |
+
{
|
| 6256 |
+
"epoch": 0.9635824116536282,
|
| 6257 |
+
"grad_norm": 0.016604945063591003,
|
| 6258 |
+
"learning_rate": 3.351582833711453e-07,
|
| 6259 |
+
"loss": 0.0004,
|
| 6260 |
+
"step": 893
|
| 6261 |
+
},
|
| 6262 |
+
{
|
| 6263 |
+
"epoch": 0.9646614513083356,
|
| 6264 |
+
"grad_norm": 0.0006192306173034012,
|
| 6265 |
+
"learning_rate": 3.1575349152463407e-07,
|
| 6266 |
+
"loss": 0.0,
|
| 6267 |
+
"step": 894
|
| 6268 |
+
},
|
| 6269 |
+
{
|
| 6270 |
+
"epoch": 0.9657404909630429,
|
| 6271 |
+
"grad_norm": 0.003590730018913746,
|
| 6272 |
+
"learning_rate": 2.969255407709648e-07,
|
| 6273 |
+
"loss": 0.0001,
|
| 6274 |
+
"step": 895
|
| 6275 |
+
},
|
| 6276 |
+
{
|
| 6277 |
+
"epoch": 0.9668195306177502,
|
| 6278 |
+
"grad_norm": 0.006865234579890966,
|
| 6279 |
+
"learning_rate": 2.786746497053072e-07,
|
| 6280 |
+
"loss": 0.0002,
|
| 6281 |
+
"step": 896
|
| 6282 |
+
},
|
| 6283 |
+
{
|
| 6284 |
+
"epoch": 0.9678985702724575,
|
| 6285 |
+
"grad_norm": 0.007230513263493776,
|
| 6286 |
+
"learning_rate": 2.610010302230625e-07,
|
| 6287 |
+
"loss": 0.0001,
|
| 6288 |
+
"step": 897
|
| 6289 |
+
},
|
| 6290 |
+
{
|
| 6291 |
+
"epoch": 0.9689776099271649,
|
| 6292 |
+
"grad_norm": 0.0004072195733897388,
|
| 6293 |
+
"learning_rate": 2.439048875174488e-07,
|
| 6294 |
+
"loss": 0.0,
|
| 6295 |
+
"step": 898
|
| 6296 |
+
},
|
| 6297 |
+
{
|
| 6298 |
+
"epoch": 0.9700566495818721,
|
| 6299 |
+
"grad_norm": 0.0009823354193940759,
|
| 6300 |
+
"learning_rate": 2.2738642007708033e-07,
|
| 6301 |
+
"loss": 0.0,
|
| 6302 |
+
"step": 899
|
| 6303 |
+
},
|
| 6304 |
+
{
|
| 6305 |
+
"epoch": 0.9711356892365794,
|
| 6306 |
+
"grad_norm": 0.13599197566509247,
|
| 6307 |
+
"learning_rate": 2.1144581968369215e-07,
|
| 6308 |
+
"loss": 0.0018,
|
| 6309 |
+
"step": 900
|
| 6310 |
+
},
|
| 6311 |
+
{
|
| 6312 |
+
"epoch": 0.9722147288912868,
|
| 6313 |
+
"grad_norm": 0.001636895234696567,
|
| 6314 |
+
"learning_rate": 1.9608327140990256e-07,
|
| 6315 |
+
"loss": 0.0,
|
| 6316 |
+
"step": 901
|
| 6317 |
+
},
|
| 6318 |
+
{
|
| 6319 |
+
"epoch": 0.973293768545994,
|
| 6320 |
+
"grad_norm": 0.0009283882100135088,
|
| 6321 |
+
"learning_rate": 1.8129895361704842e-07,
|
| 6322 |
+
"loss": 0.0,
|
| 6323 |
+
"step": 902
|
| 6324 |
+
},
|
| 6325 |
+
{
|
| 6326 |
+
"epoch": 0.9743728082007014,
|
| 6327 |
+
"grad_norm": 0.001255827839486301,
|
| 6328 |
+
"learning_rate": 1.6709303795314767e-07,
|
| 6329 |
+
"loss": 0.0001,
|
| 6330 |
+
"step": 903
|
| 6331 |
+
},
|
| 6332 |
+
{
|
| 6333 |
+
"epoch": 0.9754518478554087,
|
| 6334 |
+
"grad_norm": 0.0010009333491325378,
|
| 6335 |
+
"learning_rate": 1.5346568935087902e-07,
|
| 6336 |
+
"loss": 0.0,
|
| 6337 |
+
"step": 904
|
| 6338 |
+
},
|
| 6339 |
+
{
|
| 6340 |
+
"epoch": 0.976530887510116,
|
| 6341 |
+
"grad_norm": 0.0003917965805158019,
|
| 6342 |
+
"learning_rate": 1.4041706602567207e-07,
|
| 6343 |
+
"loss": 0.0,
|
| 6344 |
+
"step": 905
|
| 6345 |
+
},
|
| 6346 |
+
{
|
| 6347 |
+
"epoch": 0.9776099271648233,
|
| 6348 |
+
"grad_norm": 0.005224605090916157,
|
| 6349 |
+
"learning_rate": 1.279473194738867e-07,
|
| 6350 |
+
"loss": 0.0001,
|
| 6351 |
+
"step": 906
|
| 6352 |
+
},
|
| 6353 |
+
{
|
| 6354 |
+
"epoch": 0.9786889668195307,
|
| 6355 |
+
"grad_norm": 0.027016527950763702,
|
| 6356 |
+
"learning_rate": 1.1605659447102568e-07,
|
| 6357 |
+
"loss": 0.0001,
|
| 6358 |
+
"step": 907
|
| 6359 |
+
},
|
| 6360 |
+
{
|
| 6361 |
+
"epoch": 0.9797680064742379,
|
| 6362 |
+
"grad_norm": 0.0009889070643112063,
|
| 6363 |
+
"learning_rate": 1.0474502907009132e-07,
|
| 6364 |
+
"loss": 0.0,
|
| 6365 |
+
"step": 908
|
| 6366 |
+
},
|
| 6367 |
+
{
|
| 6368 |
+
"epoch": 0.9808470461289452,
|
| 6369 |
+
"grad_norm": 0.00606796657666564,
|
| 6370 |
+
"learning_rate": 9.401275459994252e-08,
|
| 6371 |
+
"loss": 0.0001,
|
| 6372 |
+
"step": 909
|
| 6373 |
+
},
|
| 6374 |
+
{
|
| 6375 |
+
"epoch": 0.9819260857836526,
|
| 6376 |
+
"grad_norm": 0.0008787002298049629,
|
| 6377 |
+
"learning_rate": 8.385989566379593e-08,
|
| 6378 |
+
"loss": 0.0,
|
| 6379 |
+
"step": 910
|
| 6380 |
+
},
|
| 6381 |
+
{
|
| 6382 |
+
"epoch": 0.9830051254383598,
|
| 6383 |
+
"grad_norm": 0.00858983676880598,
|
| 6384 |
+
"learning_rate": 7.428657013777152e-08,
|
| 6385 |
+
"loss": 0.0001,
|
| 6386 |
+
"step": 911
|
| 6387 |
+
},
|
| 6388 |
+
{
|
| 6389 |
+
"epoch": 0.9840841650930672,
|
| 6390 |
+
"grad_norm": 0.3724377751350403,
|
| 6391 |
+
"learning_rate": 6.529288916952702e-08,
|
| 6392 |
+
"loss": 0.0272,
|
| 6393 |
+
"step": 912
|
| 6394 |
+
},
|
| 6395 |
+
{
|
| 6396 |
+
"epoch": 0.9851632047477745,
|
| 6397 |
+
"grad_norm": 0.000492151128128171,
|
| 6398 |
+
"learning_rate": 5.6878957176964564e-08,
|
| 6399 |
+
"loss": 0.0,
|
| 6400 |
+
"step": 913
|
| 6401 |
+
},
|
| 6402 |
+
{
|
| 6403 |
+
"epoch": 0.9862422444024818,
|
| 6404 |
+
"grad_norm": 0.0006944717606529593,
|
| 6405 |
+
"learning_rate": 4.904487184702045e-08,
|
| 6406 |
+
"loss": 0.0,
|
| 6407 |
+
"step": 914
|
| 6408 |
+
},
|
| 6409 |
+
{
|
| 6410 |
+
"epoch": 0.9873212840571891,
|
| 6411 |
+
"grad_norm": 0.022949470207095146,
|
| 6412 |
+
"learning_rate": 4.1790724134521675e-08,
|
| 6413 |
+
"loss": 0.0004,
|
| 6414 |
+
"step": 915
|
| 6415 |
+
},
|
| 6416 |
+
{
|
| 6417 |
+
"epoch": 0.9884003237118965,
|
| 6418 |
+
"grad_norm": 0.0009061090531758964,
|
| 6419 |
+
"learning_rate": 3.511659826115343e-08,
|
| 6420 |
+
"loss": 0.0,
|
| 6421 |
+
"step": 916
|
| 6422 |
+
},
|
| 6423 |
+
{
|
| 6424 |
+
"epoch": 0.9894793633666037,
|
| 6425 |
+
"grad_norm": 0.0007004146464169025,
|
| 6426 |
+
"learning_rate": 2.9022571714448776e-08,
|
| 6427 |
+
"loss": 0.0,
|
| 6428 |
+
"step": 917
|
| 6429 |
+
},
|
| 6430 |
+
{
|
| 6431 |
+
"epoch": 0.990558403021311,
|
| 6432 |
+
"grad_norm": 0.0017613873351365328,
|
| 6433 |
+
"learning_rate": 2.350871524691156e-08,
|
| 6434 |
+
"loss": 0.0001,
|
| 6435 |
+
"step": 918
|
| 6436 |
+
},
|
| 6437 |
+
{
|
| 6438 |
+
"epoch": 0.9916374426760184,
|
| 6439 |
+
"grad_norm": 0.0016171643510460854,
|
| 6440 |
+
"learning_rate": 1.8575092875194876e-08,
|
| 6441 |
+
"loss": 0.0,
|
| 6442 |
+
"step": 919
|
| 6443 |
+
},
|
| 6444 |
+
{
|
| 6445 |
+
"epoch": 0.9927164823307256,
|
| 6446 |
+
"grad_norm": 0.011564288288354874,
|
| 6447 |
+
"learning_rate": 1.4221761879351648e-08,
|
| 6448 |
+
"loss": 0.0001,
|
| 6449 |
+
"step": 920
|
| 6450 |
+
},
|
| 6451 |
+
{
|
| 6452 |
+
"epoch": 0.993795521985433,
|
| 6453 |
+
"grad_norm": 0.0003969534591306001,
|
| 6454 |
+
"learning_rate": 1.0448772802162943e-08,
|
| 6455 |
+
"loss": 0.0,
|
| 6456 |
+
"step": 921
|
| 6457 |
+
},
|
| 6458 |
+
{
|
| 6459 |
+
"epoch": 0.9948745616401403,
|
| 6460 |
+
"grad_norm": 0.0005012512556277215,
|
| 6461 |
+
"learning_rate": 7.256169448560668e-09,
|
| 6462 |
+
"loss": 0.0,
|
| 6463 |
+
"step": 922
|
| 6464 |
+
},
|
| 6465 |
+
{
|
| 6466 |
+
"epoch": 0.9959536012948476,
|
| 6467 |
+
"grad_norm": 0.0006448710337281227,
|
| 6468 |
+
"learning_rate": 4.643988885127959e-09,
|
| 6469 |
+
"loss": 0.0,
|
| 6470 |
+
"step": 923
|
| 6471 |
+
},
|
| 6472 |
+
{
|
| 6473 |
+
"epoch": 0.9970326409495549,
|
| 6474 |
+
"grad_norm": 0.00047576057841069996,
|
| 6475 |
+
"learning_rate": 2.6122614396384415e-09,
|
| 6476 |
+
"loss": 0.0,
|
| 6477 |
+
"step": 924
|
| 6478 |
+
},
|
| 6479 |
+
{
|
| 6480 |
+
"epoch": 0.9981116806042623,
|
| 6481 |
+
"grad_norm": 0.002468546386808157,
|
| 6482 |
+
"learning_rate": 1.1610107007398174e-09,
|
| 6483 |
+
"loss": 0.0001,
|
| 6484 |
+
"step": 925
|
| 6485 |
+
},
|
| 6486 |
+
{
|
| 6487 |
+
"epoch": 0.9991907202589695,
|
| 6488 |
+
"grad_norm": 0.029836824163794518,
|
| 6489 |
+
"learning_rate": 2.902535176541044e-10,
|
| 6490 |
+
"loss": 0.0005,
|
| 6491 |
+
"step": 926
|
| 6492 |
+
},
|
| 6493 |
+
{
|
| 6494 |
+
"epoch": 0.9991907202589695,
|
| 6495 |
+
"eval_loss": 0.0009020191500894725,
|
| 6496 |
+
"eval_runtime": 12.4483,
|
| 6497 |
+
"eval_samples_per_second": 62.74,
|
| 6498 |
+
"eval_steps_per_second": 15.745,
|
| 6499 |
+
"step": 926
|
| 6500 |
+
},
|
| 6501 |
+
{
|
| 6502 |
+
"epoch": 1.0002697599136767,
|
| 6503 |
+
"grad_norm": 0.2698620557785034,
|
| 6504 |
+
"learning_rate": 0.0,
|
| 6505 |
+
"loss": 0.1051,
|
| 6506 |
+
"step": 927
|
| 6507 |
}
|
| 6508 |
],
|
| 6509 |
"logging_steps": 1,
|
|
|
|
| 6518 |
"should_evaluate": false,
|
| 6519 |
"should_log": false,
|
| 6520 |
"should_save": true,
|
| 6521 |
+
"should_training_stop": true
|
| 6522 |
},
|
| 6523 |
"attributes": {}
|
| 6524 |
}
|
| 6525 |
},
|
| 6526 |
+
"total_flos": 8.940475445044838e+16,
|
| 6527 |
"train_batch_size": 4,
|
| 6528 |
"trial_name": null,
|
| 6529 |
"trial_params": null
|