Training in progress, step 855, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 80792096
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7c51bb5b178a5a10f48262d602f1dd1f4c878efc0c8a71a5d20d462d4a57ff0
|
| 3 |
size 80792096
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 41460084
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21c1662d9d0c088f2c11816e081cfaf2a1cb3633e4eb4346dd8344524118189b
|
| 3 |
size 41460084
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04ffb892767ac33d9d888c16470956f7387ee6fe3b220b4a2dee598697bb8026
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27537edb793eaf638a01a0f3e3d9d913d146711fb62c3555e6abdde4209a80fa
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 214,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4533,6 +4533,1497 @@
|
|
| 4533 |
"eval_samples_per_second": 31.569,
|
| 4534 |
"eval_steps_per_second": 15.828,
|
| 4535 |
"step": 642
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4536 |
}
|
| 4537 |
],
|
| 4538 |
"logging_steps": 1,
|
|
@@ -4547,12 +6038,12 @@
|
|
| 4547 |
"should_evaluate": false,
|
| 4548 |
"should_log": false,
|
| 4549 |
"should_save": true,
|
| 4550 |
-
"should_training_stop":
|
| 4551 |
},
|
| 4552 |
"attributes": {}
|
| 4553 |
}
|
| 4554 |
},
|
| 4555 |
-
"total_flos": 1.
|
| 4556 |
"train_batch_size": 2,
|
| 4557 |
"trial_name": null,
|
| 4558 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
"eval_steps": 214,
|
| 6 |
+
"global_step": 855,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4533 |
"eval_samples_per_second": 31.569,
|
| 4534 |
"eval_steps_per_second": 15.828,
|
| 4535 |
"step": 642
|
| 4536 |
+
},
|
| 4537 |
+
{
|
| 4538 |
+
"epoch": 0.752046783625731,
|
| 4539 |
+
"grad_norm": 0.4816937744617462,
|
| 4540 |
+
"learning_rate": 2.948676587993834e-05,
|
| 4541 |
+
"loss": 1.2186,
|
| 4542 |
+
"step": 643
|
| 4543 |
+
},
|
| 4544 |
+
{
|
| 4545 |
+
"epoch": 0.7532163742690059,
|
| 4546 |
+
"grad_norm": 0.6862644553184509,
|
| 4547 |
+
"learning_rate": 2.922362931409851e-05,
|
| 4548 |
+
"loss": 1.6134,
|
| 4549 |
+
"step": 644
|
| 4550 |
+
},
|
| 4551 |
+
{
|
| 4552 |
+
"epoch": 0.7543859649122807,
|
| 4553 |
+
"grad_norm": 0.453056663274765,
|
| 4554 |
+
"learning_rate": 2.8961471052904852e-05,
|
| 4555 |
+
"loss": 1.8659,
|
| 4556 |
+
"step": 645
|
| 4557 |
+
},
|
| 4558 |
+
{
|
| 4559 |
+
"epoch": 0.7555555555555555,
|
| 4560 |
+
"grad_norm": 1.0425024032592773,
|
| 4561 |
+
"learning_rate": 2.8700294720033483e-05,
|
| 4562 |
+
"loss": 1.5892,
|
| 4563 |
+
"step": 646
|
| 4564 |
+
},
|
| 4565 |
+
{
|
| 4566 |
+
"epoch": 0.7567251461988304,
|
| 4567 |
+
"grad_norm": 0.7134360671043396,
|
| 4568 |
+
"learning_rate": 2.84401039255879e-05,
|
| 4569 |
+
"loss": 1.535,
|
| 4570 |
+
"step": 647
|
| 4571 |
+
},
|
| 4572 |
+
{
|
| 4573 |
+
"epoch": 0.7578947368421053,
|
| 4574 |
+
"grad_norm": 0.3440370261669159,
|
| 4575 |
+
"learning_rate": 2.8180902266048948e-05,
|
| 4576 |
+
"loss": 1.6847,
|
| 4577 |
+
"step": 648
|
| 4578 |
+
},
|
| 4579 |
+
{
|
| 4580 |
+
"epoch": 0.7590643274853801,
|
| 4581 |
+
"grad_norm": 0.458816260099411,
|
| 4582 |
+
"learning_rate": 2.7922693324225168e-05,
|
| 4583 |
+
"loss": 1.7274,
|
| 4584 |
+
"step": 649
|
| 4585 |
+
},
|
| 4586 |
+
{
|
| 4587 |
+
"epoch": 0.7602339181286549,
|
| 4588 |
+
"grad_norm": 0.7467341423034668,
|
| 4589 |
+
"learning_rate": 2.766548066920338e-05,
|
| 4590 |
+
"loss": 1.1546,
|
| 4591 |
+
"step": 650
|
| 4592 |
+
},
|
| 4593 |
+
{
|
| 4594 |
+
"epoch": 0.7614035087719299,
|
| 4595 |
+
"grad_norm": 0.8349908590316772,
|
| 4596 |
+
"learning_rate": 2.7409267856299147e-05,
|
| 4597 |
+
"loss": 1.5008,
|
| 4598 |
+
"step": 651
|
| 4599 |
+
},
|
| 4600 |
+
{
|
| 4601 |
+
"epoch": 0.7625730994152047,
|
| 4602 |
+
"grad_norm": 0.9590814113616943,
|
| 4603 |
+
"learning_rate": 2.715405842700782e-05,
|
| 4604 |
+
"loss": 0.9238,
|
| 4605 |
+
"step": 652
|
| 4606 |
+
},
|
| 4607 |
+
{
|
| 4608 |
+
"epoch": 0.7637426900584795,
|
| 4609 |
+
"grad_norm": 0.43833237886428833,
|
| 4610 |
+
"learning_rate": 2.6899855908955464e-05,
|
| 4611 |
+
"loss": 1.0527,
|
| 4612 |
+
"step": 653
|
| 4613 |
+
},
|
| 4614 |
+
{
|
| 4615 |
+
"epoch": 0.7649122807017544,
|
| 4616 |
+
"grad_norm": 0.34467437863349915,
|
| 4617 |
+
"learning_rate": 2.6646663815850092e-05,
|
| 4618 |
+
"loss": 1.5622,
|
| 4619 |
+
"step": 654
|
| 4620 |
+
},
|
| 4621 |
+
{
|
| 4622 |
+
"epoch": 0.7660818713450293,
|
| 4623 |
+
"grad_norm": 0.42286065220832825,
|
| 4624 |
+
"learning_rate": 2.6394485647433277e-05,
|
| 4625 |
+
"loss": 1.6389,
|
| 4626 |
+
"step": 655
|
| 4627 |
+
},
|
| 4628 |
+
{
|
| 4629 |
+
"epoch": 0.7672514619883041,
|
| 4630 |
+
"grad_norm": 0.34687599539756775,
|
| 4631 |
+
"learning_rate": 2.614332488943152e-05,
|
| 4632 |
+
"loss": 1.4794,
|
| 4633 |
+
"step": 656
|
| 4634 |
+
},
|
| 4635 |
+
{
|
| 4636 |
+
"epoch": 0.7684210526315789,
|
| 4637 |
+
"grad_norm": 0.46390998363494873,
|
| 4638 |
+
"learning_rate": 2.5893185013508194e-05,
|
| 4639 |
+
"loss": 1.5318,
|
| 4640 |
+
"step": 657
|
| 4641 |
+
},
|
| 4642 |
+
{
|
| 4643 |
+
"epoch": 0.7695906432748538,
|
| 4644 |
+
"grad_norm": 0.43160146474838257,
|
| 4645 |
+
"learning_rate": 2.564406947721566e-05,
|
| 4646 |
+
"loss": 1.0743,
|
| 4647 |
+
"step": 658
|
| 4648 |
+
},
|
| 4649 |
+
{
|
| 4650 |
+
"epoch": 0.7707602339181286,
|
| 4651 |
+
"grad_norm": 0.41312840580940247,
|
| 4652 |
+
"learning_rate": 2.539598172394727e-05,
|
| 4653 |
+
"loss": 2.2316,
|
| 4654 |
+
"step": 659
|
| 4655 |
+
},
|
| 4656 |
+
{
|
| 4657 |
+
"epoch": 0.7719298245614035,
|
| 4658 |
+
"grad_norm": 0.30184900760650635,
|
| 4659 |
+
"learning_rate": 2.514892518288988e-05,
|
| 4660 |
+
"loss": 1.9576,
|
| 4661 |
+
"step": 660
|
| 4662 |
+
},
|
| 4663 |
+
{
|
| 4664 |
+
"epoch": 0.7730994152046784,
|
| 4665 |
+
"grad_norm": 0.30861276388168335,
|
| 4666 |
+
"learning_rate": 2.490290326897653e-05,
|
| 4667 |
+
"loss": 1.7012,
|
| 4668 |
+
"step": 661
|
| 4669 |
+
},
|
| 4670 |
+
{
|
| 4671 |
+
"epoch": 0.7742690058479532,
|
| 4672 |
+
"grad_norm": 0.7322350740432739,
|
| 4673 |
+
"learning_rate": 2.4657919382839033e-05,
|
| 4674 |
+
"loss": 1.1043,
|
| 4675 |
+
"step": 662
|
| 4676 |
+
},
|
| 4677 |
+
{
|
| 4678 |
+
"epoch": 0.775438596491228,
|
| 4679 |
+
"grad_norm": 0.5217798948287964,
|
| 4680 |
+
"learning_rate": 2.4413976910761116e-05,
|
| 4681 |
+
"loss": 1.1938,
|
| 4682 |
+
"step": 663
|
| 4683 |
+
},
|
| 4684 |
+
{
|
| 4685 |
+
"epoch": 0.776608187134503,
|
| 4686 |
+
"grad_norm": 0.4567161798477173,
|
| 4687 |
+
"learning_rate": 2.4171079224631686e-05,
|
| 4688 |
+
"loss": 1.4945,
|
| 4689 |
+
"step": 664
|
| 4690 |
+
},
|
| 4691 |
+
{
|
| 4692 |
+
"epoch": 0.7777777777777778,
|
| 4693 |
+
"grad_norm": 0.4545726776123047,
|
| 4694 |
+
"learning_rate": 2.3929229681898003e-05,
|
| 4695 |
+
"loss": 1.7862,
|
| 4696 |
+
"step": 665
|
| 4697 |
+
},
|
| 4698 |
+
{
|
| 4699 |
+
"epoch": 0.7789473684210526,
|
| 4700 |
+
"grad_norm": 0.5831303596496582,
|
| 4701 |
+
"learning_rate": 2.3688431625519415e-05,
|
| 4702 |
+
"loss": 1.3472,
|
| 4703 |
+
"step": 666
|
| 4704 |
+
},
|
| 4705 |
+
{
|
| 4706 |
+
"epoch": 0.7801169590643274,
|
| 4707 |
+
"grad_norm": 0.25880950689315796,
|
| 4708 |
+
"learning_rate": 2.3448688383921182e-05,
|
| 4709 |
+
"loss": 1.5453,
|
| 4710 |
+
"step": 667
|
| 4711 |
+
},
|
| 4712 |
+
{
|
| 4713 |
+
"epoch": 0.7812865497076024,
|
| 4714 |
+
"grad_norm": 0.5524426698684692,
|
| 4715 |
+
"learning_rate": 2.3210003270948365e-05,
|
| 4716 |
+
"loss": 1.894,
|
| 4717 |
+
"step": 668
|
| 4718 |
+
},
|
| 4719 |
+
{
|
| 4720 |
+
"epoch": 0.7824561403508772,
|
| 4721 |
+
"grad_norm": 0.375410258769989,
|
| 4722 |
+
"learning_rate": 2.2972379585820048e-05,
|
| 4723 |
+
"loss": 2.255,
|
| 4724 |
+
"step": 669
|
| 4725 |
+
},
|
| 4726 |
+
{
|
| 4727 |
+
"epoch": 0.783625730994152,
|
| 4728 |
+
"grad_norm": 0.8052981495857239,
|
| 4729 |
+
"learning_rate": 2.2735820613083834e-05,
|
| 4730 |
+
"loss": 2.1831,
|
| 4731 |
+
"step": 670
|
| 4732 |
+
},
|
| 4733 |
+
{
|
| 4734 |
+
"epoch": 0.7847953216374269,
|
| 4735 |
+
"grad_norm": 0.4980434775352478,
|
| 4736 |
+
"learning_rate": 2.250032962257029e-05,
|
| 4737 |
+
"loss": 0.8607,
|
| 4738 |
+
"step": 671
|
| 4739 |
+
},
|
| 4740 |
+
{
|
| 4741 |
+
"epoch": 0.7859649122807018,
|
| 4742 |
+
"grad_norm": 0.33631038665771484,
|
| 4743 |
+
"learning_rate": 2.2265909869347825e-05,
|
| 4744 |
+
"loss": 2.0506,
|
| 4745 |
+
"step": 672
|
| 4746 |
+
},
|
| 4747 |
+
{
|
| 4748 |
+
"epoch": 0.7871345029239766,
|
| 4749 |
+
"grad_norm": 0.47964972257614136,
|
| 4750 |
+
"learning_rate": 2.2032564593677774e-05,
|
| 4751 |
+
"loss": 1.4278,
|
| 4752 |
+
"step": 673
|
| 4753 |
+
},
|
| 4754 |
+
{
|
| 4755 |
+
"epoch": 0.7883040935672515,
|
| 4756 |
+
"grad_norm": 0.532234251499176,
|
| 4757 |
+
"learning_rate": 2.1800297020969463e-05,
|
| 4758 |
+
"loss": 1.7963,
|
| 4759 |
+
"step": 674
|
| 4760 |
+
},
|
| 4761 |
+
{
|
| 4762 |
+
"epoch": 0.7894736842105263,
|
| 4763 |
+
"grad_norm": 0.6282833218574524,
|
| 4764 |
+
"learning_rate": 2.1569110361735677e-05,
|
| 4765 |
+
"loss": 1.0567,
|
| 4766 |
+
"step": 675
|
| 4767 |
+
},
|
| 4768 |
+
{
|
| 4769 |
+
"epoch": 0.7906432748538011,
|
| 4770 |
+
"grad_norm": 0.43357419967651367,
|
| 4771 |
+
"learning_rate": 2.1339007811548395e-05,
|
| 4772 |
+
"loss": 1.755,
|
| 4773 |
+
"step": 676
|
| 4774 |
+
},
|
| 4775 |
+
{
|
| 4776 |
+
"epoch": 0.791812865497076,
|
| 4777 |
+
"grad_norm": 0.33183780312538147,
|
| 4778 |
+
"learning_rate": 2.110999255099444e-05,
|
| 4779 |
+
"loss": 1.5154,
|
| 4780 |
+
"step": 677
|
| 4781 |
+
},
|
| 4782 |
+
{
|
| 4783 |
+
"epoch": 0.7929824561403509,
|
| 4784 |
+
"grad_norm": 0.6004055142402649,
|
| 4785 |
+
"learning_rate": 2.0882067745631605e-05,
|
| 4786 |
+
"loss": 1.5257,
|
| 4787 |
+
"step": 678
|
| 4788 |
+
},
|
| 4789 |
+
{
|
| 4790 |
+
"epoch": 0.7941520467836257,
|
| 4791 |
+
"grad_norm": 0.4232582151889801,
|
| 4792 |
+
"learning_rate": 2.0655236545944966e-05,
|
| 4793 |
+
"loss": 2.1641,
|
| 4794 |
+
"step": 679
|
| 4795 |
+
},
|
| 4796 |
+
{
|
| 4797 |
+
"epoch": 0.7953216374269005,
|
| 4798 |
+
"grad_norm": 0.4737587571144104,
|
| 4799 |
+
"learning_rate": 2.0429502087303164e-05,
|
| 4800 |
+
"loss": 0.7159,
|
| 4801 |
+
"step": 680
|
| 4802 |
+
},
|
| 4803 |
+
{
|
| 4804 |
+
"epoch": 0.7964912280701755,
|
| 4805 |
+
"grad_norm": 0.36999645829200745,
|
| 4806 |
+
"learning_rate": 2.0204867489915258e-05,
|
| 4807 |
+
"loss": 1.9891,
|
| 4808 |
+
"step": 681
|
| 4809 |
+
},
|
| 4810 |
+
{
|
| 4811 |
+
"epoch": 0.7976608187134503,
|
| 4812 |
+
"grad_norm": 0.4537879526615143,
|
| 4813 |
+
"learning_rate": 1.998133585878743e-05,
|
| 4814 |
+
"loss": 1.393,
|
| 4815 |
+
"step": 682
|
| 4816 |
+
},
|
| 4817 |
+
{
|
| 4818 |
+
"epoch": 0.7988304093567251,
|
| 4819 |
+
"grad_norm": 0.33755865693092346,
|
| 4820 |
+
"learning_rate": 1.9758910283680132e-05,
|
| 4821 |
+
"loss": 2.0622,
|
| 4822 |
+
"step": 683
|
| 4823 |
+
},
|
| 4824 |
+
{
|
| 4825 |
+
"epoch": 0.8,
|
| 4826 |
+
"grad_norm": 0.7674013376235962,
|
| 4827 |
+
"learning_rate": 1.9537593839065483e-05,
|
| 4828 |
+
"loss": 1.2588,
|
| 4829 |
+
"step": 684
|
| 4830 |
+
},
|
| 4831 |
+
{
|
| 4832 |
+
"epoch": 0.8011695906432749,
|
| 4833 |
+
"grad_norm": 0.38102641701698303,
|
| 4834 |
+
"learning_rate": 1.9317389584084568e-05,
|
| 4835 |
+
"loss": 1.3908,
|
| 4836 |
+
"step": 685
|
| 4837 |
+
},
|
| 4838 |
+
{
|
| 4839 |
+
"epoch": 0.8023391812865497,
|
| 4840 |
+
"grad_norm": 0.7196425199508667,
|
| 4841 |
+
"learning_rate": 1.9098300562505266e-05,
|
| 4842 |
+
"loss": 1.1806,
|
| 4843 |
+
"step": 686
|
| 4844 |
+
},
|
| 4845 |
+
{
|
| 4846 |
+
"epoch": 0.8035087719298246,
|
| 4847 |
+
"grad_norm": 1.120670199394226,
|
| 4848 |
+
"learning_rate": 1.888032980268025e-05,
|
| 4849 |
+
"loss": 1.5959,
|
| 4850 |
+
"step": 687
|
| 4851 |
+
},
|
| 4852 |
+
{
|
| 4853 |
+
"epoch": 0.8046783625730994,
|
| 4854 |
+
"grad_norm": 0.35074350237846375,
|
| 4855 |
+
"learning_rate": 1.8663480317504988e-05,
|
| 4856 |
+
"loss": 1.3714,
|
| 4857 |
+
"step": 688
|
| 4858 |
+
},
|
| 4859 |
+
{
|
| 4860 |
+
"epoch": 0.8058479532163743,
|
| 4861 |
+
"grad_norm": 0.5168773531913757,
|
| 4862 |
+
"learning_rate": 1.844775510437613e-05,
|
| 4863 |
+
"loss": 1.1505,
|
| 4864 |
+
"step": 689
|
| 4865 |
+
},
|
| 4866 |
+
{
|
| 4867 |
+
"epoch": 0.8070175438596491,
|
| 4868 |
+
"grad_norm": 0.3984200060367584,
|
| 4869 |
+
"learning_rate": 1.823315714515018e-05,
|
| 4870 |
+
"loss": 1.3853,
|
| 4871 |
+
"step": 690
|
| 4872 |
+
},
|
| 4873 |
+
{
|
| 4874 |
+
"epoch": 0.808187134502924,
|
| 4875 |
+
"grad_norm": 0.3879501223564148,
|
| 4876 |
+
"learning_rate": 1.8019689406102126e-05,
|
| 4877 |
+
"loss": 1.9904,
|
| 4878 |
+
"step": 691
|
| 4879 |
+
},
|
| 4880 |
+
{
|
| 4881 |
+
"epoch": 0.8093567251461988,
|
| 4882 |
+
"grad_norm": 0.5114380121231079,
|
| 4883 |
+
"learning_rate": 1.780735483788458e-05,
|
| 4884 |
+
"loss": 1.6035,
|
| 4885 |
+
"step": 692
|
| 4886 |
+
},
|
| 4887 |
+
{
|
| 4888 |
+
"epoch": 0.8105263157894737,
|
| 4889 |
+
"grad_norm": 0.5603318810462952,
|
| 4890 |
+
"learning_rate": 1.7596156375486862e-05,
|
| 4891 |
+
"loss": 1.2323,
|
| 4892 |
+
"step": 693
|
| 4893 |
+
},
|
| 4894 |
+
{
|
| 4895 |
+
"epoch": 0.8116959064327486,
|
| 4896 |
+
"grad_norm": 0.44819122552871704,
|
| 4897 |
+
"learning_rate": 1.7386096938194585e-05,
|
| 4898 |
+
"loss": 1.8956,
|
| 4899 |
+
"step": 694
|
| 4900 |
+
},
|
| 4901 |
+
{
|
| 4902 |
+
"epoch": 0.8128654970760234,
|
| 4903 |
+
"grad_norm": 0.625581681728363,
|
| 4904 |
+
"learning_rate": 1.717717942954914e-05,
|
| 4905 |
+
"loss": 1.8117,
|
| 4906 |
+
"step": 695
|
| 4907 |
+
},
|
| 4908 |
+
{
|
| 4909 |
+
"epoch": 0.8140350877192982,
|
| 4910 |
+
"grad_norm": 0.5764881372451782,
|
| 4911 |
+
"learning_rate": 1.6969406737307625e-05,
|
| 4912 |
+
"loss": 1.9899,
|
| 4913 |
+
"step": 696
|
| 4914 |
+
},
|
| 4915 |
+
{
|
| 4916 |
+
"epoch": 0.8152046783625732,
|
| 4917 |
+
"grad_norm": 1.0194729566574097,
|
| 4918 |
+
"learning_rate": 1.6762781733403033e-05,
|
| 4919 |
+
"loss": 1.4643,
|
| 4920 |
+
"step": 697
|
| 4921 |
+
},
|
| 4922 |
+
{
|
| 4923 |
+
"epoch": 0.816374269005848,
|
| 4924 |
+
"grad_norm": 0.5557692050933838,
|
| 4925 |
+
"learning_rate": 1.6557307273904354e-05,
|
| 4926 |
+
"loss": 1.3692,
|
| 4927 |
+
"step": 698
|
| 4928 |
+
},
|
| 4929 |
+
{
|
| 4930 |
+
"epoch": 0.8175438596491228,
|
| 4931 |
+
"grad_norm": 0.5595893263816833,
|
| 4932 |
+
"learning_rate": 1.6352986198977325e-05,
|
| 4933 |
+
"loss": 1.707,
|
| 4934 |
+
"step": 699
|
| 4935 |
+
},
|
| 4936 |
+
{
|
| 4937 |
+
"epoch": 0.8187134502923976,
|
| 4938 |
+
"grad_norm": 0.4992705285549164,
|
| 4939 |
+
"learning_rate": 1.614982133284495e-05,
|
| 4940 |
+
"loss": 1.7299,
|
| 4941 |
+
"step": 700
|
| 4942 |
+
},
|
| 4943 |
+
{
|
| 4944 |
+
"epoch": 0.8198830409356725,
|
| 4945 |
+
"grad_norm": 0.6664674282073975,
|
| 4946 |
+
"learning_rate": 1.5947815483748574e-05,
|
| 4947 |
+
"loss": 1.5864,
|
| 4948 |
+
"step": 701
|
| 4949 |
+
},
|
| 4950 |
+
{
|
| 4951 |
+
"epoch": 0.8210526315789474,
|
| 4952 |
+
"grad_norm": 0.5072648525238037,
|
| 4953 |
+
"learning_rate": 1.574697144390914e-05,
|
| 4954 |
+
"loss": 1.9092,
|
| 4955 |
+
"step": 702
|
| 4956 |
+
},
|
| 4957 |
+
{
|
| 4958 |
+
"epoch": 0.8222222222222222,
|
| 4959 |
+
"grad_norm": 0.5502724051475525,
|
| 4960 |
+
"learning_rate": 1.5547291989488444e-05,
|
| 4961 |
+
"loss": 1.8396,
|
| 4962 |
+
"step": 703
|
| 4963 |
+
},
|
| 4964 |
+
{
|
| 4965 |
+
"epoch": 0.8233918128654971,
|
| 4966 |
+
"grad_norm": 0.4326912760734558,
|
| 4967 |
+
"learning_rate": 1.534877988055081e-05,
|
| 4968 |
+
"loss": 0.9664,
|
| 4969 |
+
"step": 704
|
| 4970 |
+
},
|
| 4971 |
+
{
|
| 4972 |
+
"epoch": 0.8245614035087719,
|
| 4973 |
+
"grad_norm": 0.3104591965675354,
|
| 4974 |
+
"learning_rate": 1.515143786102503e-05,
|
| 4975 |
+
"loss": 1.6584,
|
| 4976 |
+
"step": 705
|
| 4977 |
+
},
|
| 4978 |
+
{
|
| 4979 |
+
"epoch": 0.8257309941520468,
|
| 4980 |
+
"grad_norm": 0.6304602026939392,
|
| 4981 |
+
"learning_rate": 1.49552686586663e-05,
|
| 4982 |
+
"loss": 1.1046,
|
| 4983 |
+
"step": 706
|
| 4984 |
+
},
|
| 4985 |
+
{
|
| 4986 |
+
"epoch": 0.8269005847953217,
|
| 4987 |
+
"grad_norm": 0.34990617632865906,
|
| 4988 |
+
"learning_rate": 1.4760274985018618e-05,
|
| 4989 |
+
"loss": 1.7938,
|
| 4990 |
+
"step": 707
|
| 4991 |
+
},
|
| 4992 |
+
{
|
| 4993 |
+
"epoch": 0.8280701754385965,
|
| 4994 |
+
"grad_norm": 0.5276802778244019,
|
| 4995 |
+
"learning_rate": 1.4566459535377252e-05,
|
| 4996 |
+
"loss": 1.4686,
|
| 4997 |
+
"step": 708
|
| 4998 |
+
},
|
| 4999 |
+
{
|
| 5000 |
+
"epoch": 0.8292397660818713,
|
| 5001 |
+
"grad_norm": 0.6775768399238586,
|
| 5002 |
+
"learning_rate": 1.4373824988751471e-05,
|
| 5003 |
+
"loss": 1.1786,
|
| 5004 |
+
"step": 709
|
| 5005 |
+
},
|
| 5006 |
+
{
|
| 5007 |
+
"epoch": 0.8304093567251462,
|
| 5008 |
+
"grad_norm": 0.6719319820404053,
|
| 5009 |
+
"learning_rate": 1.4182374007827603e-05,
|
| 5010 |
+
"loss": 1.5589,
|
| 5011 |
+
"step": 710
|
| 5012 |
+
},
|
| 5013 |
+
{
|
| 5014 |
+
"epoch": 0.8315789473684211,
|
| 5015 |
+
"grad_norm": 0.531869649887085,
|
| 5016 |
+
"learning_rate": 1.3992109238932105e-05,
|
| 5017 |
+
"loss": 2.1176,
|
| 5018 |
+
"step": 711
|
| 5019 |
+
},
|
| 5020 |
+
{
|
| 5021 |
+
"epoch": 0.8327485380116959,
|
| 5022 |
+
"grad_norm": 0.41226381063461304,
|
| 5023 |
+
"learning_rate": 1.3803033311995072e-05,
|
| 5024 |
+
"loss": 1.7806,
|
| 5025 |
+
"step": 712
|
| 5026 |
+
},
|
| 5027 |
+
{
|
| 5028 |
+
"epoch": 0.8339181286549707,
|
| 5029 |
+
"grad_norm": 0.6923168301582336,
|
| 5030 |
+
"learning_rate": 1.3615148840513881e-05,
|
| 5031 |
+
"loss": 0.6939,
|
| 5032 |
+
"step": 713
|
| 5033 |
+
},
|
| 5034 |
+
{
|
| 5035 |
+
"epoch": 0.8350877192982457,
|
| 5036 |
+
"grad_norm": 0.32801055908203125,
|
| 5037 |
+
"learning_rate": 1.3428458421517032e-05,
|
| 5038 |
+
"loss": 2.2524,
|
| 5039 |
+
"step": 714
|
| 5040 |
+
},
|
| 5041 |
+
{
|
| 5042 |
+
"epoch": 0.8362573099415205,
|
| 5043 |
+
"grad_norm": 0.5189927816390991,
|
| 5044 |
+
"learning_rate": 1.324296463552821e-05,
|
| 5045 |
+
"loss": 1.3771,
|
| 5046 |
+
"step": 715
|
| 5047 |
+
},
|
| 5048 |
+
{
|
| 5049 |
+
"epoch": 0.8374269005847953,
|
| 5050 |
+
"grad_norm": 0.42794156074523926,
|
| 5051 |
+
"learning_rate": 1.3058670046530775e-05,
|
| 5052 |
+
"loss": 2.0186,
|
| 5053 |
+
"step": 716
|
| 5054 |
+
},
|
| 5055 |
+
{
|
| 5056 |
+
"epoch": 0.8385964912280702,
|
| 5057 |
+
"grad_norm": 0.55548495054245,
|
| 5058 |
+
"learning_rate": 1.2875577201932132e-05,
|
| 5059 |
+
"loss": 1.6141,
|
| 5060 |
+
"step": 717
|
| 5061 |
+
},
|
| 5062 |
+
{
|
| 5063 |
+
"epoch": 0.839766081871345,
|
| 5064 |
+
"grad_norm": 0.4545220732688904,
|
| 5065 |
+
"learning_rate": 1.2693688632528622e-05,
|
| 5066 |
+
"loss": 1.3453,
|
| 5067 |
+
"step": 718
|
| 5068 |
+
},
|
| 5069 |
+
{
|
| 5070 |
+
"epoch": 0.8409356725146199,
|
| 5071 |
+
"grad_norm": 0.6801018118858337,
|
| 5072 |
+
"learning_rate": 1.2513006852470555e-05,
|
| 5073 |
+
"loss": 1.2189,
|
| 5074 |
+
"step": 719
|
| 5075 |
+
},
|
| 5076 |
+
{
|
| 5077 |
+
"epoch": 0.8421052631578947,
|
| 5078 |
+
"grad_norm": 1.013476848602295,
|
| 5079 |
+
"learning_rate": 1.2333534359227384e-05,
|
| 5080 |
+
"loss": 1.9237,
|
| 5081 |
+
"step": 720
|
| 5082 |
+
},
|
| 5083 |
+
{
|
| 5084 |
+
"epoch": 0.8432748538011696,
|
| 5085 |
+
"grad_norm": 0.5349302291870117,
|
| 5086 |
+
"learning_rate": 1.215527363355322e-05,
|
| 5087 |
+
"loss": 0.9729,
|
| 5088 |
+
"step": 721
|
| 5089 |
+
},
|
| 5090 |
+
{
|
| 5091 |
+
"epoch": 0.8444444444444444,
|
| 5092 |
+
"grad_norm": 0.5281980633735657,
|
| 5093 |
+
"learning_rate": 1.1978227139452625e-05,
|
| 5094 |
+
"loss": 1.7217,
|
| 5095 |
+
"step": 722
|
| 5096 |
+
},
|
| 5097 |
+
{
|
| 5098 |
+
"epoch": 0.8456140350877193,
|
| 5099 |
+
"grad_norm": 0.5399391055107117,
|
| 5100 |
+
"learning_rate": 1.1802397324146374e-05,
|
| 5101 |
+
"loss": 1.7664,
|
| 5102 |
+
"step": 723
|
| 5103 |
+
},
|
| 5104 |
+
{
|
| 5105 |
+
"epoch": 0.8467836257309942,
|
| 5106 |
+
"grad_norm": 0.8852502703666687,
|
| 5107 |
+
"learning_rate": 1.1627786618037762e-05,
|
| 5108 |
+
"loss": 0.5362,
|
| 5109 |
+
"step": 724
|
| 5110 |
+
},
|
| 5111 |
+
{
|
| 5112 |
+
"epoch": 0.847953216374269,
|
| 5113 |
+
"grad_norm": 0.45778024196624756,
|
| 5114 |
+
"learning_rate": 1.1454397434679021e-05,
|
| 5115 |
+
"loss": 2.0303,
|
| 5116 |
+
"step": 725
|
| 5117 |
+
},
|
| 5118 |
+
{
|
| 5119 |
+
"epoch": 0.8491228070175438,
|
| 5120 |
+
"grad_norm": 0.44471511244773865,
|
| 5121 |
+
"learning_rate": 1.128223217073786e-05,
|
| 5122 |
+
"loss": 1.9459,
|
| 5123 |
+
"step": 726
|
| 5124 |
+
},
|
| 5125 |
+
{
|
| 5126 |
+
"epoch": 0.8502923976608188,
|
| 5127 |
+
"grad_norm": 0.5390698909759521,
|
| 5128 |
+
"learning_rate": 1.1111293205964412e-05,
|
| 5129 |
+
"loss": 1.0607,
|
| 5130 |
+
"step": 727
|
| 5131 |
+
},
|
| 5132 |
+
{
|
| 5133 |
+
"epoch": 0.8514619883040936,
|
| 5134 |
+
"grad_norm": 0.44605326652526855,
|
| 5135 |
+
"learning_rate": 1.0941582903158343e-05,
|
| 5136 |
+
"loss": 1.8983,
|
| 5137 |
+
"step": 728
|
| 5138 |
+
},
|
| 5139 |
+
{
|
| 5140 |
+
"epoch": 0.8526315789473684,
|
| 5141 |
+
"grad_norm": 0.42205414175987244,
|
| 5142 |
+
"learning_rate": 1.0773103608136126e-05,
|
| 5143 |
+
"loss": 1.8101,
|
| 5144 |
+
"step": 729
|
| 5145 |
+
},
|
| 5146 |
+
{
|
| 5147 |
+
"epoch": 0.8538011695906432,
|
| 5148 |
+
"grad_norm": 0.3277212977409363,
|
| 5149 |
+
"learning_rate": 1.0605857649698669e-05,
|
| 5150 |
+
"loss": 0.8485,
|
| 5151 |
+
"step": 730
|
| 5152 |
+
},
|
| 5153 |
+
{
|
| 5154 |
+
"epoch": 0.8549707602339182,
|
| 5155 |
+
"grad_norm": 0.6796973347663879,
|
| 5156 |
+
"learning_rate": 1.0439847339599174e-05,
|
| 5157 |
+
"loss": 1.2562,
|
| 5158 |
+
"step": 731
|
| 5159 |
+
},
|
| 5160 |
+
{
|
| 5161 |
+
"epoch": 0.856140350877193,
|
| 5162 |
+
"grad_norm": 0.4901828169822693,
|
| 5163 |
+
"learning_rate": 1.0275074972511034e-05,
|
| 5164 |
+
"loss": 1.4787,
|
| 5165 |
+
"step": 732
|
| 5166 |
+
},
|
| 5167 |
+
{
|
| 5168 |
+
"epoch": 0.8573099415204678,
|
| 5169 |
+
"grad_norm": 0.38275349140167236,
|
| 5170 |
+
"learning_rate": 1.0111542825996245e-05,
|
| 5171 |
+
"loss": 2.1962,
|
| 5172 |
+
"step": 733
|
| 5173 |
+
},
|
| 5174 |
+
{
|
| 5175 |
+
"epoch": 0.8584795321637427,
|
| 5176 |
+
"grad_norm": 0.5255086421966553,
|
| 5177 |
+
"learning_rate": 9.949253160473915e-06,
|
| 5178 |
+
"loss": 1.5463,
|
| 5179 |
+
"step": 734
|
| 5180 |
+
},
|
| 5181 |
+
{
|
| 5182 |
+
"epoch": 0.8596491228070176,
|
| 5183 |
+
"grad_norm": 0.49351197481155396,
|
| 5184 |
+
"learning_rate": 9.788208219188932e-06,
|
| 5185 |
+
"loss": 1.4428,
|
| 5186 |
+
"step": 735
|
| 5187 |
+
},
|
| 5188 |
+
{
|
| 5189 |
+
"epoch": 0.8608187134502924,
|
| 5190 |
+
"grad_norm": 0.6366466879844666,
|
| 5191 |
+
"learning_rate": 9.628410228181084e-06,
|
| 5192 |
+
"loss": 1.8165,
|
| 5193 |
+
"step": 736
|
| 5194 |
+
},
|
| 5195 |
+
{
|
| 5196 |
+
"epoch": 0.8619883040935673,
|
| 5197 |
+
"grad_norm": 0.6942663788795471,
|
| 5198 |
+
"learning_rate": 9.469861396254153e-06,
|
| 5199 |
+
"loss": 1.1972,
|
| 5200 |
+
"step": 737
|
| 5201 |
+
},
|
| 5202 |
+
{
|
| 5203 |
+
"epoch": 0.8631578947368421,
|
| 5204 |
+
"grad_norm": 0.3593437075614929,
|
| 5205 |
+
"learning_rate": 9.31256391494546e-06,
|
| 5206 |
+
"loss": 1.6453,
|
| 5207 |
+
"step": 738
|
| 5208 |
+
},
|
| 5209 |
+
{
|
| 5210 |
+
"epoch": 0.8643274853801169,
|
| 5211 |
+
"grad_norm": 0.5943644642829895,
|
| 5212 |
+
"learning_rate": 9.156519958495602e-06,
|
| 5213 |
+
"loss": 1.2414,
|
| 5214 |
+
"step": 739
|
| 5215 |
+
},
|
| 5216 |
+
{
|
| 5217 |
+
"epoch": 0.8654970760233918,
|
| 5218 |
+
"grad_norm": 0.41532373428344727,
|
| 5219 |
+
"learning_rate": 9.001731683818337e-06,
|
| 5220 |
+
"loss": 1.4155,
|
| 5221 |
+
"step": 740
|
| 5222 |
+
},
|
| 5223 |
+
{
|
| 5224 |
+
"epoch": 0.8666666666666667,
|
| 5225 |
+
"grad_norm": 0.5489165782928467,
|
| 5226 |
+
"learning_rate": 8.848201230470776e-06,
|
| 5227 |
+
"loss": 1.839,
|
| 5228 |
+
"step": 741
|
| 5229 |
+
},
|
| 5230 |
+
{
|
| 5231 |
+
"epoch": 0.8678362573099415,
|
| 5232 |
+
"grad_norm": 0.6106531023979187,
|
| 5233 |
+
"learning_rate": 8.695930720623857e-06,
|
| 5234 |
+
"loss": 1.9636,
|
| 5235 |
+
"step": 742
|
| 5236 |
+
},
|
| 5237 |
+
{
|
| 5238 |
+
"epoch": 0.8690058479532163,
|
| 5239 |
+
"grad_norm": 0.39489027857780457,
|
| 5240 |
+
"learning_rate": 8.54492225903295e-06,
|
| 5241 |
+
"loss": 0.6472,
|
| 5242 |
+
"step": 743
|
| 5243 |
+
},
|
| 5244 |
+
{
|
| 5245 |
+
"epoch": 0.8701754385964913,
|
| 5246 |
+
"grad_norm": 0.4068044424057007,
|
| 5247 |
+
"learning_rate": 8.395177933008802e-06,
|
| 5248 |
+
"loss": 1.7972,
|
| 5249 |
+
"step": 744
|
| 5250 |
+
},
|
| 5251 |
+
{
|
| 5252 |
+
"epoch": 0.8713450292397661,
|
| 5253 |
+
"grad_norm": 0.5814440846443176,
|
| 5254 |
+
"learning_rate": 8.246699812388714e-06,
|
| 5255 |
+
"loss": 1.1727,
|
| 5256 |
+
"step": 745
|
| 5257 |
+
},
|
| 5258 |
+
{
|
| 5259 |
+
"epoch": 0.8725146198830409,
|
| 5260 |
+
"grad_norm": 0.5304514765739441,
|
| 5261 |
+
"learning_rate": 8.099489949507843e-06,
|
| 5262 |
+
"loss": 1.2336,
|
| 5263 |
+
"step": 746
|
| 5264 |
+
},
|
| 5265 |
+
{
|
| 5266 |
+
"epoch": 0.8736842105263158,
|
| 5267 |
+
"grad_norm": 0.45569664239883423,
|
| 5268 |
+
"learning_rate": 7.953550379170893e-06,
|
| 5269 |
+
"loss": 1.5075,
|
| 5270 |
+
"step": 747
|
| 5271 |
+
},
|
| 5272 |
+
{
|
| 5273 |
+
"epoch": 0.8748538011695907,
|
| 5274 |
+
"grad_norm": 0.43300554156303406,
|
| 5275 |
+
"learning_rate": 7.80888311862401e-06,
|
| 5276 |
+
"loss": 1.5435,
|
| 5277 |
+
"step": 748
|
| 5278 |
+
},
|
| 5279 |
+
{
|
| 5280 |
+
"epoch": 0.8760233918128655,
|
| 5281 |
+
"grad_norm": 0.5079240798950195,
|
| 5282 |
+
"learning_rate": 7.665490167526856e-06,
|
| 5283 |
+
"loss": 2.0697,
|
| 5284 |
+
"step": 749
|
| 5285 |
+
},
|
| 5286 |
+
{
|
| 5287 |
+
"epoch": 0.8771929824561403,
|
| 5288 |
+
"grad_norm": 0.5567116737365723,
|
| 5289 |
+
"learning_rate": 7.523373507924947e-06,
|
| 5290 |
+
"loss": 1.7936,
|
| 5291 |
+
"step": 750
|
| 5292 |
+
},
|
| 5293 |
+
{
|
| 5294 |
+
"epoch": 0.8783625730994152,
|
| 5295 |
+
"grad_norm": 0.5715877413749695,
|
| 5296 |
+
"learning_rate": 7.382535104222366e-06,
|
| 5297 |
+
"loss": 1.4835,
|
| 5298 |
+
"step": 751
|
| 5299 |
+
},
|
| 5300 |
+
{
|
| 5301 |
+
"epoch": 0.87953216374269,
|
| 5302 |
+
"grad_norm": 0.4187256693840027,
|
| 5303 |
+
"learning_rate": 7.242976903154442e-06,
|
| 5304 |
+
"loss": 1.5308,
|
| 5305 |
+
"step": 752
|
| 5306 |
+
},
|
| 5307 |
+
{
|
| 5308 |
+
"epoch": 0.8807017543859649,
|
| 5309 |
+
"grad_norm": 0.5101755857467651,
|
| 5310 |
+
"learning_rate": 7.104700833761013e-06,
|
| 5311 |
+
"loss": 1.5338,
|
| 5312 |
+
"step": 753
|
| 5313 |
+
},
|
| 5314 |
+
{
|
| 5315 |
+
"epoch": 0.8818713450292398,
|
| 5316 |
+
"grad_norm": 0.41772782802581787,
|
| 5317 |
+
"learning_rate": 6.967708807359663e-06,
|
| 5318 |
+
"loss": 1.8949,
|
| 5319 |
+
"step": 754
|
| 5320 |
+
},
|
| 5321 |
+
{
|
| 5322 |
+
"epoch": 0.8830409356725146,
|
| 5323 |
+
"grad_norm": 0.4892766773700714,
|
| 5324 |
+
"learning_rate": 6.8320027175192706e-06,
|
| 5325 |
+
"loss": 1.1041,
|
| 5326 |
+
"step": 755
|
| 5327 |
+
},
|
| 5328 |
+
{
|
| 5329 |
+
"epoch": 0.8842105263157894,
|
| 5330 |
+
"grad_norm": 0.5039598345756531,
|
| 5331 |
+
"learning_rate": 6.697584440033988e-06,
|
| 5332 |
+
"loss": 0.749,
|
| 5333 |
+
"step": 756
|
| 5334 |
+
},
|
| 5335 |
+
{
|
| 5336 |
+
"epoch": 0.8853801169590644,
|
| 5337 |
+
"grad_norm": 0.5067999958992004,
|
| 5338 |
+
"learning_rate": 6.564455832897099e-06,
|
| 5339 |
+
"loss": 1.0401,
|
| 5340 |
+
"step": 757
|
| 5341 |
+
},
|
| 5342 |
+
{
|
| 5343 |
+
"epoch": 0.8865497076023392,
|
| 5344 |
+
"grad_norm": 0.7063804864883423,
|
| 5345 |
+
"learning_rate": 6.432618736275553e-06,
|
| 5346 |
+
"loss": 1.4961,
|
| 5347 |
+
"step": 758
|
| 5348 |
+
},
|
| 5349 |
+
{
|
| 5350 |
+
"epoch": 0.887719298245614,
|
| 5351 |
+
"grad_norm": 0.303244024515152,
|
| 5352 |
+
"learning_rate": 6.302074972484362e-06,
|
| 5353 |
+
"loss": 0.8921,
|
| 5354 |
+
"step": 759
|
| 5355 |
+
},
|
| 5356 |
+
{
|
| 5357 |
+
"epoch": 0.8888888888888888,
|
| 5358 |
+
"grad_norm": 0.49389979243278503,
|
| 5359 |
+
"learning_rate": 6.1728263459614796e-06,
|
| 5360 |
+
"loss": 1.9556,
|
| 5361 |
+
"step": 760
|
| 5362 |
+
},
|
| 5363 |
+
{
|
| 5364 |
+
"epoch": 0.8900584795321638,
|
| 5365 |
+
"grad_norm": 0.34622922539711,
|
| 5366 |
+
"learning_rate": 6.044874643242904e-06,
|
| 5367 |
+
"loss": 0.6971,
|
| 5368 |
+
"step": 761
|
| 5369 |
+
},
|
| 5370 |
+
{
|
| 5371 |
+
"epoch": 0.8912280701754386,
|
| 5372 |
+
"grad_norm": 0.3411808907985687,
|
| 5373 |
+
"learning_rate": 5.9182216329378705e-06,
|
| 5374 |
+
"loss": 1.7343,
|
| 5375 |
+
"step": 762
|
| 5376 |
+
},
|
| 5377 |
+
{
|
| 5378 |
+
"epoch": 0.8923976608187134,
|
| 5379 |
+
"grad_norm": 0.3007182776927948,
|
| 5380 |
+
"learning_rate": 5.7928690657045535e-06,
|
| 5381 |
+
"loss": 1.4504,
|
| 5382 |
+
"step": 763
|
| 5383 |
+
},
|
| 5384 |
+
{
|
| 5385 |
+
"epoch": 0.8935672514619883,
|
| 5386 |
+
"grad_norm": 0.36203962564468384,
|
| 5387 |
+
"learning_rate": 5.668818674225685e-06,
|
| 5388 |
+
"loss": 1.2621,
|
| 5389 |
+
"step": 764
|
| 5390 |
+
},
|
| 5391 |
+
{
|
| 5392 |
+
"epoch": 0.8947368421052632,
|
| 5393 |
+
"grad_norm": 0.4814302623271942,
|
| 5394 |
+
"learning_rate": 5.546072173184791e-06,
|
| 5395 |
+
"loss": 1.1898,
|
| 5396 |
+
"step": 765
|
| 5397 |
+
},
|
| 5398 |
+
{
|
| 5399 |
+
"epoch": 0.895906432748538,
|
| 5400 |
+
"grad_norm": 0.6297724843025208,
|
| 5401 |
+
"learning_rate": 5.424631259242352e-06,
|
| 5402 |
+
"loss": 1.6179,
|
| 5403 |
+
"step": 766
|
| 5404 |
+
},
|
| 5405 |
+
{
|
| 5406 |
+
"epoch": 0.8970760233918129,
|
| 5407 |
+
"grad_norm": 0.47519174218177795,
|
| 5408 |
+
"learning_rate": 5.3044976110124155e-06,
|
| 5409 |
+
"loss": 2.0664,
|
| 5410 |
+
"step": 767
|
| 5411 |
+
},
|
| 5412 |
+
{
|
| 5413 |
+
"epoch": 0.8982456140350877,
|
| 5414 |
+
"grad_norm": 0.3763788938522339,
|
| 5415 |
+
"learning_rate": 5.185672889039394e-06,
|
| 5416 |
+
"loss": 1.6694,
|
| 5417 |
+
"step": 768
|
| 5418 |
+
},
|
| 5419 |
+
{
|
| 5420 |
+
"epoch": 0.8994152046783626,
|
| 5421 |
+
"grad_norm": 0.5559266805648804,
|
| 5422 |
+
"learning_rate": 5.068158735775097e-06,
|
| 5423 |
+
"loss": 1.5592,
|
| 5424 |
+
"step": 769
|
| 5425 |
+
},
|
| 5426 |
+
{
|
| 5427 |
+
"epoch": 0.9005847953216374,
|
| 5428 |
+
"grad_norm": 0.3948187232017517,
|
| 5429 |
+
"learning_rate": 4.951956775555999e-06,
|
| 5430 |
+
"loss": 1.8212,
|
| 5431 |
+
"step": 770
|
| 5432 |
+
},
|
| 5433 |
+
{
|
| 5434 |
+
"epoch": 0.9017543859649123,
|
| 5435 |
+
"grad_norm": 0.40267279744148254,
|
| 5436 |
+
"learning_rate": 4.837068614580875e-06,
|
| 5437 |
+
"loss": 0.8669,
|
| 5438 |
+
"step": 771
|
| 5439 |
+
},
|
| 5440 |
+
{
|
| 5441 |
+
"epoch": 0.9029239766081871,
|
| 5442 |
+
"grad_norm": 0.4965238869190216,
|
| 5443 |
+
"learning_rate": 4.723495840888493e-06,
|
| 5444 |
+
"loss": 1.5074,
|
| 5445 |
+
"step": 772
|
| 5446 |
+
},
|
| 5447 |
+
{
|
| 5448 |
+
"epoch": 0.904093567251462,
|
| 5449 |
+
"grad_norm": 0.6691774129867554,
|
| 5450 |
+
"learning_rate": 4.611240024335706e-06,
|
| 5451 |
+
"loss": 1.5161,
|
| 5452 |
+
"step": 773
|
| 5453 |
+
},
|
| 5454 |
+
{
|
| 5455 |
+
"epoch": 0.9052631578947369,
|
| 5456 |
+
"grad_norm": 0.6021882891654968,
|
| 5457 |
+
"learning_rate": 4.5003027165758215e-06,
|
| 5458 |
+
"loss": 1.5402,
|
| 5459 |
+
"step": 774
|
| 5460 |
+
},
|
| 5461 |
+
{
|
| 5462 |
+
"epoch": 0.9064327485380117,
|
| 5463 |
+
"grad_norm": 0.5868009924888611,
|
| 5464 |
+
"learning_rate": 4.390685451037025e-06,
|
| 5465 |
+
"loss": 1.6296,
|
| 5466 |
+
"step": 775
|
| 5467 |
+
},
|
| 5468 |
+
{
|
| 5469 |
+
"epoch": 0.9076023391812865,
|
| 5470 |
+
"grad_norm": 0.601019024848938,
|
| 5471 |
+
"learning_rate": 4.282389742901283e-06,
|
| 5472 |
+
"loss": 1.1937,
|
| 5473 |
+
"step": 776
|
| 5474 |
+
},
|
| 5475 |
+
{
|
| 5476 |
+
"epoch": 0.9087719298245615,
|
| 5477 |
+
"grad_norm": 0.5701804757118225,
|
| 5478 |
+
"learning_rate": 4.175417089083378e-06,
|
| 5479 |
+
"loss": 1.4124,
|
| 5480 |
+
"step": 777
|
| 5481 |
+
},
|
| 5482 |
+
{
|
| 5483 |
+
"epoch": 0.9099415204678363,
|
| 5484 |
+
"grad_norm": 0.9340035319328308,
|
| 5485 |
+
"learning_rate": 4.069768968210186e-06,
|
| 5486 |
+
"loss": 1.3639,
|
| 5487 |
+
"step": 778
|
| 5488 |
+
},
|
| 5489 |
+
{
|
| 5490 |
+
"epoch": 0.9111111111111111,
|
| 5491 |
+
"grad_norm": 0.3220817446708679,
|
| 5492 |
+
"learning_rate": 3.9654468406002396e-06,
|
| 5493 |
+
"loss": 1.7019,
|
| 5494 |
+
"step": 779
|
| 5495 |
+
},
|
| 5496 |
+
{
|
| 5497 |
+
"epoch": 0.9122807017543859,
|
| 5498 |
+
"grad_norm": 0.5050005316734314,
|
| 5499 |
+
"learning_rate": 3.862452148243622e-06,
|
| 5500 |
+
"loss": 1.6152,
|
| 5501 |
+
"step": 780
|
| 5502 |
+
},
|
| 5503 |
+
{
|
| 5504 |
+
"epoch": 0.9134502923976608,
|
| 5505 |
+
"grad_norm": 0.3310493528842926,
|
| 5506 |
+
"learning_rate": 3.7607863147819166e-06,
|
| 5507 |
+
"loss": 1.5067,
|
| 5508 |
+
"step": 781
|
| 5509 |
+
},
|
| 5510 |
+
{
|
| 5511 |
+
"epoch": 0.9146198830409357,
|
| 5512 |
+
"grad_norm": 0.6780093908309937,
|
| 5513 |
+
"learning_rate": 3.6604507454886083e-06,
|
| 5514 |
+
"loss": 1.029,
|
| 5515 |
+
"step": 782
|
| 5516 |
+
},
|
| 5517 |
+
{
|
| 5518 |
+
"epoch": 0.9157894736842105,
|
| 5519 |
+
"grad_norm": 0.5026019811630249,
|
| 5520 |
+
"learning_rate": 3.561446827249659e-06,
|
| 5521 |
+
"loss": 1.3767,
|
| 5522 |
+
"step": 783
|
| 5523 |
+
},
|
| 5524 |
+
{
|
| 5525 |
+
"epoch": 0.9169590643274854,
|
| 5526 |
+
"grad_norm": 0.6126399040222168,
|
| 5527 |
+
"learning_rate": 3.4637759285442882e-06,
|
| 5528 |
+
"loss": 1.2892,
|
| 5529 |
+
"step": 784
|
| 5530 |
+
},
|
| 5531 |
+
{
|
| 5532 |
+
"epoch": 0.9181286549707602,
|
| 5533 |
+
"grad_norm": 0.5534948110580444,
|
| 5534 |
+
"learning_rate": 3.367439399426087e-06,
|
| 5535 |
+
"loss": 1.2748,
|
| 5536 |
+
"step": 785
|
| 5537 |
+
},
|
| 5538 |
+
{
|
| 5539 |
+
"epoch": 0.9192982456140351,
|
| 5540 |
+
"grad_norm": 0.44695568084716797,
|
| 5541 |
+
"learning_rate": 3.2724385715043883e-06,
|
| 5542 |
+
"loss": 1.696,
|
| 5543 |
+
"step": 786
|
| 5544 |
+
},
|
| 5545 |
+
{
|
| 5546 |
+
"epoch": 0.92046783625731,
|
| 5547 |
+
"grad_norm": 0.4182886779308319,
|
| 5548 |
+
"learning_rate": 3.178774757925762e-06,
|
| 5549 |
+
"loss": 2.054,
|
| 5550 |
+
"step": 787
|
| 5551 |
+
},
|
| 5552 |
+
{
|
| 5553 |
+
"epoch": 0.9216374269005848,
|
| 5554 |
+
"grad_norm": 0.49121958017349243,
|
| 5555 |
+
"learning_rate": 3.0864492533560165e-06,
|
| 5556 |
+
"loss": 1.884,
|
| 5557 |
+
"step": 788
|
| 5558 |
+
},
|
| 5559 |
+
{
|
| 5560 |
+
"epoch": 0.9228070175438596,
|
| 5561 |
+
"grad_norm": 0.5378497242927551,
|
| 5562 |
+
"learning_rate": 2.9954633339621564e-06,
|
| 5563 |
+
"loss": 1.5816,
|
| 5564 |
+
"step": 789
|
| 5565 |
+
},
|
| 5566 |
+
{
|
| 5567 |
+
"epoch": 0.9239766081871345,
|
| 5568 |
+
"grad_norm": 0.3458714187145233,
|
| 5569 |
+
"learning_rate": 2.905818257394799e-06,
|
| 5570 |
+
"loss": 1.9412,
|
| 5571 |
+
"step": 790
|
| 5572 |
+
},
|
| 5573 |
+
{
|
| 5574 |
+
"epoch": 0.9251461988304094,
|
| 5575 |
+
"grad_norm": 0.6665580868721008,
|
| 5576 |
+
"learning_rate": 2.817515262770842e-06,
|
| 5577 |
+
"loss": 0.8671,
|
| 5578 |
+
"step": 791
|
| 5579 |
+
},
|
| 5580 |
+
{
|
| 5581 |
+
"epoch": 0.9263157894736842,
|
| 5582 |
+
"grad_norm": 0.3785383999347687,
|
| 5583 |
+
"learning_rate": 2.7305555706562457e-06,
|
| 5584 |
+
"loss": 1.8415,
|
| 5585 |
+
"step": 792
|
| 5586 |
+
},
|
| 5587 |
+
{
|
| 5588 |
+
"epoch": 0.927485380116959,
|
| 5589 |
+
"grad_norm": 0.41982072591781616,
|
| 5590 |
+
"learning_rate": 2.6449403830492104e-06,
|
| 5591 |
+
"loss": 1.8585,
|
| 5592 |
+
"step": 793
|
| 5593 |
+
},
|
| 5594 |
+
{
|
| 5595 |
+
"epoch": 0.928654970760234,
|
| 5596 |
+
"grad_norm": 0.33484476804733276,
|
| 5597 |
+
"learning_rate": 2.5606708833635917e-06,
|
| 5598 |
+
"loss": 1.5949,
|
| 5599 |
+
"step": 794
|
| 5600 |
+
},
|
| 5601 |
+
{
|
| 5602 |
+
"epoch": 0.9298245614035088,
|
| 5603 |
+
"grad_norm": 0.6077429056167603,
|
| 5604 |
+
"learning_rate": 2.4777482364124695e-06,
|
| 5605 |
+
"loss": 1.1995,
|
| 5606 |
+
"step": 795
|
| 5607 |
+
},
|
| 5608 |
+
{
|
| 5609 |
+
"epoch": 0.9309941520467836,
|
| 5610 |
+
"grad_norm": 0.6394246816635132,
|
| 5611 |
+
"learning_rate": 2.39617358839207e-06,
|
| 5612 |
+
"loss": 1.4034,
|
| 5613 |
+
"step": 796
|
| 5614 |
+
},
|
| 5615 |
+
{
|
| 5616 |
+
"epoch": 0.9321637426900585,
|
| 5617 |
+
"grad_norm": 0.7234786152839661,
|
| 5618 |
+
"learning_rate": 2.315948066866003e-06,
|
| 5619 |
+
"loss": 1.4246,
|
| 5620 |
+
"step": 797
|
| 5621 |
+
},
|
| 5622 |
+
{
|
| 5623 |
+
"epoch": 0.9333333333333333,
|
| 5624 |
+
"grad_norm": 0.6573183536529541,
|
| 5625 |
+
"learning_rate": 2.2370727807495497e-06,
|
| 5626 |
+
"loss": 1.1776,
|
| 5627 |
+
"step": 798
|
| 5628 |
+
},
|
| 5629 |
+
{
|
| 5630 |
+
"epoch": 0.9345029239766082,
|
| 5631 |
+
"grad_norm": 0.5006548166275024,
|
| 5632 |
+
"learning_rate": 2.1595488202944103e-06,
|
| 5633 |
+
"loss": 1.8393,
|
| 5634 |
+
"step": 799
|
| 5635 |
+
},
|
| 5636 |
+
{
|
| 5637 |
+
"epoch": 0.935672514619883,
|
| 5638 |
+
"grad_norm": 0.6787004470825195,
|
| 5639 |
+
"learning_rate": 2.0833772570736375e-06,
|
| 5640 |
+
"loss": 1.5865,
|
| 5641 |
+
"step": 800
|
| 5642 |
+
},
|
| 5643 |
+
{
|
| 5644 |
+
"epoch": 0.9368421052631579,
|
| 5645 |
+
"grad_norm": 0.3747584819793701,
|
| 5646 |
+
"learning_rate": 2.0085591439667927e-06,
|
| 5647 |
+
"loss": 2.2697,
|
| 5648 |
+
"step": 801
|
| 5649 |
+
},
|
| 5650 |
+
{
|
| 5651 |
+
"epoch": 0.9380116959064327,
|
| 5652 |
+
"grad_norm": 0.4233306646347046,
|
| 5653 |
+
"learning_rate": 1.935095515145391e-06,
|
| 5654 |
+
"loss": 1.6865,
|
| 5655 |
+
"step": 802
|
| 5656 |
+
},
|
| 5657 |
+
{
|
| 5658 |
+
"epoch": 0.9391812865497076,
|
| 5659 |
+
"grad_norm": 0.45490002632141113,
|
| 5660 |
+
"learning_rate": 1.8629873860586566e-06,
|
| 5661 |
+
"loss": 1.7815,
|
| 5662 |
+
"step": 803
|
| 5663 |
+
},
|
| 5664 |
+
{
|
| 5665 |
+
"epoch": 0.9403508771929825,
|
| 5666 |
+
"grad_norm": 0.5193967223167419,
|
| 5667 |
+
"learning_rate": 1.7922357534194356e-06,
|
| 5668 |
+
"loss": 1.5525,
|
| 5669 |
+
"step": 804
|
| 5670 |
+
},
|
| 5671 |
+
{
|
| 5672 |
+
"epoch": 0.9415204678362573,
|
| 5673 |
+
"grad_norm": 0.4140605330467224,
|
| 5674 |
+
"learning_rate": 1.7228415951904165e-06,
|
| 5675 |
+
"loss": 1.791,
|
| 5676 |
+
"step": 805
|
| 5677 |
+
},
|
| 5678 |
+
{
|
| 5679 |
+
"epoch": 0.9426900584795321,
|
| 5680 |
+
"grad_norm": 0.7900307774543762,
|
| 5681 |
+
"learning_rate": 1.6548058705706526e-06,
|
| 5682 |
+
"loss": 1.2546,
|
| 5683 |
+
"step": 806
|
| 5684 |
+
},
|
| 5685 |
+
{
|
| 5686 |
+
"epoch": 0.9438596491228071,
|
| 5687 |
+
"grad_norm": 0.3959881663322449,
|
| 5688 |
+
"learning_rate": 1.5881295199822953e-06,
|
| 5689 |
+
"loss": 1.6344,
|
| 5690 |
+
"step": 807
|
| 5691 |
+
},
|
| 5692 |
+
{
|
| 5693 |
+
"epoch": 0.9450292397660819,
|
| 5694 |
+
"grad_norm": 0.6034178733825684,
|
| 5695 |
+
"learning_rate": 1.5228134650575265e-06,
|
| 5696 |
+
"loss": 0.9063,
|
| 5697 |
+
"step": 808
|
| 5698 |
+
},
|
| 5699 |
+
{
|
| 5700 |
+
"epoch": 0.9461988304093567,
|
| 5701 |
+
"grad_norm": 0.4347105920314789,
|
| 5702 |
+
"learning_rate": 1.458858608625957e-06,
|
| 5703 |
+
"loss": 1.7198,
|
| 5704 |
+
"step": 809
|
| 5705 |
+
},
|
| 5706 |
+
{
|
| 5707 |
+
"epoch": 0.9473684210526315,
|
| 5708 |
+
"grad_norm": 0.5537627935409546,
|
| 5709 |
+
"learning_rate": 1.396265834701982e-06,
|
| 5710 |
+
"loss": 1.4791,
|
| 5711 |
+
"step": 810
|
| 5712 |
+
},
|
| 5713 |
+
{
|
| 5714 |
+
"epoch": 0.9485380116959065,
|
| 5715 |
+
"grad_norm": 0.5163156986236572,
|
| 5716 |
+
"learning_rate": 1.335036008472701e-06,
|
| 5717 |
+
"loss": 1.4561,
|
| 5718 |
+
"step": 811
|
| 5719 |
+
},
|
| 5720 |
+
{
|
| 5721 |
+
"epoch": 0.9497076023391813,
|
| 5722 |
+
"grad_norm": 0.30538955330848694,
|
| 5723 |
+
"learning_rate": 1.2751699762858838e-06,
|
| 5724 |
+
"loss": 0.5194,
|
| 5725 |
+
"step": 812
|
| 5726 |
+
},
|
| 5727 |
+
{
|
| 5728 |
+
"epoch": 0.9508771929824561,
|
| 5729 |
+
"grad_norm": 0.8774105906486511,
|
| 5730 |
+
"learning_rate": 1.2166685656382903e-06,
|
| 5731 |
+
"loss": 0.8315,
|
| 5732 |
+
"step": 813
|
| 5733 |
+
},
|
| 5734 |
+
{
|
| 5735 |
+
"epoch": 0.952046783625731,
|
| 5736 |
+
"grad_norm": 0.5743803381919861,
|
| 5737 |
+
"learning_rate": 1.1595325851642137e-06,
|
| 5738 |
+
"loss": 2.1359,
|
| 5739 |
+
"step": 814
|
| 5740 |
+
},
|
| 5741 |
+
{
|
| 5742 |
+
"epoch": 0.9532163742690059,
|
| 5743 |
+
"grad_norm": 0.4251870810985565,
|
| 5744 |
+
"learning_rate": 1.103762824624377e-06,
|
| 5745 |
+
"loss": 2.0652,
|
| 5746 |
+
"step": 815
|
| 5747 |
+
},
|
| 5748 |
+
{
|
| 5749 |
+
"epoch": 0.9543859649122807,
|
| 5750 |
+
"grad_norm": 0.5636005401611328,
|
| 5751 |
+
"learning_rate": 1.0493600548948878e-06,
|
| 5752 |
+
"loss": 1.919,
|
| 5753 |
+
"step": 816
|
| 5754 |
+
},
|
| 5755 |
+
{
|
| 5756 |
+
"epoch": 0.9555555555555556,
|
| 5757 |
+
"grad_norm": 0.39051973819732666,
|
| 5758 |
+
"learning_rate": 9.963250279567239e-07,
|
| 5759 |
+
"loss": 0.4746,
|
| 5760 |
+
"step": 817
|
| 5761 |
+
},
|
| 5762 |
+
{
|
| 5763 |
+
"epoch": 0.9567251461988304,
|
| 5764 |
+
"grad_norm": 0.485362708568573,
|
| 5765 |
+
"learning_rate": 9.446584768852407e-07,
|
| 5766 |
+
"loss": 1.9436,
|
| 5767 |
+
"step": 818
|
| 5768 |
+
},
|
| 5769 |
+
{
|
| 5770 |
+
"epoch": 0.9578947368421052,
|
| 5771 |
+
"grad_norm": 0.6727136373519897,
|
| 5772 |
+
"learning_rate": 8.943611158400478e-07,
|
| 5773 |
+
"loss": 0.9032,
|
| 5774 |
+
"step": 819
|
| 5775 |
+
},
|
| 5776 |
+
{
|
| 5777 |
+
"epoch": 0.9590643274853801,
|
| 5778 |
+
"grad_norm": 0.8409953117370605,
|
| 5779 |
+
"learning_rate": 8.454336400552154e-07,
|
| 5780 |
+
"loss": 1.0934,
|
| 5781 |
+
"step": 820
|
| 5782 |
+
},
|
| 5783 |
+
{
|
| 5784 |
+
"epoch": 0.960233918128655,
|
| 5785 |
+
"grad_norm": 0.45199981331825256,
|
| 5786 |
+
"learning_rate": 7.978767258295494e-07,
|
| 5787 |
+
"loss": 1.5624,
|
| 5788 |
+
"step": 821
|
| 5789 |
+
},
|
| 5790 |
+
{
|
| 5791 |
+
"epoch": 0.9614035087719298,
|
| 5792 |
+
"grad_norm": 0.3910199701786041,
|
| 5793 |
+
"learning_rate": 7.516910305173431e-07,
|
| 5794 |
+
"loss": 1.7667,
|
| 5795 |
+
"step": 822
|
| 5796 |
+
},
|
| 5797 |
+
{
|
| 5798 |
+
"epoch": 0.9625730994152046,
|
| 5799 |
+
"grad_norm": 0.40123024582862854,
|
| 5800 |
+
"learning_rate": 7.068771925192286e-07,
|
| 5801 |
+
"loss": 1.7485,
|
| 5802 |
+
"step": 823
|
| 5803 |
+
},
|
| 5804 |
+
{
|
| 5805 |
+
"epoch": 0.9637426900584796,
|
| 5806 |
+
"grad_norm": 0.40403178334236145,
|
| 5807 |
+
"learning_rate": 6.634358312733957e-07,
|
| 5808 |
+
"loss": 2.3095,
|
| 5809 |
+
"step": 824
|
| 5810 |
+
},
|
| 5811 |
+
{
|
| 5812 |
+
"epoch": 0.9649122807017544,
|
| 5813 |
+
"grad_norm": 0.5259490013122559,
|
| 5814 |
+
"learning_rate": 6.21367547246976e-07,
|
| 5815 |
+
"loss": 1.6458,
|
| 5816 |
+
"step": 825
|
| 5817 |
+
},
|
| 5818 |
+
{
|
| 5819 |
+
"epoch": 0.9660818713450292,
|
| 5820 |
+
"grad_norm": 0.5003288388252258,
|
| 5821 |
+
"learning_rate": 5.806729219278051e-07,
|
| 5822 |
+
"loss": 1.5372,
|
| 5823 |
+
"step": 826
|
| 5824 |
+
},
|
| 5825 |
+
{
|
| 5826 |
+
"epoch": 0.9672514619883041,
|
| 5827 |
+
"grad_norm": 0.4602636694908142,
|
| 5828 |
+
"learning_rate": 5.413525178163292e-07,
|
| 5829 |
+
"loss": 1.7856,
|
| 5830 |
+
"step": 827
|
| 5831 |
+
},
|
| 5832 |
+
{
|
| 5833 |
+
"epoch": 0.968421052631579,
|
| 5834 |
+
"grad_norm": 0.5534040331840515,
|
| 5835 |
+
"learning_rate": 5.034068784178891e-07,
|
| 5836 |
+
"loss": 1.3398,
|
| 5837 |
+
"step": 828
|
| 5838 |
+
},
|
| 5839 |
+
{
|
| 5840 |
+
"epoch": 0.9695906432748538,
|
| 5841 |
+
"grad_norm": 0.6525434255599976,
|
| 5842 |
+
"learning_rate": 4.668365282351372e-07,
|
| 5843 |
+
"loss": 1.4224,
|
| 5844 |
+
"step": 829
|
| 5845 |
+
},
|
| 5846 |
+
{
|
| 5847 |
+
"epoch": 0.9707602339181286,
|
| 5848 |
+
"grad_norm": 0.4619835615158081,
|
| 5849 |
+
"learning_rate": 4.316419727608434e-07,
|
| 5850 |
+
"loss": 1.6593,
|
| 5851 |
+
"step": 830
|
| 5852 |
+
},
|
| 5853 |
+
{
|
| 5854 |
+
"epoch": 0.9719298245614035,
|
| 5855 |
+
"grad_norm": 0.4035587012767792,
|
| 5856 |
+
"learning_rate": 3.978236984708894e-07,
|
| 5857 |
+
"loss": 0.6571,
|
| 5858 |
+
"step": 831
|
| 5859 |
+
},
|
| 5860 |
+
{
|
| 5861 |
+
"epoch": 0.9730994152046784,
|
| 5862 |
+
"grad_norm": 0.4727626442909241,
|
| 5863 |
+
"learning_rate": 3.653821728175522e-07,
|
| 5864 |
+
"loss": 1.9151,
|
| 5865 |
+
"step": 832
|
| 5866 |
+
},
|
| 5867 |
+
{
|
| 5868 |
+
"epoch": 0.9742690058479532,
|
| 5869 |
+
"grad_norm": 0.47662097215652466,
|
| 5870 |
+
"learning_rate": 3.343178442230088e-07,
|
| 5871 |
+
"loss": 1.4586,
|
| 5872 |
+
"step": 833
|
| 5873 |
+
},
|
| 5874 |
+
{
|
| 5875 |
+
"epoch": 0.9754385964912281,
|
| 5876 |
+
"grad_norm": 0.539738118648529,
|
| 5877 |
+
"learning_rate": 3.0463114207317513e-07,
|
| 5878 |
+
"loss": 1.9575,
|
| 5879 |
+
"step": 834
|
| 5880 |
+
},
|
| 5881 |
+
{
|
| 5882 |
+
"epoch": 0.9766081871345029,
|
| 5883 |
+
"grad_norm": 0.45117852091789246,
|
| 5884 |
+
"learning_rate": 2.7632247671177667e-07,
|
| 5885 |
+
"loss": 1.776,
|
| 5886 |
+
"step": 835
|
| 5887 |
+
},
|
| 5888 |
+
{
|
| 5889 |
+
"epoch": 0.9777777777777777,
|
| 5890 |
+
"grad_norm": 0.41608095169067383,
|
| 5891 |
+
"learning_rate": 2.493922394346315e-07,
|
| 5892 |
+
"loss": 0.5535,
|
| 5893 |
+
"step": 836
|
| 5894 |
+
},
|
| 5895 |
+
{
|
| 5896 |
+
"epoch": 0.9789473684210527,
|
| 5897 |
+
"grad_norm": 0.4384686350822449,
|
| 5898 |
+
"learning_rate": 2.2384080248429863e-07,
|
| 5899 |
+
"loss": 1.425,
|
| 5900 |
+
"step": 837
|
| 5901 |
+
},
|
| 5902 |
+
{
|
| 5903 |
+
"epoch": 0.9801169590643275,
|
| 5904 |
+
"grad_norm": 0.6699833273887634,
|
| 5905 |
+
"learning_rate": 1.9966851904487106e-07,
|
| 5906 |
+
"loss": 1.6039,
|
| 5907 |
+
"step": 838
|
| 5908 |
+
},
|
| 5909 |
+
{
|
| 5910 |
+
"epoch": 0.9812865497076023,
|
| 5911 |
+
"grad_norm": 4.3168182373046875,
|
| 5912 |
+
"learning_rate": 1.768757232371576e-07,
|
| 5913 |
+
"loss": 1.8438,
|
| 5914 |
+
"step": 839
|
| 5915 |
+
},
|
| 5916 |
+
{
|
| 5917 |
+
"epoch": 0.9824561403508771,
|
| 5918 |
+
"grad_norm": 0.44814804196357727,
|
| 5919 |
+
"learning_rate": 1.554627301140199e-07,
|
| 5920 |
+
"loss": 1.1396,
|
| 5921 |
+
"step": 840
|
| 5922 |
+
},
|
| 5923 |
+
{
|
| 5924 |
+
"epoch": 0.9836257309941521,
|
| 5925 |
+
"grad_norm": 0.34025856852531433,
|
| 5926 |
+
"learning_rate": 1.354298356560091e-07,
|
| 5927 |
+
"loss": 1.9324,
|
| 5928 |
+
"step": 841
|
| 5929 |
+
},
|
| 5930 |
+
{
|
| 5931 |
+
"epoch": 0.9847953216374269,
|
| 5932 |
+
"grad_norm": 0.49703219532966614,
|
| 5933 |
+
"learning_rate": 1.1677731676733584e-07,
|
| 5934 |
+
"loss": 1.2839,
|
| 5935 |
+
"step": 842
|
| 5936 |
+
},
|
| 5937 |
+
{
|
| 5938 |
+
"epoch": 0.9859649122807017,
|
| 5939 |
+
"grad_norm": 0.4712502956390381,
|
| 5940 |
+
"learning_rate": 9.950543127198453e-08,
|
| 5941 |
+
"loss": 1.8165,
|
| 5942 |
+
"step": 843
|
| 5943 |
+
},
|
| 5944 |
+
{
|
| 5945 |
+
"epoch": 0.9871345029239766,
|
| 5946 |
+
"grad_norm": 0.5401002764701843,
|
| 5947 |
+
"learning_rate": 8.361441791016055e-08,
|
| 5948 |
+
"loss": 1.5518,
|
| 5949 |
+
"step": 844
|
| 5950 |
+
},
|
| 5951 |
+
{
|
| 5952 |
+
"epoch": 0.9883040935672515,
|
| 5953 |
+
"grad_norm": 0.561511218547821,
|
| 5954 |
+
"learning_rate": 6.910449633501514e-08,
|
| 5955 |
+
"loss": 1.5021,
|
| 5956 |
+
"step": 845
|
| 5957 |
+
},
|
| 5958 |
+
{
|
| 5959 |
+
"epoch": 0.9894736842105263,
|
| 5960 |
+
"grad_norm": 0.41412779688835144,
|
| 5961 |
+
"learning_rate": 5.5975867109570036e-08,
|
| 5962 |
+
"loss": 2.2531,
|
| 5963 |
+
"step": 846
|
| 5964 |
+
},
|
| 5965 |
+
{
|
| 5966 |
+
"epoch": 0.9906432748538012,
|
| 5967 |
+
"grad_norm": 0.3571871817111969,
|
| 5968 |
+
"learning_rate": 4.422871170398635e-08,
|
| 5969 |
+
"loss": 1.3078,
|
| 5970 |
+
"step": 847
|
| 5971 |
+
},
|
| 5972 |
+
{
|
| 5973 |
+
"epoch": 0.991812865497076,
|
| 5974 |
+
"grad_norm": 0.6715613007545471,
|
| 5975 |
+
"learning_rate": 3.386319249303327e-08,
|
| 5976 |
+
"loss": 1.2575,
|
| 5977 |
+
"step": 848
|
| 5978 |
+
},
|
| 5979 |
+
{
|
| 5980 |
+
"epoch": 0.9929824561403509,
|
| 5981 |
+
"grad_norm": 0.36819925904273987,
|
| 5982 |
+
"learning_rate": 2.48794527538454e-08,
|
| 5983 |
+
"loss": 1.4645,
|
| 5984 |
+
"step": 849
|
| 5985 |
+
},
|
| 5986 |
+
{
|
| 5987 |
+
"epoch": 0.9941520467836257,
|
| 5988 |
+
"grad_norm": 0.3651365339756012,
|
| 5989 |
+
"learning_rate": 1.727761666394656e-08,
|
| 5990 |
+
"loss": 1.7481,
|
| 5991 |
+
"step": 850
|
| 5992 |
+
},
|
| 5993 |
+
{
|
| 5994 |
+
"epoch": 0.9953216374269006,
|
| 5995 |
+
"grad_norm": 0.34068191051483154,
|
| 5996 |
+
"learning_rate": 1.105778929951784e-08,
|
| 5997 |
+
"loss": 1.8654,
|
| 5998 |
+
"step": 851
|
| 5999 |
+
},
|
| 6000 |
+
{
|
| 6001 |
+
"epoch": 0.9964912280701754,
|
| 6002 |
+
"grad_norm": 0.4798254370689392,
|
| 6003 |
+
"learning_rate": 6.220056633987614e-09,
|
| 6004 |
+
"loss": 1.67,
|
| 6005 |
+
"step": 852
|
| 6006 |
+
},
|
| 6007 |
+
{
|
| 6008 |
+
"epoch": 0.9976608187134502,
|
| 6009 |
+
"grad_norm": 0.2631950378417969,
|
| 6010 |
+
"learning_rate": 2.764485536776995e-09,
|
| 6011 |
+
"loss": 0.6319,
|
| 6012 |
+
"step": 853
|
| 6013 |
+
},
|
| 6014 |
+
{
|
| 6015 |
+
"epoch": 0.9988304093567252,
|
| 6016 |
+
"grad_norm": 0.6361366510391235,
|
| 6017 |
+
"learning_rate": 6.911237724560593e-10,
|
| 6018 |
+
"loss": 0.7551,
|
| 6019 |
+
"step": 854
|
| 6020 |
+
},
|
| 6021 |
+
{
|
| 6022 |
+
"epoch": 1.0,
|
| 6023 |
+
"grad_norm": 0.6016250252723694,
|
| 6024 |
+
"learning_rate": 0.0,
|
| 6025 |
+
"loss": 1.5066,
|
| 6026 |
+
"step": 855
|
| 6027 |
}
|
| 6028 |
],
|
| 6029 |
"logging_steps": 1,
|
|
|
|
| 6038 |
"should_evaluate": false,
|
| 6039 |
"should_log": false,
|
| 6040 |
"should_save": true,
|
| 6041 |
+
"should_training_stop": true
|
| 6042 |
},
|
| 6043 |
"attributes": {}
|
| 6044 |
}
|
| 6045 |
},
|
| 6046 |
+
"total_flos": 1.649405017624412e+17,
|
| 6047 |
"train_batch_size": 2,
|
| 6048 |
"trial_name": null,
|
| 6049 |
"trial_params": null
|