Training in progress, step 17560, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 340808816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2de674690a633438550ec4972ea13f53310b5c6f840aa8aee6b93bf96ca8775a
|
| 3 |
size 340808816
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 173247691
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b6ab33af93f910df52436722052b5633ce791495843a964df9d799e0f2b2b4e
|
| 3 |
size 173247691
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b27a2512b4ad08672faec09eaf6ccb2c90de3ea41ac2f5dc0a7f6200a8bf86ed
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -121808,6 +121808,1126 @@
|
|
| 121808 |
"learning_rate": 1.1253000893848842e-08,
|
| 121809 |
"loss": 0.5825,
|
| 121810 |
"step": 17400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121811 |
}
|
| 121812 |
],
|
| 121813 |
"logging_steps": 1,
|
|
@@ -121822,12 +122942,12 @@
|
|
| 121822 |
"should_evaluate": false,
|
| 121823 |
"should_log": false,
|
| 121824 |
"should_save": true,
|
| 121825 |
-
"should_training_stop":
|
| 121826 |
},
|
| 121827 |
"attributes": {}
|
| 121828 |
}
|
| 121829 |
},
|
| 121830 |
-
"total_flos": 9.
|
| 121831 |
"train_batch_size": 8,
|
| 121832 |
"trial_name": null,
|
| 121833 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 20.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 17560,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 121808 |
"learning_rate": 1.1253000893848842e-08,
|
| 121809 |
"loss": 0.5825,
|
| 121810 |
"step": 17400
|
| 121811 |
+
},
|
| 121812 |
+
{
|
| 121813 |
+
"epoch": 19.81937321937322,
|
| 121814 |
+
"grad_norm": 0.20175950229167938,
|
| 121815 |
+
"learning_rate": 1.1113656515535086e-08,
|
| 121816 |
+
"loss": 0.7478,
|
| 121817 |
+
"step": 17401
|
| 121818 |
+
},
|
| 121819 |
+
{
|
| 121820 |
+
"epoch": 19.82051282051282,
|
| 121821 |
+
"grad_norm": 0.18214459717273712,
|
| 121822 |
+
"learning_rate": 1.09751800699609e-08,
|
| 121823 |
+
"loss": 0.5814,
|
| 121824 |
+
"step": 17402
|
| 121825 |
+
},
|
| 121826 |
+
{
|
| 121827 |
+
"epoch": 19.821652421652423,
|
| 121828 |
+
"grad_norm": 0.181244358420372,
|
| 121829 |
+
"learning_rate": 1.0837571561939097e-08,
|
| 121830 |
+
"loss": 0.8337,
|
| 121831 |
+
"step": 17403
|
| 121832 |
+
},
|
| 121833 |
+
{
|
| 121834 |
+
"epoch": 19.822792022792022,
|
| 121835 |
+
"grad_norm": 0.23323874175548553,
|
| 121836 |
+
"learning_rate": 1.070083099624919e-08,
|
| 121837 |
+
"loss": 0.4065,
|
| 121838 |
+
"step": 17404
|
| 121839 |
+
},
|
| 121840 |
+
{
|
| 121841 |
+
"epoch": 19.823931623931625,
|
| 121842 |
+
"grad_norm": 0.21152248978614807,
|
| 121843 |
+
"learning_rate": 1.0564958377640156e-08,
|
| 121844 |
+
"loss": 0.7171,
|
| 121845 |
+
"step": 17405
|
| 121846 |
+
},
|
| 121847 |
+
{
|
| 121848 |
+
"epoch": 19.825071225071227,
|
| 121849 |
+
"grad_norm": 0.23705808818340302,
|
| 121850 |
+
"learning_rate": 1.0429953710830443e-08,
|
| 121851 |
+
"loss": 0.8134,
|
| 121852 |
+
"step": 17406
|
| 121853 |
+
},
|
| 121854 |
+
{
|
| 121855 |
+
"epoch": 19.826210826210826,
|
| 121856 |
+
"grad_norm": 0.24272708594799042,
|
| 121857 |
+
"learning_rate": 1.0295817000513518e-08,
|
| 121858 |
+
"loss": 0.5927,
|
| 121859 |
+
"step": 17407
|
| 121860 |
+
},
|
| 121861 |
+
{
|
| 121862 |
+
"epoch": 19.827350427350428,
|
| 121863 |
+
"grad_norm": 0.19271287322044373,
|
| 121864 |
+
"learning_rate": 1.016254825134122e-08,
|
| 121865 |
+
"loss": 0.48,
|
| 121866 |
+
"step": 17408
|
| 121867 |
+
},
|
| 121868 |
+
{
|
| 121869 |
+
"epoch": 19.82849002849003,
|
| 121870 |
+
"grad_norm": 0.18881921470165253,
|
| 121871 |
+
"learning_rate": 1.0030147467945949e-08,
|
| 121872 |
+
"loss": 0.5836,
|
| 121873 |
+
"step": 17409
|
| 121874 |
+
},
|
| 121875 |
+
{
|
| 121876 |
+
"epoch": 19.82962962962963,
|
| 121877 |
+
"grad_norm": 0.23984551429748535,
|
| 121878 |
+
"learning_rate": 9.898614654929583e-09,
|
| 121879 |
+
"loss": 0.518,
|
| 121880 |
+
"step": 17410
|
| 121881 |
+
},
|
| 121882 |
+
{
|
| 121883 |
+
"epoch": 19.83076923076923,
|
| 121884 |
+
"grad_norm": 0.2106814980506897,
|
| 121885 |
+
"learning_rate": 9.767949816855137e-09,
|
| 121886 |
+
"loss": 0.44,
|
| 121887 |
+
"step": 17411
|
| 121888 |
+
},
|
| 121889 |
+
{
|
| 121890 |
+
"epoch": 19.831908831908834,
|
| 121891 |
+
"grad_norm": 0.18543528020381927,
|
| 121892 |
+
"learning_rate": 9.638152958263425e-09,
|
| 121893 |
+
"loss": 0.7746,
|
| 121894 |
+
"step": 17412
|
| 121895 |
+
},
|
| 121896 |
+
{
|
| 121897 |
+
"epoch": 19.833048433048432,
|
| 121898 |
+
"grad_norm": 0.18596503138542175,
|
| 121899 |
+
"learning_rate": 9.50922408366195e-09,
|
| 121900 |
+
"loss": 0.6435,
|
| 121901 |
+
"step": 17413
|
| 121902 |
+
},
|
| 121903 |
+
{
|
| 121904 |
+
"epoch": 19.834188034188035,
|
| 121905 |
+
"grad_norm": 0.22651363909244537,
|
| 121906 |
+
"learning_rate": 9.381163197527687e-09,
|
| 121907 |
+
"loss": 0.492,
|
| 121908 |
+
"step": 17414
|
| 121909 |
+
},
|
| 121910 |
+
{
|
| 121911 |
+
"epoch": 19.835327635327637,
|
| 121912 |
+
"grad_norm": 0.2185947149991989,
|
| 121913 |
+
"learning_rate": 9.253970304312632e-09,
|
| 121914 |
+
"loss": 0.615,
|
| 121915 |
+
"step": 17415
|
| 121916 |
+
},
|
| 121917 |
+
{
|
| 121918 |
+
"epoch": 19.836467236467236,
|
| 121919 |
+
"grad_norm": 0.19200536608695984,
|
| 121920 |
+
"learning_rate": 9.127645408432695e-09,
|
| 121921 |
+
"loss": 0.9006,
|
| 121922 |
+
"step": 17416
|
| 121923 |
+
},
|
| 121924 |
+
{
|
| 121925 |
+
"epoch": 19.837606837606838,
|
| 121926 |
+
"grad_norm": 0.19673481583595276,
|
| 121927 |
+
"learning_rate": 9.002188514273257e-09,
|
| 121928 |
+
"loss": 0.4871,
|
| 121929 |
+
"step": 17417
|
| 121930 |
+
},
|
| 121931 |
+
{
|
| 121932 |
+
"epoch": 19.83874643874644,
|
| 121933 |
+
"grad_norm": 0.1683470904827118,
|
| 121934 |
+
"learning_rate": 8.877599626194722e-09,
|
| 121935 |
+
"loss": 0.7764,
|
| 121936 |
+
"step": 17418
|
| 121937 |
+
},
|
| 121938 |
+
{
|
| 121939 |
+
"epoch": 19.83988603988604,
|
| 121940 |
+
"grad_norm": 0.2062290906906128,
|
| 121941 |
+
"learning_rate": 8.753878748521405e-09,
|
| 121942 |
+
"loss": 0.6479,
|
| 121943 |
+
"step": 17419
|
| 121944 |
+
},
|
| 121945 |
+
{
|
| 121946 |
+
"epoch": 19.84102564102564,
|
| 121947 |
+
"grad_norm": 0.16176415979862213,
|
| 121948 |
+
"learning_rate": 8.631025885552647e-09,
|
| 121949 |
+
"loss": 0.7682,
|
| 121950 |
+
"step": 17420
|
| 121951 |
+
},
|
| 121952 |
+
{
|
| 121953 |
+
"epoch": 19.842165242165244,
|
| 121954 |
+
"grad_norm": 0.3066081404685974,
|
| 121955 |
+
"learning_rate": 8.509041041554477e-09,
|
| 121956 |
+
"loss": 0.3119,
|
| 121957 |
+
"step": 17421
|
| 121958 |
+
},
|
| 121959 |
+
{
|
| 121960 |
+
"epoch": 19.843304843304843,
|
| 121961 |
+
"grad_norm": 0.20348377525806427,
|
| 121962 |
+
"learning_rate": 8.387924220765176e-09,
|
| 121963 |
+
"loss": 0.7985,
|
| 121964 |
+
"step": 17422
|
| 121965 |
+
},
|
| 121966 |
+
{
|
| 121967 |
+
"epoch": 19.844444444444445,
|
| 121968 |
+
"grad_norm": 0.18342828750610352,
|
| 121969 |
+
"learning_rate": 8.267675427386933e-09,
|
| 121970 |
+
"loss": 0.6546,
|
| 121971 |
+
"step": 17423
|
| 121972 |
+
},
|
| 121973 |
+
{
|
| 121974 |
+
"epoch": 19.845584045584047,
|
| 121975 |
+
"grad_norm": 0.22837497293949127,
|
| 121976 |
+
"learning_rate": 8.148294665605293e-09,
|
| 121977 |
+
"loss": 0.8583,
|
| 121978 |
+
"step": 17424
|
| 121979 |
+
},
|
| 121980 |
+
{
|
| 121981 |
+
"epoch": 19.846723646723646,
|
| 121982 |
+
"grad_norm": 0.19176903367042542,
|
| 121983 |
+
"learning_rate": 8.029781939558612e-09,
|
| 121984 |
+
"loss": 0.6793,
|
| 121985 |
+
"step": 17425
|
| 121986 |
+
},
|
| 121987 |
+
{
|
| 121988 |
+
"epoch": 19.84786324786325,
|
| 121989 |
+
"grad_norm": 0.20270290970802307,
|
| 121990 |
+
"learning_rate": 7.912137253365814e-09,
|
| 121991 |
+
"loss": 0.7321,
|
| 121992 |
+
"step": 17426
|
| 121993 |
+
},
|
| 121994 |
+
{
|
| 121995 |
+
"epoch": 19.84900284900285,
|
| 121996 |
+
"grad_norm": 0.21365559101104736,
|
| 121997 |
+
"learning_rate": 7.795360611112523e-09,
|
| 121998 |
+
"loss": 0.7893,
|
| 121999 |
+
"step": 17427
|
| 122000 |
+
},
|
| 122001 |
+
{
|
| 122002 |
+
"epoch": 19.85014245014245,
|
| 122003 |
+
"grad_norm": 0.19575366377830505,
|
| 122004 |
+
"learning_rate": 7.679452016853827e-09,
|
| 122005 |
+
"loss": 0.6769,
|
| 122006 |
+
"step": 17428
|
| 122007 |
+
},
|
| 122008 |
+
{
|
| 122009 |
+
"epoch": 19.851282051282052,
|
| 122010 |
+
"grad_norm": 0.16900449991226196,
|
| 122011 |
+
"learning_rate": 7.564411474619837e-09,
|
| 122012 |
+
"loss": 0.7527,
|
| 122013 |
+
"step": 17429
|
| 122014 |
+
},
|
| 122015 |
+
{
|
| 122016 |
+
"epoch": 19.852421652421654,
|
| 122017 |
+
"grad_norm": 0.20102092623710632,
|
| 122018 |
+
"learning_rate": 7.4502389884018035e-09,
|
| 122019 |
+
"loss": 0.6776,
|
| 122020 |
+
"step": 17430
|
| 122021 |
+
},
|
| 122022 |
+
{
|
| 122023 |
+
"epoch": 19.853561253561253,
|
| 122024 |
+
"grad_norm": 0.17000263929367065,
|
| 122025 |
+
"learning_rate": 7.3369345621687735e-09,
|
| 122026 |
+
"loss": 0.7118,
|
| 122027 |
+
"step": 17431
|
| 122028 |
+
},
|
| 122029 |
+
{
|
| 122030 |
+
"epoch": 19.854700854700855,
|
| 122031 |
+
"grad_norm": 0.20500367879867554,
|
| 122032 |
+
"learning_rate": 7.224498199850938e-09,
|
| 122033 |
+
"loss": 0.5854,
|
| 122034 |
+
"step": 17432
|
| 122035 |
+
},
|
| 122036 |
+
{
|
| 122037 |
+
"epoch": 19.855840455840458,
|
| 122038 |
+
"grad_norm": 0.26341789960861206,
|
| 122039 |
+
"learning_rate": 7.1129299053590556e-09,
|
| 122040 |
+
"loss": 0.4035,
|
| 122041 |
+
"step": 17433
|
| 122042 |
+
},
|
| 122043 |
+
{
|
| 122044 |
+
"epoch": 19.856980056980056,
|
| 122045 |
+
"grad_norm": 0.22894629836082458,
|
| 122046 |
+
"learning_rate": 7.002229682565031e-09,
|
| 122047 |
+
"loss": 0.425,
|
| 122048 |
+
"step": 17434
|
| 122049 |
+
},
|
| 122050 |
+
{
|
| 122051 |
+
"epoch": 19.85811965811966,
|
| 122052 |
+
"grad_norm": 0.15743249654769897,
|
| 122053 |
+
"learning_rate": 6.892397535313011e-09,
|
| 122054 |
+
"loss": 0.5116,
|
| 122055 |
+
"step": 17435
|
| 122056 |
+
},
|
| 122057 |
+
{
|
| 122058 |
+
"epoch": 19.85925925925926,
|
| 122059 |
+
"grad_norm": 0.18057456612586975,
|
| 122060 |
+
"learning_rate": 6.783433467422162e-09,
|
| 122061 |
+
"loss": 0.7988,
|
| 122062 |
+
"step": 17436
|
| 122063 |
+
},
|
| 122064 |
+
{
|
| 122065 |
+
"epoch": 19.86039886039886,
|
| 122066 |
+
"grad_norm": 0.18068578839302063,
|
| 122067 |
+
"learning_rate": 6.675337482672794e-09,
|
| 122068 |
+
"loss": 0.7424,
|
| 122069 |
+
"step": 17437
|
| 122070 |
+
},
|
| 122071 |
+
{
|
| 122072 |
+
"epoch": 19.861538461538462,
|
| 122073 |
+
"grad_norm": 0.17379270493984222,
|
| 122074 |
+
"learning_rate": 6.568109584820236e-09,
|
| 122075 |
+
"loss": 0.5549,
|
| 122076 |
+
"step": 17438
|
| 122077 |
+
},
|
| 122078 |
+
{
|
| 122079 |
+
"epoch": 19.862678062678064,
|
| 122080 |
+
"grad_norm": 0.2070305347442627,
|
| 122081 |
+
"learning_rate": 6.461749777592063e-09,
|
| 122082 |
+
"loss": 0.5018,
|
| 122083 |
+
"step": 17439
|
| 122084 |
+
},
|
| 122085 |
+
{
|
| 122086 |
+
"epoch": 19.863817663817663,
|
| 122087 |
+
"grad_norm": 0.18210367858409882,
|
| 122088 |
+
"learning_rate": 6.35625806467699e-09,
|
| 122089 |
+
"loss": 0.8099,
|
| 122090 |
+
"step": 17440
|
| 122091 |
+
},
|
| 122092 |
+
{
|
| 122093 |
+
"epoch": 19.864957264957265,
|
| 122094 |
+
"grad_norm": 0.17534932494163513,
|
| 122095 |
+
"learning_rate": 6.251634449741528e-09,
|
| 122096 |
+
"loss": 0.5797,
|
| 122097 |
+
"step": 17441
|
| 122098 |
+
},
|
| 122099 |
+
{
|
| 122100 |
+
"epoch": 19.866096866096868,
|
| 122101 |
+
"grad_norm": 0.21269935369491577,
|
| 122102 |
+
"learning_rate": 6.147878936421658e-09,
|
| 122103 |
+
"loss": 0.5527,
|
| 122104 |
+
"step": 17442
|
| 122105 |
+
},
|
| 122106 |
+
{
|
| 122107 |
+
"epoch": 19.867236467236467,
|
| 122108 |
+
"grad_norm": 0.20287007093429565,
|
| 122109 |
+
"learning_rate": 6.044991528320054e-09,
|
| 122110 |
+
"loss": 0.5098,
|
| 122111 |
+
"step": 17443
|
| 122112 |
+
},
|
| 122113 |
+
{
|
| 122114 |
+
"epoch": 19.86837606837607,
|
| 122115 |
+
"grad_norm": 0.1705034375190735,
|
| 122116 |
+
"learning_rate": 5.9429722290088586e-09,
|
| 122117 |
+
"loss": 0.7357,
|
| 122118 |
+
"step": 17444
|
| 122119 |
+
},
|
| 122120 |
+
{
|
| 122121 |
+
"epoch": 19.86951566951567,
|
| 122122 |
+
"grad_norm": 0.1602213978767395,
|
| 122123 |
+
"learning_rate": 5.8418210420296825e-09,
|
| 122124 |
+
"loss": 0.9977,
|
| 122125 |
+
"step": 17445
|
| 122126 |
+
},
|
| 122127 |
+
{
|
| 122128 |
+
"epoch": 19.87065527065527,
|
| 122129 |
+
"grad_norm": 0.16496537625789642,
|
| 122130 |
+
"learning_rate": 5.741537970901934e-09,
|
| 122131 |
+
"loss": 0.7655,
|
| 122132 |
+
"step": 17446
|
| 122133 |
+
},
|
| 122134 |
+
{
|
| 122135 |
+
"epoch": 19.871794871794872,
|
| 122136 |
+
"grad_norm": 0.16658329963684082,
|
| 122137 |
+
"learning_rate": 5.642123019103384e-09,
|
| 122138 |
+
"loss": 0.744,
|
| 122139 |
+
"step": 17447
|
| 122140 |
+
},
|
| 122141 |
+
{
|
| 122142 |
+
"epoch": 19.872934472934475,
|
| 122143 |
+
"grad_norm": 0.2549525201320648,
|
| 122144 |
+
"learning_rate": 5.54357619008683e-09,
|
| 122145 |
+
"loss": 0.5534,
|
| 122146 |
+
"step": 17448
|
| 122147 |
+
},
|
| 122148 |
+
{
|
| 122149 |
+
"epoch": 19.874074074074073,
|
| 122150 |
+
"grad_norm": 0.19434592127799988,
|
| 122151 |
+
"learning_rate": 5.445897487280083e-09,
|
| 122152 |
+
"loss": 0.7252,
|
| 122153 |
+
"step": 17449
|
| 122154 |
+
},
|
| 122155 |
+
{
|
| 122156 |
+
"epoch": 19.875213675213676,
|
| 122157 |
+
"grad_norm": 0.18083354830741882,
|
| 122158 |
+
"learning_rate": 5.349086914069323e-09,
|
| 122159 |
+
"loss": 0.7156,
|
| 122160 |
+
"step": 17450
|
| 122161 |
+
},
|
| 122162 |
+
{
|
| 122163 |
+
"epoch": 19.876353276353278,
|
| 122164 |
+
"grad_norm": 0.1755906641483307,
|
| 122165 |
+
"learning_rate": 5.2531444738240785e-09,
|
| 122166 |
+
"loss": 0.7394,
|
| 122167 |
+
"step": 17451
|
| 122168 |
+
},
|
| 122169 |
+
{
|
| 122170 |
+
"epoch": 19.877492877492877,
|
| 122171 |
+
"grad_norm": 0.24183988571166992,
|
| 122172 |
+
"learning_rate": 5.158070169869467e-09,
|
| 122173 |
+
"loss": 0.543,
|
| 122174 |
+
"step": 17452
|
| 122175 |
+
},
|
| 122176 |
+
{
|
| 122177 |
+
"epoch": 19.87863247863248,
|
| 122178 |
+
"grad_norm": 0.18133480846881866,
|
| 122179 |
+
"learning_rate": 5.063864005513952e-09,
|
| 122180 |
+
"loss": 0.5583,
|
| 122181 |
+
"step": 17453
|
| 122182 |
+
},
|
| 122183 |
+
{
|
| 122184 |
+
"epoch": 19.87977207977208,
|
| 122185 |
+
"grad_norm": 0.1737232208251953,
|
| 122186 |
+
"learning_rate": 4.970525984024365e-09,
|
| 122187 |
+
"loss": 0.7472,
|
| 122188 |
+
"step": 17454
|
| 122189 |
+
},
|
| 122190 |
+
{
|
| 122191 |
+
"epoch": 19.88091168091168,
|
| 122192 |
+
"grad_norm": 0.230348140001297,
|
| 122193 |
+
"learning_rate": 4.87805610864811e-09,
|
| 122194 |
+
"loss": 0.6935,
|
| 122195 |
+
"step": 17455
|
| 122196 |
+
},
|
| 122197 |
+
{
|
| 122198 |
+
"epoch": 19.882051282051282,
|
| 122199 |
+
"grad_norm": 0.19094283878803253,
|
| 122200 |
+
"learning_rate": 4.786454382590955e-09,
|
| 122201 |
+
"loss": 0.6521,
|
| 122202 |
+
"step": 17456
|
| 122203 |
+
},
|
| 122204 |
+
{
|
| 122205 |
+
"epoch": 19.883190883190885,
|
| 122206 |
+
"grad_norm": 0.18163184821605682,
|
| 122207 |
+
"learning_rate": 4.695720809039239e-09,
|
| 122208 |
+
"loss": 0.654,
|
| 122209 |
+
"step": 17457
|
| 122210 |
+
},
|
| 122211 |
+
{
|
| 122212 |
+
"epoch": 19.884330484330484,
|
| 122213 |
+
"grad_norm": 0.23837114870548248,
|
| 122214 |
+
"learning_rate": 4.605855391140446e-09,
|
| 122215 |
+
"loss": 0.7393,
|
| 122216 |
+
"step": 17458
|
| 122217 |
+
},
|
| 122218 |
+
{
|
| 122219 |
+
"epoch": 19.885470085470086,
|
| 122220 |
+
"grad_norm": 0.19984039664268494,
|
| 122221 |
+
"learning_rate": 4.5168581320198524e-09,
|
| 122222 |
+
"loss": 0.8007,
|
| 122223 |
+
"step": 17459
|
| 122224 |
+
},
|
| 122225 |
+
{
|
| 122226 |
+
"epoch": 19.886609686609688,
|
| 122227 |
+
"grad_norm": 0.24067912995815277,
|
| 122228 |
+
"learning_rate": 4.428729034763879e-09,
|
| 122229 |
+
"loss": 0.604,
|
| 122230 |
+
"step": 17460
|
| 122231 |
+
},
|
| 122232 |
+
{
|
| 122233 |
+
"epoch": 19.887749287749287,
|
| 122234 |
+
"grad_norm": 0.17550112307071686,
|
| 122235 |
+
"learning_rate": 4.341468102439516e-09,
|
| 122236 |
+
"loss": 0.5195,
|
| 122237 |
+
"step": 17461
|
| 122238 |
+
},
|
| 122239 |
+
{
|
| 122240 |
+
"epoch": 19.88888888888889,
|
| 122241 |
+
"grad_norm": 0.21476757526397705,
|
| 122242 |
+
"learning_rate": 4.255075338072123e-09,
|
| 122243 |
+
"loss": 0.5572,
|
| 122244 |
+
"step": 17462
|
| 122245 |
+
},
|
| 122246 |
+
{
|
| 122247 |
+
"epoch": 19.89002849002849,
|
| 122248 |
+
"grad_norm": 0.22562627494335175,
|
| 122249 |
+
"learning_rate": 4.1695507446648515e-09,
|
| 122250 |
+
"loss": 0.5565,
|
| 122251 |
+
"step": 17463
|
| 122252 |
+
},
|
| 122253 |
+
{
|
| 122254 |
+
"epoch": 19.89116809116809,
|
| 122255 |
+
"grad_norm": 0.18826919794082642,
|
| 122256 |
+
"learning_rate": 4.084894325190325e-09,
|
| 122257 |
+
"loss": 0.6722,
|
| 122258 |
+
"step": 17464
|
| 122259 |
+
},
|
| 122260 |
+
{
|
| 122261 |
+
"epoch": 19.892307692307693,
|
| 122262 |
+
"grad_norm": 0.1579591929912567,
|
| 122263 |
+
"learning_rate": 4.0011060825823065e-09,
|
| 122264 |
+
"loss": 0.7509,
|
| 122265 |
+
"step": 17465
|
| 122266 |
+
},
|
| 122267 |
+
{
|
| 122268 |
+
"epoch": 19.893447293447295,
|
| 122269 |
+
"grad_norm": 0.2192116379737854,
|
| 122270 |
+
"learning_rate": 3.9181860197579075e-09,
|
| 122271 |
+
"loss": 0.7129,
|
| 122272 |
+
"step": 17466
|
| 122273 |
+
},
|
| 122274 |
+
{
|
| 122275 |
+
"epoch": 19.894586894586894,
|
| 122276 |
+
"grad_norm": 0.20730279386043549,
|
| 122277 |
+
"learning_rate": 3.8361341395953824e-09,
|
| 122278 |
+
"loss": 0.592,
|
| 122279 |
+
"step": 17467
|
| 122280 |
+
},
|
| 122281 |
+
{
|
| 122282 |
+
"epoch": 19.895726495726496,
|
| 122283 |
+
"grad_norm": 0.19416391849517822,
|
| 122284 |
+
"learning_rate": 3.754950444942451e-09,
|
| 122285 |
+
"loss": 0.6956,
|
| 122286 |
+
"step": 17468
|
| 122287 |
+
},
|
| 122288 |
+
{
|
| 122289 |
+
"epoch": 19.8968660968661,
|
| 122290 |
+
"grad_norm": 0.19905336201190948,
|
| 122291 |
+
"learning_rate": 3.6746349386190814e-09,
|
| 122292 |
+
"loss": 0.6212,
|
| 122293 |
+
"step": 17469
|
| 122294 |
+
},
|
| 122295 |
+
{
|
| 122296 |
+
"epoch": 19.898005698005697,
|
| 122297 |
+
"grad_norm": 0.18987423181533813,
|
| 122298 |
+
"learning_rate": 3.5951876234147087e-09,
|
| 122299 |
+
"loss": 0.6066,
|
| 122300 |
+
"step": 17470
|
| 122301 |
+
},
|
| 122302 |
+
{
|
| 122303 |
+
"epoch": 19.8991452991453,
|
| 122304 |
+
"grad_norm": 0.17264628410339355,
|
| 122305 |
+
"learning_rate": 3.516608502093788e-09,
|
| 122306 |
+
"loss": 0.7292,
|
| 122307 |
+
"step": 17471
|
| 122308 |
+
},
|
| 122309 |
+
{
|
| 122310 |
+
"epoch": 19.900284900284902,
|
| 122311 |
+
"grad_norm": 0.2468525469303131,
|
| 122312 |
+
"learning_rate": 3.438897577379141e-09,
|
| 122313 |
+
"loss": 0.5559,
|
| 122314 |
+
"step": 17472
|
| 122315 |
+
},
|
| 122316 |
+
{
|
| 122317 |
+
"epoch": 19.9014245014245,
|
| 122318 |
+
"grad_norm": 0.19226570427417755,
|
| 122319 |
+
"learning_rate": 3.3620548519713855e-09,
|
| 122320 |
+
"loss": 0.6404,
|
| 122321 |
+
"step": 17473
|
| 122322 |
+
},
|
| 122323 |
+
{
|
| 122324 |
+
"epoch": 19.902564102564103,
|
| 122325 |
+
"grad_norm": 0.17536082863807678,
|
| 122326 |
+
"learning_rate": 3.2860803285406085e-09,
|
| 122327 |
+
"loss": 0.5615,
|
| 122328 |
+
"step": 17474
|
| 122329 |
+
},
|
| 122330 |
+
{
|
| 122331 |
+
"epoch": 19.903703703703705,
|
| 122332 |
+
"grad_norm": 0.19645725190639496,
|
| 122333 |
+
"learning_rate": 3.2109740097291397e-09,
|
| 122334 |
+
"loss": 0.5813,
|
| 122335 |
+
"step": 17475
|
| 122336 |
+
},
|
| 122337 |
+
{
|
| 122338 |
+
"epoch": 19.904843304843304,
|
| 122339 |
+
"grad_norm": 0.1624937653541565,
|
| 122340 |
+
"learning_rate": 3.1367358981376773e-09,
|
| 122341 |
+
"loss": 0.8953,
|
| 122342 |
+
"step": 17476
|
| 122343 |
+
},
|
| 122344 |
+
{
|
| 122345 |
+
"epoch": 19.905982905982906,
|
| 122346 |
+
"grad_norm": 0.27368196845054626,
|
| 122347 |
+
"learning_rate": 3.063365996350265e-09,
|
| 122348 |
+
"loss": 0.5482,
|
| 122349 |
+
"step": 17477
|
| 122350 |
+
},
|
| 122351 |
+
{
|
| 122352 |
+
"epoch": 19.90712250712251,
|
| 122353 |
+
"grad_norm": 0.16922980546951294,
|
| 122354 |
+
"learning_rate": 2.9908643069148646e-09,
|
| 122355 |
+
"loss": 0.6721,
|
| 122356 |
+
"step": 17478
|
| 122357 |
+
},
|
| 122358 |
+
{
|
| 122359 |
+
"epoch": 19.908262108262107,
|
| 122360 |
+
"grad_norm": 0.22800640761852264,
|
| 122361 |
+
"learning_rate": 2.919230832348907e-09,
|
| 122362 |
+
"loss": 0.4294,
|
| 122363 |
+
"step": 17479
|
| 122364 |
+
},
|
| 122365 |
+
{
|
| 122366 |
+
"epoch": 19.90940170940171,
|
| 122367 |
+
"grad_norm": 0.1852252036333084,
|
| 122368 |
+
"learning_rate": 2.848465575139292e-09,
|
| 122369 |
+
"loss": 0.867,
|
| 122370 |
+
"step": 17480
|
| 122371 |
+
},
|
| 122372 |
+
{
|
| 122373 |
+
"epoch": 19.910541310541312,
|
| 122374 |
+
"grad_norm": 0.21090421080589294,
|
| 122375 |
+
"learning_rate": 2.778568537745163e-09,
|
| 122376 |
+
"loss": 0.6482,
|
| 122377 |
+
"step": 17481
|
| 122378 |
+
},
|
| 122379 |
+
{
|
| 122380 |
+
"epoch": 19.91168091168091,
|
| 122381 |
+
"grad_norm": 0.16362862288951874,
|
| 122382 |
+
"learning_rate": 2.7095397225951337e-09,
|
| 122383 |
+
"loss": 0.7249,
|
| 122384 |
+
"step": 17482
|
| 122385 |
+
},
|
| 122386 |
+
{
|
| 122387 |
+
"epoch": 19.912820512820513,
|
| 122388 |
+
"grad_norm": 0.1504623293876648,
|
| 122389 |
+
"learning_rate": 2.6413791320845095e-09,
|
| 122390 |
+
"loss": 0.6008,
|
| 122391 |
+
"step": 17483
|
| 122392 |
+
},
|
| 122393 |
+
{
|
| 122394 |
+
"epoch": 19.913960113960115,
|
| 122395 |
+
"grad_norm": 0.20504003763198853,
|
| 122396 |
+
"learning_rate": 2.574086768580841e-09,
|
| 122397 |
+
"loss": 0.6468,
|
| 122398 |
+
"step": 17484
|
| 122399 |
+
},
|
| 122400 |
+
{
|
| 122401 |
+
"epoch": 19.915099715099714,
|
| 122402 |
+
"grad_norm": 0.17852190136909485,
|
| 122403 |
+
"learning_rate": 2.507662634423924e-09,
|
| 122404 |
+
"loss": 0.6648,
|
| 122405 |
+
"step": 17485
|
| 122406 |
+
},
|
| 122407 |
+
{
|
| 122408 |
+
"epoch": 19.916239316239317,
|
| 122409 |
+
"grad_norm": 0.23240981996059418,
|
| 122410 |
+
"learning_rate": 2.442106731920246e-09,
|
| 122411 |
+
"loss": 0.6454,
|
| 122412 |
+
"step": 17486
|
| 122413 |
+
},
|
| 122414 |
+
{
|
| 122415 |
+
"epoch": 19.91737891737892,
|
| 122416 |
+
"grad_norm": 0.17049475014209747,
|
| 122417 |
+
"learning_rate": 2.3774190633429894e-09,
|
| 122418 |
+
"loss": 0.6768,
|
| 122419 |
+
"step": 17487
|
| 122420 |
+
},
|
| 122421 |
+
{
|
| 122422 |
+
"epoch": 19.918518518518518,
|
| 122423 |
+
"grad_norm": 0.2072330266237259,
|
| 122424 |
+
"learning_rate": 2.313599630943131e-09,
|
| 122425 |
+
"loss": 0.7361,
|
| 122426 |
+
"step": 17488
|
| 122427 |
+
},
|
| 122428 |
+
{
|
| 122429 |
+
"epoch": 19.91965811965812,
|
| 122430 |
+
"grad_norm": 0.1602887511253357,
|
| 122431 |
+
"learning_rate": 2.250648436935565e-09,
|
| 122432 |
+
"loss": 0.566,
|
| 122433 |
+
"step": 17489
|
| 122434 |
+
},
|
| 122435 |
+
{
|
| 122436 |
+
"epoch": 19.920797720797722,
|
| 122437 |
+
"grad_norm": 0.1839447319507599,
|
| 122438 |
+
"learning_rate": 2.188565483507432e-09,
|
| 122439 |
+
"loss": 0.487,
|
| 122440 |
+
"step": 17490
|
| 122441 |
+
},
|
| 122442 |
+
{
|
| 122443 |
+
"epoch": 19.92193732193732,
|
| 122444 |
+
"grad_norm": 0.19115093350410461,
|
| 122445 |
+
"learning_rate": 2.12735077281534e-09,
|
| 122446 |
+
"loss": 0.6366,
|
| 122447 |
+
"step": 17491
|
| 122448 |
+
},
|
| 122449 |
+
{
|
| 122450 |
+
"epoch": 19.923076923076923,
|
| 122451 |
+
"grad_norm": 0.21729104220867157,
|
| 122452 |
+
"learning_rate": 2.0670043069825894e-09,
|
| 122453 |
+
"loss": 0.4544,
|
| 122454 |
+
"step": 17492
|
| 122455 |
+
},
|
| 122456 |
+
{
|
| 122457 |
+
"epoch": 19.924216524216526,
|
| 122458 |
+
"grad_norm": 0.20541562139987946,
|
| 122459 |
+
"learning_rate": 2.0075260881102787e-09,
|
| 122460 |
+
"loss": 0.6521,
|
| 122461 |
+
"step": 17493
|
| 122462 |
+
},
|
| 122463 |
+
{
|
| 122464 |
+
"epoch": 19.925356125356124,
|
| 122465 |
+
"grad_norm": 0.21780884265899658,
|
| 122466 |
+
"learning_rate": 1.9489161182578708e-09,
|
| 122467 |
+
"loss": 0.7322,
|
| 122468 |
+
"step": 17494
|
| 122469 |
+
},
|
| 122470 |
+
{
|
| 122471 |
+
"epoch": 19.926495726495727,
|
| 122472 |
+
"grad_norm": 0.20379436016082764,
|
| 122473 |
+
"learning_rate": 1.891174399468176e-09,
|
| 122474 |
+
"loss": 0.5706,
|
| 122475 |
+
"step": 17495
|
| 122476 |
+
},
|
| 122477 |
+
{
|
| 122478 |
+
"epoch": 19.92763532763533,
|
| 122479 |
+
"grad_norm": 0.14642983675003052,
|
| 122480 |
+
"learning_rate": 1.8343009337395967e-09,
|
| 122481 |
+
"loss": 0.4325,
|
| 122482 |
+
"step": 17496
|
| 122483 |
+
},
|
| 122484 |
+
{
|
| 122485 |
+
"epoch": 19.928774928774928,
|
| 122486 |
+
"grad_norm": 0.18292292952537537,
|
| 122487 |
+
"learning_rate": 1.77829572305388e-09,
|
| 122488 |
+
"loss": 0.5872,
|
| 122489 |
+
"step": 17497
|
| 122490 |
+
},
|
| 122491 |
+
{
|
| 122492 |
+
"epoch": 19.92991452991453,
|
| 122493 |
+
"grad_norm": 0.17598862946033478,
|
| 122494 |
+
"learning_rate": 1.7231587693511409e-09,
|
| 122495 |
+
"loss": 0.582,
|
| 122496 |
+
"step": 17498
|
| 122497 |
+
},
|
| 122498 |
+
{
|
| 122499 |
+
"epoch": 19.931054131054132,
|
| 122500 |
+
"grad_norm": 0.1596013307571411,
|
| 122501 |
+
"learning_rate": 1.6688900745492898e-09,
|
| 122502 |
+
"loss": 0.8117,
|
| 122503 |
+
"step": 17499
|
| 122504 |
+
},
|
| 122505 |
+
{
|
| 122506 |
+
"epoch": 19.93219373219373,
|
| 122507 |
+
"grad_norm": 0.2092096358537674,
|
| 122508 |
+
"learning_rate": 1.6154896405329302e-09,
|
| 122509 |
+
"loss": 0.7535,
|
| 122510 |
+
"step": 17500
|
| 122511 |
+
},
|
| 122512 |
+
{
|
| 122513 |
+
"epoch": 19.933333333333334,
|
| 122514 |
+
"grad_norm": 0.2274015098810196,
|
| 122515 |
+
"learning_rate": 1.5629574691561344e-09,
|
| 122516 |
+
"loss": 0.6412,
|
| 122517 |
+
"step": 17501
|
| 122518 |
+
},
|
| 122519 |
+
{
|
| 122520 |
+
"epoch": 19.934472934472936,
|
| 122521 |
+
"grad_norm": 0.19499576091766357,
|
| 122522 |
+
"learning_rate": 1.5112935622452196e-09,
|
| 122523 |
+
"loss": 0.6974,
|
| 122524 |
+
"step": 17502
|
| 122525 |
+
},
|
| 122526 |
+
{
|
| 122527 |
+
"epoch": 19.935612535612535,
|
| 122528 |
+
"grad_norm": 0.19396720826625824,
|
| 122529 |
+
"learning_rate": 1.4604979215904203e-09,
|
| 122530 |
+
"loss": 0.7163,
|
| 122531 |
+
"step": 17503
|
| 122532 |
+
},
|
| 122533 |
+
{
|
| 122534 |
+
"epoch": 19.936752136752137,
|
| 122535 |
+
"grad_norm": 0.16685441136360168,
|
| 122536 |
+
"learning_rate": 1.4105705489597664e-09,
|
| 122537 |
+
"loss": 0.7161,
|
| 122538 |
+
"step": 17504
|
| 122539 |
+
},
|
| 122540 |
+
{
|
| 122541 |
+
"epoch": 19.93789173789174,
|
| 122542 |
+
"grad_norm": 0.1716783493757248,
|
| 122543 |
+
"learning_rate": 1.3615114460879819e-09,
|
| 122544 |
+
"loss": 0.533,
|
| 122545 |
+
"step": 17505
|
| 122546 |
+
},
|
| 122547 |
+
{
|
| 122548 |
+
"epoch": 19.939031339031338,
|
| 122549 |
+
"grad_norm": 0.21904678642749786,
|
| 122550 |
+
"learning_rate": 1.3133206146764832e-09,
|
| 122551 |
+
"loss": 0.5761,
|
| 122552 |
+
"step": 17506
|
| 122553 |
+
},
|
| 122554 |
+
{
|
| 122555 |
+
"epoch": 19.94017094017094,
|
| 122556 |
+
"grad_norm": 0.1928315907716751,
|
| 122557 |
+
"learning_rate": 1.2659980563989315e-09,
|
| 122558 |
+
"loss": 0.6561,
|
| 122559 |
+
"step": 17507
|
| 122560 |
+
},
|
| 122561 |
+
{
|
| 122562 |
+
"epoch": 19.941310541310543,
|
| 122563 |
+
"grad_norm": 0.21238145232200623,
|
| 122564 |
+
"learning_rate": 1.2195437729012328e-09,
|
| 122565 |
+
"loss": 0.6436,
|
| 122566 |
+
"step": 17508
|
| 122567 |
+
},
|
| 122568 |
+
{
|
| 122569 |
+
"epoch": 19.94245014245014,
|
| 122570 |
+
"grad_norm": 0.16020573675632477,
|
| 122571 |
+
"learning_rate": 1.1739577657959856e-09,
|
| 122572 |
+
"loss": 0.679,
|
| 122573 |
+
"step": 17509
|
| 122574 |
+
},
|
| 122575 |
+
{
|
| 122576 |
+
"epoch": 19.943589743589744,
|
| 122577 |
+
"grad_norm": 0.17376503348350525,
|
| 122578 |
+
"learning_rate": 1.1292400366652579e-09,
|
| 122579 |
+
"loss": 0.4443,
|
| 122580 |
+
"step": 17510
|
| 122581 |
+
},
|
| 122582 |
+
{
|
| 122583 |
+
"epoch": 19.944729344729346,
|
| 122584 |
+
"grad_norm": 0.19276556372642517,
|
| 122585 |
+
"learning_rate": 1.0853905870633618e-09,
|
| 122586 |
+
"loss": 0.6591,
|
| 122587 |
+
"step": 17511
|
| 122588 |
+
},
|
| 122589 |
+
{
|
| 122590 |
+
"epoch": 19.945868945868945,
|
| 122591 |
+
"grad_norm": 0.2291955053806305,
|
| 122592 |
+
"learning_rate": 1.0424094185140786e-09,
|
| 122593 |
+
"loss": 0.4883,
|
| 122594 |
+
"step": 17512
|
| 122595 |
+
},
|
| 122596 |
+
{
|
| 122597 |
+
"epoch": 19.947008547008547,
|
| 122598 |
+
"grad_norm": 0.18416431546211243,
|
| 122599 |
+
"learning_rate": 1.0002965325078828e-09,
|
| 122600 |
+
"loss": 0.8636,
|
| 122601 |
+
"step": 17513
|
| 122602 |
+
},
|
| 122603 |
+
{
|
| 122604 |
+
"epoch": 19.94814814814815,
|
| 122605 |
+
"grad_norm": 0.1676609218120575,
|
| 122606 |
+
"learning_rate": 9.590519305102685e-10,
|
| 122607 |
+
"loss": 0.789,
|
| 122608 |
+
"step": 17514
|
| 122609 |
+
},
|
| 122610 |
+
{
|
| 122611 |
+
"epoch": 19.94928774928775,
|
| 122612 |
+
"grad_norm": 0.15865011513233185,
|
| 122613 |
+
"learning_rate": 9.186756139534236e-10,
|
| 122614 |
+
"loss": 0.7431,
|
| 122615 |
+
"step": 17515
|
| 122616 |
+
},
|
| 122617 |
+
{
|
| 122618 |
+
"epoch": 19.95042735042735,
|
| 122619 |
+
"grad_norm": 0.16312918066978455,
|
| 122620 |
+
"learning_rate": 8.791675842362291e-10,
|
| 122621 |
+
"loss": 0.4105,
|
| 122622 |
+
"step": 17516
|
| 122623 |
+
},
|
| 122624 |
+
{
|
| 122625 |
+
"epoch": 19.951566951566953,
|
| 122626 |
+
"grad_norm": 0.1837480366230011,
|
| 122627 |
+
"learning_rate": 8.405278427325858e-10,
|
| 122628 |
+
"loss": 0.7236,
|
| 122629 |
+
"step": 17517
|
| 122630 |
+
},
|
| 122631 |
+
{
|
| 122632 |
+
"epoch": 19.95270655270655,
|
| 122633 |
+
"grad_norm": 0.19747784733772278,
|
| 122634 |
+
"learning_rate": 8.027563907886392e-10,
|
| 122635 |
+
"loss": 0.6793,
|
| 122636 |
+
"step": 17518
|
| 122637 |
+
},
|
| 122638 |
+
{
|
| 122639 |
+
"epoch": 19.953846153846154,
|
| 122640 |
+
"grad_norm": 0.1995292603969574,
|
| 122641 |
+
"learning_rate": 7.658532297116772e-10,
|
| 122642 |
+
"loss": 0.6804,
|
| 122643 |
+
"step": 17519
|
| 122644 |
+
},
|
| 122645 |
+
{
|
| 122646 |
+
"epoch": 19.954985754985756,
|
| 122647 |
+
"grad_norm": 0.20492805540561676,
|
| 122648 |
+
"learning_rate": 7.29818360784007e-10,
|
| 122649 |
+
"loss": 0.6711,
|
| 122650 |
+
"step": 17520
|
| 122651 |
+
},
|
| 122652 |
+
{
|
| 122653 |
+
"epoch": 19.956125356125355,
|
| 122654 |
+
"grad_norm": 0.20226095616817474,
|
| 122655 |
+
"learning_rate": 6.946517852574052e-10,
|
| 122656 |
+
"loss": 0.638,
|
| 122657 |
+
"step": 17521
|
| 122658 |
+
},
|
| 122659 |
+
{
|
| 122660 |
+
"epoch": 19.957264957264957,
|
| 122661 |
+
"grad_norm": 0.1664465069770813,
|
| 122662 |
+
"learning_rate": 6.603535043558929e-10,
|
| 122663 |
+
"loss": 0.7497,
|
| 122664 |
+
"step": 17522
|
| 122665 |
+
},
|
| 122666 |
+
{
|
| 122667 |
+
"epoch": 19.95840455840456,
|
| 122668 |
+
"grad_norm": 0.20206011831760406,
|
| 122669 |
+
"learning_rate": 6.269235192674083e-10,
|
| 122670 |
+
"loss": 0.6539,
|
| 122671 |
+
"step": 17523
|
| 122672 |
+
},
|
| 122673 |
+
{
|
| 122674 |
+
"epoch": 19.95954415954416,
|
| 122675 |
+
"grad_norm": 0.20060062408447266,
|
| 122676 |
+
"learning_rate": 5.943618311549104e-10,
|
| 122677 |
+
"loss": 0.5205,
|
| 122678 |
+
"step": 17524
|
| 122679 |
+
},
|
| 122680 |
+
{
|
| 122681 |
+
"epoch": 19.96068376068376,
|
| 122682 |
+
"grad_norm": 0.22517715394496918,
|
| 122683 |
+
"learning_rate": 5.626684411508265e-10,
|
| 122684 |
+
"loss": 0.6764,
|
| 122685 |
+
"step": 17525
|
| 122686 |
+
},
|
| 122687 |
+
{
|
| 122688 |
+
"epoch": 19.961823361823363,
|
| 122689 |
+
"grad_norm": 0.16309784352779388,
|
| 122690 |
+
"learning_rate": 5.31843350351502e-10,
|
| 122691 |
+
"loss": 0.7961,
|
| 122692 |
+
"step": 17526
|
| 122693 |
+
},
|
| 122694 |
+
{
|
| 122695 |
+
"epoch": 19.962962962962962,
|
| 122696 |
+
"grad_norm": 0.20457012951374054,
|
| 122697 |
+
"learning_rate": 5.018865598310773e-10,
|
| 122698 |
+
"loss": 0.5823,
|
| 122699 |
+
"step": 17527
|
| 122700 |
+
},
|
| 122701 |
+
{
|
| 122702 |
+
"epoch": 19.964102564102564,
|
| 122703 |
+
"grad_norm": 0.24622264504432678,
|
| 122704 |
+
"learning_rate": 4.727980706276113e-10,
|
| 122705 |
+
"loss": 0.5505,
|
| 122706 |
+
"step": 17528
|
| 122707 |
+
},
|
| 122708 |
+
{
|
| 122709 |
+
"epoch": 19.965242165242167,
|
| 122710 |
+
"grad_norm": 0.17372025549411774,
|
| 122711 |
+
"learning_rate": 4.445778837541825e-10,
|
| 122712 |
+
"loss": 0.6343,
|
| 122713 |
+
"step": 17529
|
| 122714 |
+
},
|
| 122715 |
+
{
|
| 122716 |
+
"epoch": 19.966381766381765,
|
| 122717 |
+
"grad_norm": 0.1885814517736435,
|
| 122718 |
+
"learning_rate": 4.17226000187787e-10,
|
| 122719 |
+
"loss": 0.8166,
|
| 122720 |
+
"step": 17530
|
| 122721 |
+
},
|
| 122722 |
+
{
|
| 122723 |
+
"epoch": 19.967521367521368,
|
| 122724 |
+
"grad_norm": 0.22691234946250916,
|
| 122725 |
+
"learning_rate": 3.907424208832167e-10,
|
| 122726 |
+
"loss": 0.4473,
|
| 122727 |
+
"step": 17531
|
| 122728 |
+
},
|
| 122729 |
+
{
|
| 122730 |
+
"epoch": 19.96866096866097,
|
| 122731 |
+
"grad_norm": 0.1989099383354187,
|
| 122732 |
+
"learning_rate": 3.651271467536299e-10,
|
| 122733 |
+
"loss": 0.4989,
|
| 122734 |
+
"step": 17532
|
| 122735 |
+
},
|
| 122736 |
+
{
|
| 122737 |
+
"epoch": 19.96980056980057,
|
| 122738 |
+
"grad_norm": 0.2686764597892761,
|
| 122739 |
+
"learning_rate": 3.403801786955318e-10,
|
| 122740 |
+
"loss": 0.5714,
|
| 122741 |
+
"step": 17533
|
| 122742 |
+
},
|
| 122743 |
+
{
|
| 122744 |
+
"epoch": 19.97094017094017,
|
| 122745 |
+
"grad_norm": 0.22707980871200562,
|
| 122746 |
+
"learning_rate": 3.165015175637942e-10,
|
| 122747 |
+
"loss": 0.5101,
|
| 122748 |
+
"step": 17534
|
| 122749 |
+
},
|
| 122750 |
+
{
|
| 122751 |
+
"epoch": 19.972079772079773,
|
| 122752 |
+
"grad_norm": 0.2014019638299942,
|
| 122753 |
+
"learning_rate": 2.9349116419108427e-10,
|
| 122754 |
+
"loss": 0.4804,
|
| 122755 |
+
"step": 17535
|
| 122756 |
+
},
|
| 122757 |
+
{
|
| 122758 |
+
"epoch": 19.973219373219372,
|
| 122759 |
+
"grad_norm": 0.2190103381872177,
|
| 122760 |
+
"learning_rate": 2.71349119373987e-10,
|
| 122761 |
+
"loss": 0.4915,
|
| 122762 |
+
"step": 17536
|
| 122763 |
+
},
|
| 122764 |
+
{
|
| 122765 |
+
"epoch": 19.974358974358974,
|
| 122766 |
+
"grad_norm": 0.17524485290050507,
|
| 122767 |
+
"learning_rate": 2.500753838813319e-10,
|
| 122768 |
+
"loss": 0.6858,
|
| 122769 |
+
"step": 17537
|
| 122770 |
+
},
|
| 122771 |
+
{
|
| 122772 |
+
"epoch": 19.975498575498577,
|
| 122773 |
+
"grad_norm": 0.2067994326353073,
|
| 122774 |
+
"learning_rate": 2.296699584541928e-10,
|
| 122775 |
+
"loss": 0.8085,
|
| 122776 |
+
"step": 17538
|
| 122777 |
+
},
|
| 122778 |
+
{
|
| 122779 |
+
"epoch": 19.976638176638176,
|
| 122780 |
+
"grad_norm": 0.2091679573059082,
|
| 122781 |
+
"learning_rate": 2.1013284380033694e-10,
|
| 122782 |
+
"loss": 0.5121,
|
| 122783 |
+
"step": 17539
|
| 122784 |
+
},
|
| 122785 |
+
{
|
| 122786 |
+
"epoch": 19.977777777777778,
|
| 122787 |
+
"grad_norm": 0.23023058474063873,
|
| 122788 |
+
"learning_rate": 1.914640405970003e-10,
|
| 122789 |
+
"loss": 0.6296,
|
| 122790 |
+
"step": 17540
|
| 122791 |
+
},
|
| 122792 |
+
{
|
| 122793 |
+
"epoch": 19.97891737891738,
|
| 122794 |
+
"grad_norm": 0.17481495440006256,
|
| 122795 |
+
"learning_rate": 1.7366354949643893e-10,
|
| 122796 |
+
"loss": 0.6735,
|
| 122797 |
+
"step": 17541
|
| 122798 |
+
},
|
| 122799 |
+
{
|
| 122800 |
+
"epoch": 19.98005698005698,
|
| 122801 |
+
"grad_norm": 0.1665341705083847,
|
| 122802 |
+
"learning_rate": 1.5673137111205106e-10,
|
| 122803 |
+
"loss": 0.7221,
|
| 122804 |
+
"step": 17542
|
| 122805 |
+
},
|
| 122806 |
+
{
|
| 122807 |
+
"epoch": 19.98119658119658,
|
| 122808 |
+
"grad_norm": 0.17197179794311523,
|
| 122809 |
+
"learning_rate": 1.4066750603503044e-10,
|
| 122810 |
+
"loss": 0.6417,
|
| 122811 |
+
"step": 17543
|
| 122812 |
+
},
|
| 122813 |
+
{
|
| 122814 |
+
"epoch": 19.982336182336184,
|
| 122815 |
+
"grad_norm": 0.18099331855773926,
|
| 122816 |
+
"learning_rate": 1.2547195482048858e-10,
|
| 122817 |
+
"loss": 0.7751,
|
| 122818 |
+
"step": 17544
|
| 122819 |
+
},
|
| 122820 |
+
{
|
| 122821 |
+
"epoch": 19.983475783475782,
|
| 122822 |
+
"grad_norm": 0.18419486284255981,
|
| 122823 |
+
"learning_rate": 1.1114471800133252e-10,
|
| 122824 |
+
"loss": 0.62,
|
| 122825 |
+
"step": 17545
|
| 122826 |
+
},
|
| 122827 |
+
{
|
| 122828 |
+
"epoch": 19.984615384615385,
|
| 122829 |
+
"grad_norm": 0.20028352737426758,
|
| 122830 |
+
"learning_rate": 9.768579607161155e-11,
|
| 122831 |
+
"loss": 0.5974,
|
| 122832 |
+
"step": 17546
|
| 122833 |
+
},
|
| 122834 |
+
{
|
| 122835 |
+
"epoch": 19.985754985754987,
|
| 122836 |
+
"grad_norm": 0.20690953731536865,
|
| 122837 |
+
"learning_rate": 8.50951894976193e-11,
|
| 122838 |
+
"loss": 0.6736,
|
| 122839 |
+
"step": 17547
|
| 122840 |
+
},
|
| 122841 |
+
{
|
| 122842 |
+
"epoch": 19.986894586894586,
|
| 122843 |
+
"grad_norm": 0.20689022541046143,
|
| 122844 |
+
"learning_rate": 7.337289871789388e-11,
|
| 122845 |
+
"loss": 0.6134,
|
| 122846 |
+
"step": 17548
|
| 122847 |
+
},
|
| 122848 |
+
{
|
| 122849 |
+
"epoch": 19.988034188034188,
|
| 122850 |
+
"grad_norm": 0.20695361495018005,
|
| 122851 |
+
"learning_rate": 6.25189241432178e-11,
|
| 122852 |
+
"loss": 0.5089,
|
| 122853 |
+
"step": 17549
|
| 122854 |
+
},
|
| 122855 |
+
{
|
| 122856 |
+
"epoch": 19.98917378917379,
|
| 122857 |
+
"grad_norm": 0.17822995781898499,
|
| 122858 |
+
"learning_rate": 5.2533266145515794e-11,
|
| 122859 |
+
"loss": 0.7472,
|
| 122860 |
+
"step": 17550
|
| 122861 |
+
},
|
| 122862 |
+
{
|
| 122863 |
+
"epoch": 19.99031339031339,
|
| 122864 |
+
"grad_norm": 0.22333787381649017,
|
| 122865 |
+
"learning_rate": 4.341592507173253e-11,
|
| 122866 |
+
"loss": 0.7331,
|
| 122867 |
+
"step": 17551
|
| 122868 |
+
},
|
| 122869 |
+
{
|
| 122870 |
+
"epoch": 19.99145299145299,
|
| 122871 |
+
"grad_norm": 0.18312227725982666,
|
| 122872 |
+
"learning_rate": 3.516690124383271e-11,
|
| 122873 |
+
"loss": 0.7284,
|
| 122874 |
+
"step": 17552
|
| 122875 |
+
},
|
| 122876 |
+
{
|
| 122877 |
+
"epoch": 19.992592592592594,
|
| 122878 |
+
"grad_norm": 0.18905913829803467,
|
| 122879 |
+
"learning_rate": 2.7786194942147624e-11,
|
| 122880 |
+
"loss": 0.4803,
|
| 122881 |
+
"step": 17553
|
| 122882 |
+
},
|
| 122883 |
+
{
|
| 122884 |
+
"epoch": 19.993732193732193,
|
| 122885 |
+
"grad_norm": 0.26452741026878357,
|
| 122886 |
+
"learning_rate": 2.1273806424804143e-11,
|
| 122887 |
+
"loss": 0.5171,
|
| 122888 |
+
"step": 17554
|
| 122889 |
+
},
|
| 122890 |
+
{
|
| 122891 |
+
"epoch": 19.994871794871795,
|
| 122892 |
+
"grad_norm": 0.19459344446659088,
|
| 122893 |
+
"learning_rate": 1.5629735922173537e-11,
|
| 122894 |
+
"loss": 0.6539,
|
| 122895 |
+
"step": 17555
|
| 122896 |
+
},
|
| 122897 |
+
{
|
| 122898 |
+
"epoch": 19.996011396011397,
|
| 122899 |
+
"grad_norm": 0.20268167555332184,
|
| 122900 |
+
"learning_rate": 1.0853983622993725e-11,
|
| 122901 |
+
"loss": 0.7208,
|
| 122902 |
+
"step": 17556
|
| 122903 |
+
},
|
| 122904 |
+
{
|
| 122905 |
+
"epoch": 19.997150997150996,
|
| 122906 |
+
"grad_norm": 0.17209883034229279,
|
| 122907 |
+
"learning_rate": 6.946549699349269e-12,
|
| 122908 |
+
"loss": 0.499,
|
| 122909 |
+
"step": 17557
|
| 122910 |
+
},
|
| 122911 |
+
{
|
| 122912 |
+
"epoch": 19.9982905982906,
|
| 122913 |
+
"grad_norm": 0.21036337316036224,
|
| 122914 |
+
"learning_rate": 3.907434284466937e-12,
|
| 122915 |
+
"loss": 0.4244,
|
| 122916 |
+
"step": 17558
|
| 122917 |
+
},
|
| 122918 |
+
{
|
| 122919 |
+
"epoch": 19.9994301994302,
|
| 122920 |
+
"grad_norm": 0.1915605366230011,
|
| 122921 |
+
"learning_rate": 1.7366374865934732e-12,
|
| 122922 |
+
"loss": 0.7527,
|
| 122923 |
+
"step": 17559
|
| 122924 |
+
},
|
| 122925 |
+
{
|
| 122926 |
+
"epoch": 20.0,
|
| 122927 |
+
"grad_norm": 0.3738774061203003,
|
| 122928 |
+
"learning_rate": 4.3415937511781526e-13,
|
| 122929 |
+
"loss": 0.706,
|
| 122930 |
+
"step": 17560
|
| 122931 |
}
|
| 122932 |
],
|
| 122933 |
"logging_steps": 1,
|
|
|
|
| 122942 |
"should_evaluate": false,
|
| 122943 |
"should_log": false,
|
| 122944 |
"should_save": true,
|
| 122945 |
+
"should_training_stop": true
|
| 122946 |
},
|
| 122947 |
"attributes": {}
|
| 122948 |
}
|
| 122949 |
},
|
| 122950 |
+
"total_flos": 9.817931115291156e+19,
|
| 122951 |
"train_batch_size": 8,
|
| 122952 |
"trial_name": null,
|
| 122953 |
"trial_params": null
|